LLVM 19.0.0git
AMDGPUBaseInfo.cpp
Go to the documentation of this file.
1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDKernelCodeT.h"
15#include "llvm/IR/Attributes.h"
16#include "llvm/IR/Constants.h"
17#include "llvm/IR/Function.h"
18#include "llvm/IR/GlobalValue.h"
19#include "llvm/IR/IntrinsicsAMDGPU.h"
20#include "llvm/IR/IntrinsicsR600.h"
21#include "llvm/IR/LLVMContext.h"
22#include "llvm/MC/MCInstrInfo.h"
28#include <optional>
29
30#define GET_INSTRINFO_NAMED_OPS
31#define GET_INSTRMAP_INFO
32#include "AMDGPUGenInstrInfo.inc"
33
35 "amdhsa-code-object-version", llvm::cl::Hidden,
37 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
38 "or asm directive still take priority if present)"));
39
40namespace {
41
42/// \returns Bit mask for given bit \p Shift and bit \p Width.
43unsigned getBitMask(unsigned Shift, unsigned Width) {
44 return ((1 << Width) - 1) << Shift;
45}
46
47/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
48///
49/// \returns Packed \p Dst.
50unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
51 unsigned Mask = getBitMask(Shift, Width);
52 return ((Src << Shift) & Mask) | (Dst & ~Mask);
53}
54
55/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
56///
57/// \returns Unpacked bits.
58unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
59 return (Src & getBitMask(Shift, Width)) >> Shift;
60}
61
62/// \returns Vmcnt bit shift (lower bits).
63unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
64 return VersionMajor >= 11 ? 10 : 0;
65}
66
67/// \returns Vmcnt bit width (lower bits).
68unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
69 return VersionMajor >= 11 ? 6 : 4;
70}
71
72/// \returns Expcnt bit shift.
73unsigned getExpcntBitShift(unsigned VersionMajor) {
74 return VersionMajor >= 11 ? 0 : 4;
75}
76
77/// \returns Expcnt bit width.
78unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
79
80/// \returns Lgkmcnt bit shift.
81unsigned getLgkmcntBitShift(unsigned VersionMajor) {
82 return VersionMajor >= 11 ? 4 : 8;
83}
84
85/// \returns Lgkmcnt bit width.
86unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
87 return VersionMajor >= 10 ? 6 : 4;
88}
89
90/// \returns Vmcnt bit shift (higher bits).
91unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
92
93/// \returns Vmcnt bit width (higher bits).
94unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
95 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
96}
97
98/// \returns Loadcnt bit width
99unsigned getLoadcntBitWidth(unsigned VersionMajor) {
100 return VersionMajor >= 12 ? 6 : 0;
101}
102
103/// \returns Samplecnt bit width.
104unsigned getSamplecntBitWidth(unsigned VersionMajor) {
105 return VersionMajor >= 12 ? 6 : 0;
106}
107
108/// \returns Bvhcnt bit width.
109unsigned getBvhcntBitWidth(unsigned VersionMajor) {
110 return VersionMajor >= 12 ? 3 : 0;
111}
112
113/// \returns Dscnt bit width.
114unsigned getDscntBitWidth(unsigned VersionMajor) {
115 return VersionMajor >= 12 ? 6 : 0;
116}
117
118/// \returns Dscnt bit shift in combined S_WAIT instructions.
119unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
120
121/// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
122unsigned getStorecntBitWidth(unsigned VersionMajor) {
123 return VersionMajor >= 10 ? 6 : 0;
124}
125
126/// \returns Kmcnt bit width.
127unsigned getKmcntBitWidth(unsigned VersionMajor) {
128 return VersionMajor >= 12 ? 5 : 0;
129}
130
131/// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
132unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
133 return VersionMajor >= 12 ? 8 : 0;
134}
135
136/// \returns VmVsrc bit width
137inline unsigned getVmVsrcBitWidth() { return 3; }
138
139/// \returns VmVsrc bit shift
140inline unsigned getVmVsrcBitShift() { return 2; }
141
142/// \returns VaVdst bit width
143inline unsigned getVaVdstBitWidth() { return 4; }
144
145/// \returns VaVdst bit shift
146inline unsigned getVaVdstBitShift() { return 12; }
147
148/// \returns SaSdst bit width
149inline unsigned getSaSdstBitWidth() { return 1; }
150
151/// \returns SaSdst bit shift
152inline unsigned getSaSdstBitShift() { return 0; }
153
154} // end namespace anonymous
155
156namespace llvm {
157
158namespace AMDGPU {
159
160/// \returns True if \p STI is AMDHSA.
161bool isHsaAbi(const MCSubtargetInfo &STI) {
162 return STI.getTargetTriple().getOS() == Triple::AMDHSA;
163}
164
166 if (auto Ver = mdconst::extract_or_null<ConstantInt>(
167 M.getModuleFlag("amdgpu_code_object_version"))) {
168 return (unsigned)Ver->getZExtValue() / 100;
169 }
170
172}
173
176}
177
178unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) {
179 switch (ABIVersion) {
181 return 4;
183 return 5;
184 default:
186 }
187}
188
189uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
190 if (T.getOS() != Triple::AMDHSA)
191 return 0;
192
193 switch (CodeObjectVersion) {
194 case 4:
196 case 5:
198 case 6:
200 default:
201 report_fatal_error("Unsupported AMDHSA Code Object Version " +
202 Twine(CodeObjectVersion));
203 }
204}
205
206unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
207 switch (CodeObjectVersion) {
208 case AMDHSA_COV4:
209 return 48;
210 case AMDHSA_COV5:
211 case AMDHSA_COV6:
212 default:
214 }
215}
216
217
218// FIXME: All such magic numbers about the ABI should be in a
219// central TD file.
220unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
221 switch (CodeObjectVersion) {
222 case AMDHSA_COV4:
223 return 24;
224 case AMDHSA_COV5:
225 case AMDHSA_COV6:
226 default:
228 }
229}
230
231unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
232 switch (CodeObjectVersion) {
233 case AMDHSA_COV4:
234 return 32;
235 case AMDHSA_COV5:
236 case AMDHSA_COV6:
237 default:
239 }
240}
241
242unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
243 switch (CodeObjectVersion) {
244 case AMDHSA_COV4:
245 return 40;
246 case AMDHSA_COV5:
247 case AMDHSA_COV6:
248 default:
250 }
251}
252
253#define GET_MIMGBaseOpcodesTable_IMPL
254#define GET_MIMGDimInfoTable_IMPL
255#define GET_MIMGInfoTable_IMPL
256#define GET_MIMGLZMappingTable_IMPL
257#define GET_MIMGMIPMappingTable_IMPL
258#define GET_MIMGBiasMappingTable_IMPL
259#define GET_MIMGOffsetMappingTable_IMPL
260#define GET_MIMGG16MappingTable_IMPL
261#define GET_MAIInstInfoTable_IMPL
262#include "AMDGPUGenSearchableTables.inc"
263
264int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
265 unsigned VDataDwords, unsigned VAddrDwords) {
266 const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
267 VDataDwords, VAddrDwords);
268 return Info ? Info->Opcode : -1;
269}
270
272 const MIMGInfo *Info = getMIMGInfo(Opc);
273 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
274}
275
276int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
277 const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
278 const MIMGInfo *NewInfo =
279 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
280 NewChannels, OrigInfo->VAddrDwords);
281 return NewInfo ? NewInfo->Opcode : -1;
282}
283
284unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
285 const MIMGDimInfo *Dim, bool IsA16,
286 bool IsG16Supported) {
287 unsigned AddrWords = BaseOpcode->NumExtraArgs;
288 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
289 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
290 if (IsA16)
291 AddrWords += divideCeil(AddrComponents, 2);
292 else
293 AddrWords += AddrComponents;
294
295 // Note: For subtargets that support A16 but not G16, enabling A16 also
296 // enables 16 bit gradients.
297 // For subtargets that support A16 (operand) and G16 (done with a different
298 // instruction encoding), they are independent.
299
300 if (BaseOpcode->Gradients) {
301 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
302 // There are two gradients per coordinate, we pack them separately.
303 // For the 3d case,
304 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
305 AddrWords += alignTo<2>(Dim->NumGradients / 2);
306 else
307 AddrWords += Dim->NumGradients;
308 }
309 return AddrWords;
310}
311
312struct MUBUFInfo {
315 uint8_t elements;
320};
321
322struct MTBUFInfo {
325 uint8_t elements;
329};
330
331struct SMInfo {
334};
335
336struct VOPInfo {
339};
340
343};
344
347};
348
351};
352
357};
358
359struct VOPDInfo {
364};
365
369};
370
371#define GET_MTBUFInfoTable_DECL
372#define GET_MTBUFInfoTable_IMPL
373#define GET_MUBUFInfoTable_DECL
374#define GET_MUBUFInfoTable_IMPL
375#define GET_SMInfoTable_DECL
376#define GET_SMInfoTable_IMPL
377#define GET_VOP1InfoTable_DECL
378#define GET_VOP1InfoTable_IMPL
379#define GET_VOP2InfoTable_DECL
380#define GET_VOP2InfoTable_IMPL
381#define GET_VOP3InfoTable_DECL
382#define GET_VOP3InfoTable_IMPL
383#define GET_VOPC64DPPTable_DECL
384#define GET_VOPC64DPPTable_IMPL
385#define GET_VOPC64DPP8Table_DECL
386#define GET_VOPC64DPP8Table_IMPL
387#define GET_VOPCAsmOnlyInfoTable_DECL
388#define GET_VOPCAsmOnlyInfoTable_IMPL
389#define GET_VOP3CAsmOnlyInfoTable_DECL
390#define GET_VOP3CAsmOnlyInfoTable_IMPL
391#define GET_VOPDComponentTable_DECL
392#define GET_VOPDComponentTable_IMPL
393#define GET_VOPDPairs_DECL
394#define GET_VOPDPairs_IMPL
395#define GET_VOPTrue16Table_DECL
396#define GET_VOPTrue16Table_IMPL
397#define GET_WMMAOpcode2AddrMappingTable_DECL
398#define GET_WMMAOpcode2AddrMappingTable_IMPL
399#define GET_WMMAOpcode3AddrMappingTable_DECL
400#define GET_WMMAOpcode3AddrMappingTable_IMPL
401#include "AMDGPUGenSearchableTables.inc"
402
403int getMTBUFBaseOpcode(unsigned Opc) {
404 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
405 return Info ? Info->BaseOpcode : -1;
406}
407
408int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
409 const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
410 return Info ? Info->Opcode : -1;
411}
412
413int getMTBUFElements(unsigned Opc) {
414 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
415 return Info ? Info->elements : 0;
416}
417
418bool getMTBUFHasVAddr(unsigned Opc) {
419 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
420 return Info ? Info->has_vaddr : false;
421}
422
423bool getMTBUFHasSrsrc(unsigned Opc) {
424 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
425 return Info ? Info->has_srsrc : false;
426}
427
428bool getMTBUFHasSoffset(unsigned Opc) {
429 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
430 return Info ? Info->has_soffset : false;
431}
432
433int getMUBUFBaseOpcode(unsigned Opc) {
434 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
435 return Info ? Info->BaseOpcode : -1;
436}
437
438int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
439 const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
440 return Info ? Info->Opcode : -1;
441}
442
443int getMUBUFElements(unsigned Opc) {
444 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
445 return Info ? Info->elements : 0;
446}
447
448bool getMUBUFHasVAddr(unsigned Opc) {
449 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
450 return Info ? Info->has_vaddr : false;
451}
452
453bool getMUBUFHasSrsrc(unsigned Opc) {
454 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
455 return Info ? Info->has_srsrc : false;
456}
457
458bool getMUBUFHasSoffset(unsigned Opc) {
459 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
460 return Info ? Info->has_soffset : false;
461}
462
463bool getMUBUFIsBufferInv(unsigned Opc) {
464 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
465 return Info ? Info->IsBufferInv : false;
466}
467
468bool getSMEMIsBuffer(unsigned Opc) {
469 const SMInfo *Info = getSMEMOpcodeHelper(Opc);
470 return Info ? Info->IsBuffer : false;
471}
472
473bool getVOP1IsSingle(unsigned Opc) {
474 const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
475 return Info ? Info->IsSingle : false;
476}
477
478bool getVOP2IsSingle(unsigned Opc) {
479 const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
480 return Info ? Info->IsSingle : false;
481}
482
483bool getVOP3IsSingle(unsigned Opc) {
484 const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
485 return Info ? Info->IsSingle : false;
486}
487
488bool isVOPC64DPP(unsigned Opc) {
489 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
490}
491
492bool isVOPCAsmOnly(unsigned Opc) {
493 return isVOPCAsmOnlyOpcodeHelper(Opc) || isVOP3CAsmOnlyOpcodeHelper(Opc);
494}
495
496bool getMAIIsDGEMM(unsigned Opc) {
497 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
498 return Info ? Info->is_dgemm : false;
499}
500
501bool getMAIIsGFX940XDL(unsigned Opc) {
502 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
503 return Info ? Info->is_gfx940_xdl : false;
504}
505
507 if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
509 if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
511 llvm_unreachable("Subtarget generation does not support VOPD!");
512}
513
514CanBeVOPD getCanBeVOPD(unsigned Opc) {
515 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
516 if (Info)
517 return {Info->CanBeVOPDX, true};
518 else
519 return {false, false};
520}
521
522unsigned getVOPDOpcode(unsigned Opc) {
523 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
524 return Info ? Info->VOPDOp : ~0u;
525}
526
527bool isVOPD(unsigned Opc) {
528 return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
529}
530
531bool isMAC(unsigned Opc) {
532 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
533 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
534 Opc == AMDGPU::V_MAC_F32_e64_vi ||
535 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
536 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
537 Opc == AMDGPU::V_MAC_F16_e64_vi ||
538 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
539 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
540 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
541 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
542 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
543 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
544 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
545 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
546 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
547 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
548 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
549 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
550 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
551 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
552}
553
554bool isPermlane16(unsigned Opc) {
555 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
556 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
557 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
558 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
559 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
560 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
561 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
562 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
563}
564
565bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) {
566 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
567 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
568 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
569 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
570 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
571 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
572 Opc == AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 ||
573 Opc == AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12;
574}
575
576bool isGenericAtomic(unsigned Opc) {
577 return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN ||
578 Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX ||
579 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
580 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
581 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
582 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
583 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
584 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
585 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
586 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
587 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
588 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
589 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
590 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
591 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
592 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
593 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
594 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
595 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
596}
597
598bool isTrue16Inst(unsigned Opc) {
599 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
600 return Info ? Info->IsTrue16 : false;
601}
602
603unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
604 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
605 return Info ? Info->Opcode3Addr : ~0u;
606}
607
608unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
609 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
610 return Info ? Info->Opcode2Addr : ~0u;
611}
612
613// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
614// header files, so we need to wrap it in a function that takes unsigned
615// instead.
616int getMCOpcode(uint16_t Opcode, unsigned Gen) {
617 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
618}
619
620int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) {
621 const VOPDInfo *Info =
622 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily);
623 return Info ? Info->Opcode : -1;
624}
625
626std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
627 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
628 assert(Info);
629 auto OpX = getVOPDBaseFromComponent(Info->OpX);
630 auto OpY = getVOPDBaseFromComponent(Info->OpY);
631 assert(OpX && OpY);
632 return {OpX->BaseVOP, OpY->BaseVOP};
633}
634
635namespace VOPD {
636
639
642 auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO);
643 assert(TiedIdx == -1 || TiedIdx == Component::DST);
644 HasSrc2Acc = TiedIdx != -1;
645
646 SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();
647 assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
648
649 auto OperandsNum = OpDesc.getNumOperands();
650 unsigned CompOprIdx;
651 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
652 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
653 MandatoryLiteralIdx = CompOprIdx;
654 break;
655 }
656 }
657}
658
659unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
660 assert(CompOprIdx < Component::MAX_OPR_NUM);
661
662 if (CompOprIdx == Component::DST)
664
665 auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
666 if (CompSrcIdx < getCompParsedSrcOperandsNum())
667 return getIndexOfSrcInParsedOperands(CompSrcIdx);
668
669 // The specified operand does not exist.
670 return 0;
671}
672
674 std::function<unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc) const {
675
676 auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx);
677 auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx);
678
679 const unsigned CompOprNum =
681 unsigned CompOprIdx;
682 for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) {
683 unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx];
684 if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&
685 ((OpXRegs[CompOprIdx] & BanksMasks) ==
686 (OpYRegs[CompOprIdx] & BanksMasks)))
687 return CompOprIdx;
688 }
689
690 return {};
691}
692
693// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
694// by the specified component. If an operand is unused
695// or is not a VGPR, the corresponding value is 0.
696//
697// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
698// for the specified component and MC operand. The callback must return 0
699// if the operand is not a register or not a VGPR.
700InstInfo::RegIndices InstInfo::getRegIndices(
701 unsigned CompIdx,
702 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const {
703 assert(CompIdx < COMPONENTS_NUM);
704
705 const auto &Comp = CompInfo[CompIdx];
707
708 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
709
710 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
711 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
712 RegIndices[CompOprIdx] =
713 Comp.hasRegSrcOperand(CompSrcIdx)
714 ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx))
715 : 0;
716 }
717 return RegIndices;
718}
719
720} // namespace VOPD
721
723 return VOPD::InstInfo(OpX, OpY);
724}
725
726VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
727 const MCInstrInfo *InstrInfo) {
728 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
729 const auto &OpXDesc = InstrInfo->get(OpX);
730 const auto &OpYDesc = InstrInfo->get(OpY);
732 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo);
733 return VOPD::InstInfo(OpXInfo, OpYInfo);
734}
735
736namespace IsaInfo {
737
739 : STI(STI), XnackSetting(TargetIDSetting::Any),
740 SramEccSetting(TargetIDSetting::Any) {
741 if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
742 XnackSetting = TargetIDSetting::Unsupported;
743 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
744 SramEccSetting = TargetIDSetting::Unsupported;
745}
746
748 // Check if xnack or sramecc is explicitly enabled or disabled. In the
749 // absence of the target features we assume we must generate code that can run
750 // in any environment.
751 SubtargetFeatures Features(FS);
752 std::optional<bool> XnackRequested;
753 std::optional<bool> SramEccRequested;
754
755 for (const std::string &Feature : Features.getFeatures()) {
756 if (Feature == "+xnack")
757 XnackRequested = true;
758 else if (Feature == "-xnack")
759 XnackRequested = false;
760 else if (Feature == "+sramecc")
761 SramEccRequested = true;
762 else if (Feature == "-sramecc")
763 SramEccRequested = false;
764 }
765
766 bool XnackSupported = isXnackSupported();
767 bool SramEccSupported = isSramEccSupported();
768
769 if (XnackRequested) {
770 if (XnackSupported) {
771 XnackSetting =
772 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
773 } else {
774 // If a specific xnack setting was requested and this GPU does not support
775 // xnack emit a warning. Setting will remain set to "Unsupported".
776 if (*XnackRequested) {
777 errs() << "warning: xnack 'On' was requested for a processor that does "
778 "not support it!\n";
779 } else {
780 errs() << "warning: xnack 'Off' was requested for a processor that "
781 "does not support it!\n";
782 }
783 }
784 }
785
786 if (SramEccRequested) {
787 if (SramEccSupported) {
788 SramEccSetting =
789 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
790 } else {
791 // If a specific sramecc setting was requested and this GPU does not
792 // support sramecc emit a warning. Setting will remain set to
793 // "Unsupported".
794 if (*SramEccRequested) {
795 errs() << "warning: sramecc 'On' was requested for a processor that "
796 "does not support it!\n";
797 } else {
798 errs() << "warning: sramecc 'Off' was requested for a processor that "
799 "does not support it!\n";
800 }
801 }
802 }
803}
804
805static TargetIDSetting
807 if (FeatureString.ends_with("-"))
809 if (FeatureString.ends_with("+"))
810 return TargetIDSetting::On;
811
812 llvm_unreachable("Malformed feature string");
813}
814
816 SmallVector<StringRef, 3> TargetIDSplit;
817 TargetID.split(TargetIDSplit, ':');
818
819 for (const auto &FeatureString : TargetIDSplit) {
820 if (FeatureString.starts_with("xnack"))
821 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
822 if (FeatureString.starts_with("sramecc"))
823 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
824 }
825}
826
827std::string AMDGPUTargetID::toString() const {
828 std::string StringRep;
829 raw_string_ostream StreamRep(StringRep);
830
831 auto TargetTriple = STI.getTargetTriple();
832 auto Version = getIsaVersion(STI.getCPU());
833
834 StreamRep << TargetTriple.getArchName() << '-'
835 << TargetTriple.getVendorName() << '-'
836 << TargetTriple.getOSName() << '-'
837 << TargetTriple.getEnvironmentName() << '-';
838
839 std::string Processor;
840 // TODO: Following else statement is present here because we used various
841 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
842 // Remove once all aliases are removed from GCNProcessors.td.
843 if (Version.Major >= 9)
844 Processor = STI.getCPU().str();
845 else
846 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
847 Twine(Version.Stepping))
848 .str();
849
850 std::string Features;
851 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
852 // sramecc.
854 Features += ":sramecc-";
856 Features += ":sramecc+";
857 // xnack.
859 Features += ":xnack-";
861 Features += ":xnack+";
862 }
863
864 StreamRep << Processor << Features;
865
866 StreamRep.flush();
867 return StringRep;
868}
869
870unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
871 if (STI->getFeatureBits().test(FeatureWavefrontSize16))
872 return 16;
873 if (STI->getFeatureBits().test(FeatureWavefrontSize32))
874 return 32;
875
876 return 64;
877}
878
880 unsigned BytesPerCU = 0;
881 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
882 BytesPerCU = 32768;
883 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
884 BytesPerCU = 65536;
885
886 // "Per CU" really means "per whatever functional block the waves of a
887 // workgroup must share". So the effective local memory size is doubled in
888 // WGP mode on gfx10.
889 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
890 BytesPerCU *= 2;
891
892 return BytesPerCU;
893}
894
896 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
897 return 32768;
898 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
899 return 65536;
900 return 0;
901}
902
903unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
904 // "Per CU" really means "per whatever functional block the waves of a
905 // workgroup must share". For gfx10 in CU mode this is the CU, which contains
906 // two SIMDs.
907 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
908 return 2;
909 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
910 // two CUs, so a total of four SIMDs.
911 return 4;
912}
913
915 unsigned FlatWorkGroupSize) {
916 assert(FlatWorkGroupSize != 0);
917 if (STI->getTargetTriple().getArch() != Triple::amdgcn)
918 return 8;
919 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
920 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
921 if (N == 1) {
922 // Single-wave workgroups don't consume barrier resources.
923 return MaxWaves;
924 }
925
926 unsigned MaxBarriers = 16;
927 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
928 MaxBarriers = 32;
929
930 return std::min(MaxWaves / N, MaxBarriers);
931}
932
933unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
934 return 1;
935}
936
937unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
938 // FIXME: Need to take scratch memory into account.
939 if (isGFX90A(*STI))
940 return 8;
941 if (!isGFX10Plus(*STI))
942 return 10;
943 return hasGFX10_3Insts(*STI) ? 16 : 20;
944}
945
947 unsigned FlatWorkGroupSize) {
948 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
949 getEUsPerCU(STI));
950}
951
953 return 1;
954}
955
957 // Some subtargets allow encoding 2048, but this isn't tested or supported.
958 return 1024;
959}
960
962 unsigned FlatWorkGroupSize) {
963 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
964}
965
967 IsaVersion Version = getIsaVersion(STI->getCPU());
968 if (Version.Major >= 10)
969 return getAddressableNumSGPRs(STI);
970 if (Version.Major >= 8)
971 return 16;
972 return 8;
973}
974
976 return 8;
977}
978
979unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
980 IsaVersion Version = getIsaVersion(STI->getCPU());
981 if (Version.Major >= 8)
982 return 800;
983 return 512;
984}
985
987 if (STI->getFeatureBits().test(FeatureSGPRInitBug))
989
990 IsaVersion Version = getIsaVersion(STI->getCPU());
991 if (Version.Major >= 10)
992 return 106;
993 if (Version.Major >= 8)
994 return 102;
995 return 104;
996}
997
998unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
999 assert(WavesPerEU != 0);
1000
1001 IsaVersion Version = getIsaVersion(STI->getCPU());
1002 if (Version.Major >= 10)
1003 return 0;
1004
1005 if (WavesPerEU >= getMaxWavesPerEU(STI))
1006 return 0;
1007
1008 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
1009 if (STI->getFeatureBits().test(FeatureTrapHandler))
1010 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1011 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
1012 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
1013}
1014
1015unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1016 bool Addressable) {
1017 assert(WavesPerEU != 0);
1018
1019 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
1020 IsaVersion Version = getIsaVersion(STI->getCPU());
1021 if (Version.Major >= 10)
1022 return Addressable ? AddressableNumSGPRs : 108;
1023 if (Version.Major >= 8 && !Addressable)
1024 AddressableNumSGPRs = 112;
1025 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
1026 if (STI->getFeatureBits().test(FeatureTrapHandler))
1027 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1028 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
1029 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
1030}
1031
1032unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1033 bool FlatScrUsed, bool XNACKUsed) {
1034 unsigned ExtraSGPRs = 0;
1035 if (VCCUsed)
1036 ExtraSGPRs = 2;
1037
1038 IsaVersion Version = getIsaVersion(STI->getCPU());
1039 if (Version.Major >= 10)
1040 return ExtraSGPRs;
1041
1042 if (Version.Major < 8) {
1043 if (FlatScrUsed)
1044 ExtraSGPRs = 4;
1045 } else {
1046 if (XNACKUsed)
1047 ExtraSGPRs = 4;
1048
1049 if (FlatScrUsed ||
1050 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
1051 ExtraSGPRs = 6;
1052 }
1053
1054 return ExtraSGPRs;
1055}
1056
1057unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1058 bool FlatScrUsed) {
1059 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1060 STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
1061}
1062
1063unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1064 NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
1065 // SGPRBlocks is actual number of SGPR blocks minus 1.
1066 return NumSGPRs / getSGPREncodingGranule(STI) - 1;
1067}
1068
1070 std::optional<bool> EnableWavefrontSize32) {
1071 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1072 return 8;
1073
1074 bool IsWave32 = EnableWavefrontSize32 ?
1075 *EnableWavefrontSize32 :
1076 STI->getFeatureBits().test(FeatureWavefrontSize32);
1077
1078 if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs))
1079 return IsWave32 ? 24 : 12;
1080
1081 if (hasGFX10_3Insts(*STI))
1082 return IsWave32 ? 16 : 8;
1083
1084 return IsWave32 ? 8 : 4;
1085}
1086
1088 std::optional<bool> EnableWavefrontSize32) {
1089 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1090 return 8;
1091
1092 bool IsWave32 = EnableWavefrontSize32 ?
1093 *EnableWavefrontSize32 :
1094 STI->getFeatureBits().test(FeatureWavefrontSize32);
1095
1096 return IsWave32 ? 8 : 4;
1097}
1098
1099unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1100 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1101 return 512;
1102 if (!isGFX10Plus(*STI))
1103 return 256;
1104 bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);
1105 if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs))
1106 return IsWave32 ? 1536 : 768;
1107 return IsWave32 ? 1024 : 512;
1108}
1109
1110unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; }
1111
1113 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1114 return 512;
1115 return getAddressableNumArchVGPRs(STI);
1116}
1117
1119 unsigned NumVGPRs) {
1120 unsigned MaxWaves = getMaxWavesPerEU(STI);
1121 unsigned Granule = getVGPRAllocGranule(STI);
1122 if (NumVGPRs < Granule)
1123 return MaxWaves;
1124 unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1125 return std::min(std::max(getTotalNumVGPRs(STI) / RoundedRegs, 1u), MaxWaves);
1126}
1127
1128unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1129 assert(WavesPerEU != 0);
1130
1131 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1132 if (WavesPerEU >= MaxWavesPerEU)
1133 return 0;
1134
1135 unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1136 unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI);
1137 unsigned Granule = getVGPRAllocGranule(STI);
1138 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1139
1140 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1141 return 0;
1142
1143 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs);
1144 if (WavesPerEU < MinWavesPerEU)
1145 return getMinNumVGPRs(STI, MinWavesPerEU);
1146
1147 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1148 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1149 return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1150}
1151
1152unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1153 assert(WavesPerEU != 0);
1154
1155 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
1156 getVGPRAllocGranule(STI));
1157 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
1158 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1159}
1160
1161unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1162 std::optional<bool> EnableWavefrontSize32) {
1163 NumVGPRs = alignTo(std::max(1u, NumVGPRs),
1164 getVGPREncodingGranule(STI, EnableWavefrontSize32));
1165 // VGPRBlocks is actual number of VGPR blocks minus 1.
1166 return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
1167}
1168
1169} // end namespace IsaInfo
1170
1172 const MCSubtargetInfo *STI) {
1173 IsaVersion Version = getIsaVersion(STI->getCPU());
1174
1175 memset(&Header, 0, sizeof(Header));
1176
1177 Header.amd_kernel_code_version_major = 1;
1178 Header.amd_kernel_code_version_minor = 2;
1179 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1180 Header.amd_machine_version_major = Version.Major;
1181 Header.amd_machine_version_minor = Version.Minor;
1182 Header.amd_machine_version_stepping = Version.Stepping;
1183 Header.kernel_code_entry_byte_offset = sizeof(Header);
1184 Header.wavefront_size = 6;
1185
1186 // If the code object does not support indirect functions, then the value must
1187 // be 0xffffffff.
1188 Header.call_convention = -1;
1189
1190 // These alignment values are specified in powers of two, so alignment =
1191 // 2^n. The minimum alignment is 2^4 = 16.
1192 Header.kernarg_segment_alignment = 4;
1193 Header.group_segment_alignment = 4;
1194 Header.private_segment_alignment = 4;
1195
1196 if (Version.Major >= 10) {
1197 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
1198 Header.wavefront_size = 5;
1199 Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1200 }
1201 Header.compute_pgm_resource_registers |=
1202 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1204 }
1205}
1206
1208 const MCSubtargetInfo *STI) {
1209 IsaVersion Version = getIsaVersion(STI->getCPU());
1210
1212 memset(&KD, 0, sizeof(KD));
1213
1215 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
1217 if (Version.Major >= 12) {
1219 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, 0);
1221 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_DISABLE_PERF, 0);
1222 } else {
1224 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, 1);
1226 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, 1);
1227 }
1229 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
1230 if (Version.Major >= 10) {
1232 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
1233 STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
1235 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
1236 STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
1238 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 1);
1239 }
1240 if (AMDGPU::isGFX90A(*STI)) {
1242 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1243 STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0);
1244 }
1245 return KD;
1246}
1247
1250}
1251
1254}
1255
1257 unsigned AS = GV->getAddressSpace();
1258 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1260}
1261
1263 return TT.getArch() == Triple::r600;
1264}
1265
1266std::pair<unsigned, unsigned>
1268 std::pair<unsigned, unsigned> Default,
1269 bool OnlyFirstRequired) {
1270 Attribute A = F.getFnAttribute(Name);
1271 if (!A.isStringAttribute())
1272 return Default;
1273
1274 LLVMContext &Ctx = F.getContext();
1275 std::pair<unsigned, unsigned> Ints = Default;
1276 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1277 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1278 Ctx.emitError("can't parse first integer attribute " + Name);
1279 return Default;
1280 }
1281 if (Strs.second.trim().getAsInteger(0, Ints.second)) {
1282 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1283 Ctx.emitError("can't parse second integer attribute " + Name);
1284 return Default;
1285 }
1286 }
1287
1288 return Ints;
1289}
1290
1291unsigned getVmcntBitMask(const IsaVersion &Version) {
1292 return (1 << (getVmcntBitWidthLo(Version.Major) +
1293 getVmcntBitWidthHi(Version.Major))) -
1294 1;
1295}
1296
1297unsigned getLoadcntBitMask(const IsaVersion &Version) {
1298 return (1 << getLoadcntBitWidth(Version.Major)) - 1;
1299}
1300
1301unsigned getSamplecntBitMask(const IsaVersion &Version) {
1302 return (1 << getSamplecntBitWidth(Version.Major)) - 1;
1303}
1304
1305unsigned getBvhcntBitMask(const IsaVersion &Version) {
1306 return (1 << getBvhcntBitWidth(Version.Major)) - 1;
1307}
1308
1309unsigned getExpcntBitMask(const IsaVersion &Version) {
1310 return (1 << getExpcntBitWidth(Version.Major)) - 1;
1311}
1312
1313unsigned getLgkmcntBitMask(const IsaVersion &Version) {
1314 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1315}
1316
1317unsigned getDscntBitMask(const IsaVersion &Version) {
1318 return (1 << getDscntBitWidth(Version.Major)) - 1;
1319}
1320
1321unsigned getKmcntBitMask(const IsaVersion &Version) {
1322 return (1 << getKmcntBitWidth(Version.Major)) - 1;
1323}
1324
1325unsigned getStorecntBitMask(const IsaVersion &Version) {
1326 return (1 << getStorecntBitWidth(Version.Major)) - 1;
1327}
1328
1329unsigned getWaitcntBitMask(const IsaVersion &Version) {
1330 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1331 getVmcntBitWidthLo(Version.Major));
1332 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1333 getExpcntBitWidth(Version.Major));
1334 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1335 getLgkmcntBitWidth(Version.Major));
1336 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1337 getVmcntBitWidthHi(Version.Major));
1338 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1339}
1340
1341unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1342 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1343 getVmcntBitWidthLo(Version.Major));
1344 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1345 getVmcntBitWidthHi(Version.Major));
1346 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1347}
1348
1349unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1350 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1351 getExpcntBitWidth(Version.Major));
1352}
1353
1354unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1355 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1356 getLgkmcntBitWidth(Version.Major));
1357}
1358
1359void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
1360 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
1361 Vmcnt = decodeVmcnt(Version, Waitcnt);
1362 Expcnt = decodeExpcnt(Version, Waitcnt);
1363 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1364}
1365
1366Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1367 Waitcnt Decoded;
1368 Decoded.LoadCnt = decodeVmcnt(Version, Encoded);
1369 Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
1370 Decoded.DsCnt = decodeLgkmcnt(Version, Encoded);
1371 return Decoded;
1372}
1373
1374unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1375 unsigned Vmcnt) {
1376 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1377 getVmcntBitWidthLo(Version.Major));
1378 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1379 getVmcntBitShiftHi(Version.Major),
1380 getVmcntBitWidthHi(Version.Major));
1381}
1382
1383unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1384 unsigned Expcnt) {
1385 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1386 getExpcntBitWidth(Version.Major));
1387}
1388
1389unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1390 unsigned Lgkmcnt) {
1391 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1392 getLgkmcntBitWidth(Version.Major));
1393}
1394
1395unsigned encodeWaitcnt(const IsaVersion &Version,
1396 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
1397 unsigned Waitcnt = getWaitcntBitMask(Version);
1398 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1399 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1400 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1401 return Waitcnt;
1402}
1403
1404unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1405 return encodeWaitcnt(Version, Decoded.LoadCnt, Decoded.ExpCnt, Decoded.DsCnt);
1406}
1407
1408static unsigned getCombinedCountBitMask(const IsaVersion &Version,
1409 bool IsStore) {
1410 unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),
1411 getDscntBitWidth(Version.Major));
1412 if (IsStore) {
1413 unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1414 getStorecntBitWidth(Version.Major));
1415 return Dscnt | Storecnt;
1416 } else {
1417 unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1418 getLoadcntBitWidth(Version.Major));
1419 return Dscnt | Loadcnt;
1420 }
1421}
1422
1423Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) {
1424 Waitcnt Decoded;
1425 Decoded.LoadCnt =
1426 unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major),
1427 getLoadcntBitWidth(Version.Major));
1428 Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major),
1429 getDscntBitWidth(Version.Major));
1430 return Decoded;
1431}
1432
1433Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) {
1434 Waitcnt Decoded;
1435 Decoded.StoreCnt =
1436 unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major),
1437 getStorecntBitWidth(Version.Major));
1438 Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major),
1439 getDscntBitWidth(Version.Major));
1440 return Decoded;
1441}
1442
1443static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1444 unsigned Loadcnt) {
1445 return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1446 getLoadcntBitWidth(Version.Major));
1447}
1448
1449static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1450 unsigned Storecnt) {
1451 return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1452 getStorecntBitWidth(Version.Major));
1453}
1454
1455static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1456 unsigned Dscnt) {
1457 return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major),
1458 getDscntBitWidth(Version.Major));
1459}
1460
1461static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1462 unsigned Dscnt) {
1463 unsigned Waitcnt = getCombinedCountBitMask(Version, false);
1464 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
1465 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1466 return Waitcnt;
1467}
1468
1469unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1470 return encodeLoadcntDscnt(Version, Decoded.LoadCnt, Decoded.DsCnt);
1471}
1472
1473static unsigned encodeStorecntDscnt(const IsaVersion &Version,
1474 unsigned Storecnt, unsigned Dscnt) {
1475 unsigned Waitcnt = getCombinedCountBitMask(Version, true);
1476 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
1477 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1478 return Waitcnt;
1479}
1480
1481unsigned encodeStorecntDscnt(const IsaVersion &Version,
1482 const Waitcnt &Decoded) {
1483 return encodeStorecntDscnt(Version, Decoded.StoreCnt, Decoded.DsCnt);
1484}
1485
1486//===----------------------------------------------------------------------===//
1487// Custom Operands.
1488//
1489// A table of custom operands shall describe "primary" operand names
1490// first followed by aliases if any. It is not required but recommended
1491// to arrange operands so that operand encoding match operand position
1492// in the table. This will make disassembly a bit more efficient.
1493// Unused slots in the table shall have an empty name.
1494//
1495//===----------------------------------------------------------------------===//
1496
1497template <class T>
1498static bool isValidOpr(int Idx, const CustomOperand<T> OpInfo[], int OpInfoSize,
1499 T Context) {
1500 return 0 <= Idx && Idx < OpInfoSize && !OpInfo[Idx].Name.empty() &&
1501 (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context));
1502}
1503
1504template <class T>
1505static int getOprIdx(std::function<bool(const CustomOperand<T> &)> Test,
1506 const CustomOperand<T> OpInfo[], int OpInfoSize,
1507 T Context) {
1508 int InvalidIdx = OPR_ID_UNKNOWN;
1509 for (int Idx = 0; Idx < OpInfoSize; ++Idx) {
1510 if (Test(OpInfo[Idx])) {
1511 if (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context))
1512 return Idx;
1513 InvalidIdx = OPR_ID_UNSUPPORTED;
1514 }
1515 }
1516 return InvalidIdx;
1517}
1518
1519template <class T>
1520static int getOprIdx(const StringRef Name, const CustomOperand<T> OpInfo[],
1521 int OpInfoSize, T Context) {
1522 auto Test = [=](const CustomOperand<T> &Op) { return Op.Name == Name; };
1523 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
1524}
1525
1526template <class T>
1527static int getOprIdx(int Id, const CustomOperand<T> OpInfo[], int OpInfoSize,
1528 T Context, bool QuickCheck = true) {
1529 auto Test = [=](const CustomOperand<T> &Op) {
1530 return Op.Encoding == Id && !Op.Name.empty();
1531 };
1532 // This is an optimization that should work in most cases.
1533 // As a side effect, it may cause selection of an alias
1534 // instead of a primary operand name in case of sparse tables.
1535 if (QuickCheck && isValidOpr<T>(Id, OpInfo, OpInfoSize, Context) &&
1536 OpInfo[Id].Encoding == Id) {
1537 return Id;
1538 }
1539 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
1540}
1541
1542//===----------------------------------------------------------------------===//
1543// Custom Operand Values
1544//===----------------------------------------------------------------------===//
1545
1547 int Size,
1548 const MCSubtargetInfo &STI) {
1549 unsigned Enc = 0;
1550 for (int Idx = 0; Idx < Size; ++Idx) {
1551 const auto &Op = Opr[Idx];
1552 if (Op.isSupported(STI))
1553 Enc |= Op.encode(Op.Default);
1554 }
1555 return Enc;
1556}
1557
1559 int Size, unsigned Code,
1560 bool &HasNonDefaultVal,
1561 const MCSubtargetInfo &STI) {
1562 unsigned UsedOprMask = 0;
1563 HasNonDefaultVal = false;
1564 for (int Idx = 0; Idx < Size; ++Idx) {
1565 const auto &Op = Opr[Idx];
1566 if (!Op.isSupported(STI))
1567 continue;
1568 UsedOprMask |= Op.getMask();
1569 unsigned Val = Op.decode(Code);
1570 if (!Op.isValid(Val))
1571 return false;
1572 HasNonDefaultVal |= (Val != Op.Default);
1573 }
1574 return (Code & ~UsedOprMask) == 0;
1575}
1576
1577static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
1578 unsigned Code, int &Idx, StringRef &Name,
1579 unsigned &Val, bool &IsDefault,
1580 const MCSubtargetInfo &STI) {
1581 while (Idx < Size) {
1582 const auto &Op = Opr[Idx++];
1583 if (Op.isSupported(STI)) {
1584 Name = Op.Name;
1585 Val = Op.decode(Code);
1586 IsDefault = (Val == Op.Default);
1587 return true;
1588 }
1589 }
1590
1591 return false;
1592}
1593
1595 int64_t InputVal) {
1596 if (InputVal < 0 || InputVal > Op.Max)
1597 return OPR_VAL_INVALID;
1598 return Op.encode(InputVal);
1599}
1600
1601static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
1602 const StringRef Name, int64_t InputVal,
1603 unsigned &UsedOprMask,
1604 const MCSubtargetInfo &STI) {
1605 int InvalidId = OPR_ID_UNKNOWN;
1606 for (int Idx = 0; Idx < Size; ++Idx) {
1607 const auto &Op = Opr[Idx];
1608 if (Op.Name == Name) {
1609 if (!Op.isSupported(STI)) {
1610 InvalidId = OPR_ID_UNSUPPORTED;
1611 continue;
1612 }
1613 auto OprMask = Op.getMask();
1614 if (OprMask & UsedOprMask)
1615 return OPR_ID_DUPLICATE;
1616 UsedOprMask |= OprMask;
1617 return encodeCustomOperandVal(Op, InputVal);
1618 }
1619 }
1620 return InvalidId;
1621}
1622
1623//===----------------------------------------------------------------------===//
1624// DepCtr
1625//===----------------------------------------------------------------------===//
1626
1627namespace DepCtr {
1628
1630 static int Default = -1;
1631 if (Default == -1)
1633 return Default;
1634}
1635
1636bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1637 const MCSubtargetInfo &STI) {
1639 HasNonDefaultVal, STI);
1640}
1641
1642bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1643 bool &IsDefault, const MCSubtargetInfo &STI) {
1644 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
1645 IsDefault, STI);
1646}
1647
1648int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1649 const MCSubtargetInfo &STI) {
1650 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
1651 STI);
1652}
1653
1654unsigned decodeFieldVmVsrc(unsigned Encoded) {
1655 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1656}
1657
1658unsigned decodeFieldVaVdst(unsigned Encoded) {
1659 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1660}
1661
1662unsigned decodeFieldSaSdst(unsigned Encoded) {
1663 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1664}
1665
1666unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
1667 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1668}
1669
1670unsigned encodeFieldVmVsrc(unsigned VmVsrc) {
1671 return encodeFieldVmVsrc(0xffff, VmVsrc);
1672}
1673
1674unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
1675 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1676}
1677
1678unsigned encodeFieldVaVdst(unsigned VaVdst) {
1679 return encodeFieldVaVdst(0xffff, VaVdst);
1680}
1681
1682unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
1683 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1684}
1685
1686unsigned encodeFieldSaSdst(unsigned SaSdst) {
1687 return encodeFieldSaSdst(0xffff, SaSdst);
1688}
1689
1690} // namespace DepCtr
1691
1692//===----------------------------------------------------------------------===//
1693// hwreg
1694//===----------------------------------------------------------------------===//
1695
1696namespace Hwreg {
1697
1698int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI) {
1699 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Opr, OPR_SIZE, STI);
1700 return (Idx < 0) ? Idx : Opr[Idx].Encoding;
1701}
1702
1703bool isValidHwreg(int64_t Id) { return 0 <= Id && isUInt<HwregId::Width>(Id); }
1704
1706 return 0 <= Offset && isUInt<HwregOffset::Width>(Offset);
1707}
1708
1709bool isValidHwregWidth(int64_t Width) {
1710 return 0 <= (Width - 1) && isUInt<HwregSize::Width>(Width - 1);
1711}
1712
1713StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
1714 int Idx = getOprIdx<const MCSubtargetInfo &>(Id, Opr, OPR_SIZE, STI);
1715 return (Idx < 0) ? "" : Opr[Idx].Name;
1716}
1717
1718} // namespace Hwreg
1719
1720//===----------------------------------------------------------------------===//
1721// exp tgt
1722//===----------------------------------------------------------------------===//
1723
1724namespace Exp {
1725
1726struct ExpTgt {
1728 unsigned Tgt;
1729 unsigned MaxIndex;
1730};
1731
1732static constexpr ExpTgt ExpTgtInfo[] = {
1733 {{"null"}, ET_NULL, ET_NULL_MAX_IDX},
1734 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX},
1735 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX},
1736 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX},
1737 {{"pos"}, ET_POS0, ET_POS_MAX_IDX},
1738 {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
1739 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX},
1740};
1741
1742bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
1743 for (const ExpTgt &Val : ExpTgtInfo) {
1744 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
1745 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
1746 Name = Val.Name;
1747 return true;
1748 }
1749 }
1750 return false;
1751}
1752
1753unsigned getTgtId(const StringRef Name) {
1754
1755 for (const ExpTgt &Val : ExpTgtInfo) {
1756 if (Val.MaxIndex == 0 && Name == Val.Name)
1757 return Val.Tgt;
1758
1759 if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {
1760 StringRef Suffix = Name.drop_front(Val.Name.size());
1761
1762 unsigned Id;
1763 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
1764 return ET_INVALID;
1765
1766 // Disable leading zeroes
1767 if (Suffix.size() > 1 && Suffix[0] == '0')
1768 return ET_INVALID;
1769
1770 return Val.Tgt + Id;
1771 }
1772 }
1773 return ET_INVALID;
1774}
1775
1776bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
1777 switch (Id) {
1778 case ET_NULL:
1779 return !isGFX11Plus(STI);
1780 case ET_POS4:
1781 case ET_PRIM:
1782 return isGFX10Plus(STI);
1783 case ET_DUAL_SRC_BLEND0:
1784 case ET_DUAL_SRC_BLEND1:
1785 return isGFX11Plus(STI);
1786 default:
1787 if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
1788 return !isGFX11Plus(STI);
1789 return true;
1790 }
1791}
1792
1793} // namespace Exp
1794
1795//===----------------------------------------------------------------------===//
1796// MTBUF Format
1797//===----------------------------------------------------------------------===//
1798
1799namespace MTBUFFormat {
1800
1801int64_t getDfmt(const StringRef Name) {
1802 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
1803 if (Name == DfmtSymbolic[Id])
1804 return Id;
1805 }
1806 return DFMT_UNDEF;
1807}
1808
1810 assert(Id <= DFMT_MAX);
1811 return DfmtSymbolic[Id];
1812}
1813
1815 if (isSI(STI) || isCI(STI))
1816 return NfmtSymbolicSICI;
1817 if (isVI(STI) || isGFX9(STI))
1818 return NfmtSymbolicVI;
1819 return NfmtSymbolicGFX10;
1820}
1821
1822int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
1823 auto lookupTable = getNfmtLookupTable(STI);
1824 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
1825 if (Name == lookupTable[Id])
1826 return Id;
1827 }
1828 return NFMT_UNDEF;
1829}
1830
1831StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
1832 assert(Id <= NFMT_MAX);
1833 return getNfmtLookupTable(STI)[Id];
1834}
1835
1836bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1837 unsigned Dfmt;
1838 unsigned Nfmt;
1839 decodeDfmtNfmt(Id, Dfmt, Nfmt);
1840 return isValidNfmt(Nfmt, STI);
1841}
1842
1843bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1844 return !getNfmtName(Id, STI).empty();
1845}
1846
1847int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
1848 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
1849}
1850
1851void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
1852 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
1853 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
1854}
1855
1857 if (isGFX11Plus(STI)) {
1858 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1859 if (Name == UfmtSymbolicGFX11[Id])
1860 return Id;
1861 }
1862 } else {
1863 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1864 if (Name == UfmtSymbolicGFX10[Id])
1865 return Id;
1866 }
1867 }
1868 return UFMT_UNDEF;
1869}
1870
1872 if(isValidUnifiedFormat(Id, STI))
1873 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
1874 return "";
1875}
1876
1877bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
1878 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
1879}
1880
1881int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1882 const MCSubtargetInfo &STI) {
1883 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
1884 if (isGFX11Plus(STI)) {
1885 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1886 if (Fmt == DfmtNfmt2UFmtGFX11[Id])
1887 return Id;
1888 }
1889 } else {
1890 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1891 if (Fmt == DfmtNfmt2UFmtGFX10[Id])
1892 return Id;
1893 }
1894 }
1895 return UFMT_UNDEF;
1896}
1897
1898bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
1899 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
1900}
1901
1903 if (isGFX10Plus(STI))
1904 return UFMT_DEFAULT;
1905 return DFMT_NFMT_DEFAULT;
1906}
1907
1908} // namespace MTBUFFormat
1909
1910//===----------------------------------------------------------------------===//
1911// SendMsg
1912//===----------------------------------------------------------------------===//
1913
1914namespace SendMsg {
1915
1918}
1919
1920int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI) {
1921 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Msg, MSG_SIZE, STI);
1922 return (Idx < 0) ? Idx : Msg[Idx].Encoding;
1923}
1924
1925bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
1926 return (MsgId & ~(getMsgIdMask(STI))) == 0;
1927}
1928
1929StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI) {
1930 int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId, Msg, MSG_SIZE, STI);
1931 return (Idx < 0) ? "" : Msg[Idx].Name;
1932}
1933
1934int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
1935 const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
1936 const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
1937 const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
1938 for (int i = F; i < L; ++i) {
1939 if (Name == S[i]) {
1940 return i;
1941 }
1942 }
1943 return OP_UNKNOWN_;
1944}
1945
1946bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1947 bool Strict) {
1948 assert(isValidMsgId(MsgId, STI));
1949
1950 if (!Strict)
1951 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
1952
1953 if (MsgId == ID_SYSMSG)
1954 return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_;
1955 if (!isGFX11Plus(STI)) {
1956 switch (MsgId) {
1957 case ID_GS_PreGFX11:
1958 return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
1960 return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
1961 }
1962 }
1963 return OpId == OP_NONE_;
1964}
1965
1966StringRef getMsgOpName(int64_t MsgId, int64_t OpId,
1967 const MCSubtargetInfo &STI) {
1968 assert(msgRequiresOp(MsgId, STI));
1969 return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId];
1970}
1971
1972bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1973 const MCSubtargetInfo &STI, bool Strict) {
1974 assert(isValidMsgOp(MsgId, OpId, STI, Strict));
1975
1976 if (!Strict)
1977 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
1978
1979 if (!isGFX11Plus(STI)) {
1980 switch (MsgId) {
1981 case ID_GS_PreGFX11:
1984 return (OpId == OP_GS_NOP) ?
1987 }
1988 }
1989 return StreamId == STREAM_ID_NONE_;
1990}
1991
1992bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
1993 return MsgId == ID_SYSMSG ||
1994 (!isGFX11Plus(STI) &&
1995 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
1996}
1997
1998bool msgSupportsStream(int64_t MsgId, int64_t OpId,
1999 const MCSubtargetInfo &STI) {
2000 return !isGFX11Plus(STI) &&
2001 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
2002 OpId != OP_GS_NOP;
2003}
2004
2005void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
2006 uint16_t &StreamId, const MCSubtargetInfo &STI) {
2007 MsgId = Val & getMsgIdMask(STI);
2008 if (isGFX11Plus(STI)) {
2009 OpId = 0;
2010 StreamId = 0;
2011 } else {
2012 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
2014 }
2015}
2016
2018 uint64_t OpId,
2020 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
2021}
2022
2023} // namespace SendMsg
2024
2025//===----------------------------------------------------------------------===//
2026//
2027//===----------------------------------------------------------------------===//
2028
2030 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
2031}
2032
2034 // As a safe default always respond as if PS has color exports.
2035 return F.getFnAttributeAsParsedInteger(
2036 "amdgpu-color-export",
2037 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
2038}
2039
2041 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
2042}
2043
2045 switch(cc) {
2055 return true;
2056 default:
2057 return false;
2058 }
2059}
2060
2062 return isShader(cc) || cc == CallingConv::AMDGPU_Gfx;
2063}
2064
2066 return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS;
2067}
2068
2070 switch (CC) {
2080 return true;
2081 default:
2082 return false;
2083 }
2084}
2085
2087 switch (CC) {
2089 return true;
2090 default:
2091 return isEntryFunctionCC(CC) || isChainCC(CC);
2092 }
2093}
2094
2096 switch (CC) {
2099 return true;
2100 default:
2101 return false;
2102 }
2103}
2104
2105bool isKernelCC(const Function *Func) {
2106 return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
2107}
2108
2109bool hasXNACK(const MCSubtargetInfo &STI) {
2110 return STI.hasFeature(AMDGPU::FeatureXNACK);
2111}
2112
2113bool hasSRAMECC(const MCSubtargetInfo &STI) {
2114 return STI.hasFeature(AMDGPU::FeatureSRAMECC);
2115}
2116
2118 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16);
2119}
2120
2121bool hasA16(const MCSubtargetInfo &STI) {
2122 return STI.hasFeature(AMDGPU::FeatureA16);
2123}
2124
2125bool hasG16(const MCSubtargetInfo &STI) {
2126 return STI.hasFeature(AMDGPU::FeatureG16);
2127}
2128
2130 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2131 !isSI(STI);
2132}
2133
2134bool hasGDS(const MCSubtargetInfo &STI) {
2135 return STI.hasFeature(AMDGPU::FeatureGDS);
2136}
2137
2138unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2139 auto Version = getIsaVersion(STI.getCPU());
2140 if (Version.Major == 10)
2141 return Version.Minor >= 3 ? 13 : 5;
2142 if (Version.Major == 11)
2143 return 5;
2144 if (Version.Major >= 12)
2145 return HasSampler ? 4 : 5;
2146 return 0;
2147}
2148
2149unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; }
2150
2151bool isSI(const MCSubtargetInfo &STI) {
2152 return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
2153}
2154
2155bool isCI(const MCSubtargetInfo &STI) {
2156 return STI.hasFeature(AMDGPU::FeatureSeaIslands);
2157}
2158
2159bool isVI(const MCSubtargetInfo &STI) {
2160 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2161}
2162
2163bool isGFX9(const MCSubtargetInfo &STI) {
2164 return STI.hasFeature(AMDGPU::FeatureGFX9);
2165}
2166
2168 return isGFX9(STI) || isGFX10(STI);
2169}
2170
2172 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2173}
2174
2176 return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2177}
2178
2179bool isGFX8Plus(const MCSubtargetInfo &STI) {
2180 return isVI(STI) || isGFX9Plus(STI);
2181}
2182
2183bool isGFX9Plus(const MCSubtargetInfo &STI) {
2184 return isGFX9(STI) || isGFX10Plus(STI);
2185}
2186
2187bool isGFX10(const MCSubtargetInfo &STI) {
2188 return STI.hasFeature(AMDGPU::FeatureGFX10);
2189}
2190
2192 return isGFX10(STI) || isGFX11(STI);
2193}
2194
2196 return isGFX10(STI) || isGFX11Plus(STI);
2197}
2198
2199bool isGFX11(const MCSubtargetInfo &STI) {
2200 return STI.hasFeature(AMDGPU::FeatureGFX11);
2201}
2202
2204 return isGFX11(STI) || isGFX12Plus(STI);
2205}
2206
2207bool isGFX12(const MCSubtargetInfo &STI) {
2208 return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2209}
2210
2211bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); }
2212
2213bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2214
2216 return !isGFX11Plus(STI);
2217}
2218
2220 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2221}
2222
2224 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2225}
2226
2228 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2229}
2230
2232 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2233}
2234
2236 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2237}
2238
2240 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2241}
2242
2244 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2245}
2246
2247bool isGFX90A(const MCSubtargetInfo &STI) {
2248 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2249}
2250
2251bool isGFX940(const MCSubtargetInfo &STI) {
2252 return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2253}
2254
2256 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2257}
2258
2260 return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2261}
2262
2263bool hasVOPD(const MCSubtargetInfo &STI) {
2264 return STI.hasFeature(AMDGPU::FeatureVOPD);
2265}
2266
2268 return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
2269}
2270
2272 return STI.hasFeature(AMDGPU::FeatureKernargPreload);
2273}
2274
2275int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2276 int32_t ArgNumVGPR) {
2277 if (has90AInsts && ArgNumAGPR)
2278 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2279 return std::max(ArgNumVGPR, ArgNumAGPR);
2280}
2281
2282bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
2283 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2284 const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2285 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2286 Reg == AMDGPU::SCC;
2287}
2288
2289bool isHi(unsigned Reg, const MCRegisterInfo &MRI) {
2290 return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI;
2291}
2292
2293#define MAP_REG2REG \
2294 using namespace AMDGPU; \
2295 switch(Reg) { \
2296 default: return Reg; \
2297 CASE_CI_VI(FLAT_SCR) \
2298 CASE_CI_VI(FLAT_SCR_LO) \
2299 CASE_CI_VI(FLAT_SCR_HI) \
2300 CASE_VI_GFX9PLUS(TTMP0) \
2301 CASE_VI_GFX9PLUS(TTMP1) \
2302 CASE_VI_GFX9PLUS(TTMP2) \
2303 CASE_VI_GFX9PLUS(TTMP3) \
2304 CASE_VI_GFX9PLUS(TTMP4) \
2305 CASE_VI_GFX9PLUS(TTMP5) \
2306 CASE_VI_GFX9PLUS(TTMP6) \
2307 CASE_VI_GFX9PLUS(TTMP7) \
2308 CASE_VI_GFX9PLUS(TTMP8) \
2309 CASE_VI_GFX9PLUS(TTMP9) \
2310 CASE_VI_GFX9PLUS(TTMP10) \
2311 CASE_VI_GFX9PLUS(TTMP11) \
2312 CASE_VI_GFX9PLUS(TTMP12) \
2313 CASE_VI_GFX9PLUS(TTMP13) \
2314 CASE_VI_GFX9PLUS(TTMP14) \
2315 CASE_VI_GFX9PLUS(TTMP15) \
2316 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2317 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2318 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2319 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2320 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2321 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2322 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2323 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2324 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2325 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2326 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2327 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2328 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2329 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2330 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2331 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2332 CASE_GFXPRE11_GFX11PLUS(M0) \
2333 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2334 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2335 }
2336
2337#define CASE_CI_VI(node) \
2338 assert(!isSI(STI)); \
2339 case node: return isCI(STI) ? node##_ci : node##_vi;
2340
2341#define CASE_VI_GFX9PLUS(node) \
2342 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2343
2344#define CASE_GFXPRE11_GFX11PLUS(node) \
2345 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2346
2347#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2348 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2349
2350unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
2351 if (STI.getTargetTriple().getArch() == Triple::r600)
2352 return Reg;
2354}
2355
2356#undef CASE_CI_VI
2357#undef CASE_VI_GFX9PLUS
2358#undef CASE_GFXPRE11_GFX11PLUS
2359#undef CASE_GFXPRE11_GFX11PLUS_TO
2360
2361#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
2362#define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
2363#define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;
2364#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2365
2366unsigned mc2PseudoReg(unsigned Reg) {
2368}
2369
2370bool isInlineValue(unsigned Reg) {
2371 switch (Reg) {
2372 case AMDGPU::SRC_SHARED_BASE_LO:
2373 case AMDGPU::SRC_SHARED_BASE:
2374 case AMDGPU::SRC_SHARED_LIMIT_LO:
2375 case AMDGPU::SRC_SHARED_LIMIT:
2376 case AMDGPU::SRC_PRIVATE_BASE_LO:
2377 case AMDGPU::SRC_PRIVATE_BASE:
2378 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2379 case AMDGPU::SRC_PRIVATE_LIMIT:
2380 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2381 return true;
2382 case AMDGPU::SRC_VCCZ:
2383 case AMDGPU::SRC_EXECZ:
2384 case AMDGPU::SRC_SCC:
2385 return true;
2386 case AMDGPU::SGPR_NULL:
2387 return true;
2388 default:
2389 return false;
2390 }
2391}
2392
2393#undef CASE_CI_VI
2394#undef CASE_VI_GFX9PLUS
2395#undef CASE_GFXPRE11_GFX11PLUS
2396#undef CASE_GFXPRE11_GFX11PLUS_TO
2397#undef MAP_REG2REG
2398
2399bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2400 assert(OpNo < Desc.NumOperands);
2401 unsigned OpType = Desc.operands()[OpNo].OperandType;
2402 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2403 OpType <= AMDGPU::OPERAND_SRC_LAST;
2404}
2405
2406bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2407 assert(OpNo < Desc.NumOperands);
2408 unsigned OpType = Desc.operands()[OpNo].OperandType;
2409 return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2410 OpType <= AMDGPU::OPERAND_KIMM_LAST;
2411}
2412
2413bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2414 assert(OpNo < Desc.NumOperands);
2415 unsigned OpType = Desc.operands()[OpNo].OperandType;
2416 switch (OpType) {
2433 return true;
2434 default:
2435 return false;
2436 }
2437}
2438
2439bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2440 assert(OpNo < Desc.NumOperands);
2441 unsigned OpType = Desc.operands()[OpNo].OperandType;
2442 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2446}
2447
2448// Avoid using MCRegisterClass::getSize, since that function will go away
2449// (move from MC* level to Target* level). Return size in bits.
2450unsigned getRegBitWidth(unsigned RCID) {
2451 switch (RCID) {
2452 case AMDGPU::SGPR_LO16RegClassID:
2453 case AMDGPU::AGPR_LO16RegClassID:
2454 return 16;
2455 case AMDGPU::SGPR_32RegClassID:
2456 case AMDGPU::VGPR_32RegClassID:
2457 case AMDGPU::VRegOrLds_32RegClassID:
2458 case AMDGPU::AGPR_32RegClassID:
2459 case AMDGPU::VS_32RegClassID:
2460 case AMDGPU::AV_32RegClassID:
2461 case AMDGPU::SReg_32RegClassID:
2462 case AMDGPU::SReg_32_XM0RegClassID:
2463 case AMDGPU::SRegOrLds_32RegClassID:
2464 return 32;
2465 case AMDGPU::SGPR_64RegClassID:
2466 case AMDGPU::VS_64RegClassID:
2467 case AMDGPU::SReg_64RegClassID:
2468 case AMDGPU::VReg_64RegClassID:
2469 case AMDGPU::AReg_64RegClassID:
2470 case AMDGPU::SReg_64_XEXECRegClassID:
2471 case AMDGPU::VReg_64_Align2RegClassID:
2472 case AMDGPU::AReg_64_Align2RegClassID:
2473 case AMDGPU::AV_64RegClassID:
2474 case AMDGPU::AV_64_Align2RegClassID:
2475 return 64;
2476 case AMDGPU::SGPR_96RegClassID:
2477 case AMDGPU::SReg_96RegClassID:
2478 case AMDGPU::VReg_96RegClassID:
2479 case AMDGPU::AReg_96RegClassID:
2480 case AMDGPU::VReg_96_Align2RegClassID:
2481 case AMDGPU::AReg_96_Align2RegClassID:
2482 case AMDGPU::AV_96RegClassID:
2483 case AMDGPU::AV_96_Align2RegClassID:
2484 return 96;
2485 case AMDGPU::SGPR_128RegClassID:
2486 case AMDGPU::SReg_128RegClassID:
2487 case AMDGPU::VReg_128RegClassID:
2488 case AMDGPU::AReg_128RegClassID:
2489 case AMDGPU::VReg_128_Align2RegClassID:
2490 case AMDGPU::AReg_128_Align2RegClassID:
2491 case AMDGPU::AV_128RegClassID:
2492 case AMDGPU::AV_128_Align2RegClassID:
2493 return 128;
2494 case AMDGPU::SGPR_160RegClassID:
2495 case AMDGPU::SReg_160RegClassID:
2496 case AMDGPU::VReg_160RegClassID:
2497 case AMDGPU::AReg_160RegClassID:
2498 case AMDGPU::VReg_160_Align2RegClassID:
2499 case AMDGPU::AReg_160_Align2RegClassID:
2500 case AMDGPU::AV_160RegClassID:
2501 case AMDGPU::AV_160_Align2RegClassID:
2502 return 160;
2503 case AMDGPU::SGPR_192RegClassID:
2504 case AMDGPU::SReg_192RegClassID:
2505 case AMDGPU::VReg_192RegClassID:
2506 case AMDGPU::AReg_192RegClassID:
2507 case AMDGPU::VReg_192_Align2RegClassID:
2508 case AMDGPU::AReg_192_Align2RegClassID:
2509 case AMDGPU::AV_192RegClassID:
2510 case AMDGPU::AV_192_Align2RegClassID:
2511 return 192;
2512 case AMDGPU::SGPR_224RegClassID:
2513 case AMDGPU::SReg_224RegClassID:
2514 case AMDGPU::VReg_224RegClassID:
2515 case AMDGPU::AReg_224RegClassID:
2516 case AMDGPU::VReg_224_Align2RegClassID:
2517 case AMDGPU::AReg_224_Align2RegClassID:
2518 case AMDGPU::AV_224RegClassID:
2519 case AMDGPU::AV_224_Align2RegClassID:
2520 return 224;
2521 case AMDGPU::SGPR_256RegClassID:
2522 case AMDGPU::SReg_256RegClassID:
2523 case AMDGPU::VReg_256RegClassID:
2524 case AMDGPU::AReg_256RegClassID:
2525 case AMDGPU::VReg_256_Align2RegClassID:
2526 case AMDGPU::AReg_256_Align2RegClassID:
2527 case AMDGPU::AV_256RegClassID:
2528 case AMDGPU::AV_256_Align2RegClassID:
2529 return 256;
2530 case AMDGPU::SGPR_288RegClassID:
2531 case AMDGPU::SReg_288RegClassID:
2532 case AMDGPU::VReg_288RegClassID:
2533 case AMDGPU::AReg_288RegClassID:
2534 case AMDGPU::VReg_288_Align2RegClassID:
2535 case AMDGPU::AReg_288_Align2RegClassID:
2536 case AMDGPU::AV_288RegClassID:
2537 case AMDGPU::AV_288_Align2RegClassID:
2538 return 288;
2539 case AMDGPU::SGPR_320RegClassID:
2540 case AMDGPU::SReg_320RegClassID:
2541 case AMDGPU::VReg_320RegClassID:
2542 case AMDGPU::AReg_320RegClassID:
2543 case AMDGPU::VReg_320_Align2RegClassID:
2544 case AMDGPU::AReg_320_Align2RegClassID:
2545 case AMDGPU::AV_320RegClassID:
2546 case AMDGPU::AV_320_Align2RegClassID:
2547 return 320;
2548 case AMDGPU::SGPR_352RegClassID:
2549 case AMDGPU::SReg_352RegClassID:
2550 case AMDGPU::VReg_352RegClassID:
2551 case AMDGPU::AReg_352RegClassID:
2552 case AMDGPU::VReg_352_Align2RegClassID:
2553 case AMDGPU::AReg_352_Align2RegClassID:
2554 case AMDGPU::AV_352RegClassID:
2555 case AMDGPU::AV_352_Align2RegClassID:
2556 return 352;
2557 case AMDGPU::SGPR_384RegClassID:
2558 case AMDGPU::SReg_384RegClassID:
2559 case AMDGPU::VReg_384RegClassID:
2560 case AMDGPU::AReg_384RegClassID:
2561 case AMDGPU::VReg_384_Align2RegClassID:
2562 case AMDGPU::AReg_384_Align2RegClassID:
2563 case AMDGPU::AV_384RegClassID:
2564 case AMDGPU::AV_384_Align2RegClassID:
2565 return 384;
2566 case AMDGPU::SGPR_512RegClassID:
2567 case AMDGPU::SReg_512RegClassID:
2568 case AMDGPU::VReg_512RegClassID:
2569 case AMDGPU::AReg_512RegClassID:
2570 case AMDGPU::VReg_512_Align2RegClassID:
2571 case AMDGPU::AReg_512_Align2RegClassID:
2572 case AMDGPU::AV_512RegClassID:
2573 case AMDGPU::AV_512_Align2RegClassID:
2574 return 512;
2575 case AMDGPU::SGPR_1024RegClassID:
2576 case AMDGPU::SReg_1024RegClassID:
2577 case AMDGPU::VReg_1024RegClassID:
2578 case AMDGPU::AReg_1024RegClassID:
2579 case AMDGPU::VReg_1024_Align2RegClassID:
2580 case AMDGPU::AReg_1024_Align2RegClassID:
2581 case AMDGPU::AV_1024RegClassID:
2582 case AMDGPU::AV_1024_Align2RegClassID:
2583 return 1024;
2584 default:
2585 llvm_unreachable("Unexpected register class");
2586 }
2587}
2588
2589unsigned getRegBitWidth(const MCRegisterClass &RC) {
2590 return getRegBitWidth(RC.getID());
2591}
2592
2594 unsigned OpNo) {
2595 assert(OpNo < Desc.NumOperands);
2596 unsigned RCID = Desc.operands()[OpNo].RegClass;
2597 return getRegBitWidth(RCID) / 8;
2598}
2599
2600bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
2602 return true;
2603
2604 uint64_t Val = static_cast<uint64_t>(Literal);
2605 return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
2606 (Val == llvm::bit_cast<uint64_t>(1.0)) ||
2607 (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
2608 (Val == llvm::bit_cast<uint64_t>(0.5)) ||
2609 (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
2610 (Val == llvm::bit_cast<uint64_t>(2.0)) ||
2611 (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
2612 (Val == llvm::bit_cast<uint64_t>(4.0)) ||
2613 (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
2614 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2615}
2616
2617bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
2619 return true;
2620
2621 // The actual type of the operand does not seem to matter as long
2622 // as the bits match one of the inline immediate values. For example:
2623 //
2624 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
2625 // so it is a legal inline immediate.
2626 //
2627 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
2628 // floating-point, so it is a legal inline immediate.
2629
2630 uint32_t Val = static_cast<uint32_t>(Literal);
2631 return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
2632 (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
2633 (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
2634 (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
2635 (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
2636 (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
2637 (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
2638 (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
2639 (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
2640 (Val == 0x3e22f983 && HasInv2Pi);
2641}
2642
2643bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
2644 if (!HasInv2Pi)
2645 return false;
2647 return true;
2648 uint16_t Val = static_cast<uint16_t>(Literal);
2649 return Val == 0x3F00 || // 0.5
2650 Val == 0xBF00 || // -0.5
2651 Val == 0x3F80 || // 1.0
2652 Val == 0xBF80 || // -1.0
2653 Val == 0x4000 || // 2.0
2654 Val == 0xC000 || // -2.0
2655 Val == 0x4080 || // 4.0
2656 Val == 0xC080 || // -4.0
2657 Val == 0x3E22; // 1.0 / (2.0 * pi)
2658}
2659
2660bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
2661 if (!HasInv2Pi)
2662 return false;
2663
2665 return true;
2666
2667 uint16_t Val = static_cast<uint16_t>(Literal);
2668 return Val == 0x3C00 || // 1.0
2669 Val == 0xBC00 || // -1.0
2670 Val == 0x3800 || // 0.5
2671 Val == 0xB800 || // -0.5
2672 Val == 0x4000 || // 2.0
2673 Val == 0xC000 || // -2.0
2674 Val == 0x4400 || // 4.0
2675 Val == 0xC400 || // -4.0
2676 Val == 0x3118; // 1/2pi
2677}
2678
2679std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
2680 // Unfortunately, the Instruction Set Architecture Reference Guide is
2681 // misleading about how the inline operands work for (packed) 16-bit
2682 // instructions. In a nutshell, the actual HW behavior is:
2683 //
2684 // - integer encodings (-16 .. 64) are always produced as sign-extended
2685 // 32-bit values
2686 // - float encodings are produced as:
2687 // - for F16 instructions: corresponding half-precision float values in
2688 // the LSBs, 0 in the MSBs
2689 // - for UI16 instructions: corresponding single-precision float value
2690 int32_t Signed = static_cast<int32_t>(Literal);
2691 if (Signed >= 0 && Signed <= 64)
2692 return 128 + Signed;
2693
2694 if (Signed >= -16 && Signed <= -1)
2695 return 192 + std::abs(Signed);
2696
2697 if (IsFloat) {
2698 // clang-format off
2699 switch (Literal) {
2700 case 0x3800: return 240; // 0.5
2701 case 0xB800: return 241; // -0.5
2702 case 0x3C00: return 242; // 1.0
2703 case 0xBC00: return 243; // -1.0
2704 case 0x4000: return 244; // 2.0
2705 case 0xC000: return 245; // -2.0
2706 case 0x4400: return 246; // 4.0
2707 case 0xC400: return 247; // -4.0
2708 case 0x3118: return 248; // 1.0 / (2.0 * pi)
2709 default: break;
2710 }
2711 // clang-format on
2712 } else {
2713 // clang-format off
2714 switch (Literal) {
2715 case 0x3F000000: return 240; // 0.5
2716 case 0xBF000000: return 241; // -0.5
2717 case 0x3F800000: return 242; // 1.0
2718 case 0xBF800000: return 243; // -1.0
2719 case 0x40000000: return 244; // 2.0
2720 case 0xC0000000: return 245; // -2.0
2721 case 0x40800000: return 246; // 4.0
2722 case 0xC0800000: return 247; // -4.0
2723 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
2724 default: break;
2725 }
2726 // clang-format on
2727 }
2728
2729 return {};
2730}
2731
2732// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
2733// or nullopt.
2734std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
2735 return getInlineEncodingV216(false, Literal);
2736}
2737
2738// Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction
2739// or nullopt.
2740std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) {
2741 int32_t Signed = static_cast<int32_t>(Literal);
2742 if (Signed >= 0 && Signed <= 64)
2743 return 128 + Signed;
2744
2745 if (Signed >= -16 && Signed <= -1)
2746 return 192 + std::abs(Signed);
2747
2748 // clang-format off
2749 switch (Literal) {
2750 case 0x3F00: return 240; // 0.5
2751 case 0xBF00: return 241; // -0.5
2752 case 0x3F80: return 242; // 1.0
2753 case 0xBF80: return 243; // -1.0
2754 case 0x4000: return 244; // 2.0
2755 case 0xC000: return 245; // -2.0
2756 case 0x4080: return 246; // 4.0
2757 case 0xC080: return 247; // -4.0
2758 case 0x3E22: return 248; // 1.0 / (2.0 * pi)
2759 default: break;
2760 }
2761 // clang-format on
2762
2763 return std::nullopt;
2764}
2765
2766// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
2767// or nullopt.
2768std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
2769 return getInlineEncodingV216(true, Literal);
2770}
2771
2772// Whether the given literal can be inlined for a V_PK_* instruction.
2774 switch (OpType) {
2778 return getInlineEncodingV216(false, Literal).has_value();
2782 return getInlineEncodingV216(true, Literal).has_value();
2787 default:
2788 llvm_unreachable("bad packed operand type");
2789 }
2790}
2791
2792// Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
2794 return getInlineEncodingV2I16(Literal).has_value();
2795}
2796
2797// Whether the given literal can be inlined for a V_PK_*_BF16 instruction.
2799 return getInlineEncodingV2BF16(Literal).has_value();
2800}
2801
2802// Whether the given literal can be inlined for a V_PK_*_F16 instruction.
2804 return getInlineEncodingV2F16(Literal).has_value();
2805}
2806
2807bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
2808 if (IsFP64)
2809 return !(Val & 0xffffffffu);
2810
2811 return isUInt<32>(Val) || isInt<32>(Val);
2812}
2813
2815 const Function *F = A->getParent();
2816
2817 // Arguments to compute shaders are never a source of divergence.
2818 CallingConv::ID CC = F->getCallingConv();
2819 switch (CC) {
2822 return true;
2833 // For non-compute shaders, SGPR inputs are marked with either inreg or
2834 // byval. Everything else is in VGPRs.
2835 return A->hasAttribute(Attribute::InReg) ||
2836 A->hasAttribute(Attribute::ByVal);
2837 default:
2838 // TODO: treat i1 as divergent?
2839 return A->hasAttribute(Attribute::InReg);
2840 }
2841}
2842
2843bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
2844 // Arguments to compute shaders are never a source of divergence.
2846 switch (CC) {
2849 return true;
2860 // For non-compute shaders, SGPR inputs are marked with either inreg or
2861 // byval. Everything else is in VGPRs.
2862 return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
2863 CB->paramHasAttr(ArgNo, Attribute::ByVal);
2864 default:
2865 return CB->paramHasAttr(ArgNo, Attribute::InReg);
2866 }
2867}
2868
2869static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
2870 return isGCN3Encoding(ST) || isGFX10Plus(ST);
2871}
2872
2874 return isGFX9Plus(ST);
2875}
2876
2878 int64_t EncodedOffset) {
2879 if (isGFX12Plus(ST))
2880 return isUInt<23>(EncodedOffset);
2881
2882 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
2883 : isUInt<8>(EncodedOffset);
2884}
2885
2887 int64_t EncodedOffset,
2888 bool IsBuffer) {
2889 if (isGFX12Plus(ST))
2890 return isInt<24>(EncodedOffset);
2891
2892 return !IsBuffer &&
2894 isInt<21>(EncodedOffset);
2895}
2896
2897static bool isDwordAligned(uint64_t ByteOffset) {
2898 return (ByteOffset & 3) == 0;
2899}
2900
2902 uint64_t ByteOffset) {
2903 if (hasSMEMByteOffset(ST))
2904 return ByteOffset;
2905
2906 assert(isDwordAligned(ByteOffset));
2907 return ByteOffset >> 2;
2908}
2909
2910std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
2911 int64_t ByteOffset, bool IsBuffer) {
2912 if (isGFX12Plus(ST)) // 24 bit signed offsets
2913 return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2914 : std::nullopt;
2915
2916 // The signed version is always a byte offset.
2917 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
2919 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2920 : std::nullopt;
2921 }
2922
2923 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
2924 return std::nullopt;
2925
2926 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2927 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
2928 ? std::optional<int64_t>(EncodedOffset)
2929 : std::nullopt;
2930}
2931
2932std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
2933 int64_t ByteOffset) {
2934 if (!isCI(ST) || !isDwordAligned(ByteOffset))
2935 return std::nullopt;
2936
2937 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2938 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
2939 : std::nullopt;
2940}
2941
2943 if (AMDGPU::isGFX10(ST))
2944 return 12;
2945
2946 if (AMDGPU::isGFX12(ST))
2947 return 24;
2948 return 13;
2949}
2950
2951namespace {
2952
2953struct SourceOfDivergence {
2954 unsigned Intr;
2955};
2956const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
2957
2958struct AlwaysUniform {
2959 unsigned Intr;
2960};
2961const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
2962
2963#define GET_SourcesOfDivergence_IMPL
2964#define GET_UniformIntrinsics_IMPL
2965#define GET_Gfx9BufferFormat_IMPL
2966#define GET_Gfx10BufferFormat_IMPL
2967#define GET_Gfx11PlusBufferFormat_IMPL
2968#include "AMDGPUGenSearchableTables.inc"
2969
2970} // end anonymous namespace
2971
2972bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
2973 return lookupSourceOfDivergence(IntrID);
2974}
2975
2976bool isIntrinsicAlwaysUniform(unsigned IntrID) {
2977 return lookupAlwaysUniform(IntrID);
2978}
2979
2981 uint8_t NumComponents,
2982 uint8_t NumFormat,
2983 const MCSubtargetInfo &STI) {
2984 return isGFX11Plus(STI)
2985 ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents,
2986 NumFormat)
2987 : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp,
2988 NumComponents, NumFormat)
2989 : getGfx9BufferFormatInfo(BitsPerComp,
2990 NumComponents, NumFormat);
2991}
2992
2994 const MCSubtargetInfo &STI) {
2995 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
2996 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
2997 : getGfx9BufferFormatInfo(Format);
2998}
2999
3001 for (auto OpName : { OpName::vdst, OpName::src0, OpName::src1,
3002 OpName::src2 }) {
3003 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
3004 if (Idx == -1)
3005 continue;
3006
3007 if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID ||
3008 OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID)
3009 return true;
3010 }
3011
3012 return false;
3013}
3014
3015bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
3016 return hasAny64BitVGPROperands(OpDesc);
3017}
3018
3019} // namespace AMDGPU
3020
3023 switch (S) {
3025 OS << "Unsupported";
3026 break;
3028 OS << "Any";
3029 break;
3031 OS << "Off";
3032 break;
3034 OS << "On";
3035 break;
3036 }
3037 return OS;
3038}
3039
3040} // namespace llvm
unsigned const MachineRegisterInfo * MRI
#define MAP_REG2REG
unsigned Intr
static llvm::cl::opt< unsigned > DefaultAMDHSACodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5), llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " "or asm directive still take priority if present)"))
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_SET(DST, MSK, VAL)
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
unsigned const TargetRegisterInfo * TRI
unsigned Reg
LLVMContext & Context
const SmallVectorImpl< MachineOperand > & Cond
#define S_00B848_MEM_ORDERED(x)
Definition: SIDefines.h:1152
#define S_00B848_WGP_MODE(x)
Definition: SIDefines.h:1149
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
void setTargetIDFromFeaturesString(StringRef FS)
TargetIDSetting getXnackSetting() const
AMDGPUTargetID(const MCSubtargetInfo &STI)
void setTargetIDFromTargetIDStream(StringRef TargetID)
TargetIDSetting getSramEccSetting() const
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
unsigned getIndexOfDstInParsedOperands() const
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
unsigned getCompParsedSrcOperandsNum() const
std::optional< unsigned > getInvalidCompOperandIndex(std::function< unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc=false) const
std::array< unsigned, Component::MAX_OPR_NUM > RegIndices
Definition: Any.h:28
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1457
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1754
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
This class represents an Operation in the Expression.
Encoding
Size and signedness of expression operations' operands.
constexpr bool test(unsigned I) const
unsigned getAddressSpace() const
Definition: GlobalValue.h:205
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:230
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getID() const
getID() - Return the register class ID number.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
const Triple & getTargetTriple() const
const FeatureBitset & getFeatureBits() const
StringRef getCPU() const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:849
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:696
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:466
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:222
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:271
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
OSType getOS() const
Get the parsed operating system type of this triple.
Definition: Triple.h:370
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:361
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
unsigned decodeFieldSaSdst(unsigned Encoded)
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
const CustomOperandVal DepCtrInfo[]
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
unsigned decodeFieldVaVdst(unsigned Encoded)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
unsigned decodeFieldVmVsrc(unsigned Encoded)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
static constexpr ExpTgt ExpTgtInfo[]
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
unsigned getTgtId(const StringRef Name)
constexpr uint32_t VersionMajor
HSA metadata major version.
bool isValidHwreg(int64_t Id)
const CustomOperand< const MCSubtargetInfo & > Opr[]
bool isValidHwregOffset(int64_t Offset)
bool isValidHwregWidth(int64_t Width)
int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI)
StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
StringLiteral const UfmtSymbolicGFX11[]
bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX10[]
StringLiteral const DfmtSymbolic[]
static StringLiteral const * getNfmtLookupTable(const MCSubtargetInfo &STI)
bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)
StringLiteral const NfmtSymbolicGFX10[]
bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
StringRef getDfmtName(unsigned Id)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX11[]
StringLiteral const NfmtSymbolicVI[]
StringLiteral const NfmtSymbolicSICI[]
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
StringLiteral const UfmtSymbolicGFX10[]
void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)
StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI)
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
int64_t getMsgOpId(int64_t MsgId, const StringRef Name)
const char *const OpGsSymbolic[OP_GS_LAST_]
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
const char *const OpSysSymbolic[OP_SYS_LAST_]
static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
const CustomOperand< const MCSubtargetInfo & > Msg[]
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned VOPD_VGPR_BANK_MASKS[]
constexpr unsigned COMPONENTS_NUM
bool isGCN3Encoding(const MCSubtargetInfo &STI)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_GFX11(const MCSubtargetInfo &STI)
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Storecnt)
static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)
bool isVOPCAsmOnly(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool getMTBUFHasSrsrc(unsigned Opc)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
bool isGFX10Before1030(const MCSubtargetInfo &STI)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
const int OPR_ID_UNSUPPORTED
bool shouldEmitConstantsToTextSection(const Triple &TT)
bool isInlinableLiteralV2I16(uint32_t Literal)
int getMTBUFElements(unsigned Opc)
static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV216(bool IsFloat, uint32_t Literal)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
CanBeVOPD getCanBeVOPD(unsigned Opc)
static int getOprIdx(std::function< bool(const CustomOperand< T > &)> Test, const CustomOperand< T > OpInfo[], int OpInfoSize, T Context)
bool hasPackedD16(const MCSubtargetInfo &STI)
unsigned getStorecntBitMask(const IsaVersion &Version)
bool isGFX940(const MCSubtargetInfo &STI)
bool isEntryFunctionCC(CallingConv::ID CC)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX10_3_GFX11(const MCSubtargetInfo &STI)
bool isGroupSegment(const GlobalValue *GV)
IsaVersion getIsaVersion(StringRef GPU)
bool getMTBUFHasSoffset(unsigned Opc)
bool hasXNACK(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
static unsigned getCombinedCountBitMask(const IsaVersion &Version, bool IsStore)
unsigned getVOPDOpcode(unsigned Opc)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
bool isVOPC64DPP(unsigned Opc)
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool isCompute(CallingConv::ID cc)
bool getMAIIsGFX940XDL(unsigned Opc)
bool isSI(const MCSubtargetInfo &STI)
unsigned getDefaultAMDHSACodeObjectVersion()
bool isReadOnlySegment(const GlobalValue *GV)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
int getMUBUFBaseOpcode(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool getVOP3IsSingle(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
bool getVOP1IsSingle(unsigned Opc)
static bool isDwordAligned(uint64_t ByteOffset)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool getHasColorExport(const Function &F)
int getMTBUFBaseOpcode(unsigned Opc)
bool isChainCC(CallingConv::ID CC)
bool isGFX90A(const MCSubtargetInfo &STI)
unsigned getSamplecntBitMask(const IsaVersion &Version)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
bool hasSRAMECC(const MCSubtargetInfo &STI)
bool getHasDepthExport(const Function &F)
static bool isValidOpr(int Idx, const CustomOperand< T > OpInfo[], int OpInfoSize, T Context)
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
bool getMUBUFHasVAddr(unsigned Opc)
int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily)
bool isTrue16Inst(unsigned Opc)
bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc)
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned getInitialPSInputAddr(const Function &F)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
unsigned getKmcntBitMask(const IsaVersion &Version)
unsigned getVmcntBitMask(const IsaVersion &Version)
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
bool hasMAIInsts(const MCSubtargetInfo &STI)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isKernelCC(const Function *Func)
bool isGenericAtomic(unsigned Opc)
Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt)
bool isGFX8Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
unsigned getBvhcntBitMask(const IsaVersion &Version)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
unsigned getExpcntBitMask(const IsaVersion &Version)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool getMUBUFHasSoffset(unsigned Opc)
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV2F16(uint32_t Literal)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isShader(CallingConv::ID cc)
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)
static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Loadcnt)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
bool isGlobalSegment(const GlobalValue *GV)
@ OPERAND_KIMM_LAST
Definition: SIDefines.h:269
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:234
@ OPERAND_REG_INLINE_C_LAST
Definition: SIDefines.h:260
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:211
@ OPERAND_REG_INLINE_C_FP64
Definition: SIDefines.h:223
@ OPERAND_REG_INLINE_C_V2BF16
Definition: SIDefines.h:225
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:212
@ OPERAND_REG_INLINE_AC_V2FP16
Definition: SIDefines.h:246
@ OPERAND_SRC_FIRST
Definition: SIDefines.h:265
@ OPERAND_REG_IMM_V2BF16
Definition: SIDefines.h:210
@ OPERAND_REG_INLINE_AC_FIRST
Definition: SIDefines.h:262
@ OPERAND_KIMM_FIRST
Definition: SIDefines.h:268
@ OPERAND_REG_IMM_FP16
Definition: SIDefines.h:206
@ OPERAND_REG_IMM_FP64
Definition: SIDefines.h:204
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:226
@ OPERAND_REG_INLINE_AC_V2INT16
Definition: SIDefines.h:244
@ OPERAND_REG_INLINE_AC_FP16
Definition: SIDefines.h:241
@ OPERAND_REG_INLINE_AC_FP32
Definition: SIDefines.h:242
@ OPERAND_REG_INLINE_AC_V2BF16
Definition: SIDefines.h:245
@ OPERAND_REG_IMM_FP32
Definition: SIDefines.h:203
@ OPERAND_REG_INLINE_C_FIRST
Definition: SIDefines.h:259
@ OPERAND_REG_INLINE_C_FP32
Definition: SIDefines.h:222
@ OPERAND_REG_INLINE_AC_LAST
Definition: SIDefines.h:263
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:224
@ OPERAND_REG_IMM_V2FP32
Definition: SIDefines.h:214
@ OPERAND_REG_INLINE_AC_FP64
Definition: SIDefines.h:243
@ OPERAND_REG_INLINE_C_FP16
Definition: SIDefines.h:221
@ OPERAND_REG_INLINE_C_V2FP32
Definition: SIDefines.h:228
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_SRC_LAST
Definition: SIDefines.h:266
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc)
Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt)
std::optional< unsigned > getInlineEncodingV2I16(uint32_t Literal)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
static unsigned encodeStorecntDscnt(const IsaVersion &Version, unsigned Storecnt, unsigned Dscnt)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool getMUBUFIsBufferInv(unsigned Opc)
std::optional< unsigned > getInlineEncodingV2BF16(uint32_t Literal)
static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI)
bool getVOP2IsSingle(unsigned Opc)
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
bool isModuleEntryFunctionCC(CallingConv::ID CC)
bool isNotGFX12Plus(const MCSubtargetInfo &STI)
bool getMTBUFHasVAddr(unsigned Opc)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
unsigned getLoadcntBitMask(const IsaVersion &Version)
bool hasVOPD(const MCSubtargetInfo &STI)
static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Dscnt)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI)
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
int getMUBUFElements(unsigned Opc)
static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, unsigned Dscnt)
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
bool isGraphics(CallingConv::ID cc)
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
bool isPermlane16(unsigned Opc)
bool getMUBUFHasSrsrc(unsigned Opc)
unsigned getDscntBitMask(const IsaVersion &Version)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:197
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:188
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition: CallingConv.h:232
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition: CallingConv.h:249
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:206
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition: CallingConv.h:245
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:218
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:213
@ ELFABIVERSION_AMDGPU_HSA_V4
Definition: ELF.h:378
@ ELFABIVERSION_AMDGPU_HSA_V5
Definition: ELF.h:379
@ ELFABIVERSION_AMDGPU_HSA_V6
Definition: ELF.h:380
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:417
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:428
@ AlwaysUniform
The result values are always uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
#define N
AMD Kernel Code Object (amd_kernel_code_t).
bool(* Cond)(T Context)
Instruction set architecture version.
Definition: TargetParser.h:125
Represents the counter values to wait for in an s_waitcnt instruction.
Description of the encoding of one expression Op.