LLVM 23.0.0git
AMDGPUBaseInfo.cpp
Go to the documentation of this file.
1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDKernelCodeT.h"
17#include "llvm/IR/Attributes.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/Function.h"
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/IR/IntrinsicsR600.h"
23#include "llvm/IR/LLVMContext.h"
24#include "llvm/IR/Metadata.h"
25#include "llvm/MC/MCInstrInfo.h"
30#include <optional>
31
32#define GET_INSTRINFO_NAMED_OPS
33#define GET_INSTRMAP_INFO
34#include "AMDGPUGenInstrInfo.inc"
35
37 "amdhsa-code-object-version", llvm::cl::Hidden,
39 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
40 "or asm directive still take priority if present)"));
41
42namespace {
43
44/// \returns Bit mask for given bit \p Shift and bit \p Width.
45unsigned getBitMask(unsigned Shift, unsigned Width) {
46 return ((1 << Width) - 1) << Shift;
47}
48
49/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
50///
51/// \returns Packed \p Dst.
52unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
53 unsigned Mask = getBitMask(Shift, Width);
54 return ((Src << Shift) & Mask) | (Dst & ~Mask);
55}
56
57/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
58///
59/// \returns Unpacked bits.
60unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
61 return (Src & getBitMask(Shift, Width)) >> Shift;
62}
63
64/// \returns Vmcnt bit shift (lower bits).
65unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
66 return VersionMajor >= 11 ? 10 : 0;
67}
68
69/// \returns Vmcnt bit width (lower bits).
70unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
71 return VersionMajor >= 11 ? 6 : 4;
72}
73
74/// \returns Expcnt bit shift.
75unsigned getExpcntBitShift(unsigned VersionMajor) {
76 return VersionMajor >= 11 ? 0 : 4;
77}
78
79/// \returns Expcnt bit width.
80unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
81
82/// \returns Lgkmcnt bit shift.
83unsigned getLgkmcntBitShift(unsigned VersionMajor) {
84 return VersionMajor >= 11 ? 4 : 8;
85}
86
87/// \returns Lgkmcnt bit width.
88unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
89 return VersionMajor >= 10 ? 6 : 4;
90}
91
92/// \returns Vmcnt bit shift (higher bits).
93unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
94
95/// \returns Vmcnt bit width (higher bits).
96unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
97 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
98}
99
100/// \returns Loadcnt bit width
101unsigned getLoadcntBitWidth(unsigned VersionMajor) {
102 return VersionMajor >= 12 ? 6 : 0;
103}
104
105/// \returns Samplecnt bit width.
106unsigned getSamplecntBitWidth(unsigned VersionMajor) {
107 return VersionMajor >= 12 ? 6 : 0;
108}
109
110/// \returns Bvhcnt bit width.
111unsigned getBvhcntBitWidth(unsigned VersionMajor) {
112 return VersionMajor >= 12 ? 3 : 0;
113}
114
115/// \returns Dscnt bit width.
116unsigned getDscntBitWidth(unsigned VersionMajor) {
117 return VersionMajor >= 12 ? 6 : 0;
118}
119
120/// \returns Dscnt bit shift in combined S_WAIT instructions.
121unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
122
123/// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
124unsigned getStorecntBitWidth(unsigned VersionMajor) {
125 return VersionMajor >= 10 ? 6 : 0;
126}
127
128/// \returns Kmcnt bit width.
129unsigned getKmcntBitWidth(unsigned VersionMajor) {
130 return VersionMajor >= 12 ? 5 : 0;
131}
132
133/// \returns Xcnt bit width.
134unsigned getXcntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {
135 return VersionMajor == 12 && VersionMinor == 5 ? 6 : 0;
136}
137
138/// \returns Asynccnt bit width.
139unsigned getAsynccntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {
140 return VersionMajor == 12 && VersionMinor == 5 ? 6 : 0;
141}
142
143/// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
144unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
145 return VersionMajor >= 12 ? 8 : 0;
146}
147
148/// \returns VaSdst bit width
149inline unsigned getVaSdstBitWidth() { return 3; }
150
151/// \returns VaSdst bit shift
152inline unsigned getVaSdstBitShift() { return 9; }
153
154/// \returns VmVsrc bit width
155inline unsigned getVmVsrcBitWidth() { return 3; }
156
157/// \returns VmVsrc bit shift
158inline unsigned getVmVsrcBitShift() { return 2; }
159
160/// \returns VaVdst bit width
161inline unsigned getVaVdstBitWidth() { return 4; }
162
163/// \returns VaVdst bit shift
164inline unsigned getVaVdstBitShift() { return 12; }
165
166/// \returns VaVcc bit width
167inline unsigned getVaVccBitWidth() { return 1; }
168
169/// \returns VaVcc bit shift
170inline unsigned getVaVccBitShift() { return 1; }
171
172/// \returns SaSdst bit width
173inline unsigned getSaSdstBitWidth() { return 1; }
174
175/// \returns SaSdst bit shift
176inline unsigned getSaSdstBitShift() { return 0; }
177
178/// \returns VaSsrc width
179inline unsigned getVaSsrcBitWidth() { return 1; }
180
181/// \returns VaSsrc bit shift
182inline unsigned getVaSsrcBitShift() { return 8; }
183
184/// \returns HoldCnt bit shift
185inline unsigned getHoldCntWidth(unsigned VersionMajor, unsigned VersionMinor) {
186 static constexpr const unsigned MinMajor = 10;
187 static constexpr const unsigned MinMinor = 3;
188 return std::tie(VersionMajor, VersionMinor) >= std::tie(MinMajor, MinMinor)
189 ? 1
190 : 0;
191}
192
193/// \returns HoldCnt bit shift
194inline unsigned getHoldCntBitShift() { return 7; }
195
196} // end anonymous namespace
197
198namespace llvm {
199
200namespace AMDGPU {
201
202/// \returns true if the target supports signed immediate offset for SMRD
203/// instructions.
205 return isGFX9Plus(ST);
206}
207
208/// \returns True if \p STI is AMDHSA.
209bool isHsaAbi(const MCSubtargetInfo &STI) {
210 return STI.getTargetTriple().getOS() == Triple::AMDHSA;
211}
212
215 M.getModuleFlag("amdhsa_code_object_version"))) {
216 return (unsigned)Ver->getZExtValue() / 100;
217 }
218
220}
221
225
226unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) {
227 switch (ABIVersion) {
229 return 4;
231 return 5;
233 return 6;
234 default:
236 }
237}
238
239uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
240 if (T.getOS() != Triple::AMDHSA)
241 return 0;
242
243 switch (CodeObjectVersion) {
244 case 4:
246 case 5:
248 case 6:
250 default:
251 report_fatal_error("Unsupported AMDHSA Code Object Version " +
252 Twine(CodeObjectVersion));
253 }
254}
255
256unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
257 switch (CodeObjectVersion) {
258 case AMDHSA_COV4:
259 return 48;
260 case AMDHSA_COV5:
261 case AMDHSA_COV6:
262 default:
264 }
265}
266
267// FIXME: All such magic numbers about the ABI should be in a
268// central TD file.
269unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
270 switch (CodeObjectVersion) {
271 case AMDHSA_COV4:
272 return 24;
273 case AMDHSA_COV5:
274 case AMDHSA_COV6:
275 default:
277 }
278}
279
280unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
281 switch (CodeObjectVersion) {
282 case AMDHSA_COV4:
283 return 32;
284 case AMDHSA_COV5:
285 case AMDHSA_COV6:
286 default:
288 }
289}
290
291unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
292 switch (CodeObjectVersion) {
293 case AMDHSA_COV4:
294 return 40;
295 case AMDHSA_COV5:
296 case AMDHSA_COV6:
297 default:
299 }
300}
301
302#define GET_MIMGBaseOpcodesTable_IMPL
303#define GET_MIMGDimInfoTable_IMPL
304#define GET_MIMGInfoTable_IMPL
305#define GET_MIMGLZMappingTable_IMPL
306#define GET_MIMGMIPMappingTable_IMPL
307#define GET_MIMGBiasMappingTable_IMPL
308#define GET_MIMGOffsetMappingTable_IMPL
309#define GET_MIMGG16MappingTable_IMPL
310#define GET_MAIInstInfoTable_IMPL
311#define GET_WMMAInstInfoTable_IMPL
312#include "AMDGPUGenSearchableTables.inc"
313
314int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
315 unsigned VDataDwords, unsigned VAddrDwords) {
316 const MIMGInfo *Info =
317 getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, VDataDwords, VAddrDwords);
318 return Info ? Info->Opcode : -1;
319}
320
322 const MIMGInfo *Info = getMIMGInfo(Opc);
323 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
324}
325
326int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
327 const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
328 const MIMGInfo *NewInfo =
329 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
330 NewChannels, OrigInfo->VAddrDwords);
331 return NewInfo ? NewInfo->Opcode : -1;
332}
333
334unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
335 const MIMGDimInfo *Dim, bool IsA16,
336 bool IsG16Supported) {
337 unsigned AddrWords = BaseOpcode->NumExtraArgs;
338 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
339 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
340 if (IsA16)
341 AddrWords += divideCeil(AddrComponents, 2);
342 else
343 AddrWords += AddrComponents;
344
345 // Note: For subtargets that support A16 but not G16, enabling A16 also
346 // enables 16 bit gradients.
347 // For subtargets that support A16 (operand) and G16 (done with a different
348 // instruction encoding), they are independent.
349
350 if (BaseOpcode->Gradients) {
351 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
352 // There are two gradients per coordinate, we pack them separately.
353 // For the 3d case,
354 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
355 AddrWords += alignTo<2>(Dim->NumGradients / 2);
356 else
357 AddrWords += Dim->NumGradients;
358 }
359 return AddrWords;
360}
361
372
381
386
391
395
399
403
408
416
421
427
428#define GET_FP4FP8DstByteSelTable_DECL
429#define GET_FP4FP8DstByteSelTable_IMPL
430
435
441
442#define GET_DPMACCInstructionTable_DECL
443#define GET_DPMACCInstructionTable_IMPL
444#define GET_MTBUFInfoTable_DECL
445#define GET_MTBUFInfoTable_IMPL
446#define GET_MUBUFInfoTable_DECL
447#define GET_MUBUFInfoTable_IMPL
448#define GET_SMInfoTable_DECL
449#define GET_SMInfoTable_IMPL
450#define GET_VOP1InfoTable_DECL
451#define GET_VOP1InfoTable_IMPL
452#define GET_VOP2InfoTable_DECL
453#define GET_VOP2InfoTable_IMPL
454#define GET_VOP3InfoTable_DECL
455#define GET_VOP3InfoTable_IMPL
456#define GET_VOPC64DPPTable_DECL
457#define GET_VOPC64DPPTable_IMPL
458#define GET_VOPC64DPP8Table_DECL
459#define GET_VOPC64DPP8Table_IMPL
460#define GET_VOPCAsmOnlyInfoTable_DECL
461#define GET_VOPCAsmOnlyInfoTable_IMPL
462#define GET_VOP3CAsmOnlyInfoTable_DECL
463#define GET_VOP3CAsmOnlyInfoTable_IMPL
464#define GET_VOPDComponentTable_DECL
465#define GET_VOPDComponentTable_IMPL
466#define GET_VOPDPairs_DECL
467#define GET_VOPDPairs_IMPL
468#define GET_VOPDXTable_DECL
469#define GET_VOPDXTable_IMPL
470#define GET_VOPDYTable_DECL
471#define GET_VOPDYTable_IMPL
472#define GET_VOPTrue16Table_DECL
473#define GET_VOPTrue16Table_IMPL
474#define GET_True16D16Table_IMPL
475#define GET_WMMAOpcode2AddrMappingTable_DECL
476#define GET_WMMAOpcode2AddrMappingTable_IMPL
477#define GET_WMMAOpcode3AddrMappingTable_DECL
478#define GET_WMMAOpcode3AddrMappingTable_IMPL
479#define GET_getMFMA_F8F6F4_WithSize_DECL
480#define GET_getMFMA_F8F6F4_WithSize_IMPL
481#define GET_isMFMA_F8F6F4Table_IMPL
482#define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL
483
484#include "AMDGPUGenSearchableTables.inc"
485
486int getMTBUFBaseOpcode(unsigned Opc) {
487 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
488 return Info ? Info->BaseOpcode : -1;
489}
490
491int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
492 const MTBUFInfo *Info =
493 getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
494 return Info ? Info->Opcode : -1;
495}
496
497int getMTBUFElements(unsigned Opc) {
498 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
499 return Info ? Info->elements : 0;
500}
501
502bool getMTBUFHasVAddr(unsigned Opc) {
503 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
504 return Info && Info->has_vaddr;
505}
506
507bool getMTBUFHasSrsrc(unsigned Opc) {
508 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
509 return Info && Info->has_srsrc;
510}
511
512bool getMTBUFHasSoffset(unsigned Opc) {
513 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
514 return Info && Info->has_soffset;
515}
516
517int getMUBUFBaseOpcode(unsigned Opc) {
518 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
519 return Info ? Info->BaseOpcode : -1;
520}
521
522int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
523 const MUBUFInfo *Info =
524 getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
525 return Info ? Info->Opcode : -1;
526}
527
528int getMUBUFElements(unsigned Opc) {
529 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
530 return Info ? Info->elements : 0;
531}
532
533bool getMUBUFHasVAddr(unsigned Opc) {
534 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
535 return Info && Info->has_vaddr;
536}
537
538bool getMUBUFHasSrsrc(unsigned Opc) {
539 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
540 return Info && Info->has_srsrc;
541}
542
543bool getMUBUFHasSoffset(unsigned Opc) {
544 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
545 return Info && Info->has_soffset;
546}
547
548bool getMUBUFIsBufferInv(unsigned Opc) {
549 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
550 return Info && Info->IsBufferInv;
551}
552
553bool getMUBUFTfe(unsigned Opc) {
554 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
555 return Info && Info->tfe;
556}
557
558bool getSMEMIsBuffer(unsigned Opc) {
559 const SMInfo *Info = getSMEMOpcodeHelper(Opc);
560 return Info && Info->IsBuffer;
561}
562
563bool getVOP1IsSingle(unsigned Opc) {
564 const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
565 return !Info || Info->IsSingle;
566}
567
568bool getVOP2IsSingle(unsigned Opc) {
569 const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
570 return !Info || Info->IsSingle;
571}
572
573bool getVOP3IsSingle(unsigned Opc) {
574 const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
575 return !Info || Info->IsSingle;
576}
577
578bool isVOPC64DPP(unsigned Opc) {
579 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
580}
581
582bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opc); }
583
584bool getMAIIsDGEMM(unsigned Opc) {
585 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
586 return Info && Info->is_dgemm;
587}
588
589bool getMAIIsGFX940XDL(unsigned Opc) {
590 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
591 return Info && Info->is_gfx940_xdl;
592}
593
594bool getWMMAIsXDL(unsigned Opc) {
595 const WMMAInstInfo *Info = getWMMAInstInfoHelper(Opc);
596 return Info ? Info->is_wmma_xdl : false;
597}
598
599bool getHasMatrixScale(unsigned Opc) {
600 const WMMAInstInfo *Info = getWMMAInstInfoHelper(Opc);
601 return Info && Info->HasMatrixScale;
602}
603
605 switch (EncodingVal) {
608 return 6;
610 return 4;
613 default:
614 return 8;
615 }
616
617 llvm_unreachable("covered switch over mfma scale formats");
618}
619
621 unsigned BLGP,
622 unsigned F8F8Opcode) {
623 uint8_t SrcANumRegs = mfmaScaleF8F6F4FormatToNumRegs(CBSZ);
624 uint8_t SrcBNumRegs = mfmaScaleF8F6F4FormatToNumRegs(BLGP);
625 return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);
626}
627
629 switch (Fmt) {
632 return 16;
635 return 12;
637 return 8;
638 }
639
640 llvm_unreachable("covered switch over wmma scale formats");
641}
642
644 unsigned FmtB,
645 unsigned F8F8Opcode) {
646 uint8_t SrcANumRegs = wmmaScaleF8F6F4FormatToNumRegs(FmtA);
647 uint8_t SrcBNumRegs = wmmaScaleF8F6F4FormatToNumRegs(FmtB);
648 return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);
649}
650
651bool isValidWMMAScaleFmtCombination(unsigned AFmt, unsigned AScale,
652 unsigned BFmt, unsigned BScale) {
653 auto isValid = [](unsigned Fmt, unsigned Scale) -> bool {
654 switch (Fmt) {
659 if (Scale != WMMA::MATRIX_SCALE_FMT_E8)
660 return false;
661 break;
663 if (Scale != WMMA::MATRIX_SCALE_FMT_E8 &&
666 return false;
667 break;
668 }
669 return true;
670 };
671
672 if (!isValid(AFmt, AScale) || !isValid(BFmt, BScale))
673 return false;
674
675 if (AFmt == WMMA::MATRIX_FMT_FP4 && BFmt == WMMA::MATRIX_FMT_FP4 &&
676 AScale != BScale)
677 return false;
678
679 return true;
680}
681
683 if (ST.hasFeature(AMDGPU::FeatureGFX13Insts))
685 if (ST.hasFeature(AMDGPU::FeatureGFX1250Insts))
687 if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
689 if (ST.hasFeature(AMDGPU::FeatureGFX11_7Insts))
691 if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
693 llvm_unreachable("Subtarget generation does not support VOPD!");
694}
695
696static constexpr unsigned getVOPDXYKey(unsigned VOPDOp, unsigned Subtarget,
697 bool VOPD3) {
698 return (VOPDOp << 5) | (Subtarget << 1) | (VOPD3 ? 1u : 0u);
699}
700
701// TODO: Ideally, the table should be emitted by the TableGen backend, however
702// this is currently not supported, so the direct lookup table is generated
703// manually here.
704constexpr unsigned VOPDXYKeyBits = 11;
705static constexpr std::array<CanBeVOPD, 1 << VOPDXYKeyBits> buildVOPDXYLookup() {
706 std::array<CanBeVOPD, 1 << VOPDXYKeyBits> Table{};
707 for (auto &E : Table)
708 E = {false, false};
709 for (const auto &E : VOPDXTable)
710 Table[getVOPDXYKey(E.VOPDOp, E.Subtarget, E.VOPD3)].X = true;
711 for (const auto &E : VOPDYTable)
712 Table[getVOPDXYKey(E.VOPDOp, E.Subtarget, E.VOPD3)].Y = true;
713 return Table;
714}
715
717
718CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3) {
719 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
720 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
721 // Normalize through VOPDComponentTable so that e32 and e64 variants
722 // of the same logical opcode all share a single entry.
723 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
724 if (!Info)
725 return {false, false};
726 return VOPDXYLookup[getVOPDXYKey(Info->VOPDOp, EncodingFamily, VOPD3)];
727}
728
729unsigned getVOPDOpcode(unsigned Opc, bool VOPD3) {
730 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
731 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
732 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
733 return Info ? Info->VOPDOp : ~0u;
734}
735
736bool isVOPD(unsigned Opc) {
737 return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
738}
739
740bool isMAC(unsigned Opc) {
741 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
742 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
743 Opc == AMDGPU::V_MAC_F32_e64_vi ||
744 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
745 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
746 Opc == AMDGPU::V_MAC_F16_e64_vi ||
747 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
748 Opc == AMDGPU::V_FMAC_F64_e64_gfx12 ||
749 Opc == AMDGPU::V_FMAC_F64_e64_gfx13 ||
750 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
751 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
752 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
753 Opc == AMDGPU::V_FMAC_F32_e64_gfx13 ||
754 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
755 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
756 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
757 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
758 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
759 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||
760 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
761 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||
762 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx13 ||
763 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx13 ||
764 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
765 Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi ||
766 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
767 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
768 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
769}
770
771bool isPermlane16(unsigned Opc) {
772 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
773 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
774 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
775 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
776 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
777 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx13 ||
778 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
779 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx13 ||
780 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
781 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx13 ||
782 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12 ||
783 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx13;
784}
785
787 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
788 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
789 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
790 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
791 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
792 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
793 Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 ||
794 Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 ||
795 Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 ||
796 Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12;
797}
798
799bool isGenericAtomic(unsigned Opc) {
800 return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
801 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
802 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
803 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
804 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
805 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
806 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
807 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
808 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
809 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
810 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
811 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
812 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
813 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
814 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
815 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
816 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB_CLAMP_U32 ||
817 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32 ||
818 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
819}
820
821bool isAsyncStore(unsigned Opc) {
822 return Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_gfx1250 ||
823 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_gfx1250 ||
824 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_gfx1250 ||
825 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_gfx1250 ||
826 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_SADDR_gfx1250 ||
827 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_SADDR_gfx1250 ||
828 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_SADDR_gfx1250 ||
829 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_SADDR_gfx1250;
830}
831
832bool isTensorStore(unsigned Opc) {
833 return Opc == TENSOR_STORE_FROM_LDS_d2_gfx1250 ||
834 Opc == TENSOR_STORE_FROM_LDS_d4_gfx1250;
835}
836
837unsigned getTemporalHintType(const MCInstrDesc TID) {
840 unsigned Opc = TID.getOpcode();
841 // Async and Tensor store should have the temporal hint type of TH_TYPE_STORE
842 if (TID.mayStore() &&
843 (isAsyncStore(Opc) || isTensorStore(Opc) || !TID.mayLoad()))
844 return CPol::TH_TYPE_STORE;
845
846 // This will default to returning TH_TYPE_LOAD when neither MayStore nor
847 // MayLoad flag is present which is the case with instructions like
848 // image_get_resinfo.
849 return CPol::TH_TYPE_LOAD;
850}
851
852bool isTrue16Inst(unsigned Opc) {
853 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
854 return Info && Info->IsTrue16;
855}
856
858 const FP4FP8DstByteSelInfo *Info = getFP4FP8DstByteSelHelper(Opc);
859 if (!Info)
860 return FPType::None;
861 if (Info->HasFP8DstByteSel)
862 return FPType::FP8;
863 if (Info->HasFP4DstByteSel)
864 return FPType::FP4;
865
866 return FPType::None;
867}
868
869bool isDPMACCInstruction(unsigned Opc) {
870 const DPMACCInstructionInfo *Info = getDPMACCInstructionHelper(Opc);
871 return Info && Info->IsDPMACCInstruction;
872}
873
874unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
875 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
876 return Info ? Info->Opcode3Addr : ~0u;
877}
878
879unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
880 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
881 return Info ? Info->Opcode2Addr : ~0u;
882}
883
884// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
885// header files, so we need to wrap it in a function that takes unsigned
886// instead.
887int32_t getMCOpcode(uint32_t Opcode, unsigned Gen) {
888 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
889}
890
891unsigned getBitOp2(unsigned Opc) {
892 switch (Opc) {
893 default:
894 return 0;
895 case AMDGPU::V_AND_B32_e32:
896 return 0x40;
897 case AMDGPU::V_OR_B32_e32:
898 return 0x54;
899 case AMDGPU::V_XOR_B32_e32:
900 return 0x14;
901 case AMDGPU::V_XNOR_B32_e32:
902 return 0x41;
903 }
904}
905
906int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
907 bool VOPD3) {
908 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(OpY) : 0;
909 OpY = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : OpY;
910 const VOPDInfo *Info =
911 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily, VOPD3);
912 return Info ? Info->Opcode : -1;
913}
914
915std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
916 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
917 assert(Info);
918 const auto *OpX = getVOPDBaseFromComponent(Info->OpX);
919 const auto *OpY = getVOPDBaseFromComponent(Info->OpY);
920 assert(OpX && OpY);
921 return {OpX->BaseVOP, OpY->BaseVOP};
922}
923
924namespace VOPD {
925
926ComponentProps::ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout) {
928
931 auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO);
932 assert(TiedIdx == -1 || TiedIdx == Component::DST);
933 HasSrc2Acc = TiedIdx != -1;
934 Opcode = OpDesc.getOpcode();
935
936 IsVOP3 = VOP3Layout || (OpDesc.TSFlags & SIInstrFlags::VOP3);
937 SrcOperandsNum = AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2) ? 3
938 : AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm) ? 3
939 : AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src1) ? 2
940 : 1;
941 assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
942
943 if (Opcode == AMDGPU::V_CNDMASK_B32_e32 ||
944 Opcode == AMDGPU::V_CNDMASK_B32_e64) {
945 // CNDMASK is an awkward exception, it has FP modifiers, but not FP
946 // operands.
947 NumVOPD3Mods = 2;
948 if (IsVOP3)
949 SrcOperandsNum = 3;
950 } else if (isSISrcFPOperand(OpDesc,
951 getNamedOperandIdx(Opcode, OpName::src0))) {
952 // All FP VOPD instructions have Neg modifiers for all operands except
953 // for tied src2.
954 NumVOPD3Mods = SrcOperandsNum;
955 if (HasSrc2Acc)
956 --NumVOPD3Mods;
957 }
958
959 if (OpDesc.TSFlags & SIInstrFlags::VOP3)
960 return;
961
962 auto OperandsNum = OpDesc.getNumOperands();
963 unsigned CompOprIdx;
964 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
965 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
966 MandatoryLiteralIdx = CompOprIdx;
967 break;
968 }
969 }
970}
971
973 return getNamedOperandIdx(Opcode, OpName::bitop3);
974}
975
976unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
977 assert(CompOprIdx < Component::MAX_OPR_NUM);
978
979 if (CompOprIdx == Component::DST)
981
982 auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
983 if (CompSrcIdx < getCompParsedSrcOperandsNum())
984 return getIndexOfSrcInParsedOperands(CompSrcIdx);
985
986 // The specified operand does not exist.
987 return 0;
988}
989
991 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
992 const MCRegisterInfo &MRI, bool SkipSrc, bool AllowSameVGPR,
993 bool VOPD3) const {
994
995 auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx,
996 CompInfo[ComponentIndex::X].isVOP3());
997 auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx,
998 CompInfo[ComponentIndex::Y].isVOP3());
999
1000 const auto banksOverlap = [&MRI](MCRegister X, MCRegister Y,
1001 unsigned BanksMask) -> bool {
1002 MCRegister BaseX = MRI.getSubReg(X, AMDGPU::sub0);
1003 MCRegister BaseY = MRI.getSubReg(Y, AMDGPU::sub0);
1004 if (!BaseX)
1005 BaseX = X;
1006 if (!BaseY)
1007 BaseY = Y;
1008 if ((BaseX.id() & BanksMask) == (BaseY.id() & BanksMask))
1009 return true;
1010 if (BaseX != X /* This is 64-bit register */ &&
1011 ((BaseX.id() + 1) & BanksMask) == (BaseY.id() & BanksMask))
1012 return true;
1013 if (BaseY != Y &&
1014 (BaseX.id() & BanksMask) == ((BaseY.id() + 1) & BanksMask))
1015 return true;
1016
1017 // If both are 64-bit bank conflict will be detected yet while checking
1018 // the first subreg.
1019 return false;
1020 };
1021
1022 unsigned CompOprIdx;
1023 for (CompOprIdx = 0; CompOprIdx < Component::MAX_OPR_NUM; ++CompOprIdx) {
1024 unsigned BanksMasks = VOPD3 ? VOPD3_VGPR_BANK_MASKS[CompOprIdx]
1025 : VOPD_VGPR_BANK_MASKS[CompOprIdx];
1026 if (!OpXRegs[CompOprIdx] || !OpYRegs[CompOprIdx])
1027 continue;
1028
1029 if (getVGPREncodingMSBs(OpXRegs[CompOprIdx], MRI) !=
1030 getVGPREncodingMSBs(OpYRegs[CompOprIdx], MRI))
1031 return CompOprIdx;
1032
1033 if (SkipSrc && CompOprIdx >= Component::DST_NUM)
1034 continue;
1035
1036 if (CompOprIdx < Component::DST_NUM) {
1037 // Even if we do not check vdst parity, vdst operands still shall not
1038 // overlap.
1039 if (MRI.regsOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx]))
1040 return CompOprIdx;
1041 if (VOPD3) // No need to check dst parity.
1042 continue;
1043 }
1044
1045 if (banksOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx], BanksMasks) &&
1046 (!AllowSameVGPR || CompOprIdx < Component::DST_NUM ||
1047 OpXRegs[CompOprIdx] != OpYRegs[CompOprIdx]))
1048 return CompOprIdx;
1049 }
1050
1051 return {};
1052}
1053
1054// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
1055// by the specified component. If an operand is unused
1056// or is not a VGPR, the corresponding value is 0.
1057//
1058// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
1059// for the specified component and MC operand. The callback must return 0
1060// if the operand is not a register or not a VGPR.
1062InstInfo::getRegIndices(unsigned CompIdx,
1063 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
1064 bool VOPD3) const {
1065 assert(CompIdx < COMPONENTS_NUM);
1066
1067 const auto &Comp = CompInfo[CompIdx];
1069
1070 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
1071
1072 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
1073 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
1074 RegIndices[CompOprIdx] =
1075 Comp.hasRegSrcOperand(CompSrcIdx)
1076 ? GetRegIdx(CompIdx,
1077 Comp.getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3))
1078 : MCRegister();
1079 }
1080 return RegIndices;
1081}
1082
1083} // namespace VOPD
1084
1086 return VOPD::InstInfo(OpX, OpY);
1087}
1088
1090 const MCInstrInfo *InstrInfo) {
1091 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
1092 const auto &OpXDesc = InstrInfo->get(OpX);
1093 const auto &OpYDesc = InstrInfo->get(OpY);
1094 bool VOPD3 = InstrInfo->get(VOPDOpcode).TSFlags & SIInstrFlags::VOPD3;
1096 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo, VOPD3);
1097 return VOPD::InstInfo(OpXInfo, OpYInfo);
1098}
1099
1100namespace IsaInfo {
1101
1103 StringRef FeatureString)
1104 : STI(STI), XnackSetting(STI.getFeatureBits().test(FeatureSupportsXNACK)
1107 SramEccSetting(STI.getFeatureBits().test(FeatureSupportsSRAMECC)
1110
1111 // Check if xnack or sramecc is explicitly enabled or disabled. In the
1112 // absence of the target features we assume we must generate code that can run
1113 // in any environment.
1114 SubtargetFeatures Features(FeatureString);
1115 std::optional<bool> XnackRequested;
1116 std::optional<bool> SramEccRequested;
1117
1118 for (const std::string &Feature : Features.getFeatures()) {
1119 if (Feature == "+xnack")
1120 XnackRequested = true;
1121 else if (Feature == "-xnack")
1122 XnackRequested = false;
1123 else if (Feature == "+sramecc")
1124 SramEccRequested = true;
1125 else if (Feature == "-sramecc")
1126 SramEccRequested = false;
1127 }
1128
1129 // Only allow changing xnack setting if the target supports on/off modes.
1130 // Targets without on/off mode support keep their initial setting (Any).
1131
1132 bool XnackSupported = STI.getFeatureBits().test(FeatureXNACKOnOffModes);
1133 bool SramEccSupported = isSramEccSupported();
1134
1135 if (XnackRequested) {
1136 if (XnackSupported) {
1137 XnackSetting =
1138 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
1139 } else {
1140 // If a specific xnack setting was requested and this GPU does not support
1141 // xnack emit a warning. Setting will remain set to "Unsupported".
1142 if (*XnackRequested) {
1143 errs() << "warning: xnack 'On' was requested for a processor that does "
1144 "not support it!\n";
1145 } else {
1146 errs() << "warning: xnack 'Off' was requested for a processor that "
1147 "does not support it!\n";
1148 }
1149 }
1150 }
1151
1152 if (SramEccRequested) {
1153 if (SramEccSupported) {
1154 SramEccSetting =
1155 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
1156 } else {
1157 // If a specific sramecc setting was requested and this GPU does not
1158 // support sramecc emit a warning. Setting will remain set to
1159 // "Unsupported".
1160 if (*SramEccRequested) {
1161 errs() << "warning: sramecc 'On' was requested for a processor that "
1162 "does not support it!\n";
1163 } else {
1164 errs() << "warning: sramecc 'Off' was requested for a processor that "
1165 "does not support it!\n";
1166 }
1167 }
1168 }
1169}
1170
1171static TargetIDSetting
1173 if (FeatureString.ends_with("-"))
1174 return TargetIDSetting::Off;
1175 if (FeatureString.ends_with("+"))
1176 return TargetIDSetting::On;
1177
1178 llvm_unreachable("Malformed feature string");
1179}
1180
1182 SmallVector<StringRef, 3> TargetIDSplit;
1183 TargetID.split(TargetIDSplit, ':');
1184
1185 for (const auto &FeatureString : TargetIDSplit) {
1186 if (FeatureString.starts_with("xnack"))
1187 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
1188 if (FeatureString.starts_with("sramecc"))
1189 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
1190 }
1191}
1192
1193void AMDGPUTargetID::print(raw_ostream &StreamRep) const {
1194 const Triple &TargetTriple = STI.getTargetTriple();
1195 auto Version = getIsaVersion(STI.getCPU());
1196
1197 StreamRep << TargetTriple.getArchName() << '-' << TargetTriple.getVendorName()
1198 << '-' << TargetTriple.getOSName() << '-'
1199 << TargetTriple.getEnvironmentName() << '-';
1200
1201 std::string Processor;
1202 // TODO: Following else statement is present here because we used various
1203 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
1204 // Remove once all aliases are removed from GCNProcessors.td.
1205 if (Version.Major >= 9)
1206 Processor = STI.getCPU().str();
1207 else
1208 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
1209 Twine(Version.Stepping))
1210 .str();
1211
1212 std::string Features;
1213 if (TargetTriple.getOS() == Triple::AMDHSA) {
1214 // sramecc.
1216 Features += ":sramecc-";
1218 Features += ":sramecc+";
1219 // xnack.
1221 Features += ":xnack-";
1223 Features += ":xnack+";
1224 }
1225
1226 StreamRep << Processor << Features;
1227}
1228
1229std::string AMDGPUTargetID::toString() const {
1230 std::string Str;
1231 raw_string_ostream OS(Str);
1232 OS << *this;
1233 return Str;
1234}
1235
1237 if (STI.getFeatureBits().test(FeatureInstCacheLineSize128))
1238 return 128;
1239 if (STI.getFeatureBits().test(FeatureInstCacheLineSize64))
1240 return 64;
1241 return 64;
1242}
1243
1244unsigned getWavefrontSize(const MCSubtargetInfo &STI) {
1245 if (STI.getFeatureBits().test(FeatureWavefrontSize16))
1246 return 16;
1247 if (STI.getFeatureBits().test(FeatureWavefrontSize32))
1248 return 32;
1249
1250 return 64;
1251}
1252
1254 unsigned BytesPerCU = getAddressableLocalMemorySize(STI);
1255
1256 // "Per CU" really means "per whatever functional block the waves of a
1257 // workgroup must share". So the effective local memory size is doubled in
1258 // WGP mode on gfx10.
1259 if (isGFX10Plus(STI) && !STI.getFeatureBits().test(FeatureCuMode))
1260 BytesPerCU *= 2;
1261
1262 return BytesPerCU;
1263}
1264
1266 if (STI.getFeatureBits().test(FeatureAddressableLocalMemorySize32768))
1267 return 32768;
1268 if (STI.getFeatureBits().test(FeatureAddressableLocalMemorySize65536))
1269 return 65536;
1270 if (STI.getFeatureBits().test(FeatureAddressableLocalMemorySize163840))
1271 return 163840;
1272 if (STI.getFeatureBits().test(FeatureAddressableLocalMemorySize327680))
1273 return 327680;
1274 return 32768;
1275}
1276
1277unsigned getEUsPerCU(const MCSubtargetInfo &STI) {
1278 // "Per CU" really means "per whatever functional block the waves of a
1279 // workgroup must share".
1280
1281 // GFX12.5 only supports CU mode, which contains four SIMDs.
1282 if (isGFX1250(STI)) {
1283 assert(STI.getFeatureBits().test(FeatureCuMode));
1284 return 4;
1285 }
1286
1287 // For gfx10 in CU mode the functional block is the CU, which contains
1288 // two SIMDs.
1289 if (isGFX10Plus(STI) && STI.getFeatureBits().test(FeatureCuMode))
1290 return 2;
1291
1292 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP
1293 // contains two CUs, so a total of four SIMDs.
1294 return 4;
1295}
1296
1298 unsigned FlatWorkGroupSize) {
1299 assert(FlatWorkGroupSize != 0);
1300 if (!STI.getTargetTriple().isAMDGCN())
1301 return 8;
1302 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
1303 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
1304 if (N == 1) {
1305 // Single-wave workgroups don't consume barrier resources.
1306 return MaxWaves;
1307 }
1308
1309 unsigned MaxBarriers = 16;
1310 if (isGFX10Plus(STI) && !STI.getFeatureBits().test(FeatureCuMode))
1311 MaxBarriers = 32;
1312
1313 return std::min(MaxWaves / N, MaxBarriers);
1314}
1315
1316unsigned getMinWavesPerEU(const MCSubtargetInfo &STI) { return 1; }
1317
1318unsigned getMaxWavesPerEU(const MCSubtargetInfo &STI) {
1319 // FIXME: Need to take scratch memory into account.
1320 if (isGFX90A(STI))
1321 return 8;
1322 if (!isGFX10Plus(STI))
1323 return 10;
1324 return hasGFX10_3Insts(STI) ? 16 : 20;
1325}
1326
1328 unsigned FlatWorkGroupSize) {
1329 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
1330 getEUsPerCU(STI));
1331}
1332
1333unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo &STI) { return 1; }
1334
1336 unsigned FlatWorkGroupSize) {
1337 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
1338}
1339
1342 if (Version.Major >= 10)
1343 return getAddressableNumSGPRs(STI);
1344 if (Version.Major >= 8)
1345 return 16;
1346 return 8;
1347}
1348
1349unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI) { return 8; }
1350
1351unsigned getTotalNumSGPRs(const MCSubtargetInfo &STI) {
1353 if (Version.Major >= 8)
1354 return 800;
1355 return 512;
1356}
1357
1359 if (STI.getFeatureBits().test(FeatureSGPRInitBug))
1361
1363 if (Version.Major >= 10)
1364 return 106;
1365 if (Version.Major >= 8)
1366 return 102;
1367 return 104;
1368}
1369
1370// Per-wave SGPRs reserved for the trap handler when enabled.
1371static unsigned getSGPRTrapHandlerReserve(const MCSubtargetInfo &STI) {
1372 return STI.getFeatureBits().test(FeatureTrapHandler) ? TRAP_NUM_SGPRS : 0;
1373}
1374
1375// Per-wave SGPR budget (before the addressable clamp): take off the trap
1376// reserve, round down to \p Granule. Shared by getMinNumSGPRs() and
1377// getMaxNumSGPRs(); getOccupancyWithNumSGPRs() is the closed-form algebraic
1378// inverse of this same budget (it does not call this helper), so the two encode
1379// one model.
1380static unsigned getSGPRBudgetPerWave(unsigned TotalNumSGPRs,
1381 unsigned WavesPerEU, unsigned TrapReserve,
1382 unsigned Granule) {
1383 assert(WavesPerEU != 0 && Granule != 0);
1384 unsigned Budget = TotalNumSGPRs / WavesPerEU;
1385 Budget -= std::min(Budget, TrapReserve);
1386 return alignDown(Budget, Granule);
1387}
1388
1389unsigned getMinNumSGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU) {
1390 assert(WavesPerEU != 0);
1391
1393 if (Version.Major >= 10)
1394 return 0;
1395
1396 if (WavesPerEU >= getMaxWavesPerEU(STI))
1397 return 0;
1398
1399 unsigned MinNumSGPRs =
1400 getSGPRBudgetPerWave(getTotalNumSGPRs(STI), WavesPerEU + 1,
1402 getSGPRAllocGranule(STI)) +
1403 1;
1404 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
1405}
1406
1407unsigned getMaxNumSGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU,
1408 bool Addressable) {
1409 assert(WavesPerEU != 0);
1410
1411 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
1413 if (Version.Major >= 10)
1414 return Addressable ? AddressableNumSGPRs : 108;
1415 if (Version.Major >= 8 && !Addressable)
1416 AddressableNumSGPRs = 112;
1417 unsigned MaxNumSGPRs = getSGPRBudgetPerWave(getTotalNumSGPRs(STI), WavesPerEU,
1419 getSGPRAllocGranule(STI));
1420 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
1421}
1422
1424 // From GFX10 on the SGPR file is large enough that SGPRs never limit
1425 // occupancy. Kept as one capability so callers don't each test the version.
1426 return getIsaVersion(STI.getCPU()).Major < 10;
1427}
1428
1429unsigned getNumExtraSGPRs(const MCSubtargetInfo &STI, bool VCCUsed,
1430 bool FlatScrUsed, bool XNACKUsed) {
1431 unsigned ExtraSGPRs = 0;
1432 if (VCCUsed)
1433 ExtraSGPRs = 2;
1434
1436 if (Version.Major >= 10)
1437 return ExtraSGPRs;
1438
1439 if (Version.Major < 8) {
1440 if (FlatScrUsed)
1441 ExtraSGPRs = 4;
1442 } else {
1443 if (XNACKUsed)
1444 ExtraSGPRs = 4;
1445
1446 if (FlatScrUsed ||
1447 STI.getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
1448 ExtraSGPRs = 6;
1449 }
1450
1451 return ExtraSGPRs;
1452}
1453
1454unsigned getNumExtraSGPRs(const MCSubtargetInfo &STI, bool VCCUsed,
1455 bool FlatScrUsed) {
1456 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1457 STI.getFeatureBits().test(AMDGPU::FeatureXNACK));
1458}
1459
1460static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs,
1461 unsigned Granule) {
1462 return divideCeil(std::max(1u, NumRegs), Granule);
1463}
1464
1465unsigned getNumSGPRBlocks(const MCSubtargetInfo &STI, unsigned NumSGPRs) {
1466 // SGPRBlocks is actual number of SGPR blocks minus 1.
1468 1;
1469}
1470
1472 unsigned DynamicVGPRBlockSize,
1473 std::optional<bool> EnableWavefrontSize32) {
1474 if (STI.getFeatureBits().test(FeatureGFX90AInsts))
1475 return 8;
1476
1477 if (DynamicVGPRBlockSize != 0)
1478 return DynamicVGPRBlockSize;
1479
1480 bool IsWave32 = EnableWavefrontSize32
1481 ? *EnableWavefrontSize32
1482 : STI.getFeatureBits().test(FeatureWavefrontSize32);
1483
1484 if (STI.getFeatureBits().test(Feature1536VGPRs))
1485 return IsWave32 ? 24 : 12;
1486
1487 if (hasGFX10_3Insts(STI))
1488 return IsWave32 ? 16 : 8;
1489
1490 return IsWave32 ? 8 : 4;
1491}
1492
1494 std::optional<bool> EnableWavefrontSize32) {
1495 if (STI.getFeatureBits().test(FeatureGFX90AInsts))
1496 return 8;
1497
1498 bool IsWave32 = EnableWavefrontSize32
1499 ? *EnableWavefrontSize32
1500 : STI.getFeatureBits().test(FeatureWavefrontSize32);
1501
1502 if (STI.getFeatureBits().test(Feature1024AddressableVGPRs))
1503 return IsWave32 ? 16 : 8;
1504
1505 return IsWave32 ? 8 : 4;
1506}
1507
1508unsigned getArchVGPRAllocGranule() { return 4; }
1509
1510unsigned getTotalNumVGPRs(const MCSubtargetInfo &STI) {
1511 if (STI.getFeatureBits().test(FeatureGFX90AInsts))
1512 return 512;
1513 if (!isGFX10Plus(STI))
1514 return 256;
1515 bool IsWave32 = STI.getFeatureBits().test(FeatureWavefrontSize32);
1516 if (STI.getFeatureBits().test(Feature1536VGPRs))
1517 return IsWave32 ? 1536 : 768;
1518 return IsWave32 ? 1024 : 512;
1519}
1520
1522 const auto &Features = STI.getFeatureBits();
1523 if (Features.test(Feature1024AddressableVGPRs))
1524 return Features.test(FeatureWavefrontSize32) ? 1024 : 512;
1525 return 256;
1526}
1527
1529 unsigned DynamicVGPRBlockSize) {
1530 const auto &Features = STI.getFeatureBits();
1531 if (Features.test(FeatureGFX90AInsts))
1532 return 512;
1533
1534 if (DynamicVGPRBlockSize != 0) {
1535 // On GFX12 we can allocate at most MaxDynamicVGPRBlocks blocks of VGPRs.
1536 return MaxDynamicVGPRBlocks *
1537 getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1538 }
1539 return getAddressableNumArchVGPRs(STI);
1540}
1541
1543 unsigned NumVGPRs,
1544 unsigned DynamicVGPRBlockSize) {
1546 NumVGPRs, getVGPRAllocGranule(STI, DynamicVGPRBlockSize),
1548}
1549
1550unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
1551 unsigned MaxWaves,
1552 unsigned TotalNumVGPRs) {
1553 if (NumVGPRs < Granule)
1554 return MaxWaves;
1555 unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1556 return std::min(std::max(TotalNumVGPRs / RoundedRegs, 1u), MaxWaves);
1557}
1558
1559unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
1560 unsigned TotalNumSGPRs, unsigned Granule,
1561 unsigned TrapReserve) {
1562 // Closed-form inverse of getMaxNumSGPRs(): the budget condition
1563 // SGPRs <= alignDown(TotalNumSGPRs / W - TrapReserve, Granule)
1564 // solves to W <= TotalNumSGPRs / (alignTo(SGPRs, Granule) + TrapReserve).
1565 unsigned PerWave = alignTo(SGPRs, Granule) + TrapReserve;
1566 return PerWave ? std::clamp(TotalNumSGPRs / PerWave, 1u, MaxWaves) : MaxWaves;
1567}
1568
1569unsigned getOccupancyWithNumSGPRs(const MCSubtargetInfo &STI, unsigned SGPRs) {
1570 unsigned MaxWaves = getMaxWavesPerEU(STI);
1571
1572 if (!isSGPROccupancyLimited(STI))
1573 return MaxWaves;
1574
1575 return getOccupancyWithNumSGPRs(SGPRs, MaxWaves, getTotalNumSGPRs(STI),
1578}
1579
1580unsigned getMinNumVGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU,
1581 unsigned DynamicVGPRBlockSize) {
1582 assert(WavesPerEU != 0);
1583
1584 // In dynamic VGPR mode, (static) occupancy does not depend on VGPR usage,
1585 // so getMaxNumVGPRs does not depend on WavesPerEU, and thus we need to return
1586 // zero because there is no nonzero VGPR usage N where going below N
1587 // achieves higher (static) occupancy.
1588 bool DynamicVGPREnabled = (DynamicVGPRBlockSize != 0);
1589 if (DynamicVGPREnabled)
1590 return 0;
1591
1592 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1593 if (WavesPerEU >= MaxWavesPerEU)
1594 return 0;
1595
1596 unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1597 unsigned AddrsableNumVGPRs =
1598 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1599 unsigned Granule = getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1600 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1601
1602 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1603 return 0;
1604
1605 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs,
1606 DynamicVGPRBlockSize);
1607 if (WavesPerEU < MinWavesPerEU)
1608 return getMinNumVGPRs(STI, MinWavesPerEU, DynamicVGPRBlockSize);
1609
1610 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1611 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1612 return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1613}
1614
1615unsigned getMaxNumVGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU,
1616 unsigned DynamicVGPRBlockSize) {
1617 assert(WavesPerEU != 0);
1618
1619 // In dynamic VGPR mode, WavesPerEU does not imply a VGPR limit.
1620 bool DynamicVGPREnabled = (DynamicVGPRBlockSize != 0);
1621 unsigned MaxNumVGPRs =
1622 DynamicVGPREnabled
1623 ? getTotalNumVGPRs(STI)
1624 : alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
1625 getVGPRAllocGranule(STI, DynamicVGPRBlockSize));
1626 unsigned AddressableNumVGPRs =
1627 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1628 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1629}
1630
1631unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo &STI, unsigned NumVGPRs,
1632 std::optional<bool> EnableWavefrontSize32) {
1634 NumVGPRs, getVGPREncodingGranule(STI, EnableWavefrontSize32)) -
1635 1;
1636}
1637
1639 unsigned NumVGPRs,
1640 unsigned DynamicVGPRBlockSize,
1641 std::optional<bool> EnableWavefrontSize32) {
1643 NumVGPRs,
1644 getVGPRAllocGranule(STI, DynamicVGPRBlockSize, EnableWavefrontSize32));
1645}
1646} // end namespace IsaInfo
1647
1649 const MCSubtargetInfo &STI) {
1651 KernelCode.amd_kernel_code_version_major = 1;
1652 KernelCode.amd_kernel_code_version_minor = 2;
1653 KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1654 KernelCode.amd_machine_version_major = Version.Major;
1655 KernelCode.amd_machine_version_minor = Version.Minor;
1656 KernelCode.amd_machine_version_stepping = Version.Stepping;
1658 if (STI.getFeatureBits().test(FeatureWavefrontSize32)) {
1659 KernelCode.wavefront_size = 5;
1661 } else {
1662 KernelCode.wavefront_size = 6;
1663 }
1664
1665 // If the code object does not support indirect functions, then the value must
1666 // be 0xffffffff.
1667 KernelCode.call_convention = -1;
1668
1669 // These alignment values are specified in powers of two, so alignment =
1670 // 2^n. The minimum alignment is 2^4 = 16.
1671 KernelCode.kernarg_segment_alignment = 4;
1672 KernelCode.group_segment_alignment = 4;
1673 KernelCode.private_segment_alignment = 4;
1674
1675 if (Version.Major >= 10) {
1676 KernelCode.compute_pgm_resource_registers |=
1677 S_00B848_WGP_MODE(STI.getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1679 }
1680}
1681
1684}
1685
1688}
1689
1691 unsigned AS = GV->getAddressSpace();
1692 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1694}
1695
1697 return TT.getArch() == Triple::r600;
1698}
1699
1700static bool isValidRegPrefix(char C) {
1701 return C == 'v' || C == 's' || C == 'a';
1702}
1703
1704std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef RegName) {
1705 char Kind = RegName.front();
1706 if (!isValidRegPrefix(Kind))
1707 return {};
1708
1709 RegName = RegName.drop_front();
1710 if (RegName.consume_front("[")) {
1711 unsigned Idx, End;
1712 bool Failed = RegName.consumeInteger(10, Idx);
1713 Failed |= !RegName.consume_front(":");
1714 Failed |= RegName.consumeInteger(10, End);
1715 Failed |= !RegName.consume_back("]");
1716 if (!Failed) {
1717 unsigned NumRegs = End - Idx + 1;
1718 if (NumRegs > 1)
1719 return {Kind, Idx, NumRegs};
1720 }
1721 } else {
1722 unsigned Idx;
1723 bool Failed = RegName.getAsInteger(10, Idx);
1724 if (!Failed)
1725 return {Kind, Idx, 1};
1726 }
1727
1728 return {};
1729}
1730
1731std::tuple<char, unsigned, unsigned>
1733 StringRef RegName = Constraint;
1734 if (!RegName.consume_front("{") || !RegName.consume_back("}"))
1735 return {};
1737}
1738
1739std::pair<unsigned, unsigned>
1741 std::pair<unsigned, unsigned> Default,
1742 bool OnlyFirstRequired) {
1743 if (auto Attr = getIntegerPairAttribute(F, Name, OnlyFirstRequired))
1744 return {Attr->first, Attr->second.value_or(Default.second)};
1745 return Default;
1746}
1747
1748std::optional<std::pair<unsigned, std::optional<unsigned>>>
1750 bool OnlyFirstRequired) {
1751 Attribute A = F.getFnAttribute(Name);
1752 if (!A.isStringAttribute())
1753 return std::nullopt;
1754
1755 LLVMContext &Ctx = F.getContext();
1756 std::pair<unsigned, std::optional<unsigned>> Ints;
1757 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1758 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1759 Ctx.emitError("can't parse first integer attribute " + Name);
1760 return std::nullopt;
1761 }
1762 unsigned Second = 0;
1763 if (Strs.second.trim().getAsInteger(0, Second)) {
1764 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1765 Ctx.emitError("can't parse second integer attribute " + Name);
1766 return std::nullopt;
1767 }
1768 } else {
1769 Ints.second = Second;
1770 }
1771
1772 return Ints;
1773}
1774
1776 unsigned Size,
1777 unsigned DefaultVal) {
1778 std::optional<SmallVector<unsigned>> R =
1780 return R.has_value() ? *R : SmallVector<unsigned>(Size, DefaultVal);
1781}
1782
1783std::optional<SmallVector<unsigned>>
1785 assert(Size > 2);
1786 LLVMContext &Ctx = F.getContext();
1787
1788 Attribute A = F.getFnAttribute(Name);
1789 if (!A.isValid())
1790 return std::nullopt;
1791 if (!A.isStringAttribute()) {
1792 Ctx.emitError(Name + " is not a string attribute");
1793 return std::nullopt;
1794 }
1795
1797
1798 StringRef S = A.getValueAsString();
1799 unsigned i = 0;
1800 for (; !S.empty() && i < Size; i++) {
1801 std::pair<StringRef, StringRef> Strs = S.split(',');
1802 unsigned IntVal;
1803 if (Strs.first.trim().getAsInteger(0, IntVal)) {
1804 Ctx.emitError("can't parse integer attribute " + Strs.first + " in " +
1805 Name);
1806 return std::nullopt;
1807 }
1808 Vals[i] = IntVal;
1809 S = Strs.second;
1810 }
1811
1812 if (!S.empty() || i < Size) {
1813 Ctx.emitError("attribute " + Name +
1814 " has incorrect number of integers; expected " +
1816 return std::nullopt;
1817 }
1818 return Vals;
1819}
1820
1821bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val) {
1822 assert((MD.getNumOperands() % 2 == 0) && "invalid number of operands!");
1823 for (unsigned I = 0, E = MD.getNumOperands() / 2; I != E; ++I) {
1824 auto Low =
1825 mdconst::extract<ConstantInt>(MD.getOperand(2 * I + 0))->getValue();
1826 auto High =
1827 mdconst::extract<ConstantInt>(MD.getOperand(2 * I + 1))->getValue();
1828 // There are two types of [A; B) ranges:
1829 // A < B, e.g. [4; 5) which is a range that only includes 4.
1830 // A > B, e.g. [5; 4) which is a range that wraps around and includes
1831 // everything except 4.
1832 if (Low.ult(High)) {
1833 if (Low.ule(Val) && High.ugt(Val))
1834 return true;
1835 } else {
1836 if (Low.uge(Val) && High.ult(Val))
1837 return true;
1838 }
1839 }
1840
1841 return false;
1842}
1843
1845 return (1 << (getVmcntBitWidthLo(Version.Major) +
1846 getVmcntBitWidthHi(Version.Major))) -
1847 1;
1848}
1849
1851 return (1 << getLoadcntBitWidth(Version.Major)) - 1;
1852}
1853
1855 return (1 << getSamplecntBitWidth(Version.Major)) - 1;
1856}
1857
1859 return (1 << getBvhcntBitWidth(Version.Major)) - 1;
1860}
1861
1863 return (1 << getExpcntBitWidth(Version.Major)) - 1;
1864}
1865
1867 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1868}
1869
1871 return (1 << getDscntBitWidth(Version.Major)) - 1;
1872}
1873
1875 return (1 << getKmcntBitWidth(Version.Major)) - 1;
1876}
1877
1879 return (1 << getXcntBitWidth(Version.Major, Version.Minor)) - 1;
1880}
1881
1883 return (1 << getAsynccntBitWidth(Version.Major, Version.Minor)) - 1;
1884}
1885
1887 return (1 << getStorecntBitWidth(Version.Major)) - 1;
1888}
1889
1891 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1892 getVmcntBitWidthLo(Version.Major));
1893 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1894 getExpcntBitWidth(Version.Major));
1895 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1896 getLgkmcntBitWidth(Version.Major));
1897 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1898 getVmcntBitWidthHi(Version.Major));
1899 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1900}
1901
1902unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1903 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1904 getVmcntBitWidthLo(Version.Major));
1905 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1906 getVmcntBitWidthHi(Version.Major));
1907 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1908}
1909
1910unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1911 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1912 getExpcntBitWidth(Version.Major));
1913}
1914
1915unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1916 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1917 getLgkmcntBitWidth(Version.Major));
1918}
1919
1920unsigned decodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt) {
1921 return unpackBits(Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1922 getLoadcntBitWidth(Version.Major));
1923}
1924
1925unsigned decodeStorecnt(const IsaVersion &Version, unsigned Waitcnt) {
1926 return unpackBits(Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1927 getStorecntBitWidth(Version.Major));
1928}
1929
1930unsigned decodeDscnt(const IsaVersion &Version, unsigned Waitcnt) {
1931 return unpackBits(Waitcnt, getDscntBitShift(Version.Major),
1932 getDscntBitWidth(Version.Major));
1933}
1934
1935void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1936 unsigned &Expcnt, unsigned &Lgkmcnt) {
1937 Vmcnt = decodeVmcnt(Version, Waitcnt);
1938 Expcnt = decodeExpcnt(Version, Waitcnt);
1939 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1940}
1941
1942unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1943 unsigned Vmcnt) {
1944 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1945 getVmcntBitWidthLo(Version.Major));
1946 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1947 getVmcntBitShiftHi(Version.Major),
1948 getVmcntBitWidthHi(Version.Major));
1949}
1950
1951unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1952 unsigned Expcnt) {
1953 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1954 getExpcntBitWidth(Version.Major));
1955}
1956
1957unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1958 unsigned Lgkmcnt) {
1959 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1960 getLgkmcntBitWidth(Version.Major));
1961}
1962
1963unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1964 unsigned Expcnt, unsigned Lgkmcnt) {
1965 unsigned Waitcnt = getWaitcntBitMask(Version);
1967 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1968 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1969 return Waitcnt;
1970}
1971
1973 bool IsStore) {
1974 unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),
1975 getDscntBitWidth(Version.Major));
1976 if (IsStore) {
1977 unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1978 getStorecntBitWidth(Version.Major));
1979 return Dscnt | Storecnt;
1980 }
1981 unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1982 getLoadcntBitWidth(Version.Major));
1983 return Dscnt | Loadcnt;
1984}
1985
1986static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1987 unsigned Loadcnt) {
1988 return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1989 getLoadcntBitWidth(Version.Major));
1990}
1991
1992static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1993 unsigned Storecnt) {
1994 return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1995 getStorecntBitWidth(Version.Major));
1996}
1997
1998static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1999 unsigned Dscnt) {
2000 return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major),
2001 getDscntBitWidth(Version.Major));
2002}
2003
2004unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
2005 unsigned Dscnt) {
2006 unsigned Waitcnt = getCombinedCountBitMask(Version, false);
2007 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
2009 return Waitcnt;
2010}
2011
2012unsigned encodeStorecntDscnt(const IsaVersion &Version, unsigned Storecnt,
2013 unsigned Dscnt) {
2014 unsigned Waitcnt = getCombinedCountBitMask(Version, true);
2015 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
2017 return Waitcnt;
2018}
2019
2020//===----------------------------------------------------------------------===//
2021// Custom Operand Values
2022//===----------------------------------------------------------------------===//
2023
2025 int Size,
2026 const MCSubtargetInfo &STI) {
2027 unsigned Enc = 0;
2028 for (int Idx = 0; Idx < Size; ++Idx) {
2029 const auto &Op = Opr[Idx];
2030 if (Op.isSupported(STI))
2031 Enc |= Op.encode(Op.Default);
2032 }
2033 return Enc;
2034}
2035
2037 int Size, unsigned Code,
2038 bool &HasNonDefaultVal,
2039 const MCSubtargetInfo &STI) {
2040 unsigned UsedOprMask = 0;
2041 HasNonDefaultVal = false;
2042 for (int Idx = 0; Idx < Size; ++Idx) {
2043 const auto &Op = Opr[Idx];
2044 if (!Op.isSupported(STI))
2045 continue;
2046 UsedOprMask |= Op.getMask();
2047 unsigned Val = Op.decode(Code);
2048 if (!Op.isValid(Val))
2049 return false;
2050 HasNonDefaultVal |= (Val != Op.Default);
2051 }
2052 return (Code & ~UsedOprMask) == 0;
2053}
2054
2055static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
2056 unsigned Code, int &Idx, StringRef &Name,
2057 unsigned &Val, bool &IsDefault,
2058 const MCSubtargetInfo &STI) {
2059 while (Idx < Size) {
2060 const auto &Op = Opr[Idx++];
2061 if (Op.isSupported(STI)) {
2062 Name = Op.Name;
2063 Val = Op.decode(Code);
2064 IsDefault = (Val == Op.Default);
2065 return true;
2066 }
2067 }
2068
2069 return false;
2070}
2071
2073 int64_t InputVal) {
2074 if (InputVal < 0 || InputVal > Op.Max)
2075 return OPR_VAL_INVALID;
2076 return Op.encode(InputVal);
2077}
2078
2079static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
2080 const StringRef Name, int64_t InputVal,
2081 unsigned &UsedOprMask,
2082 const MCSubtargetInfo &STI) {
2083 int InvalidId = OPR_ID_UNKNOWN;
2084 for (int Idx = 0; Idx < Size; ++Idx) {
2085 const auto &Op = Opr[Idx];
2086 if (Op.Name == Name) {
2087 if (!Op.isSupported(STI)) {
2088 InvalidId = OPR_ID_UNSUPPORTED;
2089 continue;
2090 }
2091 auto OprMask = Op.getMask();
2092 if (OprMask & UsedOprMask)
2093 return OPR_ID_DUPLICATE;
2094 UsedOprMask |= OprMask;
2095 return encodeCustomOperandVal(Op, InputVal);
2096 }
2097 }
2098 return InvalidId;
2099}
2100
2101//===----------------------------------------------------------------------===//
2102// DepCtr
2103//===----------------------------------------------------------------------===//
2104
2105namespace DepCtr {
2106
2108 static int Default = -1;
2109 if (Default == -1)
2111 return Default;
2112}
2113
2114bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
2115 const MCSubtargetInfo &STI) {
2117 HasNonDefaultVal, STI);
2118}
2119
2120bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
2121 bool &IsDefault, const MCSubtargetInfo &STI) {
2122 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
2123 IsDefault, STI);
2124}
2125
2126int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
2127 const MCSubtargetInfo &STI) {
2128 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
2129 STI);
2130}
2131
2132unsigned getVaVdstBitMask() { return (1 << getVaVdstBitWidth()) - 1; }
2133
2134unsigned getVaSdstBitMask() { return (1 << getVaSdstBitWidth()) - 1; }
2135
2136unsigned getVaSsrcBitMask() { return (1 << getVaSsrcBitWidth()) - 1; }
2137
2139 return (1 << getHoldCntWidth(Version.Major, Version.Minor)) - 1;
2140}
2141
2142unsigned getVmVsrcBitMask() { return (1 << getVmVsrcBitWidth()) - 1; }
2143
2144unsigned getVaVccBitMask() { return (1 << getVaVccBitWidth()) - 1; }
2145
2146unsigned getSaSdstBitMask() { return (1 << getSaSdstBitWidth()) - 1; }
2147
2148unsigned decodeFieldVmVsrc(unsigned Encoded) {
2149 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
2150}
2151
2152unsigned decodeFieldVaVdst(unsigned Encoded) {
2153 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
2154}
2155
2156unsigned decodeFieldSaSdst(unsigned Encoded) {
2157 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
2158}
2159
2160unsigned decodeFieldVaSdst(unsigned Encoded) {
2161 return unpackBits(Encoded, getVaSdstBitShift(), getVaSdstBitWidth());
2162}
2163
2164unsigned decodeFieldVaVcc(unsigned Encoded) {
2165 return unpackBits(Encoded, getVaVccBitShift(), getVaVccBitWidth());
2166}
2167
2168unsigned decodeFieldVaSsrc(unsigned Encoded) {
2169 return unpackBits(Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
2170}
2171
2172unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version) {
2173 return unpackBits(Encoded, getHoldCntBitShift(),
2174 getHoldCntWidth(Version.Major, Version.Minor));
2175}
2176
2177unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
2178 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
2179}
2180
2181unsigned encodeFieldVmVsrc(unsigned VmVsrc, const MCSubtargetInfo &STI) {
2182 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2183 return encodeFieldVmVsrc(Encoded, VmVsrc);
2184}
2185
2186unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
2187 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
2188}
2189
2190unsigned encodeFieldVaVdst(unsigned VaVdst, const MCSubtargetInfo &STI) {
2191 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2192 return encodeFieldVaVdst(Encoded, VaVdst);
2193}
2194
2195unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
2196 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
2197}
2198
2199unsigned encodeFieldSaSdst(unsigned SaSdst, const MCSubtargetInfo &STI) {
2200 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2201 return encodeFieldSaSdst(Encoded, SaSdst);
2202}
2203
2204unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst) {
2205 return packBits(VaSdst, Encoded, getVaSdstBitShift(), getVaSdstBitWidth());
2206}
2207
2208unsigned encodeFieldVaSdst(unsigned VaSdst, const MCSubtargetInfo &STI) {
2209 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2210 return encodeFieldVaSdst(Encoded, VaSdst);
2211}
2212
2213unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc) {
2214 return packBits(VaVcc, Encoded, getVaVccBitShift(), getVaVccBitWidth());
2215}
2216
2217unsigned encodeFieldVaVcc(unsigned VaVcc, const MCSubtargetInfo &STI) {
2218 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2219 return encodeFieldVaVcc(Encoded, VaVcc);
2220}
2221
2222unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) {
2223 return packBits(VaSsrc, Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
2224}
2225
2226unsigned encodeFieldVaSsrc(unsigned VaSsrc, const MCSubtargetInfo &STI) {
2227 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2228 return encodeFieldVaSsrc(Encoded, VaSsrc);
2229}
2230
2231unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt,
2232 const IsaVersion &Version) {
2233 return packBits(HoldCnt, Encoded, getHoldCntBitShift(),
2234 getHoldCntWidth(Version.Major, Version.Minor));
2235}
2236
2237unsigned encodeFieldHoldCnt(unsigned HoldCnt, const MCSubtargetInfo &STI) {
2238 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2239 return encodeFieldHoldCnt(Encoded, HoldCnt, getIsaVersion(STI.getCPU()));
2240}
2241
2242} // namespace DepCtr
2243
2244//===----------------------------------------------------------------------===//
2245// exp tgt
2246//===----------------------------------------------------------------------===//
2247
2248namespace Exp {
2249
2250struct ExpTgt {
2252 unsigned Tgt;
2253 unsigned MaxIndex;
2254};
2255
2256// clang-format off
2257static constexpr ExpTgt ExpTgtInfo[] = {
2258 {{"null"}, ET_NULL, ET_NULL_MAX_IDX},
2259 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX},
2260 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX},
2261 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX},
2262 {{"pos"}, ET_POS0, ET_POS_MAX_IDX},
2263 {{"dual_src_blend"},ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
2264 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX},
2265};
2266// clang-format on
2267
2268bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
2269 for (const ExpTgt &Val : ExpTgtInfo) {
2270 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
2271 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
2272 Name = Val.Name;
2273 return true;
2274 }
2275 }
2276 return false;
2277}
2278
2279unsigned getTgtId(const StringRef Name) {
2280
2281 for (const ExpTgt &Val : ExpTgtInfo) {
2282 if (Val.MaxIndex == 0 && Name == Val.Name)
2283 return Val.Tgt;
2284
2285 if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {
2286 StringRef Suffix = Name.drop_front(Val.Name.size());
2287
2288 unsigned Id;
2289 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
2290 return ET_INVALID;
2291
2292 // Disable leading zeroes
2293 if (Suffix.size() > 1 && Suffix[0] == '0')
2294 return ET_INVALID;
2295
2296 return Val.Tgt + Id;
2297 }
2298 }
2299 return ET_INVALID;
2300}
2301
2302bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
2303 switch (Id) {
2304 case ET_NULL:
2305 return !isGFX11Plus(STI);
2306 case ET_POS4:
2307 case ET_PRIM:
2308 return isGFX10Plus(STI);
2309 case ET_DUAL_SRC_BLEND0:
2310 case ET_DUAL_SRC_BLEND1:
2311 return isGFX11Plus(STI);
2312 default:
2313 if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
2314 return !isGFX11Plus(STI) || isGFX13Plus(STI);
2315 return true;
2316 }
2317}
2318
2319} // namespace Exp
2320
2321//===----------------------------------------------------------------------===//
2322// MTBUF Format
2323//===----------------------------------------------------------------------===//
2324
2325namespace MTBUFFormat {
2326
2327int64_t getDfmt(const StringRef Name) {
2328 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
2329 if (Name == DfmtSymbolic[Id])
2330 return Id;
2331 }
2332 return DFMT_UNDEF;
2333}
2334
2336 assert(Id <= DFMT_MAX);
2337 return DfmtSymbolic[Id];
2338}
2339
2341 if (isSI(STI) || isCI(STI))
2342 return NfmtSymbolicSICI;
2343 if (isVI(STI) || isGFX9(STI))
2344 return NfmtSymbolicVI;
2345 return NfmtSymbolicGFX10;
2346}
2347
2348int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
2349 const auto *lookupTable = getNfmtLookupTable(STI);
2350 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
2351 if (Name == lookupTable[Id])
2352 return Id;
2353 }
2354 return NFMT_UNDEF;
2355}
2356
2357StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
2358 assert(Id <= NFMT_MAX);
2359 return getNfmtLookupTable(STI)[Id];
2360}
2361
2362bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2363 unsigned Dfmt;
2364 unsigned Nfmt;
2365 decodeDfmtNfmt(Id, Dfmt, Nfmt);
2366 return isValidNfmt(Nfmt, STI);
2367}
2368
2369bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2370 return !getNfmtName(Id, STI).empty();
2371}
2372
2373int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
2374 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
2375}
2376
2377void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
2378 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
2379 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
2380}
2381
2382int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) {
2383 if (isGFX11Plus(STI)) {
2384 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2385 if (Name == UfmtSymbolicGFX11[Id])
2386 return Id;
2387 }
2388 } else {
2389 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2390 if (Name == UfmtSymbolicGFX10[Id])
2391 return Id;
2392 }
2393 }
2394 return UFMT_UNDEF;
2395}
2396
2398 if (isValidUnifiedFormat(Id, STI))
2399 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
2400 return "";
2401}
2402
2403bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
2404 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
2405}
2406
2407int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
2408 const MCSubtargetInfo &STI) {
2409 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
2410 if (isGFX11Plus(STI)) {
2411 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2412 if (Fmt == DfmtNfmt2UFmtGFX11[Id])
2413 return Id;
2414 }
2415 } else {
2416 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2417 if (Fmt == DfmtNfmt2UFmtGFX10[Id])
2418 return Id;
2419 }
2420 }
2421 return UFMT_UNDEF;
2422}
2423
2424bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
2425 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
2426}
2427
2429 if (isGFX10Plus(STI))
2430 return UFMT_DEFAULT;
2431 return DFMT_NFMT_DEFAULT;
2432}
2433
2434} // namespace MTBUFFormat
2435
2436//===----------------------------------------------------------------------===//
2437// SendMsg
2438//===----------------------------------------------------------------------===//
2439
2440namespace SendMsg {
2441
2445
2446bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
2447 return (MsgId & ~(getMsgIdMask(STI))) == 0;
2448}
2449
2450bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
2451 bool Strict) {
2452 assert(isValidMsgId(MsgId, STI));
2453
2454 if (!Strict)
2455 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
2456
2457 if (msgRequiresOp(MsgId, STI)) {
2458 if (MsgId == ID_GS_PreGFX11 && OpId == OP_GS_NOP)
2459 return false;
2460
2461 return !getMsgOpName(MsgId, OpId, STI).empty();
2462 }
2463
2464 return OpId == OP_NONE_;
2465}
2466
2467bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
2468 const MCSubtargetInfo &STI, bool Strict) {
2469 assert(isValidMsgOp(MsgId, OpId, STI, Strict));
2470
2471 if (!Strict)
2473
2474 if (!isGFX11Plus(STI)) {
2475 switch (MsgId) {
2476 case ID_GS_PreGFX11:
2479 return (OpId == OP_GS_NOP)
2482 }
2483 }
2484 return StreamId == STREAM_ID_NONE_;
2485}
2486
2487bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
2488 return MsgId == ID_SYSMSG ||
2489 (!isGFX11Plus(STI) &&
2490 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
2491}
2492
2493bool msgSupportsStream(int64_t MsgId, int64_t OpId,
2494 const MCSubtargetInfo &STI) {
2495 return !isGFX11Plus(STI) &&
2496 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
2497 OpId != OP_GS_NOP;
2498}
2499
2500void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
2501 uint16_t &StreamId, const MCSubtargetInfo &STI) {
2502 MsgId = Val & getMsgIdMask(STI);
2503 if (isGFX11Plus(STI)) {
2504 OpId = 0;
2505 StreamId = 0;
2506 } else {
2507 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
2509 }
2510}
2511
2513 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
2514}
2515
2516bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI) {
2517 // Explicitly list message types that are known to not use m0.
2518 // This is safer than excluding only GS_ALLOC_REQ, in case new message
2519 // types are added in the future that do use m0.
2520 if (isGFX11Plus(STI)) {
2521 switch (MsgId) {
2523 return true;
2524 default:
2525 break;
2526 }
2527 }
2528 switch (MsgId) {
2529 case ID_SAVEWAVE:
2530 case ID_STALL_WAVE_GEN:
2531 case ID_HALT_WAVES:
2532 case ID_ORDERED_PS_DONE:
2534 case ID_GET_DOORBELL:
2535 case ID_GET_DDID:
2536 case ID_SYSMSG:
2537 return true;
2538 default:
2539 return false;
2540 }
2541}
2542
2543} // namespace SendMsg
2544
2545//===----------------------------------------------------------------------===//
2546//
2547//===----------------------------------------------------------------------===//
2548
2550 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
2551}
2552
2554 // As a safe default always respond as if PS has color exports.
2555 return F.getFnAttributeAsParsedInteger(
2556 "amdgpu-color-export",
2557 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
2558}
2559
2561 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
2562}
2563
2565 unsigned BlockSize =
2566 F.getFnAttributeAsParsedInteger("amdgpu-dynamic-vgpr-block-size", 0);
2567
2568 if (BlockSize == 16 || BlockSize == 32)
2569 return BlockSize;
2570
2571 return 0;
2572}
2573
2574bool hasXNACK(const MCSubtargetInfo &STI) {
2575 return STI.hasFeature(AMDGPU::FeatureXNACK);
2576}
2577
2579 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) &&
2580 !STI.hasFeature(AMDGPU::FeatureR128A16);
2581}
2582
2583bool hasA16(const MCSubtargetInfo &STI) {
2584 return STI.hasFeature(AMDGPU::FeatureA16);
2585}
2586
2587bool hasG16(const MCSubtargetInfo &STI) {
2588 return STI.hasFeature(AMDGPU::FeatureG16);
2589}
2590
2592 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2593 !isSI(STI);
2594}
2595
2596bool hasGDS(const MCSubtargetInfo &STI) {
2597 return STI.hasFeature(AMDGPU::FeatureGDS);
2598}
2599
2600unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2601 auto Version = getIsaVersion(STI.getCPU());
2602 if (Version.Major == 10)
2603 return Version.Minor >= 3 ? 13 : 5;
2604 if (Version.Major == 11)
2605 return 5;
2606 if (Version.Major >= 12)
2607 return HasSampler ? 4 : 5;
2608 return 0;
2609}
2610
2612 if (isGFX1250Plus(STI))
2613 return 32;
2614 return 16;
2615}
2616
2617bool isSI(const MCSubtargetInfo &STI) {
2618 return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
2619}
2620
2621bool isCI(const MCSubtargetInfo &STI) {
2622 return STI.hasFeature(AMDGPU::FeatureSeaIslands);
2623}
2624
2625bool isVI(const MCSubtargetInfo &STI) {
2626 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2627}
2628
2629bool isGFX9(const MCSubtargetInfo &STI) {
2630 return STI.hasFeature(AMDGPU::FeatureGFX9);
2631}
2632
2634 return isGFX9(STI) || isGFX10(STI);
2635}
2636
2638 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2639}
2640
2642 return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2643}
2644
2645bool isGFX8Plus(const MCSubtargetInfo &STI) {
2646 return isVI(STI) || isGFX9Plus(STI);
2647}
2648
2649bool isGFX9Plus(const MCSubtargetInfo &STI) {
2650 return isGFX9(STI) || isGFX10Plus(STI);
2651}
2652
2653bool isNotGFX9Plus(const MCSubtargetInfo &STI) { return !isGFX9Plus(STI); }
2654
2655bool isGFX10(const MCSubtargetInfo &STI) {
2656 return STI.hasFeature(AMDGPU::FeatureGFX10);
2657}
2658
2660 return isGFX10(STI) || isGFX11(STI);
2661}
2662
2664 return isGFX10(STI) || isGFX11Plus(STI);
2665}
2666
2667bool isGFX11(const MCSubtargetInfo &STI) {
2668 return STI.hasFeature(AMDGPU::FeatureGFX11);
2669}
2670
2672 return isGFX11(STI) || isGFX12Plus(STI);
2673}
2674
2675bool isGFX12(const MCSubtargetInfo &STI) {
2676 return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2677}
2678
2680 return isGFX12(STI) || isGFX13Plus(STI);
2681}
2682
2683bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2684
2685bool isGFX1250(const MCSubtargetInfo &STI) {
2686 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts] && !isGFX13(STI);
2687}
2688
2690 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts];
2691}
2692
2693bool isGFX13(const MCSubtargetInfo &STI) {
2694 return STI.getFeatureBits()[AMDGPU::FeatureGFX13];
2695}
2696
2697bool isGFX13Plus(const MCSubtargetInfo &STI) { return isGFX13(STI); }
2698
2700 if (isGFX1250(STI))
2701 return false;
2702 return isGFX10Plus(STI);
2703}
2704
2705bool isNotGFX11Plus(const MCSubtargetInfo &STI) { return !isGFX11Plus(STI); }
2706
2708 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2709}
2710
2712 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2713}
2714
2716 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2717}
2718
2720 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2721}
2722
2724 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2725}
2726
2728 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2729}
2730
2732 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2733}
2734
2735bool isGFX90A(const MCSubtargetInfo &STI) {
2736 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2737}
2738
2739bool isGFX940(const MCSubtargetInfo &STI) {
2740 return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2741}
2742
2744 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2745}
2746
2748 return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2749}
2750
2751bool hasVOPD(const MCSubtargetInfo &STI) {
2752 return STI.hasFeature(AMDGPU::FeatureVOPDInsts);
2753}
2754
2756 return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
2757}
2758
2760 return STI.hasFeature(AMDGPU::FeatureKernargPreload);
2761}
2762
2763int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2764 int32_t ArgNumVGPR) {
2765 if (has90AInsts && ArgNumAGPR)
2766 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2767 return std::max(ArgNumVGPR, ArgNumAGPR);
2768}
2769
2771 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2772 const MCRegister FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2773 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2774 Reg == AMDGPU::SCC;
2775}
2776
2780
2781#define MAP_REG2REG \
2782 using namespace AMDGPU; \
2783 switch (Reg.id()) { \
2784 default: \
2785 return Reg; \
2786 CASE_CI_VI(FLAT_SCR) \
2787 CASE_CI_VI(FLAT_SCR_LO) \
2788 CASE_CI_VI(FLAT_SCR_HI) \
2789 CASE_VI_GFX9PLUS(TTMP0) \
2790 CASE_VI_GFX9PLUS(TTMP1) \
2791 CASE_VI_GFX9PLUS(TTMP2) \
2792 CASE_VI_GFX9PLUS(TTMP3) \
2793 CASE_VI_GFX9PLUS(TTMP4) \
2794 CASE_VI_GFX9PLUS(TTMP5) \
2795 CASE_VI_GFX9PLUS(TTMP6) \
2796 CASE_VI_GFX9PLUS(TTMP7) \
2797 CASE_VI_GFX9PLUS(TTMP8) \
2798 CASE_VI_GFX9PLUS(TTMP9) \
2799 CASE_VI_GFX9PLUS(TTMP10) \
2800 CASE_VI_GFX9PLUS(TTMP11) \
2801 CASE_VI_GFX9PLUS(TTMP12) \
2802 CASE_VI_GFX9PLUS(TTMP13) \
2803 CASE_VI_GFX9PLUS(TTMP14) \
2804 CASE_VI_GFX9PLUS(TTMP15) \
2805 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2806 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2807 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2808 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2809 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2810 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2811 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2812 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2813 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2814 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2815 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2816 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2817 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2818 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2819 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2820 CASE_VI_GFX9PLUS( \
2821 TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2822 CASE_GFXPRE11_GFX11PLUS(M0) \
2823 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2824 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2825 }
2826
2827#define CASE_CI_VI(node) \
2828 assert(!isSI(STI)); \
2829 case node: \
2830 return isCI(STI) ? node##_ci : node##_vi;
2831
2832#define CASE_VI_GFX9PLUS(node) \
2833 case node: \
2834 return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2835
2836#define CASE_GFXPRE11_GFX11PLUS(node) \
2837 case node: \
2838 return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2839
2840#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2841 case node: \
2842 return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2843
2845 if (STI.getTargetTriple().getArch() == Triple::r600)
2846 return Reg;
2848}
2849
2850#undef CASE_CI_VI
2851#undef CASE_VI_GFX9PLUS
2852#undef CASE_GFXPRE11_GFX11PLUS
2853#undef CASE_GFXPRE11_GFX11PLUS_TO
2854
2855#define CASE_CI_VI(node) \
2856 case node##_ci: \
2857 case node##_vi: \
2858 return node;
2859#define CASE_VI_GFX9PLUS(node) \
2860 case node##_vi: \
2861 case node##_gfx9plus: \
2862 return node;
2863#define CASE_GFXPRE11_GFX11PLUS(node) \
2864 case node##_gfx11plus: \
2865 case node##_gfxpre11: \
2866 return node;
2867#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2868
2870
2872 switch (Reg.id()) {
2873 case AMDGPU::SRC_SHARED_BASE_LO:
2874 case AMDGPU::SRC_SHARED_BASE:
2875 case AMDGPU::SRC_SHARED_LIMIT_LO:
2876 case AMDGPU::SRC_SHARED_LIMIT:
2877 case AMDGPU::SRC_PRIVATE_BASE_LO:
2878 case AMDGPU::SRC_PRIVATE_BASE:
2879 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2880 case AMDGPU::SRC_PRIVATE_LIMIT:
2881 case AMDGPU::SRC_FLAT_SCRATCH_BASE_LO:
2882 case AMDGPU::SRC_FLAT_SCRATCH_BASE_HI:
2883 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2884 return true;
2885 case AMDGPU::SRC_VCCZ:
2886 case AMDGPU::SRC_EXECZ:
2887 case AMDGPU::SRC_SCC:
2888 return true;
2889 case AMDGPU::SGPR_NULL:
2890 return true;
2891 default:
2892 return false;
2893 }
2894}
2895
2896#undef CASE_CI_VI
2897#undef CASE_VI_GFX9PLUS
2898#undef CASE_GFXPRE11_GFX11PLUS
2899#undef CASE_GFXPRE11_GFX11PLUS_TO
2900#undef MAP_REG2REG
2901
2902bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2903 assert(OpNo < Desc.NumOperands);
2904 unsigned OpType = Desc.operands()[OpNo].OperandType;
2905 return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2906 OpType <= AMDGPU::OPERAND_KIMM_LAST;
2907}
2908
2909bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2910 assert(OpNo < Desc.NumOperands);
2911 unsigned OpType = Desc.operands()[OpNo].OperandType;
2912 switch (OpType) {
2927 return true;
2928 default:
2929 return false;
2930 }
2931}
2932
2933bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2934 assert(OpNo < Desc.NumOperands);
2935 unsigned OpType = Desc.operands()[OpNo].OperandType;
2936 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2940}
2941
2942// Avoid using MCRegisterClass::getSize, since that function will go away
2943// (move from MC* level to Target* level). Return size in bits.
2944unsigned getRegBitWidth(unsigned RCID) {
2945 switch (RCID) {
2946 case AMDGPU::VGPR_16RegClassID:
2947 case AMDGPU::VGPR_16_Lo128RegClassID:
2948 case AMDGPU::SGPR_LO16RegClassID:
2949 case AMDGPU::AGPR_LO16RegClassID:
2950 return 16;
2951 case AMDGPU::SGPR_32RegClassID:
2952 case AMDGPU::VGPR_32RegClassID:
2953 case AMDGPU::VGPR_32_Lo256RegClassID:
2954 case AMDGPU::VRegOrLds_32RegClassID:
2955 case AMDGPU::AGPR_32RegClassID:
2956 case AMDGPU::VS_32RegClassID:
2957 case AMDGPU::AV_32RegClassID:
2958 case AMDGPU::SReg_32RegClassID:
2959 case AMDGPU::SReg_32_XM0RegClassID:
2960 case AMDGPU::SRegOrLds_32RegClassID:
2961 return 32;
2962 case AMDGPU::SGPR_64RegClassID:
2963 case AMDGPU::VS_64RegClassID:
2964 case AMDGPU::SReg_64RegClassID:
2965 case AMDGPU::VReg_64RegClassID:
2966 case AMDGPU::AReg_64RegClassID:
2967 case AMDGPU::SReg_64_XEXECRegClassID:
2968 case AMDGPU::VReg_64_Align2RegClassID:
2969 case AMDGPU::AReg_64_Align2RegClassID:
2970 case AMDGPU::AV_64RegClassID:
2971 case AMDGPU::AV_64_Align2RegClassID:
2972 case AMDGPU::VReg_64_Lo256_Align2RegClassID:
2973 case AMDGPU::VS_64_Lo256RegClassID:
2974 return 64;
2975 case AMDGPU::SGPR_96RegClassID:
2976 case AMDGPU::SReg_96RegClassID:
2977 case AMDGPU::VReg_96RegClassID:
2978 case AMDGPU::AReg_96RegClassID:
2979 case AMDGPU::VReg_96_Align2RegClassID:
2980 case AMDGPU::AReg_96_Align2RegClassID:
2981 case AMDGPU::AV_96RegClassID:
2982 case AMDGPU::AV_96_Align2RegClassID:
2983 case AMDGPU::VReg_96_Lo256_Align2RegClassID:
2984 return 96;
2985 case AMDGPU::SGPR_128RegClassID:
2986 case AMDGPU::SReg_128RegClassID:
2987 case AMDGPU::VReg_128RegClassID:
2988 case AMDGPU::AReg_128RegClassID:
2989 case AMDGPU::VReg_128_Align2RegClassID:
2990 case AMDGPU::AReg_128_Align2RegClassID:
2991 case AMDGPU::AV_128RegClassID:
2992 case AMDGPU::AV_128_Align2RegClassID:
2993 case AMDGPU::SReg_128_XNULLRegClassID:
2994 case AMDGPU::VReg_128_Lo256_Align2RegClassID:
2995 return 128;
2996 case AMDGPU::SGPR_160RegClassID:
2997 case AMDGPU::SReg_160RegClassID:
2998 case AMDGPU::VReg_160RegClassID:
2999 case AMDGPU::AReg_160RegClassID:
3000 case AMDGPU::VReg_160_Align2RegClassID:
3001 case AMDGPU::AReg_160_Align2RegClassID:
3002 case AMDGPU::AV_160RegClassID:
3003 case AMDGPU::AV_160_Align2RegClassID:
3004 case AMDGPU::VReg_160_Lo256_Align2RegClassID:
3005 return 160;
3006 case AMDGPU::SGPR_192RegClassID:
3007 case AMDGPU::SReg_192RegClassID:
3008 case AMDGPU::VReg_192RegClassID:
3009 case AMDGPU::AReg_192RegClassID:
3010 case AMDGPU::VReg_192_Align2RegClassID:
3011 case AMDGPU::AReg_192_Align2RegClassID:
3012 case AMDGPU::AV_192RegClassID:
3013 case AMDGPU::AV_192_Align2RegClassID:
3014 case AMDGPU::VReg_192_Lo256_Align2RegClassID:
3015 return 192;
3016 case AMDGPU::SGPR_224RegClassID:
3017 case AMDGPU::SReg_224RegClassID:
3018 case AMDGPU::VReg_224RegClassID:
3019 case AMDGPU::AReg_224RegClassID:
3020 case AMDGPU::VReg_224_Align2RegClassID:
3021 case AMDGPU::AReg_224_Align2RegClassID:
3022 case AMDGPU::AV_224RegClassID:
3023 case AMDGPU::AV_224_Align2RegClassID:
3024 case AMDGPU::VReg_224_Lo256_Align2RegClassID:
3025 return 224;
3026 case AMDGPU::SGPR_256RegClassID:
3027 case AMDGPU::SReg_256RegClassID:
3028 case AMDGPU::VReg_256RegClassID:
3029 case AMDGPU::AReg_256RegClassID:
3030 case AMDGPU::VReg_256_Align2RegClassID:
3031 case AMDGPU::AReg_256_Align2RegClassID:
3032 case AMDGPU::AV_256RegClassID:
3033 case AMDGPU::AV_256_Align2RegClassID:
3034 case AMDGPU::SReg_256_XNULLRegClassID:
3035 case AMDGPU::VReg_256_Lo256_Align2RegClassID:
3036 return 256;
3037 case AMDGPU::SGPR_288RegClassID:
3038 case AMDGPU::SReg_288RegClassID:
3039 case AMDGPU::VReg_288RegClassID:
3040 case AMDGPU::AReg_288RegClassID:
3041 case AMDGPU::VReg_288_Align2RegClassID:
3042 case AMDGPU::AReg_288_Align2RegClassID:
3043 case AMDGPU::AV_288RegClassID:
3044 case AMDGPU::AV_288_Align2RegClassID:
3045 case AMDGPU::VReg_288_Lo256_Align2RegClassID:
3046 return 288;
3047 case AMDGPU::SGPR_320RegClassID:
3048 case AMDGPU::SReg_320RegClassID:
3049 case AMDGPU::VReg_320RegClassID:
3050 case AMDGPU::AReg_320RegClassID:
3051 case AMDGPU::VReg_320_Align2RegClassID:
3052 case AMDGPU::AReg_320_Align2RegClassID:
3053 case AMDGPU::AV_320RegClassID:
3054 case AMDGPU::AV_320_Align2RegClassID:
3055 case AMDGPU::VReg_320_Lo256_Align2RegClassID:
3056 return 320;
3057 case AMDGPU::SGPR_352RegClassID:
3058 case AMDGPU::SReg_352RegClassID:
3059 case AMDGPU::VReg_352RegClassID:
3060 case AMDGPU::AReg_352RegClassID:
3061 case AMDGPU::VReg_352_Align2RegClassID:
3062 case AMDGPU::AReg_352_Align2RegClassID:
3063 case AMDGPU::AV_352RegClassID:
3064 case AMDGPU::AV_352_Align2RegClassID:
3065 case AMDGPU::VReg_352_Lo256_Align2RegClassID:
3066 return 352;
3067 case AMDGPU::SGPR_384RegClassID:
3068 case AMDGPU::SReg_384RegClassID:
3069 case AMDGPU::VReg_384RegClassID:
3070 case AMDGPU::AReg_384RegClassID:
3071 case AMDGPU::VReg_384_Align2RegClassID:
3072 case AMDGPU::AReg_384_Align2RegClassID:
3073 case AMDGPU::AV_384RegClassID:
3074 case AMDGPU::AV_384_Align2RegClassID:
3075 case AMDGPU::VReg_384_Lo256_Align2RegClassID:
3076 return 384;
3077 case AMDGPU::SGPR_512RegClassID:
3078 case AMDGPU::SReg_512RegClassID:
3079 case AMDGPU::VReg_512RegClassID:
3080 case AMDGPU::AReg_512RegClassID:
3081 case AMDGPU::VReg_512_Align2RegClassID:
3082 case AMDGPU::AReg_512_Align2RegClassID:
3083 case AMDGPU::AV_512RegClassID:
3084 case AMDGPU::AV_512_Align2RegClassID:
3085 case AMDGPU::VReg_512_Lo256_Align2RegClassID:
3086 return 512;
3087 case AMDGPU::SGPR_1024RegClassID:
3088 case AMDGPU::SReg_1024RegClassID:
3089 case AMDGPU::VReg_1024RegClassID:
3090 case AMDGPU::AReg_1024RegClassID:
3091 case AMDGPU::VReg_1024_Align2RegClassID:
3092 case AMDGPU::AReg_1024_Align2RegClassID:
3093 case AMDGPU::AV_1024RegClassID:
3094 case AMDGPU::AV_1024_Align2RegClassID:
3095 case AMDGPU::VReg_1024_Lo256_Align2RegClassID:
3096 return 1024;
3097 default:
3098 llvm_unreachable("Unexpected register class");
3099 }
3100}
3101
3102unsigned getRegBitWidth(const MCRegisterClass &RC) {
3103 return getRegBitWidth(RC.getID());
3104}
3105
3106bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
3108 return true;
3109
3110 uint64_t Val = static_cast<uint64_t>(Literal);
3111 return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
3112 (Val == llvm::bit_cast<uint64_t>(1.0)) ||
3113 (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
3114 (Val == llvm::bit_cast<uint64_t>(0.5)) ||
3115 (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
3116 (Val == llvm::bit_cast<uint64_t>(2.0)) ||
3117 (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
3118 (Val == llvm::bit_cast<uint64_t>(4.0)) ||
3119 (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
3120 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
3121}
3122
3123bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
3125 return true;
3126
3127 // The actual type of the operand does not seem to matter as long
3128 // as the bits match one of the inline immediate values. For example:
3129 //
3130 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
3131 // so it is a legal inline immediate.
3132 //
3133 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
3134 // floating-point, so it is a legal inline immediate.
3135
3136 uint32_t Val = static_cast<uint32_t>(Literal);
3137 return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
3138 (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
3139 (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
3140 (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
3141 (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
3142 (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
3143 (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
3144 (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
3145 (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
3146 (Val == 0x3e22f983 && HasInv2Pi);
3147}
3148
3149bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
3150 if (!HasInv2Pi)
3151 return false;
3153 return true;
3154 uint16_t Val = static_cast<uint16_t>(Literal);
3155 return Val == 0x3F00 || // 0.5
3156 Val == 0xBF00 || // -0.5
3157 Val == 0x3F80 || // 1.0
3158 Val == 0xBF80 || // -1.0
3159 Val == 0x4000 || // 2.0
3160 Val == 0xC000 || // -2.0
3161 Val == 0x4080 || // 4.0
3162 Val == 0xC080 || // -4.0
3163 Val == 0x3E22; // 1.0 / (2.0 * pi)
3164}
3165
3166bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) {
3167 return isInlinableLiteral32(Literal, HasInv2Pi);
3168}
3169
3170bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
3171 if (!HasInv2Pi)
3172 return false;
3174 return true;
3175 uint16_t Val = static_cast<uint16_t>(Literal);
3176 return Val == 0x3C00 || // 1.0
3177 Val == 0xBC00 || // -1.0
3178 Val == 0x3800 || // 0.5
3179 Val == 0xB800 || // -0.5
3180 Val == 0x4000 || // 2.0
3181 Val == 0xC000 || // -2.0
3182 Val == 0x4400 || // 4.0
3183 Val == 0xC400 || // -4.0
3184 Val == 0x3118; // 1/2pi
3185}
3186
3187std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
3188 // Unfortunately, the Instruction Set Architecture Reference Guide is
3189 // misleading about how the inline operands work for (packed) 16-bit
3190 // instructions. In a nutshell, the actual HW behavior is:
3191 //
3192 // - integer encodings (-16 .. 64) are always produced as sign-extended
3193 // 32-bit values
3194 // - float encodings are produced as:
3195 // - for F16 instructions: corresponding half-precision float values in
3196 // the LSBs, 0 in the MSBs
3197 // - for UI16 instructions: corresponding single-precision float value
3198 int32_t Signed = static_cast<int32_t>(Literal);
3199 if (Signed >= 0 && Signed <= 64)
3200 return 128 + Signed;
3201
3202 if (Signed >= -16 && Signed <= -1)
3203 return 192 + std::abs(Signed);
3204
3205 if (IsFloat) {
3206 // clang-format off
3207 switch (Literal) {
3208 case 0x3800: return 240; // 0.5
3209 case 0xB800: return 241; // -0.5
3210 case 0x3C00: return 242; // 1.0
3211 case 0xBC00: return 243; // -1.0
3212 case 0x4000: return 244; // 2.0
3213 case 0xC000: return 245; // -2.0
3214 case 0x4400: return 246; // 4.0
3215 case 0xC400: return 247; // -4.0
3216 case 0x3118: return 248; // 1.0 / (2.0 * pi)
3217 default: break;
3218 }
3219 // clang-format on
3220 } else {
3221 // clang-format off
3222 switch (Literal) {
3223 case 0x3F000000: return 240; // 0.5
3224 case 0xBF000000: return 241; // -0.5
3225 case 0x3F800000: return 242; // 1.0
3226 case 0xBF800000: return 243; // -1.0
3227 case 0x40000000: return 244; // 2.0
3228 case 0xC0000000: return 245; // -2.0
3229 case 0x40800000: return 246; // 4.0
3230 case 0xC0800000: return 247; // -4.0
3231 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
3232 default: break;
3233 }
3234 // clang-format on
3235 }
3236
3237 return {};
3238}
3239
3240// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
3241// or nullopt.
3242std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
3243 return getInlineEncodingV216(false, Literal);
3244}
3245
3246// Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction
3247// or nullopt.
3248std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) {
3249 int32_t Signed = static_cast<int32_t>(Literal);
3250 if (Signed >= 0 && Signed <= 64)
3251 return 128 + Signed;
3252
3253 if (Signed >= -16 && Signed <= -1)
3254 return 192 + std::abs(Signed);
3255
3256 // clang-format off
3257 switch (Literal) {
3258 case 0x3F00: return 240; // 0.5
3259 case 0xBF00: return 241; // -0.5
3260 case 0x3F80: return 242; // 1.0
3261 case 0xBF80: return 243; // -1.0
3262 case 0x4000: return 244; // 2.0
3263 case 0xC000: return 245; // -2.0
3264 case 0x4080: return 246; // 4.0
3265 case 0xC080: return 247; // -4.0
3266 case 0x3E22: return 248; // 1.0 / (2.0 * pi)
3267 default: break;
3268 }
3269 // clang-format on
3270
3271 return std::nullopt;
3272}
3273
3274// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
3275// or nullopt.
3276std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
3277 return getInlineEncodingV216(true, Literal);
3278}
3279
3280// Encoding of the literal as an inline constant for V_PK_FMAC_F16 instruction
3281// or nullopt. This accounts for different inline constant behavior:
3282// - Pre-GFX11: fp16 inline constants have the value in low 16 bits, 0 in high
3283// - GFX11+: fp16 inline constants are duplicated into both halves
3285 bool IsGFX11Plus) {
3286 // Pre-GFX11 behavior: f16 in low bits, 0 in high bits
3287 if (!IsGFX11Plus)
3288 return getInlineEncodingV216(/*IsFloat=*/true, Literal);
3289
3290 // GFX11+ behavior: f16 duplicated in both halves
3291 // First, check for sign-extended integer inline constants (-16 to 64)
3292 // These work the same across all generations
3293 int32_t Signed = static_cast<int32_t>(Literal);
3294 if (Signed >= 0 && Signed <= 64)
3295 return 128 + Signed;
3296
3297 if (Signed >= -16 && Signed <= -1)
3298 return 192 + std::abs(Signed);
3299
3300 // For float inline constants on GFX11+, both halves must be equal
3301 uint16_t Lo = static_cast<uint16_t>(Literal);
3302 uint16_t Hi = static_cast<uint16_t>(Literal >> 16);
3303 if (Lo != Hi)
3304 return std::nullopt;
3305 return getInlineEncodingV216(/*IsFloat=*/true, Lo);
3306}
3307
3308// Whether the given literal can be inlined for a V_PK_* instruction.
3310 switch (OpType) {
3313 return getInlineEncodingV216(false, Literal).has_value();
3316 return getInlineEncodingV216(true, Literal).has_value();
3318 llvm_unreachable("OPERAND_REG_IMM_V2FP16_SPLAT is not supported");
3323 return false;
3324 default:
3325 llvm_unreachable("bad packed operand type");
3326 }
3327}
3328
3329// Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
3333
3334// Whether the given literal can be inlined for a V_PK_*_BF16 instruction.
3338
3339// Whether the given literal can be inlined for a V_PK_*_F16 instruction.
3343
3344// Whether the given literal can be inlined for V_PK_FMAC_F16 instruction.
3346 return getPKFMACF16InlineEncoding(Literal, IsGFX11Plus).has_value();
3347}
3348
3349bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
3350 if (IsFP64)
3351 return !Lo_32(Val);
3352
3353 return isUInt<32>(Val) || isInt<32>(Val);
3354}
3355
3356int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit) {
3357 switch (Type) {
3358 default:
3359 break;
3364 return Imm & 0xffff;
3378 return Lo_32(Imm);
3381 return IsLit ? Imm : Hi_32(Imm);
3382 }
3383 return Imm;
3384}
3385
3387 const Function *F = A->getParent();
3388
3389 // Arguments to compute shaders are never a source of divergence.
3390 CallingConv::ID CC = F->getCallingConv();
3391 switch (CC) {
3394 return true;
3405 // For non-compute shaders, SGPR inputs are marked with either inreg or
3406 // byval. Everything else is in VGPRs.
3407 return A->hasAttribute(Attribute::InReg) ||
3408 A->hasAttribute(Attribute::ByVal);
3409 default:
3410 // TODO: treat i1 as divergent?
3411 return A->hasAttribute(Attribute::InReg);
3412 }
3413}
3414
3415bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
3416 // Arguments to compute shaders are never a source of divergence.
3418 switch (CC) {
3421 return true;
3432 // For non-compute shaders, SGPR inputs are marked with either inreg or
3433 // byval. Everything else is in VGPRs.
3434 return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
3435 CB->paramHasAttr(ArgNo, Attribute::ByVal);
3436 default:
3437 return CB->paramHasAttr(ArgNo, Attribute::InReg);
3438 }
3439}
3440
3441static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
3442 return isGCN3Encoding(ST) || isGFX10Plus(ST);
3443}
3444
3446 int64_t EncodedOffset) {
3447 if (isGFX12Plus(ST))
3448 return isUInt<23>(EncodedOffset);
3449
3450 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
3451 : isUInt<8>(EncodedOffset);
3452}
3453
3455 int64_t EncodedOffset, bool IsBuffer) {
3456 if (isGFX12Plus(ST)) {
3457 if (IsBuffer && EncodedOffset < 0)
3458 return false;
3459 return isInt<24>(EncodedOffset);
3460 }
3461
3462 return !IsBuffer && hasSMRDSignedImmOffset(ST) && isInt<21>(EncodedOffset);
3463}
3464
3465static bool isDwordAligned(uint64_t ByteOffset) {
3466 return (ByteOffset & 3) == 0;
3467}
3468
3470 uint64_t ByteOffset) {
3471 if (hasSMEMByteOffset(ST))
3472 return ByteOffset;
3473
3474 assert(isDwordAligned(ByteOffset));
3475 return ByteOffset >> 2;
3476}
3477
3478std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
3479 int64_t ByteOffset, bool IsBuffer,
3480 bool HasSOffset) {
3481 // For unbuffered smem loads, it is illegal for the Immediate Offset to be
3482 // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
3483 // Handle case where SOffset is not present.
3484 if (!IsBuffer && !HasSOffset && ByteOffset < 0 && hasSMRDSignedImmOffset(ST))
3485 return std::nullopt;
3486
3487 if (isGFX12Plus(ST)) // 24 bit signed offsets
3488 return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
3489 : std::nullopt;
3490
3491 // The signed version is always a byte offset.
3492 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
3494 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
3495 : std::nullopt;
3496 }
3497
3498 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
3499 return std::nullopt;
3500
3501 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3502 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
3503 ? std::optional<int64_t>(EncodedOffset)
3504 : std::nullopt;
3505}
3506
3507std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
3508 int64_t ByteOffset) {
3509 if (!isCI(ST) || !isDwordAligned(ByteOffset))
3510 return std::nullopt;
3511
3512 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3513 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
3514 : std::nullopt;
3515}
3516
3518 if (ST.getFeatureBits().test(FeatureFlatOffsetBits12))
3519 return 12;
3520 if (ST.getFeatureBits().test(FeatureFlatOffsetBits24))
3521 return 24;
3522 return 13;
3523}
3524
3525namespace {
3526
3527struct SourceOfDivergence {
3528 unsigned Intr;
3529};
3530const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
3531
3532struct AlwaysUniform {
3533 unsigned Intr;
3534};
3535const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
3536
3537#define GET_SourcesOfDivergence_IMPL
3538#define GET_UniformIntrinsics_IMPL
3539#define GET_Gfx9BufferFormat_IMPL
3540#define GET_Gfx10BufferFormat_IMPL
3541#define GET_Gfx11PlusBufferFormat_IMPL
3542
3543#include "AMDGPUGenSearchableTables.inc"
3544
3545} // end anonymous namespace
3546
3547bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
3548 return lookupSourceOfDivergence(IntrID);
3549}
3550
3551bool isIntrinsicAlwaysUniform(unsigned IntrID) {
3552 return lookupAlwaysUniform(IntrID);
3553}
3554
3556 uint8_t NumComponents,
3557 uint8_t NumFormat,
3558 const MCSubtargetInfo &STI) {
3559 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(
3560 BitsPerComp, NumComponents, NumFormat)
3561 : isGFX10(STI)
3562 ? getGfx10BufferFormatInfo(BitsPerComp, NumComponents, NumFormat)
3563 : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
3564}
3565
3567 const MCSubtargetInfo &STI) {
3568 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
3569 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
3570 : getGfx9BufferFormatInfo(Format);
3571}
3572
3574 const MCRegisterInfo &MRI) {
3575 const unsigned VGPRClasses[] = {
3576 AMDGPU::VGPR_16RegClassID, AMDGPU::VGPR_32RegClassID,
3577 AMDGPU::VReg_64RegClassID, AMDGPU::VReg_96RegClassID,
3578 AMDGPU::VReg_128RegClassID, AMDGPU::VReg_160RegClassID,
3579 AMDGPU::VReg_192RegClassID, AMDGPU::VReg_224RegClassID,
3580 AMDGPU::VReg_256RegClassID, AMDGPU::VReg_288RegClassID,
3581 AMDGPU::VReg_320RegClassID, AMDGPU::VReg_352RegClassID,
3582 AMDGPU::VReg_384RegClassID, AMDGPU::VReg_512RegClassID,
3583 AMDGPU::VReg_1024RegClassID};
3584
3585 for (unsigned RCID : VGPRClasses) {
3586 const MCRegisterClass &RC = MRI.getRegClass(RCID);
3587 if (RC.contains(Reg))
3588 return &RC;
3589 }
3590
3591 return nullptr;
3592}
3593
3595 unsigned Enc = MRI.getEncodingValue(Reg);
3596 unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
3597 return Idx >> 8;
3598}
3599
3601 const MCRegisterInfo &MRI) {
3602 unsigned Enc = MRI.getEncodingValue(Reg);
3603 unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
3604 if (Idx >= 0x100)
3605 return MCRegister();
3606
3607 const MCRegisterClass *RC = getVGPRPhysRegClass(Reg, MRI);
3608 if (!RC)
3609 return MCRegister();
3610
3611 Idx |= MSBs << 8;
3612 if (RC->getID() == AMDGPU::VGPR_16RegClassID) {
3613 // This class has 2048 registers with interleaved lo16 and hi16.
3614 Idx *= 2;
3616 ++Idx;
3617 }
3618
3619 return RC->getRegister(Idx);
3620}
3621
3622static std::optional<unsigned>
3623convertSetRegImmToVgprMSBs(unsigned Imm, unsigned Simm16,
3624 bool HasSetregVGPRMSBFixup) {
3625 constexpr unsigned VGPRMSBShift =
3627
3628 auto [HwRegId, Offset, Size] = Hwreg::HwregEncoding::decode(Simm16);
3629 if (HwRegId != Hwreg::ID_MODE ||
3630 (!HasSetregVGPRMSBFixup && (Offset + Size) < VGPRMSBShift))
3631 return {};
3632 // If there is SetregVGPRMSBFixup then Offset is ignored.
3633 if (!HasSetregVGPRMSBFixup)
3634 Imm <<= Offset;
3635 Imm = (Imm & Hwreg::VGPR_MSB_MASK) >> VGPRMSBShift;
3636 if (!HasSetregVGPRMSBFixup)
3638 return llvm::rotr<uint8_t>(static_cast<uint8_t>(Imm), /*R=*/2);
3639}
3640
3641std::optional<unsigned> convertSetRegImmToVgprMSBs(const MachineInstr &MI,
3642 bool HasSetregVGPRMSBFixup) {
3643 assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32);
3644 return convertSetRegImmToVgprMSBs(MI.getOperand(0).getImm(),
3645 MI.getOperand(1).getImm(),
3646 HasSetregVGPRMSBFixup);
3647}
3648
3649std::optional<unsigned> convertSetRegImmToVgprMSBs(const MCInst &MI,
3650 bool HasSetregVGPRMSBFixup) {
3651 assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_gfx12);
3652 return convertSetRegImmToVgprMSBs(MI.getOperand(0).getImm(),
3653 MI.getOperand(1).getImm(),
3654 HasSetregVGPRMSBFixup);
3655}
3656
3657std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
3659 static const AMDGPU::OpName VOPOps[4] = {
3660 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2,
3661 AMDGPU::OpName::vdst};
3662 static const AMDGPU::OpName VDSOps[4] = {
3663 AMDGPU::OpName::addr, AMDGPU::OpName::data0, AMDGPU::OpName::data1,
3664 AMDGPU::OpName::vdst};
3665 static const AMDGPU::OpName FLATOps[4] = {
3666 AMDGPU::OpName::vaddr, AMDGPU::OpName::vdata,
3667 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdst};
3668 static const AMDGPU::OpName BUFOps[4] = {
3669 AMDGPU::OpName::vaddr, AMDGPU::OpName::NUM_OPERAND_NAMES,
3670 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdata};
3671 static const AMDGPU::OpName VIMGOps[4] = {
3672 AMDGPU::OpName::vaddr0, AMDGPU::OpName::vaddr1, AMDGPU::OpName::vaddr2,
3673 AMDGPU::OpName::vdata};
3674
3675 // For VOPD instructions MSB of a corresponding Y component operand VGPR
3676 // address is supposed to match X operand, otherwise VOPD shall not be
3677 // combined.
3678 static const AMDGPU::OpName VOPDOpsX[4] = {
3679 AMDGPU::OpName::src0X, AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vsrc2X,
3680 AMDGPU::OpName::vdstX};
3681 static const AMDGPU::OpName VOPDOpsY[4] = {
3682 AMDGPU::OpName::src0Y, AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vsrc2Y,
3683 AMDGPU::OpName::vdstY};
3684
3685 // VOP2 MADMK instructions use src0, imm, src1 scheme.
3686 static const AMDGPU::OpName VOP2MADMKOps[4] = {
3687 AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES,
3688 AMDGPU::OpName::src1, AMDGPU::OpName::vdst};
3689 static const AMDGPU::OpName VOPDFMAMKOpsX[4] = {
3690 AMDGPU::OpName::src0X, AMDGPU::OpName::NUM_OPERAND_NAMES,
3691 AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vdstX};
3692 static const AMDGPU::OpName VOPDFMAMKOpsY[4] = {
3693 AMDGPU::OpName::src0Y, AMDGPU::OpName::NUM_OPERAND_NAMES,
3694 AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vdstY};
3695
3696 unsigned TSFlags = Desc.TSFlags;
3697
3698 if (TSFlags &
3701 switch (Desc.getOpcode()) {
3702 // LD_SCALE operands ignore MSB.
3703 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32:
3704 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250:
3705 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64:
3706 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250:
3707 return {};
3708 case AMDGPU::V_FMAMK_F16:
3709 case AMDGPU::V_FMAMK_F16_t16:
3710 case AMDGPU::V_FMAMK_F16_t16_gfx12:
3711 case AMDGPU::V_FMAMK_F16_fake16:
3712 case AMDGPU::V_FMAMK_F16_fake16_gfx12:
3713 case AMDGPU::V_FMAMK_F32:
3714 case AMDGPU::V_FMAMK_F32_gfx12:
3715 case AMDGPU::V_FMAMK_F64:
3716 case AMDGPU::V_FMAMK_F64_gfx1250:
3717 return {VOP2MADMKOps, nullptr};
3718 default:
3719 break;
3720 }
3721 return {VOPOps, nullptr};
3722 }
3723
3724 if (TSFlags & SIInstrFlags::DS)
3725 return {VDSOps, nullptr};
3726
3727 if (TSFlags & SIInstrFlags::FLAT)
3728 return {FLATOps, nullptr};
3729
3730 if (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))
3731 return {BUFOps, nullptr};
3732
3733 if (TSFlags & SIInstrFlags::VIMAGE)
3734 return {VIMGOps, nullptr};
3735
3736 if (AMDGPU::isVOPD(Desc.getOpcode())) {
3737 auto [OpX, OpY] = getVOPDComponents(Desc.getOpcode());
3738 return {(OpX == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsX : VOPDOpsX,
3739 (OpY == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsY : VOPDOpsY};
3740 }
3741
3742 assert(!(TSFlags & SIInstrFlags::MIMG));
3743
3744 if (TSFlags & (SIInstrFlags::VSAMPLE | SIInstrFlags::EXP))
3745 llvm_unreachable("Sample and export VGPR lowering is not implemented and"
3746 " these instructions are not expected on gfx1250");
3747
3748 return {};
3749}
3750
3751bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode) {
3752 uint64_t TSFlags = MII.get(Opcode).TSFlags;
3753
3754 if (TSFlags & SIInstrFlags::SMRD)
3755 return !getSMEMIsBuffer(Opcode);
3756 if (!(TSFlags & SIInstrFlags::FLAT))
3757 return false;
3758
3759 // Only SV and SVS modes are supported.
3760 if (TSFlags & SIInstrFlags::FlatScratch)
3761 return hasNamedOperand(Opcode, OpName::vaddr);
3762
3763 // Only GVS mode is supported.
3764 return hasNamedOperand(Opcode, OpName::vaddr) &&
3765 hasNamedOperand(Opcode, OpName::saddr);
3766
3767 return false;
3768}
3769
3770bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
3771 const MCSubtargetInfo &ST) {
3772 for (auto OpName : {OpName::vdst, OpName::src0, OpName::src1, OpName::src2}) {
3773 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
3774 if (Idx == -1)
3775 continue;
3776
3777 const MCOperandInfo &OpInfo = OpDesc.operands()[Idx];
3778 int16_t RegClass = MII.getOpRegClassID(
3779 OpInfo, ST.getHwMode(MCSubtargetInfo::HwMode_RegInfo));
3780 if (RegClass == AMDGPU::VReg_64RegClassID ||
3781 RegClass == AMDGPU::VReg_64_Align2RegClassID)
3782 return true;
3783 }
3784
3785 return false;
3786}
3787
3788bool isDPALU_DPP32BitOpc(unsigned Opc) {
3789 switch (Opc) {
3790 case AMDGPU::V_MUL_LO_U32_e64:
3791 case AMDGPU::V_MUL_LO_U32_e64_dpp:
3792 case AMDGPU::V_MUL_LO_U32_e64_dpp_gfx1250:
3793 case AMDGPU::V_MUL_HI_U32_e64:
3794 case AMDGPU::V_MUL_HI_U32_e64_dpp:
3795 case AMDGPU::V_MUL_HI_U32_e64_dpp_gfx1250:
3796 case AMDGPU::V_MUL_HI_I32_e64:
3797 case AMDGPU::V_MUL_HI_I32_e64_dpp:
3798 case AMDGPU::V_MUL_HI_I32_e64_dpp_gfx1250:
3799 case AMDGPU::V_MAD_U32_e64:
3800 case AMDGPU::V_MAD_U32_e64_dpp:
3801 case AMDGPU::V_MAD_U32_e64_dpp_gfx1250:
3802 return true;
3803 default:
3804 return false;
3805 }
3806}
3807
3808bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
3809 const MCSubtargetInfo &ST) {
3810 if (!ST.hasFeature(AMDGPU::FeatureDPALU_DPP))
3811 return false;
3812
3813 if (isDPALU_DPP32BitOpc(OpDesc.getOpcode()))
3814 return ST.hasFeature(AMDGPU::FeatureGFX1250Insts);
3815
3816 return hasAny64BitVGPROperands(OpDesc, MII, ST);
3817}
3818
3820 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize32768))
3821 return 64;
3822 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize65536))
3823 return 128;
3824 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize163840))
3825 return 320;
3826 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize327680))
3827 return 512;
3828 return 64; // In sync with getAddressableLocalMemorySize
3829}
3830
3831bool isPackedFP32Inst(unsigned Opc) {
3832 switch (Opc) {
3833 case AMDGPU::V_PK_ADD_F32:
3834 case AMDGPU::V_PK_ADD_F32_gfx12:
3835 case AMDGPU::V_PK_MUL_F32:
3836 case AMDGPU::V_PK_MUL_F32_gfx12:
3837 case AMDGPU::V_PK_FMA_F32:
3838 case AMDGPU::V_PK_FMA_F32_gfx12:
3839 return true;
3840 default:
3841 return false;
3842 }
3843}
3844
3845bool isPacked64BitInst(unsigned Opc) {
3846 switch (Opc) {
3847 case AMDGPU::V_PK_ADD_F64:
3848 case AMDGPU::V_PK_ADD_F64_gfx1250:
3849 case AMDGPU::V_PK_MUL_F64:
3850 case AMDGPU::V_PK_MUL_F64_gfx1250:
3851 case AMDGPU::V_PK_FMA_F64:
3852 case AMDGPU::V_PK_FMA_F64_gfx1250:
3853 case AMDGPU::V_PK_MAX_NUM_F64:
3854 case AMDGPU::V_PK_MAX_NUM_F64_gfx1250:
3855 case AMDGPU::V_PK_MIN_NUM_F64:
3856 case AMDGPU::V_PK_MIN_NUM_F64_gfx1250:
3857 case AMDGPU::V_PK_ADD_NC_U64:
3858 case AMDGPU::V_PK_ADD_NC_U64_gfx1250:
3859 case AMDGPU::V_PK_SUB_NC_U64:
3860 case AMDGPU::V_PK_SUB_NC_U64_gfx1250:
3861 case AMDGPU::V_PK_LSHL_ADD_U64:
3862 case AMDGPU::V_PK_LSHL_ADD_U64_gfx1250:
3863 return true;
3864 default:
3865 return false;
3866 }
3867}
3868
3871}
3872
3873const std::array<unsigned, 3> &ClusterDimsAttr::getDims() const {
3874 assert(isFixedDims() && "expect kind to be FixedDims");
3875 return Dims;
3876}
3877
3878std::string ClusterDimsAttr::to_string() const {
3879 SmallString<10> Buffer;
3880 raw_svector_ostream OS(Buffer);
3881
3882 switch (getKind()) {
3883 case Kind::Unknown:
3884 return "";
3885 case Kind::NoCluster: {
3886 OS << EncoNoCluster << ',' << EncoNoCluster << ',' << EncoNoCluster;
3887 return Buffer.c_str();
3888 }
3889 case Kind::VariableDims: {
3890 OS << EncoVariableDims << ',' << EncoVariableDims << ','
3891 << EncoVariableDims;
3892 return Buffer.c_str();
3893 }
3894 case Kind::FixedDims: {
3895 OS << Dims[0] << ',' << Dims[1] << ',' << Dims[2];
3896 return Buffer.c_str();
3897 }
3898 }
3899 llvm_unreachable("Unknown ClusterDimsAttr kind");
3900}
3901
3903 std::optional<SmallVector<unsigned>> Attr =
3904 getIntegerVecAttribute(F, "amdgpu-cluster-dims", /*Size=*/3);
3906
3907 if (!Attr.has_value())
3908 AttrKind = Kind::Unknown;
3909 else if (all_of(*Attr, equal_to(EncoNoCluster)))
3910 AttrKind = Kind::NoCluster;
3911 else if (all_of(*Attr, equal_to(EncoVariableDims)))
3912 AttrKind = Kind::VariableDims;
3913
3914 ClusterDimsAttr A(AttrKind);
3915 if (AttrKind == Kind::FixedDims)
3916 A.Dims = {(*Attr)[0], (*Attr)[1], (*Attr)[2]};
3917
3918 return A;
3919}
3920
3921} // namespace AMDGPU
3922
3925 switch (S) {
3927 OS << "Unsupported";
3928 break;
3930 OS << "Any";
3931 break;
3933 OS << "Off";
3934 break;
3936 OS << "On";
3937 break;
3938 }
3939 return OS;
3940}
3941
3942} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static llvm::cl::opt< unsigned > DefaultAMDHSACodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, llvm::cl::init(llvm::AMDGPU::AMDHSA_COV6), llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " "or asm directive still take priority if present)"))
#define MAP_REG2REG
Provides AMDGPU specific target descriptions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
IRTranslator LLVM IR MI
#define RegName(no)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
This file contains the declarations for metadata subclasses.
#define T
modulo schedule test
uint64_t High
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
#define S_00B848_MEM_ORDERED(x)
Definition SIDefines.h:1466
#define S_00B848_WGP_MODE(x)
Definition SIDefines.h:1463
#define S_00B848_FWD_PROGRESS(x)
Definition SIDefines.h:1469
This file contains some functions that are useful when dealing with strings.
static const int BlockSize
Definition TarWriter.cpp:33
static ClusterDimsAttr get(const Function &F)
const std::array< unsigned, 3 > & getDims() const
TargetIDSetting getXnackSetting() const
void print(raw_ostream &OS) const
Write string representation to OS.
void setTargetIDFromTargetIDStream(StringRef TargetID)
TargetIDSetting getSramEccSetting() const
AMDGPUTargetID(const MCSubtargetInfo &STI, StringRef FeatureString)
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
std::optional< unsigned > getInvalidCompOperandIndex(std::function< MCRegister(unsigned, unsigned)> GetRegIdx, const MCRegisterInfo &MRI, bool SkipSrc=false, bool AllowSameVGPR=false, bool VOPD3=false) const
std::array< MCRegister, Component::MAX_OPR_NUM > RegIndices
Represents the counter values to wait for in an s_waitcnt instruction.
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
constexpr bool test(unsigned I) const
unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
unsigned getOpcode() const
Return the opcode number for this descriptor.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Definition MCInstrInfo.h:80
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:86
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getID() const
getID() - Return the register class ID number.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool regsOverlap(MCRegister RegA, MCRegister RegB) const
Returns true if the two registers are equal or alias each other.
uint16_t getEncodingValue(MCRegister Reg) const
Returns the encoding for Reg.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr unsigned id() const
Definition MCRegister.h:82
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
const Triple & getTargetTriple() const
const FeatureBitset & getFeatureBits() const
StringRef getCPU() const
Metadata node.
Definition Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1433
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1439
Representation of each machine instruction.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
const char * c_str()
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:490
std::string str() const
Get the contents as an std::string.
Definition StringRef.h:222
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:270
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
LLVM_ABI StringRef getVendorName() const
Get the vendor (second) component of the triple.
Definition Triple.cpp:1659
LLVM_ABI StringRef getOSName() const
Get the operating system (third) component of the triple.
Definition Triple.cpp:1664
OSType getOS() const
Get the parsed operating system type of this triple.
Definition Triple.h:445
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition Triple.h:436
LLVM_ABI StringRef getEnvironmentName() const
Get the optional environment (fourth) component of the triple, or "" if empty.
Definition Triple.cpp:1670
bool isAMDGCN() const
Tests whether the target is AMDGCN.
Definition Triple.h:908
LLVM_ABI StringRef getArchName() const
Get the architecture (first) component of the triple.
Definition Triple.cpp:1655
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
unsigned decodeFieldVaVcc(unsigned Encoded)
unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc)
unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version)
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt, const IsaVersion &Version)
unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc)
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
unsigned decodeFieldSaSdst(unsigned Encoded)
unsigned getHoldCntBitMask(const IsaVersion &Version)
unsigned decodeFieldVaSdst(unsigned Encoded)
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
unsigned decodeFieldVaSsrc(unsigned Encoded)
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
const CustomOperandVal DepCtrInfo[]
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
unsigned decodeFieldVaVdst(unsigned Encoded)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
unsigned decodeFieldVmVsrc(unsigned Encoded)
unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
static constexpr ExpTgt ExpTgtInfo[]
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
unsigned getTgtId(const StringRef Name)
constexpr uint32_t VersionMinor
HSA metadata minor version.
constexpr uint32_t VersionMajor
HSA metadata major version.
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo &STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize)
unsigned getSGPRAllocGranule(const MCSubtargetInfo &STI)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo &STI)
bool isSGPROccupancyLimited(const MCSubtargetInfo &STI)
unsigned getArchVGPRAllocGranule()
For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage, returns the allocation granule...
unsigned getEUsPerCU(const MCSubtargetInfo &STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo &STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo &STI)
static unsigned getSGPRTrapHandlerReserve(const MCSubtargetInfo &STI)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo &STI)
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo &STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo &STI, unsigned FlatWorkGroupSize)
unsigned getMinNumSGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU)
unsigned getMaxNumSGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU, bool Addressable)
unsigned getWavefrontSize(const MCSubtargetInfo &STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo &STI, unsigned FlatWorkGroupSize)
unsigned getInstCacheLineSize(const MCSubtargetInfo &STI)
static constexpr unsigned MaxDynamicVGPRBlocks
Maximum number of VGPR blocks that can be allocated in dynamic VGPR mode.
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
static unsigned getSGPRBudgetPerWave(unsigned TotalNumSGPRs, unsigned WavesPerEU, unsigned TrapReserve, unsigned Granule)
unsigned getTotalNumVGPRs(const MCSubtargetInfo &STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo &STI, unsigned DynamicVGPRBlockSize)
unsigned getWavesPerWorkGroup(const MCSubtargetInfo &STI, unsigned FlatWorkGroupSize)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo &STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, unsigned TotalNumSGPRs, unsigned Granule, unsigned TrapReserve)
unsigned getNumSGPRBlocks(const MCSubtargetInfo &STI, unsigned NumSGPRs)
unsigned getMaxWavesPerEU(const MCSubtargetInfo &STI)
unsigned getNumExtraSGPRs(const MCSubtargetInfo &STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getLocalMemorySize(const MCSubtargetInfo &STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, unsigned Granule)
unsigned getVGPRAllocGranule(const MCSubtargetInfo &STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getMinWavesPerEU(const MCSubtargetInfo &STI)
StringLiteral const UfmtSymbolicGFX11[]
bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX10[]
StringLiteral const DfmtSymbolic[]
static StringLiteral const * getNfmtLookupTable(const MCSubtargetInfo &STI)
bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)
StringLiteral const NfmtSymbolicGFX10[]
bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
StringRef getDfmtName(unsigned Id)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX11[]
StringLiteral const NfmtSymbolicVI[]
StringLiteral const NfmtSymbolicSICI[]
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
StringLiteral const UfmtSymbolicGFX10[]
void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI)
Returns true if the message does not use the m0 operand.
StringRef getMsgOpName(int64_t MsgId, uint64_t Encoding, const MCSubtargetInfo &STI)
Map from an encoding to the symbolic name for a sendmsg operation.
static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned VOPD_VGPR_BANK_MASKS[]
constexpr unsigned COMPONENTS_NUM
constexpr unsigned VOPD3_VGPR_BANK_MASKS[]
bool isPackedFP32Inst(unsigned Opc)
bool isGCN3Encoding(const MCSubtargetInfo &STI)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isGFX10_GFX11(const MCSubtargetInfo &STI)
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Storecnt)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)
bool isVOPCAsmOnly(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool getMTBUFHasSrsrc(unsigned Opc)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool getWMMAIsXDL(unsigned Opc)
static std::optional< unsigned > convertSetRegImmToVgprMSBs(unsigned Imm, unsigned Simm16, bool HasSetregVGPRMSBFixup)
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
bool isGFX10Before1030(const MCSubtargetInfo &STI)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
const int OPR_ID_UNSUPPORTED
void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, const MCSubtargetInfo &STI)
bool shouldEmitConstantsToTextSection(const Triple &TT)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isDPMACCInstruction(unsigned Opc)
int getMTBUFElements(unsigned Opc)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
static constexpr std::array< CanBeVOPD, 1<< VOPDXYKeyBits > buildVOPDXYLookup()
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV216(bool IsFloat, uint32_t Literal)
FPType getFPDstSelType(unsigned Opc)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
const MCRegisterClass * getVGPRPhysRegClass(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded)
bool getHasMatrixScale(unsigned Opc)
bool hasPackedD16(const MCSubtargetInfo &STI)
unsigned getStorecntBitMask(const IsaVersion &Version)
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX10_3_GFX11(const MCSubtargetInfo &STI)
bool isGFX13(const MCSubtargetInfo &STI)
unsigned getAsynccntBitMask(const IsaVersion &Version)
bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val)
Checks if Val is inside MD, a !range-like metadata.
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
constexpr unsigned VOPDXYKeyBits
unsigned getVOPDOpcode(unsigned Opc, bool VOPD3)
bool isGroupSegment(const GlobalValue *GV)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool getMTBUFHasSoffset(unsigned Opc)
bool hasXNACK(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
static unsigned getCombinedCountBitMask(const IsaVersion &Version, bool IsStore)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
bool isVOPC64DPP(unsigned Opc)
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool getMAIIsGFX940XDL(unsigned Opc)
bool isSI(const MCSubtargetInfo &STI)
unsigned getDefaultAMDHSACodeObjectVersion()
bool isReadOnlySegment(const GlobalValue *GV)
Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
int getMUBUFBaseOpcode(unsigned Opc)
unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool getVOP3IsSingle(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
bool isDPALU_DPP32BitOpc(unsigned Opc)
bool getVOP1IsSingle(unsigned Opc)
static bool isDwordAligned(uint64_t ByteOffset)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool getHasColorExport(const Function &F)
int getMTBUFBaseOpcode(unsigned Opc)
bool isGFX90A(const MCSubtargetInfo &STI)
unsigned getSamplecntBitMask(const IsaVersion &Version)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)
Returns a valid charcode or 0 in the first entry if this is a valid physical register name.
bool getHasDepthExport(const Function &F)
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
bool getMUBUFHasVAddr(unsigned Opc)
bool isTrue16Inst(unsigned Opc)
unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI)
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned getInitialPSInputAddr(const Function &F)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isAsyncStore(unsigned Opc)
unsigned getDynamicVGPRBlockSize(const Function &F)
unsigned getKmcntBitMask(const IsaVersion &Version)
MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs, const MCRegisterInfo &MRI)
If Reg is a low VGPR return a corresponding high VGPR with MSBs set.
unsigned getVmcntBitMask(const IsaVersion &Version)
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
bool hasMAIInsts(const MCSubtargetInfo &STI)
unsigned getBitOp2(unsigned Opc)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
unsigned getXcntBitMask(const IsaVersion &Version)
bool isGenericAtomic(unsigned Opc)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool isGFX8Plus(const MCSubtargetInfo &STI)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
bool getMUBUFTfe(unsigned Opc)
unsigned getBvhcntBitMask(const IsaVersion &Version)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
unsigned decodeDscnt(const IsaVersion &Version, unsigned Waitcnt)
std::pair< const AMDGPU::OpName *, const AMDGPU::OpName * > getVGPRLoweringOperandTables(const MCInstrDesc &Desc)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool isGFX13Plus(const MCSubtargetInfo &STI)
unsigned getExpcntBitMask(const IsaVersion &Version)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
int32_t getMCOpcode(uint32_t Opcode, unsigned Gen)
bool getMUBUFHasSoffset(unsigned Opc)
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV2F16(uint32_t Literal)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)
Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)
bool isPackedFP32or64BitInst(unsigned Opc)
static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Loadcnt)
bool isGFX10Plus(const MCSubtargetInfo &STI)
static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
static bool isValidRegPrefix(char C)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
bool isGlobalSegment(const GlobalValue *GV)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
bool isValidWMMAScaleFmtCombination(unsigned AFmt, unsigned AScale, unsigned BFmt, unsigned BScale)
@ OPERAND_REG_IMM_V2FP64
Definition SIDefines.h:430
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:448
@ OPERAND_REG_INLINE_C_LAST
Definition SIDefines.h:471
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:423
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:439
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:436
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:441
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:425
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:420
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:415
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:422
@ OPERAND_REG_INLINE_AC_FIRST
Definition SIDefines.h:473
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:421
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:424
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:427
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:419
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:442
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:453
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:454
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:428
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:418
@ OPERAND_REG_INLINE_C_FIRST
Definition SIDefines.h:470
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:438
@ OPERAND_REG_INLINE_AC_LAST
Definition SIDefines.h:474
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:434
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:440
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:429
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:455
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:437
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:445
std::optional< unsigned > getPKFMACF16InlineEncoding(uint32_t Literal, bool IsGFX11Plus)
bool isNotGFX9Plus(const MCSubtargetInfo &STI)
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
static constexpr unsigned getVOPDXYKey(unsigned VOPDOp, unsigned Subtarget, bool VOPD3)
constexpr auto VOPDXYLookup
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc)
std::optional< unsigned > getInlineEncodingV2I16(uint32_t Literal)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded)
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool isTensorStore(unsigned Opc)
bool getMUBUFIsBufferInv(unsigned Opc)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
std::optional< unsigned > getInlineEncodingV2BF16(uint32_t Literal)
static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
bool getVOP2IsSingle(unsigned Opc)
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)
bool isPacked64BitInst(unsigned Opc)
unsigned decodeStorecnt(const IsaVersion &Version, unsigned Waitcnt)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
bool isNotGFX12Plus(const MCSubtargetInfo &STI)
bool getMTBUFHasVAddr(unsigned Opc)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
unsigned getLoadcntBitMask(const IsaVersion &Version)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily, bool VOPD3)
static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Dscnt)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
unsigned decodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI)
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
int getMUBUFElements(unsigned Opc)
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
bool isPermlane16(unsigned Opc)
bool getMUBUFHasSrsrc(unsigned Opc)
unsigned getDscntBitMask(const IsaVersion &Version)
bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ ELFABIVERSION_AMDGPU_HSA_V4
Definition ELF.h:384
@ ELFABIVERSION_AMDGPU_HSA_V5
Definition ELF.h:385
@ ELFABIVERSION_AMDGPU_HSA_V6
Definition ELF.h:386
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract_or_null(Y &&MD)
Extract a Value from Metadata, allowing null.
Definition Metadata.h:683
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:558
constexpr T rotr(T V, int R)
Definition bit.h:399
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
std::string utostr(uint64_t X, bool isNeg=false)
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2172
Op::Description Desc
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
To bit_cast(const From &from) noexcept
Definition bit.h:90
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr int countr_zero_constexpr(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:190
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
@ AlwaysUniform
The result value is always uniform.
Definition Uniformity.h:23
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
#define N
AMD Kernel Code Object (amd_kernel_code_t).
static std::tuple< typename Fields::ValueType... > decode(uint64_t Encoded)
Instruction set architecture version.