LLVM 23.0.0git
AMDGPUBaseInfo.cpp
Go to the documentation of this file.
1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDKernelCodeT.h"
17#include "llvm/IR/Attributes.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/Function.h"
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/IR/IntrinsicsR600.h"
23#include "llvm/IR/LLVMContext.h"
24#include "llvm/IR/Metadata.h"
25#include "llvm/MC/MCInstrInfo.h"
30#include <optional>
31
32#define GET_INSTRINFO_NAMED_OPS
33#define GET_INSTRMAP_INFO
34#include "AMDGPUGenInstrInfo.inc"
35
37 "amdhsa-code-object-version", llvm::cl::Hidden,
39 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
40 "or asm directive still take priority if present)"));
41
42namespace {
43
44/// \returns Bit mask for given bit \p Shift and bit \p Width.
45unsigned getBitMask(unsigned Shift, unsigned Width) {
46 return ((1 << Width) - 1) << Shift;
47}
48
49/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
50///
51/// \returns Packed \p Dst.
52unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
53 unsigned Mask = getBitMask(Shift, Width);
54 return ((Src << Shift) & Mask) | (Dst & ~Mask);
55}
56
57/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
58///
59/// \returns Unpacked bits.
60unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
61 return (Src & getBitMask(Shift, Width)) >> Shift;
62}
63
64/// \returns Vmcnt bit shift (lower bits).
65unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
66 return VersionMajor >= 11 ? 10 : 0;
67}
68
69/// \returns Vmcnt bit width (lower bits).
70unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
71 return VersionMajor >= 11 ? 6 : 4;
72}
73
74/// \returns Expcnt bit shift.
75unsigned getExpcntBitShift(unsigned VersionMajor) {
76 return VersionMajor >= 11 ? 0 : 4;
77}
78
79/// \returns Expcnt bit width.
80unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
81
82/// \returns Lgkmcnt bit shift.
83unsigned getLgkmcntBitShift(unsigned VersionMajor) {
84 return VersionMajor >= 11 ? 4 : 8;
85}
86
87/// \returns Lgkmcnt bit width.
88unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
89 return VersionMajor >= 10 ? 6 : 4;
90}
91
92/// \returns Vmcnt bit shift (higher bits).
93unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
94
95/// \returns Vmcnt bit width (higher bits).
96unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
97 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
98}
99
100/// \returns Loadcnt bit width
101unsigned getLoadcntBitWidth(unsigned VersionMajor) {
102 return VersionMajor >= 12 ? 6 : 0;
103}
104
105/// \returns Samplecnt bit width.
106unsigned getSamplecntBitWidth(unsigned VersionMajor) {
107 return VersionMajor >= 12 ? 6 : 0;
108}
109
110/// \returns Bvhcnt bit width.
111unsigned getBvhcntBitWidth(unsigned VersionMajor) {
112 return VersionMajor >= 12 ? 3 : 0;
113}
114
115/// \returns Dscnt bit width.
116unsigned getDscntBitWidth(unsigned VersionMajor) {
117 return VersionMajor >= 12 ? 6 : 0;
118}
119
120/// \returns Dscnt bit shift in combined S_WAIT instructions.
121unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
122
123/// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
124unsigned getStorecntBitWidth(unsigned VersionMajor) {
125 return VersionMajor >= 10 ? 6 : 0;
126}
127
128/// \returns Kmcnt bit width.
129unsigned getKmcntBitWidth(unsigned VersionMajor) {
130 return VersionMajor >= 12 ? 5 : 0;
131}
132
133/// \returns Xcnt bit width.
134unsigned getXcntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {
135 return VersionMajor == 12 && VersionMinor == 5 ? 6 : 0;
136}
137
138/// \returns Asynccnt bit width.
139unsigned getAsynccntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {
140 return VersionMajor == 12 && VersionMinor == 5 ? 6 : 0;
141}
142
143/// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
144unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
145 return VersionMajor >= 12 ? 8 : 0;
146}
147
148/// \returns VaSdst bit width
149inline unsigned getVaSdstBitWidth() { return 3; }
150
151/// \returns VaSdst bit shift
152inline unsigned getVaSdstBitShift() { return 9; }
153
154/// \returns VmVsrc bit width
155inline unsigned getVmVsrcBitWidth() { return 3; }
156
157/// \returns VmVsrc bit shift
158inline unsigned getVmVsrcBitShift() { return 2; }
159
160/// \returns VaVdst bit width
161inline unsigned getVaVdstBitWidth() { return 4; }
162
163/// \returns VaVdst bit shift
164inline unsigned getVaVdstBitShift() { return 12; }
165
166/// \returns VaVcc bit width
167inline unsigned getVaVccBitWidth() { return 1; }
168
169/// \returns VaVcc bit shift
170inline unsigned getVaVccBitShift() { return 1; }
171
172/// \returns SaSdst bit width
173inline unsigned getSaSdstBitWidth() { return 1; }
174
175/// \returns SaSdst bit shift
176inline unsigned getSaSdstBitShift() { return 0; }
177
178/// \returns VaSsrc width
179inline unsigned getVaSsrcBitWidth() { return 1; }
180
181/// \returns VaSsrc bit shift
182inline unsigned getVaSsrcBitShift() { return 8; }
183
184/// \returns HoldCnt bit shift
185inline unsigned getHoldCntWidth(unsigned VersionMajor, unsigned VersionMinor) {
186 static constexpr const unsigned MinMajor = 10;
187 static constexpr const unsigned MinMinor = 3;
188 return std::tie(VersionMajor, VersionMinor) >= std::tie(MinMajor, MinMinor)
189 ? 1
190 : 0;
191}
192
193/// \returns HoldCnt bit shift
194inline unsigned getHoldCntBitShift() { return 7; }
195
196} // end anonymous namespace
197
198namespace llvm {
199
200namespace AMDGPU {
201
202/// \returns true if the target supports signed immediate offset for SMRD
203/// instructions.
205 return isGFX9Plus(ST);
206}
207
208/// \returns True if \p STI is AMDHSA.
209bool isHsaAbi(const MCSubtargetInfo &STI) {
210 return STI.getTargetTriple().getOS() == Triple::AMDHSA;
211}
212
215 M.getModuleFlag("amdhsa_code_object_version"))) {
216 return (unsigned)Ver->getZExtValue() / 100;
217 }
218
220}
221
225
226unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) {
227 switch (ABIVersion) {
229 return 4;
231 return 5;
233 return 6;
234 default:
236 }
237}
238
239uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
240 if (T.getOS() != Triple::AMDHSA)
241 return 0;
242
243 switch (CodeObjectVersion) {
244 case 4:
246 case 5:
248 case 6:
250 default:
251 report_fatal_error("Unsupported AMDHSA Code Object Version " +
252 Twine(CodeObjectVersion));
253 }
254}
255
256unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
257 switch (CodeObjectVersion) {
258 case AMDHSA_COV4:
259 return 48;
260 case AMDHSA_COV5:
261 case AMDHSA_COV6:
262 default:
264 }
265}
266
267// FIXME: All such magic numbers about the ABI should be in a
268// central TD file.
269unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
270 switch (CodeObjectVersion) {
271 case AMDHSA_COV4:
272 return 24;
273 case AMDHSA_COV5:
274 case AMDHSA_COV6:
275 default:
277 }
278}
279
280unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
281 switch (CodeObjectVersion) {
282 case AMDHSA_COV4:
283 return 32;
284 case AMDHSA_COV5:
285 case AMDHSA_COV6:
286 default:
288 }
289}
290
291unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
292 switch (CodeObjectVersion) {
293 case AMDHSA_COV4:
294 return 40;
295 case AMDHSA_COV5:
296 case AMDHSA_COV6:
297 default:
299 }
300}
301
302#define GET_MIMGBaseOpcodesTable_IMPL
303#define GET_MIMGDimInfoTable_IMPL
304#define GET_MIMGInfoTable_IMPL
305#define GET_MIMGLZMappingTable_IMPL
306#define GET_MIMGMIPMappingTable_IMPL
307#define GET_MIMGBiasMappingTable_IMPL
308#define GET_MIMGOffsetMappingTable_IMPL
309#define GET_MIMGG16MappingTable_IMPL
310#define GET_MAIInstInfoTable_IMPL
311#define GET_WMMAInstInfoTable_IMPL
312#include "AMDGPUGenSearchableTables.inc"
313
314int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
315 unsigned VDataDwords, unsigned VAddrDwords) {
316 const MIMGInfo *Info =
317 getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, VDataDwords, VAddrDwords);
318 return Info ? Info->Opcode : -1;
319}
320
322 const MIMGInfo *Info = getMIMGInfo(Opc);
323 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
324}
325
326int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
327 const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
328 const MIMGInfo *NewInfo =
329 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
330 NewChannels, OrigInfo->VAddrDwords);
331 return NewInfo ? NewInfo->Opcode : -1;
332}
333
334unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
335 const MIMGDimInfo *Dim, bool IsA16,
336 bool IsG16Supported) {
337 unsigned AddrWords = BaseOpcode->NumExtraArgs;
338 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
339 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
340 if (IsA16)
341 AddrWords += divideCeil(AddrComponents, 2);
342 else
343 AddrWords += AddrComponents;
344
345 // Note: For subtargets that support A16 but not G16, enabling A16 also
346 // enables 16 bit gradients.
347 // For subtargets that support A16 (operand) and G16 (done with a different
348 // instruction encoding), they are independent.
349
350 if (BaseOpcode->Gradients) {
351 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
352 // There are two gradients per coordinate, we pack them separately.
353 // For the 3d case,
354 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
355 AddrWords += alignTo<2>(Dim->NumGradients / 2);
356 else
357 AddrWords += Dim->NumGradients;
358 }
359 return AddrWords;
360}
361
372
381
386
391
395
399
403
410
418
423
424#define GET_FP4FP8DstByteSelTable_DECL
425#define GET_FP4FP8DstByteSelTable_IMPL
426
431
437
438#define GET_DPMACCInstructionTable_DECL
439#define GET_DPMACCInstructionTable_IMPL
440#define GET_MTBUFInfoTable_DECL
441#define GET_MTBUFInfoTable_IMPL
442#define GET_MUBUFInfoTable_DECL
443#define GET_MUBUFInfoTable_IMPL
444#define GET_SMInfoTable_DECL
445#define GET_SMInfoTable_IMPL
446#define GET_VOP1InfoTable_DECL
447#define GET_VOP1InfoTable_IMPL
448#define GET_VOP2InfoTable_DECL
449#define GET_VOP2InfoTable_IMPL
450#define GET_VOP3InfoTable_DECL
451#define GET_VOP3InfoTable_IMPL
452#define GET_VOPC64DPPTable_DECL
453#define GET_VOPC64DPPTable_IMPL
454#define GET_VOPC64DPP8Table_DECL
455#define GET_VOPC64DPP8Table_IMPL
456#define GET_VOPCAsmOnlyInfoTable_DECL
457#define GET_VOPCAsmOnlyInfoTable_IMPL
458#define GET_VOP3CAsmOnlyInfoTable_DECL
459#define GET_VOP3CAsmOnlyInfoTable_IMPL
460#define GET_VOPDComponentTable_DECL
461#define GET_VOPDComponentTable_IMPL
462#define GET_VOPDPairs_DECL
463#define GET_VOPDPairs_IMPL
464#define GET_VOPTrue16Table_DECL
465#define GET_VOPTrue16Table_IMPL
466#define GET_True16D16Table_IMPL
467#define GET_WMMAOpcode2AddrMappingTable_DECL
468#define GET_WMMAOpcode2AddrMappingTable_IMPL
469#define GET_WMMAOpcode3AddrMappingTable_DECL
470#define GET_WMMAOpcode3AddrMappingTable_IMPL
471#define GET_getMFMA_F8F6F4_WithSize_DECL
472#define GET_getMFMA_F8F6F4_WithSize_IMPL
473#define GET_isMFMA_F8F6F4Table_IMPL
474#define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL
475
476#include "AMDGPUGenSearchableTables.inc"
477
478int getMTBUFBaseOpcode(unsigned Opc) {
479 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
480 return Info ? Info->BaseOpcode : -1;
481}
482
483int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
484 const MTBUFInfo *Info =
485 getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
486 return Info ? Info->Opcode : -1;
487}
488
489int getMTBUFElements(unsigned Opc) {
490 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
491 return Info ? Info->elements : 0;
492}
493
494bool getMTBUFHasVAddr(unsigned Opc) {
495 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
496 return Info && Info->has_vaddr;
497}
498
499bool getMTBUFHasSrsrc(unsigned Opc) {
500 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
501 return Info && Info->has_srsrc;
502}
503
504bool getMTBUFHasSoffset(unsigned Opc) {
505 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
506 return Info && Info->has_soffset;
507}
508
509int getMUBUFBaseOpcode(unsigned Opc) {
510 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
511 return Info ? Info->BaseOpcode : -1;
512}
513
514int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
515 const MUBUFInfo *Info =
516 getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
517 return Info ? Info->Opcode : -1;
518}
519
520int getMUBUFElements(unsigned Opc) {
521 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
522 return Info ? Info->elements : 0;
523}
524
525bool getMUBUFHasVAddr(unsigned Opc) {
526 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
527 return Info && Info->has_vaddr;
528}
529
530bool getMUBUFHasSrsrc(unsigned Opc) {
531 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
532 return Info && Info->has_srsrc;
533}
534
535bool getMUBUFHasSoffset(unsigned Opc) {
536 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
537 return Info && Info->has_soffset;
538}
539
540bool getMUBUFIsBufferInv(unsigned Opc) {
541 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
542 return Info && Info->IsBufferInv;
543}
544
545bool getMUBUFTfe(unsigned Opc) {
546 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
547 return Info && Info->tfe;
548}
549
550bool getSMEMIsBuffer(unsigned Opc) {
551 const SMInfo *Info = getSMEMOpcodeHelper(Opc);
552 return Info && Info->IsBuffer;
553}
554
555bool getVOP1IsSingle(unsigned Opc) {
556 const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
557 return !Info || Info->IsSingle;
558}
559
560bool getVOP2IsSingle(unsigned Opc) {
561 const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
562 return !Info || Info->IsSingle;
563}
564
565bool getVOP3IsSingle(unsigned Opc) {
566 const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
567 return !Info || Info->IsSingle;
568}
569
570bool isVOPC64DPP(unsigned Opc) {
571 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
572}
573
574bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opc); }
575
576bool getMAIIsDGEMM(unsigned Opc) {
577 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
578 return Info && Info->is_dgemm;
579}
580
581bool getMAIIsGFX940XDL(unsigned Opc) {
582 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
583 return Info && Info->is_gfx940_xdl;
584}
585
586bool getWMMAIsXDL(unsigned Opc) {
587 const WMMAInstInfo *Info = getWMMAInstInfoHelper(Opc);
588 return Info ? Info->is_wmma_xdl : false;
589}
590
591bool getHasMatrixScale(unsigned Opc) {
592 const WMMAInstInfo *Info = getWMMAInstInfoHelper(Opc);
593 return Info && Info->HasMatrixScale;
594}
595
597 switch (EncodingVal) {
600 return 6;
602 return 4;
605 default:
606 return 8;
607 }
608
609 llvm_unreachable("covered switch over mfma scale formats");
610}
611
613 unsigned BLGP,
614 unsigned F8F8Opcode) {
615 uint8_t SrcANumRegs = mfmaScaleF8F6F4FormatToNumRegs(CBSZ);
616 uint8_t SrcBNumRegs = mfmaScaleF8F6F4FormatToNumRegs(BLGP);
617 return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);
618}
619
621 switch (Fmt) {
624 return 16;
627 return 12;
629 return 8;
630 }
631
632 llvm_unreachable("covered switch over wmma scale formats");
633}
634
636 unsigned FmtB,
637 unsigned F8F8Opcode) {
638 uint8_t SrcANumRegs = wmmaScaleF8F6F4FormatToNumRegs(FmtA);
639 uint8_t SrcBNumRegs = wmmaScaleF8F6F4FormatToNumRegs(FmtB);
640 return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);
641}
642
644 if (ST.hasFeature(AMDGPU::FeatureGFX13Insts))
646 if (ST.hasFeature(AMDGPU::FeatureGFX1250Insts))
648 if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
650 if (ST.hasFeature(AMDGPU::FeatureGFX11_7Insts))
652 if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
654 llvm_unreachable("Subtarget generation does not support VOPD!");
655}
656
657CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3) {
658 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
659 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
660 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
661 if (Info) {
662 // Check that Opc can be used as VOPDY for this encoding. V_MOV_B32 as a
663 // VOPDX is just a placeholder here, it is supported on all encodings.
664 // TODO: This can be optimized by creating tables of supported VOPDY
665 // opcodes per encoding.
666 unsigned VOPDMov = AMDGPU::getVOPDOpcode(AMDGPU::V_MOV_B32_e32, VOPD3);
667 bool CanBeVOPDX;
668 if (VOPD3) {
669 CanBeVOPDX = getVOPDFull(AMDGPU::getVOPDOpcode(Opc, VOPD3), VOPDMov,
670 EncodingFamily, VOPD3) != -1;
671 } else {
672 // The list of VOPDX opcodes is currently the same in all encoding
673 // families, so we do not need a family-specific check.
674 CanBeVOPDX = Info->CanBeVOPDX;
675 }
676 bool CanBeVOPDY = getVOPDFull(VOPDMov, AMDGPU::getVOPDOpcode(Opc, VOPD3),
677 EncodingFamily, VOPD3) != -1;
678 return {CanBeVOPDX, CanBeVOPDY};
679 }
680
681 return {false, false};
682}
683
684unsigned getVOPDOpcode(unsigned Opc, bool VOPD3) {
685 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
686 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
687 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
688 return Info ? Info->VOPDOp : ~0u;
689}
690
691bool isVOPD(unsigned Opc) {
692 return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
693}
694
695bool isMAC(unsigned Opc) {
696 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
697 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
698 Opc == AMDGPU::V_MAC_F32_e64_vi ||
699 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
700 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
701 Opc == AMDGPU::V_MAC_F16_e64_vi ||
702 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
703 Opc == AMDGPU::V_FMAC_F64_e64_gfx12 ||
704 Opc == AMDGPU::V_FMAC_F64_e64_gfx13 ||
705 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
706 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
707 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
708 Opc == AMDGPU::V_FMAC_F32_e64_gfx13 ||
709 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
710 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
711 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
712 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
713 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
714 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||
715 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
716 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||
717 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx13 ||
718 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx13 ||
719 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
720 Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi ||
721 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
722 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
723 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
724}
725
726bool isPermlane16(unsigned Opc) {
727 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
728 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
729 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
730 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
731 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
732 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
733 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
734 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
735}
736
738 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
739 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
740 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
741 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
742 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
743 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
744 Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 ||
745 Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 ||
746 Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 ||
747 Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12;
748}
749
750bool isGenericAtomic(unsigned Opc) {
751 return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
752 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
753 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
754 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
755 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
756 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
757 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
758 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
759 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
760 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
761 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
762 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
763 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
764 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
765 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
766 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
767 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB_CLAMP_U32 ||
768 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32 ||
769 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
770}
771
772bool isAsyncStore(unsigned Opc) {
773 return Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_gfx1250 ||
774 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_gfx1250 ||
775 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_gfx1250 ||
776 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_gfx1250 ||
777 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_SADDR_gfx1250 ||
778 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_SADDR_gfx1250 ||
779 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_SADDR_gfx1250 ||
780 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_SADDR_gfx1250;
781}
782
783bool isTensorStore(unsigned Opc) {
784 return Opc == TENSOR_STORE_FROM_LDS_d2_gfx1250 ||
785 Opc == TENSOR_STORE_FROM_LDS_d4_gfx1250;
786}
787
788unsigned getTemporalHintType(const MCInstrDesc TID) {
791 unsigned Opc = TID.getOpcode();
792 // Async and Tensor store should have the temporal hint type of TH_TYPE_STORE
793 if (TID.mayStore() &&
794 (isAsyncStore(Opc) || isTensorStore(Opc) || !TID.mayLoad()))
795 return CPol::TH_TYPE_STORE;
796
797 // This will default to returning TH_TYPE_LOAD when neither MayStore nor
798 // MayLoad flag is present which is the case with instructions like
799 // image_get_resinfo.
800 return CPol::TH_TYPE_LOAD;
801}
802
803bool isTrue16Inst(unsigned Opc) {
804 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
805 return Info && Info->IsTrue16;
806}
807
809 const FP4FP8DstByteSelInfo *Info = getFP4FP8DstByteSelHelper(Opc);
810 if (!Info)
811 return FPType::None;
812 if (Info->HasFP8DstByteSel)
813 return FPType::FP8;
814 if (Info->HasFP4DstByteSel)
815 return FPType::FP4;
816
817 return FPType::None;
818}
819
820bool isDPMACCInstruction(unsigned Opc) {
821 const DPMACCInstructionInfo *Info = getDPMACCInstructionHelper(Opc);
822 return Info && Info->IsDPMACCInstruction;
823}
824
825unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
826 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
827 return Info ? Info->Opcode3Addr : ~0u;
828}
829
830unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
831 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
832 return Info ? Info->Opcode2Addr : ~0u;
833}
834
835// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
836// header files, so we need to wrap it in a function that takes unsigned
837// instead.
838int32_t getMCOpcode(uint32_t Opcode, unsigned Gen) {
839 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
840}
841
842unsigned getBitOp2(unsigned Opc) {
843 switch (Opc) {
844 default:
845 return 0;
846 case AMDGPU::V_AND_B32_e32:
847 return 0x40;
848 case AMDGPU::V_OR_B32_e32:
849 return 0x54;
850 case AMDGPU::V_XOR_B32_e32:
851 return 0x14;
852 case AMDGPU::V_XNOR_B32_e32:
853 return 0x41;
854 }
855}
856
857int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
858 bool VOPD3) {
859 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(OpY) : 0;
860 OpY = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : OpY;
861 const VOPDInfo *Info =
862 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily, VOPD3);
863 return Info ? Info->Opcode : -1;
864}
865
866std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
867 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
868 assert(Info);
869 const auto *OpX = getVOPDBaseFromComponent(Info->OpX);
870 const auto *OpY = getVOPDBaseFromComponent(Info->OpY);
871 assert(OpX && OpY);
872 return {OpX->BaseVOP, OpY->BaseVOP};
873}
874
875namespace VOPD {
876
877ComponentProps::ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout) {
879
882 auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO);
883 assert(TiedIdx == -1 || TiedIdx == Component::DST);
884 HasSrc2Acc = TiedIdx != -1;
885 Opcode = OpDesc.getOpcode();
886
887 IsVOP3 = VOP3Layout || (OpDesc.TSFlags & SIInstrFlags::VOP3);
888 SrcOperandsNum = AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2) ? 3
889 : AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm) ? 3
890 : AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src1) ? 2
891 : 1;
892 assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
893
894 if (Opcode == AMDGPU::V_CNDMASK_B32_e32 ||
895 Opcode == AMDGPU::V_CNDMASK_B32_e64) {
896 // CNDMASK is an awkward exception, it has FP modifiers, but not FP
897 // operands.
898 NumVOPD3Mods = 2;
899 if (IsVOP3)
900 SrcOperandsNum = 3;
901 } else if (isSISrcFPOperand(OpDesc,
902 getNamedOperandIdx(Opcode, OpName::src0))) {
903 // All FP VOPD instructions have Neg modifiers for all operands except
904 // for tied src2.
905 NumVOPD3Mods = SrcOperandsNum;
906 if (HasSrc2Acc)
907 --NumVOPD3Mods;
908 }
909
910 if (OpDesc.TSFlags & SIInstrFlags::VOP3)
911 return;
912
913 auto OperandsNum = OpDesc.getNumOperands();
914 unsigned CompOprIdx;
915 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
916 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
917 MandatoryLiteralIdx = CompOprIdx;
918 break;
919 }
920 }
921}
922
924 return getNamedOperandIdx(Opcode, OpName::bitop3);
925}
926
927unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
928 assert(CompOprIdx < Component::MAX_OPR_NUM);
929
930 if (CompOprIdx == Component::DST)
932
933 auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
934 if (CompSrcIdx < getCompParsedSrcOperandsNum())
935 return getIndexOfSrcInParsedOperands(CompSrcIdx);
936
937 // The specified operand does not exist.
938 return 0;
939}
940
942 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
943 const MCRegisterInfo &MRI, bool SkipSrc, bool AllowSameVGPR,
944 bool VOPD3) const {
945
946 auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx,
947 CompInfo[ComponentIndex::X].isVOP3());
948 auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx,
949 CompInfo[ComponentIndex::Y].isVOP3());
950
951 const auto banksOverlap = [&MRI](MCRegister X, MCRegister Y,
952 unsigned BanksMask) -> bool {
953 MCRegister BaseX = MRI.getSubReg(X, AMDGPU::sub0);
954 MCRegister BaseY = MRI.getSubReg(Y, AMDGPU::sub0);
955 if (!BaseX)
956 BaseX = X;
957 if (!BaseY)
958 BaseY = Y;
959 if ((BaseX.id() & BanksMask) == (BaseY.id() & BanksMask))
960 return true;
961 if (BaseX != X /* This is 64-bit register */ &&
962 ((BaseX.id() + 1) & BanksMask) == (BaseY.id() & BanksMask))
963 return true;
964 if (BaseY != Y &&
965 (BaseX.id() & BanksMask) == ((BaseY.id() + 1) & BanksMask))
966 return true;
967
968 // If both are 64-bit bank conflict will be detected yet while checking
969 // the first subreg.
970 return false;
971 };
972
973 unsigned CompOprIdx;
974 for (CompOprIdx = 0; CompOprIdx < Component::MAX_OPR_NUM; ++CompOprIdx) {
975 unsigned BanksMasks = VOPD3 ? VOPD3_VGPR_BANK_MASKS[CompOprIdx]
976 : VOPD_VGPR_BANK_MASKS[CompOprIdx];
977 if (!OpXRegs[CompOprIdx] || !OpYRegs[CompOprIdx])
978 continue;
979
980 if (getVGPREncodingMSBs(OpXRegs[CompOprIdx], MRI) !=
981 getVGPREncodingMSBs(OpYRegs[CompOprIdx], MRI))
982 return CompOprIdx;
983
984 if (SkipSrc && CompOprIdx >= Component::DST_NUM)
985 continue;
986
987 if (CompOprIdx < Component::DST_NUM) {
988 // Even if we do not check vdst parity, vdst operands still shall not
989 // overlap.
990 if (MRI.regsOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx]))
991 return CompOprIdx;
992 if (VOPD3) // No need to check dst parity.
993 continue;
994 }
995
996 if (banksOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx], BanksMasks) &&
997 (!AllowSameVGPR || CompOprIdx < Component::DST_NUM ||
998 OpXRegs[CompOprIdx] != OpYRegs[CompOprIdx]))
999 return CompOprIdx;
1000 }
1001
1002 return {};
1003}
1004
1005// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
1006// by the specified component. If an operand is unused
1007// or is not a VGPR, the corresponding value is 0.
1008//
1009// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
1010// for the specified component and MC operand. The callback must return 0
1011// if the operand is not a register or not a VGPR.
1013InstInfo::getRegIndices(unsigned CompIdx,
1014 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
1015 bool VOPD3) const {
1016 assert(CompIdx < COMPONENTS_NUM);
1017
1018 const auto &Comp = CompInfo[CompIdx];
1020
1021 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
1022
1023 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
1024 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
1025 RegIndices[CompOprIdx] =
1026 Comp.hasRegSrcOperand(CompSrcIdx)
1027 ? GetRegIdx(CompIdx,
1028 Comp.getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3))
1029 : MCRegister();
1030 }
1031 return RegIndices;
1032}
1033
1034} // namespace VOPD
1035
1037 return VOPD::InstInfo(OpX, OpY);
1038}
1039
1041 const MCInstrInfo *InstrInfo) {
1042 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
1043 const auto &OpXDesc = InstrInfo->get(OpX);
1044 const auto &OpYDesc = InstrInfo->get(OpY);
1045 bool VOPD3 = InstrInfo->get(VOPDOpcode).TSFlags & SIInstrFlags::VOPD3;
1047 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo, VOPD3);
1048 return VOPD::InstInfo(OpXInfo, OpYInfo);
1049}
1050
1051namespace IsaInfo {
1052
1054 : STI(STI), XnackSetting(TargetIDSetting::Any),
1055 SramEccSetting(TargetIDSetting::Any) {
1056 if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
1057 XnackSetting = TargetIDSetting::Unsupported;
1058 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
1059 SramEccSetting = TargetIDSetting::Unsupported;
1060}
1061
1063 // Check if xnack or sramecc is explicitly enabled or disabled. In the
1064 // absence of the target features we assume we must generate code that can run
1065 // in any environment.
1066 SubtargetFeatures Features(FS);
1067 std::optional<bool> XnackRequested;
1068 std::optional<bool> SramEccRequested;
1069
1070 for (const std::string &Feature : Features.getFeatures()) {
1071 if (Feature == "+xnack")
1072 XnackRequested = true;
1073 else if (Feature == "-xnack")
1074 XnackRequested = false;
1075 else if (Feature == "+sramecc")
1076 SramEccRequested = true;
1077 else if (Feature == "-sramecc")
1078 SramEccRequested = false;
1079 }
1080
1081 bool XnackSupported = isXnackSupported();
1082 bool SramEccSupported = isSramEccSupported();
1083
1084 if (XnackRequested) {
1085 if (XnackSupported) {
1086 XnackSetting =
1087 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
1088 } else {
1089 // If a specific xnack setting was requested and this GPU does not support
1090 // xnack emit a warning. Setting will remain set to "Unsupported".
1091 if (*XnackRequested) {
1092 errs() << "warning: xnack 'On' was requested for a processor that does "
1093 "not support it!\n";
1094 } else {
1095 errs() << "warning: xnack 'Off' was requested for a processor that "
1096 "does not support it!\n";
1097 }
1098 }
1099 }
1100
1101 if (SramEccRequested) {
1102 if (SramEccSupported) {
1103 SramEccSetting =
1104 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
1105 } else {
1106 // If a specific sramecc setting was requested and this GPU does not
1107 // support sramecc emit a warning. Setting will remain set to
1108 // "Unsupported".
1109 if (*SramEccRequested) {
1110 errs() << "warning: sramecc 'On' was requested for a processor that "
1111 "does not support it!\n";
1112 } else {
1113 errs() << "warning: sramecc 'Off' was requested for a processor that "
1114 "does not support it!\n";
1115 }
1116 }
1117 }
1118}
1119
1120static TargetIDSetting
1122 if (FeatureString.ends_with("-"))
1123 return TargetIDSetting::Off;
1124 if (FeatureString.ends_with("+"))
1125 return TargetIDSetting::On;
1126
1127 llvm_unreachable("Malformed feature string");
1128}
1129
1131 SmallVector<StringRef, 3> TargetIDSplit;
1132 TargetID.split(TargetIDSplit, ':');
1133
1134 for (const auto &FeatureString : TargetIDSplit) {
1135 if (FeatureString.starts_with("xnack"))
1136 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
1137 if (FeatureString.starts_with("sramecc"))
1138 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
1139 }
1140}
1141
1142void AMDGPUTargetID::print(raw_ostream &StreamRep) const {
1143 const Triple &TargetTriple = STI.getTargetTriple();
1144 auto Version = getIsaVersion(STI.getCPU());
1145
1146 StreamRep << TargetTriple.getArchName() << '-' << TargetTriple.getVendorName()
1147 << '-' << TargetTriple.getOSName() << '-'
1148 << TargetTriple.getEnvironmentName() << '-';
1149
1150 std::string Processor;
1151 // TODO: Following else statement is present here because we used various
1152 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
1153 // Remove once all aliases are removed from GCNProcessors.td.
1154 if (Version.Major >= 9)
1155 Processor = STI.getCPU().str();
1156 else
1157 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
1158 Twine(Version.Stepping))
1159 .str();
1160
1161 std::string Features;
1162 if (TargetTriple.getOS() == Triple::AMDHSA) {
1163 // sramecc.
1165 Features += ":sramecc-";
1167 Features += ":sramecc+";
1168 // xnack.
1170 Features += ":xnack-";
1172 Features += ":xnack+";
1173 }
1174
1175 StreamRep << Processor << Features;
1176}
1177
1178std::string AMDGPUTargetID::toString() const {
1179 std::string Str;
1180 raw_string_ostream OS(Str);
1181 OS << *this;
1182 return Str;
1183}
1184
1185unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
1186 if (STI->getFeatureBits().test(FeatureWavefrontSize16))
1187 return 16;
1188 if (STI->getFeatureBits().test(FeatureWavefrontSize32))
1189 return 32;
1190
1191 return 64;
1192}
1193
1195 unsigned BytesPerCU = getAddressableLocalMemorySize(STI);
1196
1197 // "Per CU" really means "per whatever functional block the waves of a
1198 // workgroup must share". So the effective local memory size is doubled in
1199 // WGP mode on gfx10.
1200 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
1201 BytesPerCU *= 2;
1202
1203 return BytesPerCU;
1204}
1205
1207 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize32768))
1208 return 32768;
1209 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize65536))
1210 return 65536;
1211 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize163840))
1212 return 163840;
1213 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize327680))
1214 return 327680;
1215 return 32768;
1216}
1217
1218unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
1219 // "Per CU" really means "per whatever functional block the waves of a
1220 // workgroup must share".
1221
1222 // GFX12.5 only supports CU mode, which contains four SIMDs.
1223 if (isGFX1250(*STI)) {
1224 assert(STI->getFeatureBits().test(FeatureCuMode));
1225 return 4;
1226 }
1227
1228 // For gfx10 in CU mode the functional block is the CU, which contains
1229 // two SIMDs.
1230 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
1231 return 2;
1232
1233 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP
1234 // contains two CUs, so a total of four SIMDs.
1235 return 4;
1236}
1237
1239 unsigned FlatWorkGroupSize) {
1240 assert(FlatWorkGroupSize != 0);
1241 if (!STI->getTargetTriple().isAMDGCN())
1242 return 8;
1243 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
1244 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
1245 if (N == 1) {
1246 // Single-wave workgroups don't consume barrier resources.
1247 return MaxWaves;
1248 }
1249
1250 unsigned MaxBarriers = 16;
1251 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
1252 MaxBarriers = 32;
1253
1254 return std::min(MaxWaves / N, MaxBarriers);
1255}
1256
1257unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { return 1; }
1258
1259unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
1260 // FIXME: Need to take scratch memory into account.
1261 if (isGFX90A(*STI))
1262 return 8;
1263 if (!isGFX10Plus(*STI))
1264 return 10;
1265 return hasGFX10_3Insts(*STI) ? 16 : 20;
1266}
1267
1269 unsigned FlatWorkGroupSize) {
1270 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
1271 getEUsPerCU(STI));
1272}
1273
1274unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { return 1; }
1275
1277 unsigned FlatWorkGroupSize) {
1278 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
1279}
1280
1283 if (Version.Major >= 10)
1284 return getAddressableNumSGPRs(STI);
1285 if (Version.Major >= 8)
1286 return 16;
1287 return 8;
1288}
1289
1290unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { return 8; }
1291
1292unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
1294 if (Version.Major >= 8)
1295 return 800;
1296 return 512;
1297}
1298
1300 if (STI->getFeatureBits().test(FeatureSGPRInitBug))
1302
1304 if (Version.Major >= 10)
1305 return 106;
1306 if (Version.Major >= 8)
1307 return 102;
1308 return 104;
1309}
1310
1311unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1312 assert(WavesPerEU != 0);
1313
1315 if (Version.Major >= 10)
1316 return 0;
1317
1318 if (WavesPerEU >= getMaxWavesPerEU(STI))
1319 return 0;
1320
1321 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
1322 if (STI->getFeatureBits().test(FeatureTrapHandler))
1323 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1324 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
1325 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
1326}
1327
1328unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1329 bool Addressable) {
1330 assert(WavesPerEU != 0);
1331
1332 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
1334 if (Version.Major >= 10)
1335 return Addressable ? AddressableNumSGPRs : 108;
1336 if (Version.Major >= 8 && !Addressable)
1337 AddressableNumSGPRs = 112;
1338 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
1339 if (STI->getFeatureBits().test(FeatureTrapHandler))
1340 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1341 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
1342 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
1343}
1344
1345unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1346 bool FlatScrUsed, bool XNACKUsed) {
1347 unsigned ExtraSGPRs = 0;
1348 if (VCCUsed)
1349 ExtraSGPRs = 2;
1350
1352 if (Version.Major >= 10)
1353 return ExtraSGPRs;
1354
1355 if (Version.Major < 8) {
1356 if (FlatScrUsed)
1357 ExtraSGPRs = 4;
1358 } else {
1359 if (XNACKUsed)
1360 ExtraSGPRs = 4;
1361
1362 if (FlatScrUsed ||
1363 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
1364 ExtraSGPRs = 6;
1365 }
1366
1367 return ExtraSGPRs;
1368}
1369
1370unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1371 bool FlatScrUsed) {
1372 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1373 STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
1374}
1375
1376static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs,
1377 unsigned Granule) {
1378 return divideCeil(std::max(1u, NumRegs), Granule);
1379}
1380
1381unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1382 // SGPRBlocks is actual number of SGPR blocks minus 1.
1384 1;
1385}
1386
1388 unsigned DynamicVGPRBlockSize,
1389 std::optional<bool> EnableWavefrontSize32) {
1390 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1391 return 8;
1392
1393 if (DynamicVGPRBlockSize != 0)
1394 return DynamicVGPRBlockSize;
1395
1396 bool IsWave32 = EnableWavefrontSize32
1397 ? *EnableWavefrontSize32
1398 : STI->getFeatureBits().test(FeatureWavefrontSize32);
1399
1400 if (STI->getFeatureBits().test(Feature1536VGPRs))
1401 return IsWave32 ? 24 : 12;
1402
1403 if (hasGFX10_3Insts(*STI))
1404 return IsWave32 ? 16 : 8;
1405
1406 return IsWave32 ? 8 : 4;
1407}
1408
1410 std::optional<bool> EnableWavefrontSize32) {
1411 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1412 return 8;
1413
1414 bool IsWave32 = EnableWavefrontSize32
1415 ? *EnableWavefrontSize32
1416 : STI->getFeatureBits().test(FeatureWavefrontSize32);
1417
1418 if (STI->getFeatureBits().test(Feature1024AddressableVGPRs))
1419 return IsWave32 ? 16 : 8;
1420
1421 return IsWave32 ? 8 : 4;
1422}
1423
1424unsigned getArchVGPRAllocGranule() { return 4; }
1425
1426unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1427 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1428 return 512;
1429 if (!isGFX10Plus(*STI))
1430 return 256;
1431 bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);
1432 if (STI->getFeatureBits().test(Feature1536VGPRs))
1433 return IsWave32 ? 1536 : 768;
1434 return IsWave32 ? 1024 : 512;
1435}
1436
1438 const auto &Features = STI->getFeatureBits();
1439 if (Features.test(Feature1024AddressableVGPRs))
1440 return Features.test(FeatureWavefrontSize32) ? 1024 : 512;
1441 return 256;
1442}
1443
1445 unsigned DynamicVGPRBlockSize) {
1446 const auto &Features = STI->getFeatureBits();
1447 if (Features.test(FeatureGFX90AInsts))
1448 return 512;
1449
1450 if (DynamicVGPRBlockSize != 0)
1451 // On GFX12 we can allocate at most 8 blocks of VGPRs.
1452 return 8 * getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1453 return getAddressableNumArchVGPRs(STI);
1454}
1455
1457 unsigned NumVGPRs,
1458 unsigned DynamicVGPRBlockSize) {
1460 NumVGPRs, getVGPRAllocGranule(STI, DynamicVGPRBlockSize),
1462}
1463
1464unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
1465 unsigned MaxWaves,
1466 unsigned TotalNumVGPRs) {
1467 if (NumVGPRs < Granule)
1468 return MaxWaves;
1469 unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1470 return std::min(std::max(TotalNumVGPRs / RoundedRegs, 1u), MaxWaves);
1471}
1472
1473unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
1475 if (Gen >= AMDGPUSubtarget::GFX10)
1476 return MaxWaves;
1477
1479 if (SGPRs <= 80)
1480 return 10;
1481 if (SGPRs <= 88)
1482 return 9;
1483 if (SGPRs <= 100)
1484 return 8;
1485 return 7;
1486 }
1487 if (SGPRs <= 48)
1488 return 10;
1489 if (SGPRs <= 56)
1490 return 9;
1491 if (SGPRs <= 64)
1492 return 8;
1493 if (SGPRs <= 72)
1494 return 7;
1495 if (SGPRs <= 80)
1496 return 6;
1497 return 5;
1498}
1499
1500unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1501 unsigned DynamicVGPRBlockSize) {
1502 assert(WavesPerEU != 0);
1503
1504 // In dynamic VGPR mode, (static) occupancy does not depend on VGPR usage,
1505 // so getMaxNumVGPRs does not depend on WavesPerEU, and thus we need to return
1506 // zero because there is no nonzero VGPR usage N where going below N
1507 // achieves higher (static) occupancy.
1508 bool DynamicVGPREnabled = (DynamicVGPRBlockSize != 0);
1509 if (DynamicVGPREnabled)
1510 return 0;
1511
1512 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1513 if (WavesPerEU >= MaxWavesPerEU)
1514 return 0;
1515
1516 unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1517 unsigned AddrsableNumVGPRs =
1518 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1519 unsigned Granule = getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1520 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1521
1522 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1523 return 0;
1524
1525 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs,
1526 DynamicVGPRBlockSize);
1527 if (WavesPerEU < MinWavesPerEU)
1528 return getMinNumVGPRs(STI, MinWavesPerEU, DynamicVGPRBlockSize);
1529
1530 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1531 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1532 return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1533}
1534
1535unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1536 unsigned DynamicVGPRBlockSize) {
1537 assert(WavesPerEU != 0);
1538
1539 // In dynamic VGPR mode, WavesPerEU does not imply a VGPR limit.
1540 bool DynamicVGPREnabled = (DynamicVGPRBlockSize != 0);
1541 unsigned MaxNumVGPRs =
1542 DynamicVGPREnabled
1543 ? getTotalNumVGPRs(STI)
1544 : alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
1545 getVGPRAllocGranule(STI, DynamicVGPRBlockSize));
1546 unsigned AddressableNumVGPRs =
1547 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1548 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1549}
1550
1551unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1552 std::optional<bool> EnableWavefrontSize32) {
1554 NumVGPRs, getVGPREncodingGranule(STI, EnableWavefrontSize32)) -
1555 1;
1556}
1557
1559 unsigned NumVGPRs,
1560 unsigned DynamicVGPRBlockSize,
1561 std::optional<bool> EnableWavefrontSize32) {
1563 NumVGPRs,
1564 getVGPRAllocGranule(STI, DynamicVGPRBlockSize, EnableWavefrontSize32));
1565}
1566} // end namespace IsaInfo
1567
1569 const MCSubtargetInfo *STI) {
1571 KernelCode.amd_kernel_code_version_major = 1;
1572 KernelCode.amd_kernel_code_version_minor = 2;
1573 KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1574 KernelCode.amd_machine_version_major = Version.Major;
1575 KernelCode.amd_machine_version_minor = Version.Minor;
1576 KernelCode.amd_machine_version_stepping = Version.Stepping;
1578 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
1579 KernelCode.wavefront_size = 5;
1581 } else {
1582 KernelCode.wavefront_size = 6;
1583 }
1584
1585 // If the code object does not support indirect functions, then the value must
1586 // be 0xffffffff.
1587 KernelCode.call_convention = -1;
1588
1589 // These alignment values are specified in powers of two, so alignment =
1590 // 2^n. The minimum alignment is 2^4 = 16.
1591 KernelCode.kernarg_segment_alignment = 4;
1592 KernelCode.group_segment_alignment = 4;
1593 KernelCode.private_segment_alignment = 4;
1594
1595 if (Version.Major >= 10) {
1596 KernelCode.compute_pgm_resource_registers |=
1597 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1599 }
1600}
1601
1604}
1605
1608}
1609
1611 unsigned AS = GV->getAddressSpace();
1612 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1614}
1615
1617 return TT.getArch() == Triple::r600;
1618}
1619
1620static bool isValidRegPrefix(char C) {
1621 return C == 'v' || C == 's' || C == 'a';
1622}
1623
1624std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef RegName) {
1625 char Kind = RegName.front();
1626 if (!isValidRegPrefix(Kind))
1627 return {};
1628
1629 RegName = RegName.drop_front();
1630 if (RegName.consume_front("[")) {
1631 unsigned Idx, End;
1632 bool Failed = RegName.consumeInteger(10, Idx);
1633 Failed |= !RegName.consume_front(":");
1634 Failed |= RegName.consumeInteger(10, End);
1635 Failed |= !RegName.consume_back("]");
1636 if (!Failed) {
1637 unsigned NumRegs = End - Idx + 1;
1638 if (NumRegs > 1)
1639 return {Kind, Idx, NumRegs};
1640 }
1641 } else {
1642 unsigned Idx;
1643 bool Failed = RegName.getAsInteger(10, Idx);
1644 if (!Failed)
1645 return {Kind, Idx, 1};
1646 }
1647
1648 return {};
1649}
1650
1651std::tuple<char, unsigned, unsigned>
1653 StringRef RegName = Constraint;
1654 if (!RegName.consume_front("{") || !RegName.consume_back("}"))
1655 return {};
1657}
1658
1659std::pair<unsigned, unsigned>
1661 std::pair<unsigned, unsigned> Default,
1662 bool OnlyFirstRequired) {
1663 if (auto Attr = getIntegerPairAttribute(F, Name, OnlyFirstRequired))
1664 return {Attr->first, Attr->second.value_or(Default.second)};
1665 return Default;
1666}
1667
1668std::optional<std::pair<unsigned, std::optional<unsigned>>>
1670 bool OnlyFirstRequired) {
1671 Attribute A = F.getFnAttribute(Name);
1672 if (!A.isStringAttribute())
1673 return std::nullopt;
1674
1675 LLVMContext &Ctx = F.getContext();
1676 std::pair<unsigned, std::optional<unsigned>> Ints;
1677 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1678 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1679 Ctx.emitError("can't parse first integer attribute " + Name);
1680 return std::nullopt;
1681 }
1682 unsigned Second = 0;
1683 if (Strs.second.trim().getAsInteger(0, Second)) {
1684 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1685 Ctx.emitError("can't parse second integer attribute " + Name);
1686 return std::nullopt;
1687 }
1688 } else {
1689 Ints.second = Second;
1690 }
1691
1692 return Ints;
1693}
1694
1696 unsigned Size,
1697 unsigned DefaultVal) {
1698 std::optional<SmallVector<unsigned>> R =
1700 return R.has_value() ? *R : SmallVector<unsigned>(Size, DefaultVal);
1701}
1702
1703std::optional<SmallVector<unsigned>>
1705 assert(Size > 2);
1706 LLVMContext &Ctx = F.getContext();
1707
1708 Attribute A = F.getFnAttribute(Name);
1709 if (!A.isValid())
1710 return std::nullopt;
1711 if (!A.isStringAttribute()) {
1712 Ctx.emitError(Name + " is not a string attribute");
1713 return std::nullopt;
1714 }
1715
1717
1718 StringRef S = A.getValueAsString();
1719 unsigned i = 0;
1720 for (; !S.empty() && i < Size; i++) {
1721 std::pair<StringRef, StringRef> Strs = S.split(',');
1722 unsigned IntVal;
1723 if (Strs.first.trim().getAsInteger(0, IntVal)) {
1724 Ctx.emitError("can't parse integer attribute " + Strs.first + " in " +
1725 Name);
1726 return std::nullopt;
1727 }
1728 Vals[i] = IntVal;
1729 S = Strs.second;
1730 }
1731
1732 if (!S.empty() || i < Size) {
1733 Ctx.emitError("attribute " + Name +
1734 " has incorrect number of integers; expected " +
1736 return std::nullopt;
1737 }
1738 return Vals;
1739}
1740
1741bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val) {
1742 assert((MD.getNumOperands() % 2 == 0) && "invalid number of operands!");
1743 for (unsigned I = 0, E = MD.getNumOperands() / 2; I != E; ++I) {
1744 auto Low =
1745 mdconst::extract<ConstantInt>(MD.getOperand(2 * I + 0))->getValue();
1746 auto High =
1747 mdconst::extract<ConstantInt>(MD.getOperand(2 * I + 1))->getValue();
1748 // There are two types of [A; B) ranges:
1749 // A < B, e.g. [4; 5) which is a range that only includes 4.
1750 // A > B, e.g. [5; 4) which is a range that wraps around and includes
1751 // everything except 4.
1752 if (Low.ult(High)) {
1753 if (Low.ule(Val) && High.ugt(Val))
1754 return true;
1755 } else {
1756 if (Low.uge(Val) && High.ult(Val))
1757 return true;
1758 }
1759 }
1760
1761 return false;
1762}
1763
1765 return (1 << (getVmcntBitWidthLo(Version.Major) +
1766 getVmcntBitWidthHi(Version.Major))) -
1767 1;
1768}
1769
1771 return (1 << getLoadcntBitWidth(Version.Major)) - 1;
1772}
1773
1775 return (1 << getSamplecntBitWidth(Version.Major)) - 1;
1776}
1777
1779 return (1 << getBvhcntBitWidth(Version.Major)) - 1;
1780}
1781
1783 return (1 << getExpcntBitWidth(Version.Major)) - 1;
1784}
1785
1787 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1788}
1789
1791 return (1 << getDscntBitWidth(Version.Major)) - 1;
1792}
1793
1795 return (1 << getKmcntBitWidth(Version.Major)) - 1;
1796}
1797
1799 return (1 << getXcntBitWidth(Version.Major, Version.Minor)) - 1;
1800}
1801
1803 return (1 << getAsynccntBitWidth(Version.Major, Version.Minor)) - 1;
1804}
1805
1807 return (1 << getStorecntBitWidth(Version.Major)) - 1;
1808}
1809
1811 bool HasExtendedWaitCounts = IV.Major >= 12;
1812 if (HasExtendedWaitCounts) {
1815 } else {
1818 }
1828}
1829
1831 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1832 getVmcntBitWidthLo(Version.Major));
1833 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1834 getExpcntBitWidth(Version.Major));
1835 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1836 getLgkmcntBitWidth(Version.Major));
1837 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1838 getVmcntBitWidthHi(Version.Major));
1839 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1840}
1841
1842unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1843 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1844 getVmcntBitWidthLo(Version.Major));
1845 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1846 getVmcntBitWidthHi(Version.Major));
1847 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1848}
1849
1850unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1851 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1852 getExpcntBitWidth(Version.Major));
1853}
1854
1855unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1856 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1857 getLgkmcntBitWidth(Version.Major));
1858}
1859
1860unsigned decodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt) {
1861 return unpackBits(Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1862 getLoadcntBitWidth(Version.Major));
1863}
1864
1865unsigned decodeStorecnt(const IsaVersion &Version, unsigned Waitcnt) {
1866 return unpackBits(Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1867 getStorecntBitWidth(Version.Major));
1868}
1869
1870unsigned decodeDscnt(const IsaVersion &Version, unsigned Waitcnt) {
1871 return unpackBits(Waitcnt, getDscntBitShift(Version.Major),
1872 getDscntBitWidth(Version.Major));
1873}
1874
1875void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1876 unsigned &Expcnt, unsigned &Lgkmcnt) {
1877 Vmcnt = decodeVmcnt(Version, Waitcnt);
1878 Expcnt = decodeExpcnt(Version, Waitcnt);
1879 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1880}
1881
1882unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1883 unsigned Vmcnt) {
1884 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1885 getVmcntBitWidthLo(Version.Major));
1886 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1887 getVmcntBitShiftHi(Version.Major),
1888 getVmcntBitWidthHi(Version.Major));
1889}
1890
1891unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1892 unsigned Expcnt) {
1893 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1894 getExpcntBitWidth(Version.Major));
1895}
1896
1897unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1898 unsigned Lgkmcnt) {
1899 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1900 getLgkmcntBitWidth(Version.Major));
1901}
1902
1903unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1904 unsigned Expcnt, unsigned Lgkmcnt) {
1905 unsigned Waitcnt = getWaitcntBitMask(Version);
1907 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1908 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1909 return Waitcnt;
1910}
1911
1913 bool IsStore) {
1914 unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),
1915 getDscntBitWidth(Version.Major));
1916 if (IsStore) {
1917 unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1918 getStorecntBitWidth(Version.Major));
1919 return Dscnt | Storecnt;
1920 }
1921 unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1922 getLoadcntBitWidth(Version.Major));
1923 return Dscnt | Loadcnt;
1924}
1925
1926static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1927 unsigned Loadcnt) {
1928 return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1929 getLoadcntBitWidth(Version.Major));
1930}
1931
1932static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1933 unsigned Storecnt) {
1934 return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1935 getStorecntBitWidth(Version.Major));
1936}
1937
1938static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1939 unsigned Dscnt) {
1940 return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major),
1941 getDscntBitWidth(Version.Major));
1942}
1943
1944unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1945 unsigned Dscnt) {
1946 unsigned Waitcnt = getCombinedCountBitMask(Version, false);
1947 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
1949 return Waitcnt;
1950}
1951
1952unsigned encodeStorecntDscnt(const IsaVersion &Version, unsigned Storecnt,
1953 unsigned Dscnt) {
1954 unsigned Waitcnt = getCombinedCountBitMask(Version, true);
1955 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
1957 return Waitcnt;
1958}
1959
1960//===----------------------------------------------------------------------===//
1961// Custom Operand Values
1962//===----------------------------------------------------------------------===//
1963
1965 int Size,
1966 const MCSubtargetInfo &STI) {
1967 unsigned Enc = 0;
1968 for (int Idx = 0; Idx < Size; ++Idx) {
1969 const auto &Op = Opr[Idx];
1970 if (Op.isSupported(STI))
1971 Enc |= Op.encode(Op.Default);
1972 }
1973 return Enc;
1974}
1975
1977 int Size, unsigned Code,
1978 bool &HasNonDefaultVal,
1979 const MCSubtargetInfo &STI) {
1980 unsigned UsedOprMask = 0;
1981 HasNonDefaultVal = false;
1982 for (int Idx = 0; Idx < Size; ++Idx) {
1983 const auto &Op = Opr[Idx];
1984 if (!Op.isSupported(STI))
1985 continue;
1986 UsedOprMask |= Op.getMask();
1987 unsigned Val = Op.decode(Code);
1988 if (!Op.isValid(Val))
1989 return false;
1990 HasNonDefaultVal |= (Val != Op.Default);
1991 }
1992 return (Code & ~UsedOprMask) == 0;
1993}
1994
1995static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
1996 unsigned Code, int &Idx, StringRef &Name,
1997 unsigned &Val, bool &IsDefault,
1998 const MCSubtargetInfo &STI) {
1999 while (Idx < Size) {
2000 const auto &Op = Opr[Idx++];
2001 if (Op.isSupported(STI)) {
2002 Name = Op.Name;
2003 Val = Op.decode(Code);
2004 IsDefault = (Val == Op.Default);
2005 return true;
2006 }
2007 }
2008
2009 return false;
2010}
2011
2013 int64_t InputVal) {
2014 if (InputVal < 0 || InputVal > Op.Max)
2015 return OPR_VAL_INVALID;
2016 return Op.encode(InputVal);
2017}
2018
2019static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
2020 const StringRef Name, int64_t InputVal,
2021 unsigned &UsedOprMask,
2022 const MCSubtargetInfo &STI) {
2023 int InvalidId = OPR_ID_UNKNOWN;
2024 for (int Idx = 0; Idx < Size; ++Idx) {
2025 const auto &Op = Opr[Idx];
2026 if (Op.Name == Name) {
2027 if (!Op.isSupported(STI)) {
2028 InvalidId = OPR_ID_UNSUPPORTED;
2029 continue;
2030 }
2031 auto OprMask = Op.getMask();
2032 if (OprMask & UsedOprMask)
2033 return OPR_ID_DUPLICATE;
2034 UsedOprMask |= OprMask;
2035 return encodeCustomOperandVal(Op, InputVal);
2036 }
2037 }
2038 return InvalidId;
2039}
2040
2041//===----------------------------------------------------------------------===//
2042// DepCtr
2043//===----------------------------------------------------------------------===//
2044
2045namespace DepCtr {
2046
2048 static int Default = -1;
2049 if (Default == -1)
2051 return Default;
2052}
2053
2054bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
2055 const MCSubtargetInfo &STI) {
2057 HasNonDefaultVal, STI);
2058}
2059
2060bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
2061 bool &IsDefault, const MCSubtargetInfo &STI) {
2062 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
2063 IsDefault, STI);
2064}
2065
2066int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
2067 const MCSubtargetInfo &STI) {
2068 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
2069 STI);
2070}
2071
2072unsigned getVaVdstBitMask() { return (1 << getVaVdstBitWidth()) - 1; }
2073
2074unsigned getVaSdstBitMask() { return (1 << getVaSdstBitWidth()) - 1; }
2075
2076unsigned getVaSsrcBitMask() { return (1 << getVaSsrcBitWidth()) - 1; }
2077
2079 return (1 << getHoldCntWidth(Version.Major, Version.Minor)) - 1;
2080}
2081
2082unsigned getVmVsrcBitMask() { return (1 << getVmVsrcBitWidth()) - 1; }
2083
2084unsigned getVaVccBitMask() { return (1 << getVaVccBitWidth()) - 1; }
2085
2086unsigned getSaSdstBitMask() { return (1 << getSaSdstBitWidth()) - 1; }
2087
2088unsigned decodeFieldVmVsrc(unsigned Encoded) {
2089 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
2090}
2091
2092unsigned decodeFieldVaVdst(unsigned Encoded) {
2093 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
2094}
2095
2096unsigned decodeFieldSaSdst(unsigned Encoded) {
2097 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
2098}
2099
2100unsigned decodeFieldVaSdst(unsigned Encoded) {
2101 return unpackBits(Encoded, getVaSdstBitShift(), getVaSdstBitWidth());
2102}
2103
2104unsigned decodeFieldVaVcc(unsigned Encoded) {
2105 return unpackBits(Encoded, getVaVccBitShift(), getVaVccBitWidth());
2106}
2107
2108unsigned decodeFieldVaSsrc(unsigned Encoded) {
2109 return unpackBits(Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
2110}
2111
2112unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version) {
2113 return unpackBits(Encoded, getHoldCntBitShift(),
2114 getHoldCntWidth(Version.Major, Version.Minor));
2115}
2116
2117unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
2118 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
2119}
2120
2121unsigned encodeFieldVmVsrc(unsigned VmVsrc, const MCSubtargetInfo &STI) {
2122 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2123 return encodeFieldVmVsrc(Encoded, VmVsrc);
2124}
2125
2126unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
2127 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
2128}
2129
2130unsigned encodeFieldVaVdst(unsigned VaVdst, const MCSubtargetInfo &STI) {
2131 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2132 return encodeFieldVaVdst(Encoded, VaVdst);
2133}
2134
2135unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
2136 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
2137}
2138
2139unsigned encodeFieldSaSdst(unsigned SaSdst, const MCSubtargetInfo &STI) {
2140 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2141 return encodeFieldSaSdst(Encoded, SaSdst);
2142}
2143
2144unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst) {
2145 return packBits(VaSdst, Encoded, getVaSdstBitShift(), getVaSdstBitWidth());
2146}
2147
2148unsigned encodeFieldVaSdst(unsigned VaSdst, const MCSubtargetInfo &STI) {
2149 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2150 return encodeFieldVaSdst(Encoded, VaSdst);
2151}
2152
2153unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc) {
2154 return packBits(VaVcc, Encoded, getVaVccBitShift(), getVaVccBitWidth());
2155}
2156
2157unsigned encodeFieldVaVcc(unsigned VaVcc, const MCSubtargetInfo &STI) {
2158 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2159 return encodeFieldVaVcc(Encoded, VaVcc);
2160}
2161
2162unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) {
2163 return packBits(VaSsrc, Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
2164}
2165
2166unsigned encodeFieldVaSsrc(unsigned VaSsrc, const MCSubtargetInfo &STI) {
2167 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2168 return encodeFieldVaSsrc(Encoded, VaSsrc);
2169}
2170
2171unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt,
2172 const IsaVersion &Version) {
2173 return packBits(HoldCnt, Encoded, getHoldCntBitShift(),
2174 getHoldCntWidth(Version.Major, Version.Minor));
2175}
2176
2177unsigned encodeFieldHoldCnt(unsigned HoldCnt, const MCSubtargetInfo &STI) {
2178 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2179 return encodeFieldHoldCnt(Encoded, HoldCnt, getIsaVersion(STI.getCPU()));
2180}
2181
2182} // namespace DepCtr
2183
2184//===----------------------------------------------------------------------===//
2185// exp tgt
2186//===----------------------------------------------------------------------===//
2187
2188namespace Exp {
2189
2190struct ExpTgt {
2192 unsigned Tgt;
2193 unsigned MaxIndex;
2194};
2195
2196// clang-format off
2197static constexpr ExpTgt ExpTgtInfo[] = {
2198 {{"null"}, ET_NULL, ET_NULL_MAX_IDX},
2199 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX},
2200 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX},
2201 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX},
2202 {{"pos"}, ET_POS0, ET_POS_MAX_IDX},
2203 {{"dual_src_blend"},ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
2204 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX},
2205};
2206// clang-format on
2207
2208bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
2209 for (const ExpTgt &Val : ExpTgtInfo) {
2210 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
2211 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
2212 Name = Val.Name;
2213 return true;
2214 }
2215 }
2216 return false;
2217}
2218
2219unsigned getTgtId(const StringRef Name) {
2220
2221 for (const ExpTgt &Val : ExpTgtInfo) {
2222 if (Val.MaxIndex == 0 && Name == Val.Name)
2223 return Val.Tgt;
2224
2225 if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {
2226 StringRef Suffix = Name.drop_front(Val.Name.size());
2227
2228 unsigned Id;
2229 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
2230 return ET_INVALID;
2231
2232 // Disable leading zeroes
2233 if (Suffix.size() > 1 && Suffix[0] == '0')
2234 return ET_INVALID;
2235
2236 return Val.Tgt + Id;
2237 }
2238 }
2239 return ET_INVALID;
2240}
2241
2242bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
2243 switch (Id) {
2244 case ET_NULL:
2245 return !isGFX11Plus(STI);
2246 case ET_POS4:
2247 case ET_PRIM:
2248 return isGFX10Plus(STI);
2249 case ET_DUAL_SRC_BLEND0:
2250 case ET_DUAL_SRC_BLEND1:
2251 return isGFX11Plus(STI);
2252 default:
2253 if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
2254 return !isGFX11Plus(STI) || isGFX13Plus(STI);
2255 return true;
2256 }
2257}
2258
2259} // namespace Exp
2260
2261//===----------------------------------------------------------------------===//
2262// MTBUF Format
2263//===----------------------------------------------------------------------===//
2264
2265namespace MTBUFFormat {
2266
2267int64_t getDfmt(const StringRef Name) {
2268 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
2269 if (Name == DfmtSymbolic[Id])
2270 return Id;
2271 }
2272 return DFMT_UNDEF;
2273}
2274
2276 assert(Id <= DFMT_MAX);
2277 return DfmtSymbolic[Id];
2278}
2279
2281 if (isSI(STI) || isCI(STI))
2282 return NfmtSymbolicSICI;
2283 if (isVI(STI) || isGFX9(STI))
2284 return NfmtSymbolicVI;
2285 return NfmtSymbolicGFX10;
2286}
2287
2288int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
2289 const auto *lookupTable = getNfmtLookupTable(STI);
2290 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
2291 if (Name == lookupTable[Id])
2292 return Id;
2293 }
2294 return NFMT_UNDEF;
2295}
2296
2297StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
2298 assert(Id <= NFMT_MAX);
2299 return getNfmtLookupTable(STI)[Id];
2300}
2301
2302bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2303 unsigned Dfmt;
2304 unsigned Nfmt;
2305 decodeDfmtNfmt(Id, Dfmt, Nfmt);
2306 return isValidNfmt(Nfmt, STI);
2307}
2308
2309bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2310 return !getNfmtName(Id, STI).empty();
2311}
2312
2313int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
2314 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
2315}
2316
2317void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
2318 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
2319 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
2320}
2321
2322int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) {
2323 if (isGFX11Plus(STI)) {
2324 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2325 if (Name == UfmtSymbolicGFX11[Id])
2326 return Id;
2327 }
2328 } else {
2329 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2330 if (Name == UfmtSymbolicGFX10[Id])
2331 return Id;
2332 }
2333 }
2334 return UFMT_UNDEF;
2335}
2336
2338 if (isValidUnifiedFormat(Id, STI))
2339 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
2340 return "";
2341}
2342
2343bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
2344 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
2345}
2346
2347int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
2348 const MCSubtargetInfo &STI) {
2349 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
2350 if (isGFX11Plus(STI)) {
2351 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2352 if (Fmt == DfmtNfmt2UFmtGFX11[Id])
2353 return Id;
2354 }
2355 } else {
2356 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2357 if (Fmt == DfmtNfmt2UFmtGFX10[Id])
2358 return Id;
2359 }
2360 }
2361 return UFMT_UNDEF;
2362}
2363
2364bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
2365 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
2366}
2367
2369 if (isGFX10Plus(STI))
2370 return UFMT_DEFAULT;
2371 return DFMT_NFMT_DEFAULT;
2372}
2373
2374} // namespace MTBUFFormat
2375
2376//===----------------------------------------------------------------------===//
2377// SendMsg
2378//===----------------------------------------------------------------------===//
2379
2380namespace SendMsg {
2381
2385
2386bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
2387 return (MsgId & ~(getMsgIdMask(STI))) == 0;
2388}
2389
2390bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
2391 bool Strict) {
2392 assert(isValidMsgId(MsgId, STI));
2393
2394 if (!Strict)
2395 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
2396
2397 if (msgRequiresOp(MsgId, STI)) {
2398 if (MsgId == ID_GS_PreGFX11 && OpId == OP_GS_NOP)
2399 return false;
2400
2401 return !getMsgOpName(MsgId, OpId, STI).empty();
2402 }
2403
2404 return OpId == OP_NONE_;
2405}
2406
2407bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
2408 const MCSubtargetInfo &STI, bool Strict) {
2409 assert(isValidMsgOp(MsgId, OpId, STI, Strict));
2410
2411 if (!Strict)
2413
2414 if (!isGFX11Plus(STI)) {
2415 switch (MsgId) {
2416 case ID_GS_PreGFX11:
2419 return (OpId == OP_GS_NOP)
2422 }
2423 }
2424 return StreamId == STREAM_ID_NONE_;
2425}
2426
2427bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
2428 return MsgId == ID_SYSMSG ||
2429 (!isGFX11Plus(STI) &&
2430 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
2431}
2432
2433bool msgSupportsStream(int64_t MsgId, int64_t OpId,
2434 const MCSubtargetInfo &STI) {
2435 return !isGFX11Plus(STI) &&
2436 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
2437 OpId != OP_GS_NOP;
2438}
2439
2440void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
2441 uint16_t &StreamId, const MCSubtargetInfo &STI) {
2442 MsgId = Val & getMsgIdMask(STI);
2443 if (isGFX11Plus(STI)) {
2444 OpId = 0;
2445 StreamId = 0;
2446 } else {
2447 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
2449 }
2450}
2451
2453 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
2454}
2455
2456bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI) {
2457 // Explicitly list message types that are known to not use m0.
2458 // This is safer than excluding only GS_ALLOC_REQ, in case new message
2459 // types are added in the future that do use m0.
2460 if (isGFX11Plus(STI)) {
2461 switch (MsgId) {
2463 return true;
2464 default:
2465 break;
2466 }
2467 }
2468 switch (MsgId) {
2469 case ID_SAVEWAVE:
2470 case ID_STALL_WAVE_GEN:
2471 case ID_HALT_WAVES:
2472 case ID_ORDERED_PS_DONE:
2474 case ID_GET_DOORBELL:
2475 case ID_GET_DDID:
2476 case ID_SYSMSG:
2477 return true;
2478 default:
2479 return false;
2480 }
2481}
2482
2483} // namespace SendMsg
2484
2485//===----------------------------------------------------------------------===//
2486//
2487//===----------------------------------------------------------------------===//
2488
2490 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
2491}
2492
2494 // As a safe default always respond as if PS has color exports.
2495 return F.getFnAttributeAsParsedInteger(
2496 "amdgpu-color-export",
2497 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
2498}
2499
2501 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
2502}
2503
2505 unsigned BlockSize =
2506 F.getFnAttributeAsParsedInteger("amdgpu-dynamic-vgpr-block-size", 0);
2507
2508 if (BlockSize == 16 || BlockSize == 32)
2509 return BlockSize;
2510
2511 return 0;
2512}
2513
2514bool hasXNACK(const MCSubtargetInfo &STI) {
2515 return STI.hasFeature(AMDGPU::FeatureXNACK);
2516}
2517
2519 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) &&
2520 !STI.hasFeature(AMDGPU::FeatureR128A16);
2521}
2522
2523bool hasA16(const MCSubtargetInfo &STI) {
2524 return STI.hasFeature(AMDGPU::FeatureA16);
2525}
2526
2527bool hasG16(const MCSubtargetInfo &STI) {
2528 return STI.hasFeature(AMDGPU::FeatureG16);
2529}
2530
2532 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2533 !isSI(STI);
2534}
2535
2536bool hasGDS(const MCSubtargetInfo &STI) {
2537 return STI.hasFeature(AMDGPU::FeatureGDS);
2538}
2539
2540unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2541 auto Version = getIsaVersion(STI.getCPU());
2542 if (Version.Major == 10)
2543 return Version.Minor >= 3 ? 13 : 5;
2544 if (Version.Major == 11)
2545 return 5;
2546 if (Version.Major >= 12)
2547 return HasSampler ? 4 : 5;
2548 return 0;
2549}
2550
2552 if (isGFX1250Plus(STI))
2553 return 32;
2554 return 16;
2555}
2556
2557bool isSI(const MCSubtargetInfo &STI) {
2558 return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
2559}
2560
2561bool isCI(const MCSubtargetInfo &STI) {
2562 return STI.hasFeature(AMDGPU::FeatureSeaIslands);
2563}
2564
2565bool isVI(const MCSubtargetInfo &STI) {
2566 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2567}
2568
2569bool isGFX9(const MCSubtargetInfo &STI) {
2570 return STI.hasFeature(AMDGPU::FeatureGFX9);
2571}
2572
2574 return isGFX9(STI) || isGFX10(STI);
2575}
2576
2578 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2579}
2580
2582 return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2583}
2584
2585bool isGFX8Plus(const MCSubtargetInfo &STI) {
2586 return isVI(STI) || isGFX9Plus(STI);
2587}
2588
2589bool isGFX9Plus(const MCSubtargetInfo &STI) {
2590 return isGFX9(STI) || isGFX10Plus(STI);
2591}
2592
2593bool isNotGFX9Plus(const MCSubtargetInfo &STI) { return !isGFX9Plus(STI); }
2594
2595bool isGFX10(const MCSubtargetInfo &STI) {
2596 return STI.hasFeature(AMDGPU::FeatureGFX10);
2597}
2598
2600 return isGFX10(STI) || isGFX11(STI);
2601}
2602
2604 return isGFX10(STI) || isGFX11Plus(STI);
2605}
2606
2607bool isGFX11(const MCSubtargetInfo &STI) {
2608 return STI.hasFeature(AMDGPU::FeatureGFX11);
2609}
2610
2612 return isGFX11(STI) || isGFX12Plus(STI);
2613}
2614
2615bool isGFX12(const MCSubtargetInfo &STI) {
2616 return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2617}
2618
2620 return isGFX12(STI) || isGFX13Plus(STI);
2621}
2622
2623bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2624
2625bool isGFX1250(const MCSubtargetInfo &STI) {
2626 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts] && !isGFX13(STI);
2627}
2628
2630 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts];
2631}
2632
2633bool isGFX13(const MCSubtargetInfo &STI) {
2634 return STI.getFeatureBits()[AMDGPU::FeatureGFX13];
2635}
2636
2637bool isGFX13Plus(const MCSubtargetInfo &STI) { return isGFX13(STI); }
2638
2640 if (isGFX1250(STI))
2641 return false;
2642 return isGFX10Plus(STI);
2643}
2644
2645bool isNotGFX11Plus(const MCSubtargetInfo &STI) { return !isGFX11Plus(STI); }
2646
2648 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2649}
2650
2652 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2653}
2654
2656 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2657}
2658
2660 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2661}
2662
2664 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2665}
2666
2668 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2669}
2670
2672 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2673}
2674
2675bool isGFX90A(const MCSubtargetInfo &STI) {
2676 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2677}
2678
2679bool isGFX940(const MCSubtargetInfo &STI) {
2680 return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2681}
2682
2684 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2685}
2686
2688 return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2689}
2690
2691bool hasVOPD(const MCSubtargetInfo &STI) {
2692 return STI.hasFeature(AMDGPU::FeatureVOPDInsts);
2693}
2694
2696 return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
2697}
2698
2700 return STI.hasFeature(AMDGPU::FeatureKernargPreload);
2701}
2702
2703int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2704 int32_t ArgNumVGPR) {
2705 if (has90AInsts && ArgNumAGPR)
2706 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2707 return std::max(ArgNumVGPR, ArgNumAGPR);
2708}
2709
2711 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2712 const MCRegister FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2713 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2714 Reg == AMDGPU::SCC;
2715}
2716
2720
2721#define MAP_REG2REG \
2722 using namespace AMDGPU; \
2723 switch (Reg.id()) { \
2724 default: \
2725 return Reg; \
2726 CASE_CI_VI(FLAT_SCR) \
2727 CASE_CI_VI(FLAT_SCR_LO) \
2728 CASE_CI_VI(FLAT_SCR_HI) \
2729 CASE_VI_GFX9PLUS(TTMP0) \
2730 CASE_VI_GFX9PLUS(TTMP1) \
2731 CASE_VI_GFX9PLUS(TTMP2) \
2732 CASE_VI_GFX9PLUS(TTMP3) \
2733 CASE_VI_GFX9PLUS(TTMP4) \
2734 CASE_VI_GFX9PLUS(TTMP5) \
2735 CASE_VI_GFX9PLUS(TTMP6) \
2736 CASE_VI_GFX9PLUS(TTMP7) \
2737 CASE_VI_GFX9PLUS(TTMP8) \
2738 CASE_VI_GFX9PLUS(TTMP9) \
2739 CASE_VI_GFX9PLUS(TTMP10) \
2740 CASE_VI_GFX9PLUS(TTMP11) \
2741 CASE_VI_GFX9PLUS(TTMP12) \
2742 CASE_VI_GFX9PLUS(TTMP13) \
2743 CASE_VI_GFX9PLUS(TTMP14) \
2744 CASE_VI_GFX9PLUS(TTMP15) \
2745 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2746 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2747 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2748 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2749 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2750 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2751 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2752 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2753 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2754 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2755 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2756 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2757 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2758 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2759 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2760 CASE_VI_GFX9PLUS( \
2761 TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2762 CASE_GFXPRE11_GFX11PLUS(M0) \
2763 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2764 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2765 }
2766
2767#define CASE_CI_VI(node) \
2768 assert(!isSI(STI)); \
2769 case node: \
2770 return isCI(STI) ? node##_ci : node##_vi;
2771
2772#define CASE_VI_GFX9PLUS(node) \
2773 case node: \
2774 return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2775
2776#define CASE_GFXPRE11_GFX11PLUS(node) \
2777 case node: \
2778 return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2779
2780#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2781 case node: \
2782 return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2783
2785 if (STI.getTargetTriple().getArch() == Triple::r600)
2786 return Reg;
2788}
2789
2790#undef CASE_CI_VI
2791#undef CASE_VI_GFX9PLUS
2792#undef CASE_GFXPRE11_GFX11PLUS
2793#undef CASE_GFXPRE11_GFX11PLUS_TO
2794
2795#define CASE_CI_VI(node) \
2796 case node##_ci: \
2797 case node##_vi: \
2798 return node;
2799#define CASE_VI_GFX9PLUS(node) \
2800 case node##_vi: \
2801 case node##_gfx9plus: \
2802 return node;
2803#define CASE_GFXPRE11_GFX11PLUS(node) \
2804 case node##_gfx11plus: \
2805 case node##_gfxpre11: \
2806 return node;
2807#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2808
2810
2812 switch (Reg.id()) {
2813 case AMDGPU::SRC_SHARED_BASE_LO:
2814 case AMDGPU::SRC_SHARED_BASE:
2815 case AMDGPU::SRC_SHARED_LIMIT_LO:
2816 case AMDGPU::SRC_SHARED_LIMIT:
2817 case AMDGPU::SRC_PRIVATE_BASE_LO:
2818 case AMDGPU::SRC_PRIVATE_BASE:
2819 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2820 case AMDGPU::SRC_PRIVATE_LIMIT:
2821 case AMDGPU::SRC_FLAT_SCRATCH_BASE_LO:
2822 case AMDGPU::SRC_FLAT_SCRATCH_BASE_HI:
2823 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2824 return true;
2825 case AMDGPU::SRC_VCCZ:
2826 case AMDGPU::SRC_EXECZ:
2827 case AMDGPU::SRC_SCC:
2828 return true;
2829 case AMDGPU::SGPR_NULL:
2830 return true;
2831 default:
2832 return false;
2833 }
2834}
2835
2836#undef CASE_CI_VI
2837#undef CASE_VI_GFX9PLUS
2838#undef CASE_GFXPRE11_GFX11PLUS
2839#undef CASE_GFXPRE11_GFX11PLUS_TO
2840#undef MAP_REG2REG
2841
2842bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2843 assert(OpNo < Desc.NumOperands);
2844 unsigned OpType = Desc.operands()[OpNo].OperandType;
2845 return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2846 OpType <= AMDGPU::OPERAND_KIMM_LAST;
2847}
2848
2849bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2850 assert(OpNo < Desc.NumOperands);
2851 unsigned OpType = Desc.operands()[OpNo].OperandType;
2852 switch (OpType) {
2866 return true;
2867 default:
2868 return false;
2869 }
2870}
2871
2872bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2873 assert(OpNo < Desc.NumOperands);
2874 unsigned OpType = Desc.operands()[OpNo].OperandType;
2875 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2879}
2880
2881// Avoid using MCRegisterClass::getSize, since that function will go away
2882// (move from MC* level to Target* level). Return size in bits.
2883unsigned getRegBitWidth(unsigned RCID) {
2884 switch (RCID) {
2885 case AMDGPU::VGPR_16RegClassID:
2886 case AMDGPU::VGPR_16_Lo128RegClassID:
2887 case AMDGPU::SGPR_LO16RegClassID:
2888 case AMDGPU::AGPR_LO16RegClassID:
2889 return 16;
2890 case AMDGPU::SGPR_32RegClassID:
2891 case AMDGPU::VGPR_32RegClassID:
2892 case AMDGPU::VGPR_32_Lo256RegClassID:
2893 case AMDGPU::VRegOrLds_32RegClassID:
2894 case AMDGPU::AGPR_32RegClassID:
2895 case AMDGPU::VS_32RegClassID:
2896 case AMDGPU::AV_32RegClassID:
2897 case AMDGPU::SReg_32RegClassID:
2898 case AMDGPU::SReg_32_XM0RegClassID:
2899 case AMDGPU::SRegOrLds_32RegClassID:
2900 return 32;
2901 case AMDGPU::SGPR_64RegClassID:
2902 case AMDGPU::VS_64RegClassID:
2903 case AMDGPU::SReg_64RegClassID:
2904 case AMDGPU::VReg_64RegClassID:
2905 case AMDGPU::AReg_64RegClassID:
2906 case AMDGPU::SReg_64_XEXECRegClassID:
2907 case AMDGPU::VReg_64_Align2RegClassID:
2908 case AMDGPU::AReg_64_Align2RegClassID:
2909 case AMDGPU::AV_64RegClassID:
2910 case AMDGPU::AV_64_Align2RegClassID:
2911 case AMDGPU::VReg_64_Lo256_Align2RegClassID:
2912 case AMDGPU::VS_64_Lo256RegClassID:
2913 return 64;
2914 case AMDGPU::SGPR_96RegClassID:
2915 case AMDGPU::SReg_96RegClassID:
2916 case AMDGPU::VReg_96RegClassID:
2917 case AMDGPU::AReg_96RegClassID:
2918 case AMDGPU::VReg_96_Align2RegClassID:
2919 case AMDGPU::AReg_96_Align2RegClassID:
2920 case AMDGPU::AV_96RegClassID:
2921 case AMDGPU::AV_96_Align2RegClassID:
2922 case AMDGPU::VReg_96_Lo256_Align2RegClassID:
2923 return 96;
2924 case AMDGPU::SGPR_128RegClassID:
2925 case AMDGPU::SReg_128RegClassID:
2926 case AMDGPU::VReg_128RegClassID:
2927 case AMDGPU::AReg_128RegClassID:
2928 case AMDGPU::VReg_128_Align2RegClassID:
2929 case AMDGPU::AReg_128_Align2RegClassID:
2930 case AMDGPU::AV_128RegClassID:
2931 case AMDGPU::AV_128_Align2RegClassID:
2932 case AMDGPU::SReg_128_XNULLRegClassID:
2933 case AMDGPU::VReg_128_Lo256_Align2RegClassID:
2934 return 128;
2935 case AMDGPU::SGPR_160RegClassID:
2936 case AMDGPU::SReg_160RegClassID:
2937 case AMDGPU::VReg_160RegClassID:
2938 case AMDGPU::AReg_160RegClassID:
2939 case AMDGPU::VReg_160_Align2RegClassID:
2940 case AMDGPU::AReg_160_Align2RegClassID:
2941 case AMDGPU::AV_160RegClassID:
2942 case AMDGPU::AV_160_Align2RegClassID:
2943 case AMDGPU::VReg_160_Lo256_Align2RegClassID:
2944 return 160;
2945 case AMDGPU::SGPR_192RegClassID:
2946 case AMDGPU::SReg_192RegClassID:
2947 case AMDGPU::VReg_192RegClassID:
2948 case AMDGPU::AReg_192RegClassID:
2949 case AMDGPU::VReg_192_Align2RegClassID:
2950 case AMDGPU::AReg_192_Align2RegClassID:
2951 case AMDGPU::AV_192RegClassID:
2952 case AMDGPU::AV_192_Align2RegClassID:
2953 case AMDGPU::VReg_192_Lo256_Align2RegClassID:
2954 return 192;
2955 case AMDGPU::SGPR_224RegClassID:
2956 case AMDGPU::SReg_224RegClassID:
2957 case AMDGPU::VReg_224RegClassID:
2958 case AMDGPU::AReg_224RegClassID:
2959 case AMDGPU::VReg_224_Align2RegClassID:
2960 case AMDGPU::AReg_224_Align2RegClassID:
2961 case AMDGPU::AV_224RegClassID:
2962 case AMDGPU::AV_224_Align2RegClassID:
2963 case AMDGPU::VReg_224_Lo256_Align2RegClassID:
2964 return 224;
2965 case AMDGPU::SGPR_256RegClassID:
2966 case AMDGPU::SReg_256RegClassID:
2967 case AMDGPU::VReg_256RegClassID:
2968 case AMDGPU::AReg_256RegClassID:
2969 case AMDGPU::VReg_256_Align2RegClassID:
2970 case AMDGPU::AReg_256_Align2RegClassID:
2971 case AMDGPU::AV_256RegClassID:
2972 case AMDGPU::AV_256_Align2RegClassID:
2973 case AMDGPU::SReg_256_XNULLRegClassID:
2974 case AMDGPU::VReg_256_Lo256_Align2RegClassID:
2975 return 256;
2976 case AMDGPU::SGPR_288RegClassID:
2977 case AMDGPU::SReg_288RegClassID:
2978 case AMDGPU::VReg_288RegClassID:
2979 case AMDGPU::AReg_288RegClassID:
2980 case AMDGPU::VReg_288_Align2RegClassID:
2981 case AMDGPU::AReg_288_Align2RegClassID:
2982 case AMDGPU::AV_288RegClassID:
2983 case AMDGPU::AV_288_Align2RegClassID:
2984 case AMDGPU::VReg_288_Lo256_Align2RegClassID:
2985 return 288;
2986 case AMDGPU::SGPR_320RegClassID:
2987 case AMDGPU::SReg_320RegClassID:
2988 case AMDGPU::VReg_320RegClassID:
2989 case AMDGPU::AReg_320RegClassID:
2990 case AMDGPU::VReg_320_Align2RegClassID:
2991 case AMDGPU::AReg_320_Align2RegClassID:
2992 case AMDGPU::AV_320RegClassID:
2993 case AMDGPU::AV_320_Align2RegClassID:
2994 case AMDGPU::VReg_320_Lo256_Align2RegClassID:
2995 return 320;
2996 case AMDGPU::SGPR_352RegClassID:
2997 case AMDGPU::SReg_352RegClassID:
2998 case AMDGPU::VReg_352RegClassID:
2999 case AMDGPU::AReg_352RegClassID:
3000 case AMDGPU::VReg_352_Align2RegClassID:
3001 case AMDGPU::AReg_352_Align2RegClassID:
3002 case AMDGPU::AV_352RegClassID:
3003 case AMDGPU::AV_352_Align2RegClassID:
3004 case AMDGPU::VReg_352_Lo256_Align2RegClassID:
3005 return 352;
3006 case AMDGPU::SGPR_384RegClassID:
3007 case AMDGPU::SReg_384RegClassID:
3008 case AMDGPU::VReg_384RegClassID:
3009 case AMDGPU::AReg_384RegClassID:
3010 case AMDGPU::VReg_384_Align2RegClassID:
3011 case AMDGPU::AReg_384_Align2RegClassID:
3012 case AMDGPU::AV_384RegClassID:
3013 case AMDGPU::AV_384_Align2RegClassID:
3014 case AMDGPU::VReg_384_Lo256_Align2RegClassID:
3015 return 384;
3016 case AMDGPU::SGPR_512RegClassID:
3017 case AMDGPU::SReg_512RegClassID:
3018 case AMDGPU::VReg_512RegClassID:
3019 case AMDGPU::AReg_512RegClassID:
3020 case AMDGPU::VReg_512_Align2RegClassID:
3021 case AMDGPU::AReg_512_Align2RegClassID:
3022 case AMDGPU::AV_512RegClassID:
3023 case AMDGPU::AV_512_Align2RegClassID:
3024 case AMDGPU::VReg_512_Lo256_Align2RegClassID:
3025 return 512;
3026 case AMDGPU::SGPR_1024RegClassID:
3027 case AMDGPU::SReg_1024RegClassID:
3028 case AMDGPU::VReg_1024RegClassID:
3029 case AMDGPU::AReg_1024RegClassID:
3030 case AMDGPU::VReg_1024_Align2RegClassID:
3031 case AMDGPU::AReg_1024_Align2RegClassID:
3032 case AMDGPU::AV_1024RegClassID:
3033 case AMDGPU::AV_1024_Align2RegClassID:
3034 case AMDGPU::VReg_1024_Lo256_Align2RegClassID:
3035 return 1024;
3036 default:
3037 llvm_unreachable("Unexpected register class");
3038 }
3039}
3040
3041unsigned getRegBitWidth(const MCRegisterClass &RC) {
3042 return getRegBitWidth(RC.getID());
3043}
3044
3045bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
3047 return true;
3048
3049 uint64_t Val = static_cast<uint64_t>(Literal);
3050 return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
3051 (Val == llvm::bit_cast<uint64_t>(1.0)) ||
3052 (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
3053 (Val == llvm::bit_cast<uint64_t>(0.5)) ||
3054 (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
3055 (Val == llvm::bit_cast<uint64_t>(2.0)) ||
3056 (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
3057 (Val == llvm::bit_cast<uint64_t>(4.0)) ||
3058 (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
3059 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
3060}
3061
3062bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
3064 return true;
3065
3066 // The actual type of the operand does not seem to matter as long
3067 // as the bits match one of the inline immediate values. For example:
3068 //
3069 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
3070 // so it is a legal inline immediate.
3071 //
3072 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
3073 // floating-point, so it is a legal inline immediate.
3074
3075 uint32_t Val = static_cast<uint32_t>(Literal);
3076 return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
3077 (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
3078 (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
3079 (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
3080 (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
3081 (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
3082 (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
3083 (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
3084 (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
3085 (Val == 0x3e22f983 && HasInv2Pi);
3086}
3087
3088bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
3089 if (!HasInv2Pi)
3090 return false;
3092 return true;
3093 uint16_t Val = static_cast<uint16_t>(Literal);
3094 return Val == 0x3F00 || // 0.5
3095 Val == 0xBF00 || // -0.5
3096 Val == 0x3F80 || // 1.0
3097 Val == 0xBF80 || // -1.0
3098 Val == 0x4000 || // 2.0
3099 Val == 0xC000 || // -2.0
3100 Val == 0x4080 || // 4.0
3101 Val == 0xC080 || // -4.0
3102 Val == 0x3E22; // 1.0 / (2.0 * pi)
3103}
3104
3105bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) {
3106 return isInlinableLiteral32(Literal, HasInv2Pi);
3107}
3108
3109bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
3110 if (!HasInv2Pi)
3111 return false;
3113 return true;
3114 uint16_t Val = static_cast<uint16_t>(Literal);
3115 return Val == 0x3C00 || // 1.0
3116 Val == 0xBC00 || // -1.0
3117 Val == 0x3800 || // 0.5
3118 Val == 0xB800 || // -0.5
3119 Val == 0x4000 || // 2.0
3120 Val == 0xC000 || // -2.0
3121 Val == 0x4400 || // 4.0
3122 Val == 0xC400 || // -4.0
3123 Val == 0x3118; // 1/2pi
3124}
3125
3126std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
3127 // Unfortunately, the Instruction Set Architecture Reference Guide is
3128 // misleading about how the inline operands work for (packed) 16-bit
3129 // instructions. In a nutshell, the actual HW behavior is:
3130 //
3131 // - integer encodings (-16 .. 64) are always produced as sign-extended
3132 // 32-bit values
3133 // - float encodings are produced as:
3134 // - for F16 instructions: corresponding half-precision float values in
3135 // the LSBs, 0 in the MSBs
3136 // - for UI16 instructions: corresponding single-precision float value
3137 int32_t Signed = static_cast<int32_t>(Literal);
3138 if (Signed >= 0 && Signed <= 64)
3139 return 128 + Signed;
3140
3141 if (Signed >= -16 && Signed <= -1)
3142 return 192 + std::abs(Signed);
3143
3144 if (IsFloat) {
3145 // clang-format off
3146 switch (Literal) {
3147 case 0x3800: return 240; // 0.5
3148 case 0xB800: return 241; // -0.5
3149 case 0x3C00: return 242; // 1.0
3150 case 0xBC00: return 243; // -1.0
3151 case 0x4000: return 244; // 2.0
3152 case 0xC000: return 245; // -2.0
3153 case 0x4400: return 246; // 4.0
3154 case 0xC400: return 247; // -4.0
3155 case 0x3118: return 248; // 1.0 / (2.0 * pi)
3156 default: break;
3157 }
3158 // clang-format on
3159 } else {
3160 // clang-format off
3161 switch (Literal) {
3162 case 0x3F000000: return 240; // 0.5
3163 case 0xBF000000: return 241; // -0.5
3164 case 0x3F800000: return 242; // 1.0
3165 case 0xBF800000: return 243; // -1.0
3166 case 0x40000000: return 244; // 2.0
3167 case 0xC0000000: return 245; // -2.0
3168 case 0x40800000: return 246; // 4.0
3169 case 0xC0800000: return 247; // -4.0
3170 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
3171 default: break;
3172 }
3173 // clang-format on
3174 }
3175
3176 return {};
3177}
3178
3179// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
3180// or nullopt.
3181std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
3182 return getInlineEncodingV216(false, Literal);
3183}
3184
3185// Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction
3186// or nullopt.
3187std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) {
3188 int32_t Signed = static_cast<int32_t>(Literal);
3189 if (Signed >= 0 && Signed <= 64)
3190 return 128 + Signed;
3191
3192 if (Signed >= -16 && Signed <= -1)
3193 return 192 + std::abs(Signed);
3194
3195 // clang-format off
3196 switch (Literal) {
3197 case 0x3F00: return 240; // 0.5
3198 case 0xBF00: return 241; // -0.5
3199 case 0x3F80: return 242; // 1.0
3200 case 0xBF80: return 243; // -1.0
3201 case 0x4000: return 244; // 2.0
3202 case 0xC000: return 245; // -2.0
3203 case 0x4080: return 246; // 4.0
3204 case 0xC080: return 247; // -4.0
3205 case 0x3E22: return 248; // 1.0 / (2.0 * pi)
3206 default: break;
3207 }
3208 // clang-format on
3209
3210 return std::nullopt;
3211}
3212
3213// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
3214// or nullopt.
3215std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
3216 return getInlineEncodingV216(true, Literal);
3217}
3218
3219// Encoding of the literal as an inline constant for V_PK_FMAC_F16 instruction
3220// or nullopt. This accounts for different inline constant behavior:
3221// - Pre-GFX11: fp16 inline constants have the value in low 16 bits, 0 in high
3222// - GFX11+: fp16 inline constants are duplicated into both halves
3224 bool IsGFX11Plus) {
3225 // Pre-GFX11 behavior: f16 in low bits, 0 in high bits
3226 if (!IsGFX11Plus)
3227 return getInlineEncodingV216(/*IsFloat=*/true, Literal);
3228
3229 // GFX11+ behavior: f16 duplicated in both halves
3230 // First, check for sign-extended integer inline constants (-16 to 64)
3231 // These work the same across all generations
3232 int32_t Signed = static_cast<int32_t>(Literal);
3233 if (Signed >= 0 && Signed <= 64)
3234 return 128 + Signed;
3235
3236 if (Signed >= -16 && Signed <= -1)
3237 return 192 + std::abs(Signed);
3238
3239 // For float inline constants on GFX11+, both halves must be equal
3240 uint16_t Lo = static_cast<uint16_t>(Literal);
3241 uint16_t Hi = static_cast<uint16_t>(Literal >> 16);
3242 if (Lo != Hi)
3243 return std::nullopt;
3244 return getInlineEncodingV216(/*IsFloat=*/true, Lo);
3245}
3246
3247// Whether the given literal can be inlined for a V_PK_* instruction.
3249 switch (OpType) {
3252 return getInlineEncodingV216(false, Literal).has_value();
3255 return getInlineEncodingV216(true, Literal).has_value();
3257 llvm_unreachable("OPERAND_REG_IMM_V2FP16_SPLAT is not supported");
3262 return false;
3263 default:
3264 llvm_unreachable("bad packed operand type");
3265 }
3266}
3267
3268// Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
3272
3273// Whether the given literal can be inlined for a V_PK_*_BF16 instruction.
3277
3278// Whether the given literal can be inlined for a V_PK_*_F16 instruction.
3282
3283// Whether the given literal can be inlined for V_PK_FMAC_F16 instruction.
3285 return getPKFMACF16InlineEncoding(Literal, IsGFX11Plus).has_value();
3286}
3287
3288bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
3289 if (IsFP64)
3290 return !Lo_32(Val);
3291
3292 return isUInt<32>(Val) || isInt<32>(Val);
3293}
3294
3295int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit) {
3296 switch (Type) {
3297 default:
3298 break;
3303 return Imm & 0xffff;
3317 return Lo_32(Imm);
3319 return IsLit ? Imm : Hi_32(Imm);
3320 }
3321 return Imm;
3322}
3323
3325 const Function *F = A->getParent();
3326
3327 // Arguments to compute shaders are never a source of divergence.
3328 CallingConv::ID CC = F->getCallingConv();
3329 switch (CC) {
3332 return true;
3343 // For non-compute shaders, SGPR inputs are marked with either inreg or
3344 // byval. Everything else is in VGPRs.
3345 return A->hasAttribute(Attribute::InReg) ||
3346 A->hasAttribute(Attribute::ByVal);
3347 default:
3348 // TODO: treat i1 as divergent?
3349 return A->hasAttribute(Attribute::InReg);
3350 }
3351}
3352
3353bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
3354 // Arguments to compute shaders are never a source of divergence.
3356 switch (CC) {
3359 return true;
3370 // For non-compute shaders, SGPR inputs are marked with either inreg or
3371 // byval. Everything else is in VGPRs.
3372 return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
3373 CB->paramHasAttr(ArgNo, Attribute::ByVal);
3374 default:
3375 return CB->paramHasAttr(ArgNo, Attribute::InReg);
3376 }
3377}
3378
3379static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
3380 return isGCN3Encoding(ST) || isGFX10Plus(ST);
3381}
3382
3384 int64_t EncodedOffset) {
3385 if (isGFX12Plus(ST))
3386 return isUInt<23>(EncodedOffset);
3387
3388 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
3389 : isUInt<8>(EncodedOffset);
3390}
3391
3393 int64_t EncodedOffset, bool IsBuffer) {
3394 if (isGFX12Plus(ST)) {
3395 if (IsBuffer && EncodedOffset < 0)
3396 return false;
3397 return isInt<24>(EncodedOffset);
3398 }
3399
3400 return !IsBuffer && hasSMRDSignedImmOffset(ST) && isInt<21>(EncodedOffset);
3401}
3402
3403static bool isDwordAligned(uint64_t ByteOffset) {
3404 return (ByteOffset & 3) == 0;
3405}
3406
3408 uint64_t ByteOffset) {
3409 if (hasSMEMByteOffset(ST))
3410 return ByteOffset;
3411
3412 assert(isDwordAligned(ByteOffset));
3413 return ByteOffset >> 2;
3414}
3415
3416std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
3417 int64_t ByteOffset, bool IsBuffer,
3418 bool HasSOffset) {
3419 // For unbuffered smem loads, it is illegal for the Immediate Offset to be
3420 // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
3421 // Handle case where SOffset is not present.
3422 if (!IsBuffer && !HasSOffset && ByteOffset < 0 && hasSMRDSignedImmOffset(ST))
3423 return std::nullopt;
3424
3425 if (isGFX12Plus(ST)) // 24 bit signed offsets
3426 return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
3427 : std::nullopt;
3428
3429 // The signed version is always a byte offset.
3430 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
3432 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
3433 : std::nullopt;
3434 }
3435
3436 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
3437 return std::nullopt;
3438
3439 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3440 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
3441 ? std::optional<int64_t>(EncodedOffset)
3442 : std::nullopt;
3443}
3444
3445std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
3446 int64_t ByteOffset) {
3447 if (!isCI(ST) || !isDwordAligned(ByteOffset))
3448 return std::nullopt;
3449
3450 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3451 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
3452 : std::nullopt;
3453}
3454
3456 if (ST.getFeatureBits().test(FeatureFlatOffsetBits12))
3457 return 12;
3458 if (ST.getFeatureBits().test(FeatureFlatOffsetBits24))
3459 return 24;
3460 return 13;
3461}
3462
3463namespace {
3464
3465struct SourceOfDivergence {
3466 unsigned Intr;
3467};
3468const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
3469
3470struct AlwaysUniform {
3471 unsigned Intr;
3472};
3473const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
3474
3475#define GET_SourcesOfDivergence_IMPL
3476#define GET_UniformIntrinsics_IMPL
3477#define GET_Gfx9BufferFormat_IMPL
3478#define GET_Gfx10BufferFormat_IMPL
3479#define GET_Gfx11PlusBufferFormat_IMPL
3480
3481#include "AMDGPUGenSearchableTables.inc"
3482
3483} // end anonymous namespace
3484
3485bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
3486 return lookupSourceOfDivergence(IntrID);
3487}
3488
3489bool isIntrinsicAlwaysUniform(unsigned IntrID) {
3490 return lookupAlwaysUniform(IntrID);
3491}
3492
3494 uint8_t NumComponents,
3495 uint8_t NumFormat,
3496 const MCSubtargetInfo &STI) {
3497 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(
3498 BitsPerComp, NumComponents, NumFormat)
3499 : isGFX10(STI)
3500 ? getGfx10BufferFormatInfo(BitsPerComp, NumComponents, NumFormat)
3501 : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
3502}
3503
3505 const MCSubtargetInfo &STI) {
3506 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
3507 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
3508 : getGfx9BufferFormatInfo(Format);
3509}
3510
3512 const MCRegisterInfo &MRI) {
3513 const unsigned VGPRClasses[] = {
3514 AMDGPU::VGPR_16RegClassID, AMDGPU::VGPR_32RegClassID,
3515 AMDGPU::VReg_64RegClassID, AMDGPU::VReg_96RegClassID,
3516 AMDGPU::VReg_128RegClassID, AMDGPU::VReg_160RegClassID,
3517 AMDGPU::VReg_192RegClassID, AMDGPU::VReg_224RegClassID,
3518 AMDGPU::VReg_256RegClassID, AMDGPU::VReg_288RegClassID,
3519 AMDGPU::VReg_320RegClassID, AMDGPU::VReg_352RegClassID,
3520 AMDGPU::VReg_384RegClassID, AMDGPU::VReg_512RegClassID,
3521 AMDGPU::VReg_1024RegClassID};
3522
3523 for (unsigned RCID : VGPRClasses) {
3524 const MCRegisterClass &RC = MRI.getRegClass(RCID);
3525 if (RC.contains(Reg))
3526 return &RC;
3527 }
3528
3529 return nullptr;
3530}
3531
3533 unsigned Enc = MRI.getEncodingValue(Reg);
3534 unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
3535 return Idx >> 8;
3536}
3537
3539 const MCRegisterInfo &MRI) {
3540 unsigned Enc = MRI.getEncodingValue(Reg);
3541 unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
3542 if (Idx >= 0x100)
3543 return MCRegister();
3544
3545 const MCRegisterClass *RC = getVGPRPhysRegClass(Reg, MRI);
3546 if (!RC)
3547 return MCRegister();
3548
3549 Idx |= MSBs << 8;
3550 if (RC->getID() == AMDGPU::VGPR_16RegClassID) {
3551 // This class has 2048 registers with interleaved lo16 and hi16.
3552 Idx *= 2;
3554 ++Idx;
3555 }
3556
3557 return RC->getRegister(Idx);
3558}
3559
3560static std::optional<unsigned>
3561convertSetRegImmToVgprMSBs(unsigned Imm, unsigned Simm16,
3562 bool HasSetregVGPRMSBFixup) {
3563 constexpr unsigned VGPRMSBShift =
3565
3566 auto [HwRegId, Offset, Size] = Hwreg::HwregEncoding::decode(Simm16);
3567 if (HwRegId != Hwreg::ID_MODE ||
3568 (!HasSetregVGPRMSBFixup && (Offset + Size) < VGPRMSBShift))
3569 return {};
3570 // If there is SetregVGPRMSBFixup then Offset is ignored.
3571 if (!HasSetregVGPRMSBFixup)
3572 Imm <<= Offset;
3573 Imm = (Imm & Hwreg::VGPR_MSB_MASK) >> VGPRMSBShift;
3574 if (!HasSetregVGPRMSBFixup)
3576 return llvm::rotr<uint8_t>(static_cast<uint8_t>(Imm), /*R=*/2);
3577}
3578
3579std::optional<unsigned> convertSetRegImmToVgprMSBs(const MachineInstr &MI,
3580 bool HasSetregVGPRMSBFixup) {
3581 assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32);
3582 return convertSetRegImmToVgprMSBs(MI.getOperand(0).getImm(),
3583 MI.getOperand(1).getImm(),
3584 HasSetregVGPRMSBFixup);
3585}
3586
3587std::optional<unsigned> convertSetRegImmToVgprMSBs(const MCInst &MI,
3588 bool HasSetregVGPRMSBFixup) {
3589 assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_gfx12);
3590 return convertSetRegImmToVgprMSBs(MI.getOperand(0).getImm(),
3591 MI.getOperand(1).getImm(),
3592 HasSetregVGPRMSBFixup);
3593}
3594
3595std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
3597 static const AMDGPU::OpName VOPOps[4] = {
3598 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2,
3599 AMDGPU::OpName::vdst};
3600 static const AMDGPU::OpName VDSOps[4] = {
3601 AMDGPU::OpName::addr, AMDGPU::OpName::data0, AMDGPU::OpName::data1,
3602 AMDGPU::OpName::vdst};
3603 static const AMDGPU::OpName FLATOps[4] = {
3604 AMDGPU::OpName::vaddr, AMDGPU::OpName::vdata,
3605 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdst};
3606 static const AMDGPU::OpName BUFOps[4] = {
3607 AMDGPU::OpName::vaddr, AMDGPU::OpName::NUM_OPERAND_NAMES,
3608 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdata};
3609 static const AMDGPU::OpName VIMGOps[4] = {
3610 AMDGPU::OpName::vaddr0, AMDGPU::OpName::vaddr1, AMDGPU::OpName::vaddr2,
3611 AMDGPU::OpName::vdata};
3612
3613 // For VOPD instructions MSB of a corresponding Y component operand VGPR
3614 // address is supposed to match X operand, otherwise VOPD shall not be
3615 // combined.
3616 static const AMDGPU::OpName VOPDOpsX[4] = {
3617 AMDGPU::OpName::src0X, AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vsrc2X,
3618 AMDGPU::OpName::vdstX};
3619 static const AMDGPU::OpName VOPDOpsY[4] = {
3620 AMDGPU::OpName::src0Y, AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vsrc2Y,
3621 AMDGPU::OpName::vdstY};
3622
3623 // VOP2 MADMK instructions use src0, imm, src1 scheme.
3624 static const AMDGPU::OpName VOP2MADMKOps[4] = {
3625 AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES,
3626 AMDGPU::OpName::src1, AMDGPU::OpName::vdst};
3627 static const AMDGPU::OpName VOPDFMAMKOpsX[4] = {
3628 AMDGPU::OpName::src0X, AMDGPU::OpName::NUM_OPERAND_NAMES,
3629 AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vdstX};
3630 static const AMDGPU::OpName VOPDFMAMKOpsY[4] = {
3631 AMDGPU::OpName::src0Y, AMDGPU::OpName::NUM_OPERAND_NAMES,
3632 AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vdstY};
3633
3634 unsigned TSFlags = Desc.TSFlags;
3635
3636 if (TSFlags &
3639 switch (Desc.getOpcode()) {
3640 // LD_SCALE operands ignore MSB.
3641 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32:
3642 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250:
3643 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64:
3644 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250:
3645 return {};
3646 case AMDGPU::V_FMAMK_F16:
3647 case AMDGPU::V_FMAMK_F16_t16:
3648 case AMDGPU::V_FMAMK_F16_t16_gfx12:
3649 case AMDGPU::V_FMAMK_F16_fake16:
3650 case AMDGPU::V_FMAMK_F16_fake16_gfx12:
3651 case AMDGPU::V_FMAMK_F32:
3652 case AMDGPU::V_FMAMK_F32_gfx12:
3653 case AMDGPU::V_FMAMK_F64:
3654 case AMDGPU::V_FMAMK_F64_gfx1250:
3655 return {VOP2MADMKOps, nullptr};
3656 default:
3657 break;
3658 }
3659 return {VOPOps, nullptr};
3660 }
3661
3662 if (TSFlags & SIInstrFlags::DS)
3663 return {VDSOps, nullptr};
3664
3665 if (TSFlags & SIInstrFlags::FLAT)
3666 return {FLATOps, nullptr};
3667
3668 if (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))
3669 return {BUFOps, nullptr};
3670
3671 if (TSFlags & SIInstrFlags::VIMAGE)
3672 return {VIMGOps, nullptr};
3673
3674 if (AMDGPU::isVOPD(Desc.getOpcode())) {
3675 auto [OpX, OpY] = getVOPDComponents(Desc.getOpcode());
3676 return {(OpX == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsX : VOPDOpsX,
3677 (OpY == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsY : VOPDOpsY};
3678 }
3679
3680 assert(!(TSFlags & SIInstrFlags::MIMG));
3681
3682 if (TSFlags & (SIInstrFlags::VSAMPLE | SIInstrFlags::EXP))
3683 llvm_unreachable("Sample and export VGPR lowering is not implemented and"
3684 " these instructions are not expected on gfx1250");
3685
3686 return {};
3687}
3688
3689bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode) {
3690 uint64_t TSFlags = MII.get(Opcode).TSFlags;
3691
3692 if (TSFlags & SIInstrFlags::SMRD)
3693 return !getSMEMIsBuffer(Opcode);
3694 if (!(TSFlags & SIInstrFlags::FLAT))
3695 return false;
3696
3697 // Only SV and SVS modes are supported.
3698 if (TSFlags & SIInstrFlags::FlatScratch)
3699 return hasNamedOperand(Opcode, OpName::vaddr);
3700
3701 // Only GVS mode is supported.
3702 return hasNamedOperand(Opcode, OpName::vaddr) &&
3703 hasNamedOperand(Opcode, OpName::saddr);
3704
3705 return false;
3706}
3707
3708bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
3709 const MCSubtargetInfo &ST) {
3710 for (auto OpName : {OpName::vdst, OpName::src0, OpName::src1, OpName::src2}) {
3711 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
3712 if (Idx == -1)
3713 continue;
3714
3715 const MCOperandInfo &OpInfo = OpDesc.operands()[Idx];
3716 int16_t RegClass = MII.getOpRegClassID(
3717 OpInfo, ST.getHwMode(MCSubtargetInfo::HwMode_RegInfo));
3718 if (RegClass == AMDGPU::VReg_64RegClassID ||
3719 RegClass == AMDGPU::VReg_64_Align2RegClassID)
3720 return true;
3721 }
3722
3723 return false;
3724}
3725
3726bool isDPALU_DPP32BitOpc(unsigned Opc) {
3727 switch (Opc) {
3728 case AMDGPU::V_MUL_LO_U32_e64:
3729 case AMDGPU::V_MUL_LO_U32_e64_dpp:
3730 case AMDGPU::V_MUL_LO_U32_e64_dpp_gfx1250:
3731 case AMDGPU::V_MUL_HI_U32_e64:
3732 case AMDGPU::V_MUL_HI_U32_e64_dpp:
3733 case AMDGPU::V_MUL_HI_U32_e64_dpp_gfx1250:
3734 case AMDGPU::V_MUL_HI_I32_e64:
3735 case AMDGPU::V_MUL_HI_I32_e64_dpp:
3736 case AMDGPU::V_MUL_HI_I32_e64_dpp_gfx1250:
3737 case AMDGPU::V_MAD_U32_e64:
3738 case AMDGPU::V_MAD_U32_e64_dpp:
3739 case AMDGPU::V_MAD_U32_e64_dpp_gfx1250:
3740 return true;
3741 default:
3742 return false;
3743 }
3744}
3745
3746bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
3747 const MCSubtargetInfo &ST) {
3748 if (!ST.hasFeature(AMDGPU::FeatureDPALU_DPP))
3749 return false;
3750
3751 if (isDPALU_DPP32BitOpc(OpDesc.getOpcode()))
3752 return ST.hasFeature(AMDGPU::FeatureGFX1250Insts);
3753
3754 return hasAny64BitVGPROperands(OpDesc, MII, ST);
3755}
3756
3758 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize32768))
3759 return 64;
3760 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize65536))
3761 return 128;
3762 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize163840))
3763 return 320;
3764 if (ST.getFeatureBits().test(FeatureAddressableLocalMemorySize327680))
3765 return 512;
3766 return 64; // In sync with getAddressableLocalMemorySize
3767}
3768
3769bool isPackedFP32Inst(unsigned Opc) {
3770 switch (Opc) {
3771 case AMDGPU::V_PK_ADD_F32:
3772 case AMDGPU::V_PK_ADD_F32_gfx12:
3773 case AMDGPU::V_PK_MUL_F32:
3774 case AMDGPU::V_PK_MUL_F32_gfx12:
3775 case AMDGPU::V_PK_FMA_F32:
3776 case AMDGPU::V_PK_FMA_F32_gfx12:
3777 return true;
3778 default:
3779 return false;
3780 }
3781}
3782
3783const std::array<unsigned, 3> &ClusterDimsAttr::getDims() const {
3784 assert(isFixedDims() && "expect kind to be FixedDims");
3785 return Dims;
3786}
3787
3788std::string ClusterDimsAttr::to_string() const {
3789 SmallString<10> Buffer;
3790 raw_svector_ostream OS(Buffer);
3791
3792 switch (getKind()) {
3793 case Kind::Unknown:
3794 return "";
3795 case Kind::NoCluster: {
3796 OS << EncoNoCluster << ',' << EncoNoCluster << ',' << EncoNoCluster;
3797 return Buffer.c_str();
3798 }
3799 case Kind::VariableDims: {
3800 OS << EncoVariableDims << ',' << EncoVariableDims << ','
3801 << EncoVariableDims;
3802 return Buffer.c_str();
3803 }
3804 case Kind::FixedDims: {
3805 OS << Dims[0] << ',' << Dims[1] << ',' << Dims[2];
3806 return Buffer.c_str();
3807 }
3808 }
3809 llvm_unreachable("Unknown ClusterDimsAttr kind");
3810}
3811
3813 std::optional<SmallVector<unsigned>> Attr =
3814 getIntegerVecAttribute(F, "amdgpu-cluster-dims", /*Size=*/3);
3816
3817 if (!Attr.has_value())
3818 AttrKind = Kind::Unknown;
3819 else if (all_of(*Attr, equal_to(EncoNoCluster)))
3820 AttrKind = Kind::NoCluster;
3821 else if (all_of(*Attr, equal_to(EncoVariableDims)))
3822 AttrKind = Kind::VariableDims;
3823
3824 ClusterDimsAttr A(AttrKind);
3825 if (AttrKind == Kind::FixedDims)
3826 A.Dims = {(*Attr)[0], (*Attr)[1], (*Attr)[2]};
3827
3828 return A;
3829}
3830
3831} // namespace AMDGPU
3832
3835 switch (S) {
3837 OS << "Unsupported";
3838 break;
3840 OS << "Any";
3841 break;
3843 OS << "Off";
3844 break;
3846 OS << "On";
3847 break;
3848 }
3849 return OS;
3850}
3851
3852} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static llvm::cl::opt< unsigned > DefaultAMDHSACodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, llvm::cl::init(llvm::AMDGPU::AMDHSA_COV6), llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " "or asm directive still take priority if present)"))
#define MAP_REG2REG
Provides AMDGPU specific target descriptions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
IRTranslator LLVM IR MI
#define RegName(no)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
This file contains the declarations for metadata subclasses.
#define T
uint64_t High
if(PassOpts->AAPipeline)
#define S_00B848_MEM_ORDERED(x)
Definition SIDefines.h:1252
#define S_00B848_WGP_MODE(x)
Definition SIDefines.h:1249
#define S_00B848_FWD_PROGRESS(x)
Definition SIDefines.h:1255
This file contains some functions that are useful when dealing with strings.
static const int BlockSize
Definition TarWriter.cpp:33
static const uint32_t IV[8]
Definition blake3_impl.h:83
static ClusterDimsAttr get(const Function &F)
const std::array< unsigned, 3 > & getDims() const
TargetIDSetting getXnackSetting() const
void print(raw_ostream &OS) const
Write string representation to OS.
AMDGPUTargetID(const MCSubtargetInfo &STI)
void setTargetIDFromTargetIDStream(StringRef TargetID)
TargetIDSetting getSramEccSetting() const
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
std::optional< unsigned > getInvalidCompOperandIndex(std::function< MCRegister(unsigned, unsigned)> GetRegIdx, const MCRegisterInfo &MRI, bool SkipSrc=false, bool AllowSameVGPR=false, bool VOPD3=false) const
std::array< MCRegister, Component::MAX_OPR_NUM > RegIndices
Represents the counter values to wait for in an s_waitcnt instruction.
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
constexpr bool test(unsigned I) const
unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
unsigned getOpcode() const
Return the opcode number for this descriptor.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Definition MCInstrInfo.h:80
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:86
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getID() const
getID() - Return the register class ID number.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool regsOverlap(MCRegister RegA, MCRegister RegB) const
Returns true if the two registers are equal or alias each other.
uint16_t getEncodingValue(MCRegister Reg) const
Returns the encoding for Reg.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr unsigned id() const
Definition MCRegister.h:82
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
const Triple & getTargetTriple() const
const FeatureBitset & getFeatureBits() const
StringRef getCPU() const
Metadata node.
Definition Metadata.h:1080
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
Representation of each machine instruction.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
const char * c_str()
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:490
std::string str() const
Get the contents as an std::string.
Definition StringRef.h:222
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:270
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
LLVM_ABI StringRef getVendorName() const
Get the vendor (second) component of the triple.
Definition Triple.cpp:1659
LLVM_ABI StringRef getOSName() const
Get the operating system (third) component of the triple.
Definition Triple.cpp:1664
OSType getOS() const
Get the parsed operating system type of this triple.
Definition Triple.h:445
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition Triple.h:436
LLVM_ABI StringRef getEnvironmentName() const
Get the optional environment (fourth) component of the triple, or "" if empty.
Definition Triple.cpp:1670
bool isAMDGCN() const
Tests whether the target is AMDGCN.
Definition Triple.h:908
LLVM_ABI StringRef getArchName() const
Get the architecture (first) component of the triple.
Definition Triple.cpp:1655
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
unsigned decodeFieldVaVcc(unsigned Encoded)
unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc)
unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version)
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt, const IsaVersion &Version)
unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc)
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
unsigned decodeFieldSaSdst(unsigned Encoded)
unsigned getHoldCntBitMask(const IsaVersion &Version)
unsigned decodeFieldVaSdst(unsigned Encoded)
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
unsigned decodeFieldVaSsrc(unsigned Encoded)
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
const CustomOperandVal DepCtrInfo[]
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
unsigned decodeFieldVaVdst(unsigned Encoded)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
unsigned decodeFieldVmVsrc(unsigned Encoded)
unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
static constexpr ExpTgt ExpTgtInfo[]
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
unsigned getTgtId(const StringRef Name)
constexpr uint32_t VersionMinor
HSA metadata minor version.
constexpr uint32_t VersionMajor
HSA metadata major version.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getArchVGPRAllocGranule()
For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage, returns the allocation granule...
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, AMDGPUSubtarget::Generation Gen)
static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, unsigned Granule)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
StringLiteral const UfmtSymbolicGFX11[]
bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX10[]
StringLiteral const DfmtSymbolic[]
static StringLiteral const * getNfmtLookupTable(const MCSubtargetInfo &STI)
bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)
StringLiteral const NfmtSymbolicGFX10[]
bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
StringRef getDfmtName(unsigned Id)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX11[]
StringLiteral const NfmtSymbolicVI[]
StringLiteral const NfmtSymbolicSICI[]
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
StringLiteral const UfmtSymbolicGFX10[]
void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI)
Returns true if the message does not use the m0 operand.
StringRef getMsgOpName(int64_t MsgId, uint64_t Encoding, const MCSubtargetInfo &STI)
Map from an encoding to the symbolic name for a sendmsg operation.
static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned VOPD_VGPR_BANK_MASKS[]
constexpr unsigned COMPONENTS_NUM
constexpr unsigned VOPD3_VGPR_BANK_MASKS[]
bool isPackedFP32Inst(unsigned Opc)
bool isGCN3Encoding(const MCSubtargetInfo &STI)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isGFX10_GFX11(const MCSubtargetInfo &STI)
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Storecnt)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)
bool isVOPCAsmOnly(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool getMTBUFHasSrsrc(unsigned Opc)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool getWMMAIsXDL(unsigned Opc)
static std::optional< unsigned > convertSetRegImmToVgprMSBs(unsigned Imm, unsigned Simm16, bool HasSetregVGPRMSBFixup)
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
bool isGFX10Before1030(const MCSubtargetInfo &STI)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
const int OPR_ID_UNSUPPORTED
bool shouldEmitConstantsToTextSection(const Triple &TT)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isDPMACCInstruction(unsigned Opc)
int getMTBUFElements(unsigned Opc)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV216(bool IsFloat, uint32_t Literal)
FPType getFPDstSelType(unsigned Opc)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
const MCRegisterClass * getVGPRPhysRegClass(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded)
bool getHasMatrixScale(unsigned Opc)
bool hasPackedD16(const MCSubtargetInfo &STI)
unsigned getStorecntBitMask(const IsaVersion &Version)
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX10_3_GFX11(const MCSubtargetInfo &STI)
bool isGFX13(const MCSubtargetInfo &STI)
unsigned getAsynccntBitMask(const IsaVersion &Version)
bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val)
Checks if Val is inside MD, a !range-like metadata.
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
unsigned getVOPDOpcode(unsigned Opc, bool VOPD3)
bool isGroupSegment(const GlobalValue *GV)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool getMTBUFHasSoffset(unsigned Opc)
bool hasXNACK(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
static unsigned getCombinedCountBitMask(const IsaVersion &Version, bool IsStore)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
bool isVOPC64DPP(unsigned Opc)
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool getMAIIsGFX940XDL(unsigned Opc)
bool isSI(const MCSubtargetInfo &STI)
unsigned getDefaultAMDHSACodeObjectVersion()
bool isReadOnlySegment(const GlobalValue *GV)
Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
int getMUBUFBaseOpcode(unsigned Opc)
unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool getVOP3IsSingle(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
bool isDPALU_DPP32BitOpc(unsigned Opc)
bool getVOP1IsSingle(unsigned Opc)
static bool isDwordAligned(uint64_t ByteOffset)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool getHasColorExport(const Function &F)
int getMTBUFBaseOpcode(unsigned Opc)
bool isGFX90A(const MCSubtargetInfo &STI)
unsigned getSamplecntBitMask(const IsaVersion &Version)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)
Returns a valid charcode or 0 in the first entry if this is a valid physical register name.
bool getHasDepthExport(const Function &F)
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
bool getMUBUFHasVAddr(unsigned Opc)
bool isTrue16Inst(unsigned Opc)
unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI)
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned getInitialPSInputAddr(const Function &F)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isAsyncStore(unsigned Opc)
unsigned getDynamicVGPRBlockSize(const Function &F)
unsigned getKmcntBitMask(const IsaVersion &Version)
MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs, const MCRegisterInfo &MRI)
If Reg is a low VGPR return a corresponding high VGPR with MSBs set.
unsigned getVmcntBitMask(const IsaVersion &Version)
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
bool hasMAIInsts(const MCSubtargetInfo &STI)
unsigned getBitOp2(unsigned Opc)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
unsigned getXcntBitMask(const IsaVersion &Version)
bool isGenericAtomic(unsigned Opc)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool isGFX8Plus(const MCSubtargetInfo &STI)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
bool getMUBUFTfe(unsigned Opc)
unsigned getBvhcntBitMask(const IsaVersion &Version)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
unsigned decodeDscnt(const IsaVersion &Version, unsigned Waitcnt)
std::pair< const AMDGPU::OpName *, const AMDGPU::OpName * > getVGPRLoweringOperandTables(const MCInstrDesc &Desc)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool isGFX13Plus(const MCSubtargetInfo &STI)
unsigned getExpcntBitMask(const IsaVersion &Version)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
int32_t getMCOpcode(uint32_t Opcode, unsigned Gen)
bool getMUBUFHasSoffset(unsigned Opc)
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV2F16(uint32_t Literal)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)
Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)
static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Loadcnt)
bool isGFX10Plus(const MCSubtargetInfo &STI)
static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
static bool isValidRegPrefix(char C)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
bool isGlobalSegment(const GlobalValue *GV)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:234
@ OPERAND_REG_INLINE_C_LAST
Definition SIDefines.h:257
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:227
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:213
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:203
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:210
@ OPERAND_REG_INLINE_AC_FIRST
Definition SIDefines.h:259
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:209
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:212
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:214
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:228
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:240
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:215
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:206
@ OPERAND_REG_INLINE_C_FIRST
Definition SIDefines.h:256
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:224
@ OPERAND_REG_INLINE_AC_LAST
Definition SIDefines.h:260
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:226
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:216
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:241
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:223
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:231
std::optional< unsigned > getPKFMACF16InlineEncoding(uint32_t Literal, bool IsGFX11Plus)
void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, const MCSubtargetInfo *STI)
bool isNotGFX9Plus(const MCSubtargetInfo &STI)
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc)
std::optional< unsigned > getInlineEncodingV2I16(uint32_t Literal)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded)
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool isTensorStore(unsigned Opc)
bool getMUBUFIsBufferInv(unsigned Opc)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
std::optional< unsigned > getInlineEncodingV2BF16(uint32_t Literal)
static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
bool getVOP2IsSingle(unsigned Opc)
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)
unsigned decodeStorecnt(const IsaVersion &Version, unsigned Waitcnt)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
bool isNotGFX12Plus(const MCSubtargetInfo &STI)
bool getMTBUFHasVAddr(unsigned Opc)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
unsigned getLoadcntBitMask(const IsaVersion &Version)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily, bool VOPD3)
static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Dscnt)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
unsigned decodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI)
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
int getMUBUFElements(unsigned Opc)
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
bool isPermlane16(unsigned Opc)
bool getMUBUFHasSrsrc(unsigned Opc)
unsigned getDscntBitMask(const IsaVersion &Version)
bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ ELFABIVERSION_AMDGPU_HSA_V4
Definition ELF.h:384
@ ELFABIVERSION_AMDGPU_HSA_V5
Definition ELF.h:385
@ ELFABIVERSION_AMDGPU_HSA_V6
Definition ELF.h:386
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract_or_null(Y &&MD)
Extract a Value from Metadata, allowing null.
Definition Metadata.h:683
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:557
constexpr T rotr(T V, int R)
Definition bit.h:399
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
std::string utostr(uint64_t X, bool isNeg=false)
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2172
Op::Description Desc
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
To bit_cast(const From &from) noexcept
Definition bit.h:90
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr int countr_zero_constexpr(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:190
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
@ AlwaysUniform
The result value is always uniform.
Definition Uniformity.h:23
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
#define N
AMD Kernel Code Object (amd_kernel_code_t).
static std::tuple< typename Fields::ValueType... > decode(uint64_t Encoded)
Instruction set architecture version.