LLVM 19.0.0git
AMDGPUBaseInfo.cpp
Go to the documentation of this file.
1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDKernelCodeT.h"
16#include "llvm/IR/Attributes.h"
17#include "llvm/IR/Constants.h"
18#include "llvm/IR/Function.h"
19#include "llvm/IR/GlobalValue.h"
20#include "llvm/IR/IntrinsicsAMDGPU.h"
21#include "llvm/IR/IntrinsicsR600.h"
22#include "llvm/IR/LLVMContext.h"
23#include "llvm/MC/MCInstrInfo.h"
29#include <optional>
30
31#define GET_INSTRINFO_NAMED_OPS
32#define GET_INSTRMAP_INFO
33#include "AMDGPUGenInstrInfo.inc"
34
36 "amdhsa-code-object-version", llvm::cl::Hidden,
38 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
39 "or asm directive still take priority if present)"));
40
41namespace {
42
43/// \returns Bit mask for given bit \p Shift and bit \p Width.
44unsigned getBitMask(unsigned Shift, unsigned Width) {
45 return ((1 << Width) - 1) << Shift;
46}
47
48/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
49///
50/// \returns Packed \p Dst.
51unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
52 unsigned Mask = getBitMask(Shift, Width);
53 return ((Src << Shift) & Mask) | (Dst & ~Mask);
54}
55
56/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
57///
58/// \returns Unpacked bits.
59unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
60 return (Src & getBitMask(Shift, Width)) >> Shift;
61}
62
63/// \returns Vmcnt bit shift (lower bits).
64unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
65 return VersionMajor >= 11 ? 10 : 0;
66}
67
68/// \returns Vmcnt bit width (lower bits).
69unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
70 return VersionMajor >= 11 ? 6 : 4;
71}
72
73/// \returns Expcnt bit shift.
74unsigned getExpcntBitShift(unsigned VersionMajor) {
75 return VersionMajor >= 11 ? 0 : 4;
76}
77
78/// \returns Expcnt bit width.
79unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
80
81/// \returns Lgkmcnt bit shift.
82unsigned getLgkmcntBitShift(unsigned VersionMajor) {
83 return VersionMajor >= 11 ? 4 : 8;
84}
85
86/// \returns Lgkmcnt bit width.
87unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
88 return VersionMajor >= 10 ? 6 : 4;
89}
90
91/// \returns Vmcnt bit shift (higher bits).
92unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
93
94/// \returns Vmcnt bit width (higher bits).
95unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
96 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
97}
98
99/// \returns Loadcnt bit width
100unsigned getLoadcntBitWidth(unsigned VersionMajor) {
101 return VersionMajor >= 12 ? 6 : 0;
102}
103
104/// \returns Samplecnt bit width.
105unsigned getSamplecntBitWidth(unsigned VersionMajor) {
106 return VersionMajor >= 12 ? 6 : 0;
107}
108
109/// \returns Bvhcnt bit width.
110unsigned getBvhcntBitWidth(unsigned VersionMajor) {
111 return VersionMajor >= 12 ? 3 : 0;
112}
113
114/// \returns Dscnt bit width.
115unsigned getDscntBitWidth(unsigned VersionMajor) {
116 return VersionMajor >= 12 ? 6 : 0;
117}
118
119/// \returns Dscnt bit shift in combined S_WAIT instructions.
120unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
121
122/// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
123unsigned getStorecntBitWidth(unsigned VersionMajor) {
124 return VersionMajor >= 10 ? 6 : 0;
125}
126
127/// \returns Kmcnt bit width.
128unsigned getKmcntBitWidth(unsigned VersionMajor) {
129 return VersionMajor >= 12 ? 5 : 0;
130}
131
132/// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
133unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
134 return VersionMajor >= 12 ? 8 : 0;
135}
136
137/// \returns VmVsrc bit width
138inline unsigned getVmVsrcBitWidth() { return 3; }
139
140/// \returns VmVsrc bit shift
141inline unsigned getVmVsrcBitShift() { return 2; }
142
143/// \returns VaVdst bit width
144inline unsigned getVaVdstBitWidth() { return 4; }
145
146/// \returns VaVdst bit shift
147inline unsigned getVaVdstBitShift() { return 12; }
148
149/// \returns SaSdst bit width
150inline unsigned getSaSdstBitWidth() { return 1; }
151
152/// \returns SaSdst bit shift
153inline unsigned getSaSdstBitShift() { return 0; }
154
155} // end namespace anonymous
156
157namespace llvm {
158
159namespace AMDGPU {
160
161/// \returns True if \p STI is AMDHSA.
162bool isHsaAbi(const MCSubtargetInfo &STI) {
163 return STI.getTargetTriple().getOS() == Triple::AMDHSA;
164}
165
167 if (auto Ver = mdconst::extract_or_null<ConstantInt>(
168 M.getModuleFlag("amdhsa_code_object_version"))) {
169 return (unsigned)Ver->getZExtValue() / 100;
170 }
171
173}
174
177}
178
179unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) {
180 switch (ABIVersion) {
182 return 4;
184 return 5;
185 default:
187 }
188}
189
190uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
191 if (T.getOS() != Triple::AMDHSA)
192 return 0;
193
194 switch (CodeObjectVersion) {
195 case 4:
197 case 5:
199 case 6:
201 default:
202 report_fatal_error("Unsupported AMDHSA Code Object Version " +
203 Twine(CodeObjectVersion));
204 }
205}
206
207unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
208 switch (CodeObjectVersion) {
209 case AMDHSA_COV4:
210 return 48;
211 case AMDHSA_COV5:
212 case AMDHSA_COV6:
213 default:
215 }
216}
217
218
219// FIXME: All such magic numbers about the ABI should be in a
220// central TD file.
221unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
222 switch (CodeObjectVersion) {
223 case AMDHSA_COV4:
224 return 24;
225 case AMDHSA_COV5:
226 case AMDHSA_COV6:
227 default:
229 }
230}
231
232unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
233 switch (CodeObjectVersion) {
234 case AMDHSA_COV4:
235 return 32;
236 case AMDHSA_COV5:
237 case AMDHSA_COV6:
238 default:
240 }
241}
242
243unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
244 switch (CodeObjectVersion) {
245 case AMDHSA_COV4:
246 return 40;
247 case AMDHSA_COV5:
248 case AMDHSA_COV6:
249 default:
251 }
252}
253
254#define GET_MIMGBaseOpcodesTable_IMPL
255#define GET_MIMGDimInfoTable_IMPL
256#define GET_MIMGInfoTable_IMPL
257#define GET_MIMGLZMappingTable_IMPL
258#define GET_MIMGMIPMappingTable_IMPL
259#define GET_MIMGBiasMappingTable_IMPL
260#define GET_MIMGOffsetMappingTable_IMPL
261#define GET_MIMGG16MappingTable_IMPL
262#define GET_MAIInstInfoTable_IMPL
263#include "AMDGPUGenSearchableTables.inc"
264
265int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
266 unsigned VDataDwords, unsigned VAddrDwords) {
267 const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
268 VDataDwords, VAddrDwords);
269 return Info ? Info->Opcode : -1;
270}
271
273 const MIMGInfo *Info = getMIMGInfo(Opc);
274 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
275}
276
277int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
278 const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
279 const MIMGInfo *NewInfo =
280 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
281 NewChannels, OrigInfo->VAddrDwords);
282 return NewInfo ? NewInfo->Opcode : -1;
283}
284
285unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
286 const MIMGDimInfo *Dim, bool IsA16,
287 bool IsG16Supported) {
288 unsigned AddrWords = BaseOpcode->NumExtraArgs;
289 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
290 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
291 if (IsA16)
292 AddrWords += divideCeil(AddrComponents, 2);
293 else
294 AddrWords += AddrComponents;
295
296 // Note: For subtargets that support A16 but not G16, enabling A16 also
297 // enables 16 bit gradients.
298 // For subtargets that support A16 (operand) and G16 (done with a different
299 // instruction encoding), they are independent.
300
301 if (BaseOpcode->Gradients) {
302 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
303 // There are two gradients per coordinate, we pack them separately.
304 // For the 3d case,
305 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
306 AddrWords += alignTo<2>(Dim->NumGradients / 2);
307 else
308 AddrWords += Dim->NumGradients;
309 }
310 return AddrWords;
311}
312
313struct MUBUFInfo {
316 uint8_t elements;
321};
322
323struct MTBUFInfo {
326 uint8_t elements;
330};
331
332struct SMInfo {
335};
336
337struct VOPInfo {
340};
341
344};
345
348};
349
352};
353
358};
359
360struct VOPDInfo {
365};
366
370};
371
372#define GET_MTBUFInfoTable_DECL
373#define GET_MTBUFInfoTable_IMPL
374#define GET_MUBUFInfoTable_DECL
375#define GET_MUBUFInfoTable_IMPL
376#define GET_SMInfoTable_DECL
377#define GET_SMInfoTable_IMPL
378#define GET_VOP1InfoTable_DECL
379#define GET_VOP1InfoTable_IMPL
380#define GET_VOP2InfoTable_DECL
381#define GET_VOP2InfoTable_IMPL
382#define GET_VOP3InfoTable_DECL
383#define GET_VOP3InfoTable_IMPL
384#define GET_VOPC64DPPTable_DECL
385#define GET_VOPC64DPPTable_IMPL
386#define GET_VOPC64DPP8Table_DECL
387#define GET_VOPC64DPP8Table_IMPL
388#define GET_VOPCAsmOnlyInfoTable_DECL
389#define GET_VOPCAsmOnlyInfoTable_IMPL
390#define GET_VOP3CAsmOnlyInfoTable_DECL
391#define GET_VOP3CAsmOnlyInfoTable_IMPL
392#define GET_VOPDComponentTable_DECL
393#define GET_VOPDComponentTable_IMPL
394#define GET_VOPDPairs_DECL
395#define GET_VOPDPairs_IMPL
396#define GET_VOPTrue16Table_DECL
397#define GET_VOPTrue16Table_IMPL
398#define GET_WMMAOpcode2AddrMappingTable_DECL
399#define GET_WMMAOpcode2AddrMappingTable_IMPL
400#define GET_WMMAOpcode3AddrMappingTable_DECL
401#define GET_WMMAOpcode3AddrMappingTable_IMPL
402#include "AMDGPUGenSearchableTables.inc"
403
404int getMTBUFBaseOpcode(unsigned Opc) {
405 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
406 return Info ? Info->BaseOpcode : -1;
407}
408
409int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
410 const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
411 return Info ? Info->Opcode : -1;
412}
413
414int getMTBUFElements(unsigned Opc) {
415 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
416 return Info ? Info->elements : 0;
417}
418
419bool getMTBUFHasVAddr(unsigned Opc) {
420 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
421 return Info ? Info->has_vaddr : false;
422}
423
424bool getMTBUFHasSrsrc(unsigned Opc) {
425 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
426 return Info ? Info->has_srsrc : false;
427}
428
429bool getMTBUFHasSoffset(unsigned Opc) {
430 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
431 return Info ? Info->has_soffset : false;
432}
433
434int getMUBUFBaseOpcode(unsigned Opc) {
435 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
436 return Info ? Info->BaseOpcode : -1;
437}
438
439int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
440 const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
441 return Info ? Info->Opcode : -1;
442}
443
444int getMUBUFElements(unsigned Opc) {
445 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
446 return Info ? Info->elements : 0;
447}
448
449bool getMUBUFHasVAddr(unsigned Opc) {
450 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
451 return Info ? Info->has_vaddr : false;
452}
453
454bool getMUBUFHasSrsrc(unsigned Opc) {
455 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
456 return Info ? Info->has_srsrc : false;
457}
458
459bool getMUBUFHasSoffset(unsigned Opc) {
460 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
461 return Info ? Info->has_soffset : false;
462}
463
464bool getMUBUFIsBufferInv(unsigned Opc) {
465 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
466 return Info ? Info->IsBufferInv : false;
467}
468
469bool getSMEMIsBuffer(unsigned Opc) {
470 const SMInfo *Info = getSMEMOpcodeHelper(Opc);
471 return Info ? Info->IsBuffer : false;
472}
473
474bool getVOP1IsSingle(unsigned Opc) {
475 const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
476 return Info ? Info->IsSingle : false;
477}
478
479bool getVOP2IsSingle(unsigned Opc) {
480 const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
481 return Info ? Info->IsSingle : false;
482}
483
484bool getVOP3IsSingle(unsigned Opc) {
485 const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
486 return Info ? Info->IsSingle : false;
487}
488
489bool isVOPC64DPP(unsigned Opc) {
490 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
491}
492
493bool isVOPCAsmOnly(unsigned Opc) {
494 return isVOPCAsmOnlyOpcodeHelper(Opc) || isVOP3CAsmOnlyOpcodeHelper(Opc);
495}
496
497bool getMAIIsDGEMM(unsigned Opc) {
498 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
499 return Info ? Info->is_dgemm : false;
500}
501
502bool getMAIIsGFX940XDL(unsigned Opc) {
503 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
504 return Info ? Info->is_gfx940_xdl : false;
505}
506
508 if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
510 if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
512 llvm_unreachable("Subtarget generation does not support VOPD!");
513}
514
515CanBeVOPD getCanBeVOPD(unsigned Opc) {
516 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
517 if (Info)
518 return {Info->CanBeVOPDX, true};
519 else
520 return {false, false};
521}
522
523unsigned getVOPDOpcode(unsigned Opc) {
524 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
525 return Info ? Info->VOPDOp : ~0u;
526}
527
528bool isVOPD(unsigned Opc) {
529 return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
530}
531
532bool isMAC(unsigned Opc) {
533 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
534 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
535 Opc == AMDGPU::V_MAC_F32_e64_vi ||
536 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
537 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
538 Opc == AMDGPU::V_MAC_F16_e64_vi ||
539 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
540 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
541 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
542 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
543 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
544 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
545 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
546 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
547 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
548 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
549 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
550 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
551 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
552 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
553}
554
555bool isPermlane16(unsigned Opc) {
556 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
557 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
558 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
559 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
560 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
561 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
562 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
563 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
564}
565
566bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) {
567 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
568 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
569 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
570 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
571 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
572 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
573 Opc == AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 ||
574 Opc == AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12;
575}
576
577bool isGenericAtomic(unsigned Opc) {
578 return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN ||
579 Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX ||
580 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
581 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
582 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
583 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
584 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
585 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
586 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
587 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
588 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
589 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
590 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
591 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
592 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
593 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
594 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
595 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
596 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
597}
598
599bool isTrue16Inst(unsigned Opc) {
600 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
601 return Info ? Info->IsTrue16 : false;
602}
603
604unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
605 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
606 return Info ? Info->Opcode3Addr : ~0u;
607}
608
609unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
610 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
611 return Info ? Info->Opcode2Addr : ~0u;
612}
613
614// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
615// header files, so we need to wrap it in a function that takes unsigned
616// instead.
617int getMCOpcode(uint16_t Opcode, unsigned Gen) {
618 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
619}
620
621int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) {
622 const VOPDInfo *Info =
623 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily);
624 return Info ? Info->Opcode : -1;
625}
626
627std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
628 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
629 assert(Info);
630 auto OpX = getVOPDBaseFromComponent(Info->OpX);
631 auto OpY = getVOPDBaseFromComponent(Info->OpY);
632 assert(OpX && OpY);
633 return {OpX->BaseVOP, OpY->BaseVOP};
634}
635
636namespace VOPD {
637
640
643 auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO);
644 assert(TiedIdx == -1 || TiedIdx == Component::DST);
645 HasSrc2Acc = TiedIdx != -1;
646
647 SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();
648 assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
649
650 auto OperandsNum = OpDesc.getNumOperands();
651 unsigned CompOprIdx;
652 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
653 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
654 MandatoryLiteralIdx = CompOprIdx;
655 break;
656 }
657 }
658}
659
660unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
661 assert(CompOprIdx < Component::MAX_OPR_NUM);
662
663 if (CompOprIdx == Component::DST)
665
666 auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
667 if (CompSrcIdx < getCompParsedSrcOperandsNum())
668 return getIndexOfSrcInParsedOperands(CompSrcIdx);
669
670 // The specified operand does not exist.
671 return 0;
672}
673
675 std::function<unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc) const {
676
677 auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx);
678 auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx);
679
680 const unsigned CompOprNum =
682 unsigned CompOprIdx;
683 for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) {
684 unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx];
685 if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&
686 ((OpXRegs[CompOprIdx] & BanksMasks) ==
687 (OpYRegs[CompOprIdx] & BanksMasks)))
688 return CompOprIdx;
689 }
690
691 return {};
692}
693
694// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
695// by the specified component. If an operand is unused
696// or is not a VGPR, the corresponding value is 0.
697//
698// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
699// for the specified component and MC operand. The callback must return 0
700// if the operand is not a register or not a VGPR.
701InstInfo::RegIndices InstInfo::getRegIndices(
702 unsigned CompIdx,
703 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const {
704 assert(CompIdx < COMPONENTS_NUM);
705
706 const auto &Comp = CompInfo[CompIdx];
708
709 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
710
711 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
712 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
713 RegIndices[CompOprIdx] =
714 Comp.hasRegSrcOperand(CompSrcIdx)
715 ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx))
716 : 0;
717 }
718 return RegIndices;
719}
720
721} // namespace VOPD
722
724 return VOPD::InstInfo(OpX, OpY);
725}
726
727VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
728 const MCInstrInfo *InstrInfo) {
729 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
730 const auto &OpXDesc = InstrInfo->get(OpX);
731 const auto &OpYDesc = InstrInfo->get(OpY);
733 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo);
734 return VOPD::InstInfo(OpXInfo, OpYInfo);
735}
736
737namespace IsaInfo {
738
740 : STI(STI), XnackSetting(TargetIDSetting::Any),
741 SramEccSetting(TargetIDSetting::Any) {
742 if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
743 XnackSetting = TargetIDSetting::Unsupported;
744 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
745 SramEccSetting = TargetIDSetting::Unsupported;
746}
747
749 // Check if xnack or sramecc is explicitly enabled or disabled. In the
750 // absence of the target features we assume we must generate code that can run
751 // in any environment.
752 SubtargetFeatures Features(FS);
753 std::optional<bool> XnackRequested;
754 std::optional<bool> SramEccRequested;
755
756 for (const std::string &Feature : Features.getFeatures()) {
757 if (Feature == "+xnack")
758 XnackRequested = true;
759 else if (Feature == "-xnack")
760 XnackRequested = false;
761 else if (Feature == "+sramecc")
762 SramEccRequested = true;
763 else if (Feature == "-sramecc")
764 SramEccRequested = false;
765 }
766
767 bool XnackSupported = isXnackSupported();
768 bool SramEccSupported = isSramEccSupported();
769
770 if (XnackRequested) {
771 if (XnackSupported) {
772 XnackSetting =
773 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
774 } else {
775 // If a specific xnack setting was requested and this GPU does not support
776 // xnack emit a warning. Setting will remain set to "Unsupported".
777 if (*XnackRequested) {
778 errs() << "warning: xnack 'On' was requested for a processor that does "
779 "not support it!\n";
780 } else {
781 errs() << "warning: xnack 'Off' was requested for a processor that "
782 "does not support it!\n";
783 }
784 }
785 }
786
787 if (SramEccRequested) {
788 if (SramEccSupported) {
789 SramEccSetting =
790 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
791 } else {
792 // If a specific sramecc setting was requested and this GPU does not
793 // support sramecc emit a warning. Setting will remain set to
794 // "Unsupported".
795 if (*SramEccRequested) {
796 errs() << "warning: sramecc 'On' was requested for a processor that "
797 "does not support it!\n";
798 } else {
799 errs() << "warning: sramecc 'Off' was requested for a processor that "
800 "does not support it!\n";
801 }
802 }
803 }
804}
805
806static TargetIDSetting
808 if (FeatureString.ends_with("-"))
810 if (FeatureString.ends_with("+"))
811 return TargetIDSetting::On;
812
813 llvm_unreachable("Malformed feature string");
814}
815
817 SmallVector<StringRef, 3> TargetIDSplit;
818 TargetID.split(TargetIDSplit, ':');
819
820 for (const auto &FeatureString : TargetIDSplit) {
821 if (FeatureString.starts_with("xnack"))
822 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
823 if (FeatureString.starts_with("sramecc"))
824 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
825 }
826}
827
828std::string AMDGPUTargetID::toString() const {
829 std::string StringRep;
830 raw_string_ostream StreamRep(StringRep);
831
832 auto TargetTriple = STI.getTargetTriple();
833 auto Version = getIsaVersion(STI.getCPU());
834
835 StreamRep << TargetTriple.getArchName() << '-'
836 << TargetTriple.getVendorName() << '-'
837 << TargetTriple.getOSName() << '-'
838 << TargetTriple.getEnvironmentName() << '-';
839
840 std::string Processor;
841 // TODO: Following else statement is present here because we used various
842 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
843 // Remove once all aliases are removed from GCNProcessors.td.
844 if (Version.Major >= 9)
845 Processor = STI.getCPU().str();
846 else
847 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
848 Twine(Version.Stepping))
849 .str();
850
851 std::string Features;
852 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
853 // sramecc.
855 Features += ":sramecc-";
857 Features += ":sramecc+";
858 // xnack.
860 Features += ":xnack-";
862 Features += ":xnack+";
863 }
864
865 StreamRep << Processor << Features;
866
867 StreamRep.flush();
868 return StringRep;
869}
870
871unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
872 if (STI->getFeatureBits().test(FeatureWavefrontSize16))
873 return 16;
874 if (STI->getFeatureBits().test(FeatureWavefrontSize32))
875 return 32;
876
877 return 64;
878}
879
881 unsigned BytesPerCU = 0;
882 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
883 BytesPerCU = 32768;
884 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
885 BytesPerCU = 65536;
886
887 // "Per CU" really means "per whatever functional block the waves of a
888 // workgroup must share". So the effective local memory size is doubled in
889 // WGP mode on gfx10.
890 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
891 BytesPerCU *= 2;
892
893 return BytesPerCU;
894}
895
897 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
898 return 32768;
899 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
900 return 65536;
901 return 0;
902}
903
904unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
905 // "Per CU" really means "per whatever functional block the waves of a
906 // workgroup must share". For gfx10 in CU mode this is the CU, which contains
907 // two SIMDs.
908 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
909 return 2;
910 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
911 // two CUs, so a total of four SIMDs.
912 return 4;
913}
914
916 unsigned FlatWorkGroupSize) {
917 assert(FlatWorkGroupSize != 0);
918 if (STI->getTargetTriple().getArch() != Triple::amdgcn)
919 return 8;
920 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
921 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
922 if (N == 1) {
923 // Single-wave workgroups don't consume barrier resources.
924 return MaxWaves;
925 }
926
927 unsigned MaxBarriers = 16;
928 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
929 MaxBarriers = 32;
930
931 return std::min(MaxWaves / N, MaxBarriers);
932}
933
934unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
935 return 1;
936}
937
938unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
939 // FIXME: Need to take scratch memory into account.
940 if (isGFX90A(*STI))
941 return 8;
942 if (!isGFX10Plus(*STI))
943 return 10;
944 return hasGFX10_3Insts(*STI) ? 16 : 20;
945}
946
948 unsigned FlatWorkGroupSize) {
949 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
950 getEUsPerCU(STI));
951}
952
954 return 1;
955}
956
958 // Some subtargets allow encoding 2048, but this isn't tested or supported.
959 return 1024;
960}
961
963 unsigned FlatWorkGroupSize) {
964 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
965}
966
968 IsaVersion Version = getIsaVersion(STI->getCPU());
969 if (Version.Major >= 10)
970 return getAddressableNumSGPRs(STI);
971 if (Version.Major >= 8)
972 return 16;
973 return 8;
974}
975
977 return 8;
978}
979
980unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
981 IsaVersion Version = getIsaVersion(STI->getCPU());
982 if (Version.Major >= 8)
983 return 800;
984 return 512;
985}
986
988 if (STI->getFeatureBits().test(FeatureSGPRInitBug))
990
991 IsaVersion Version = getIsaVersion(STI->getCPU());
992 if (Version.Major >= 10)
993 return 106;
994 if (Version.Major >= 8)
995 return 102;
996 return 104;
997}
998
999unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1000 assert(WavesPerEU != 0);
1001
1002 IsaVersion Version = getIsaVersion(STI->getCPU());
1003 if (Version.Major >= 10)
1004 return 0;
1005
1006 if (WavesPerEU >= getMaxWavesPerEU(STI))
1007 return 0;
1008
1009 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
1010 if (STI->getFeatureBits().test(FeatureTrapHandler))
1011 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1012 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
1013 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
1014}
1015
1016unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1017 bool Addressable) {
1018 assert(WavesPerEU != 0);
1019
1020 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
1021 IsaVersion Version = getIsaVersion(STI->getCPU());
1022 if (Version.Major >= 10)
1023 return Addressable ? AddressableNumSGPRs : 108;
1024 if (Version.Major >= 8 && !Addressable)
1025 AddressableNumSGPRs = 112;
1026 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
1027 if (STI->getFeatureBits().test(FeatureTrapHandler))
1028 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1029 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
1030 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
1031}
1032
1033unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1034 bool FlatScrUsed, bool XNACKUsed) {
1035 unsigned ExtraSGPRs = 0;
1036 if (VCCUsed)
1037 ExtraSGPRs = 2;
1038
1039 IsaVersion Version = getIsaVersion(STI->getCPU());
1040 if (Version.Major >= 10)
1041 return ExtraSGPRs;
1042
1043 if (Version.Major < 8) {
1044 if (FlatScrUsed)
1045 ExtraSGPRs = 4;
1046 } else {
1047 if (XNACKUsed)
1048 ExtraSGPRs = 4;
1049
1050 if (FlatScrUsed ||
1051 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
1052 ExtraSGPRs = 6;
1053 }
1054
1055 return ExtraSGPRs;
1056}
1057
1058unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1059 bool FlatScrUsed) {
1060 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1061 STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
1062}
1063
1064static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs,
1065 unsigned Granule) {
1066 return divideCeil(std::max(1u, NumRegs), Granule);
1067}
1068
1069unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1070 // SGPRBlocks is actual number of SGPR blocks minus 1.
1072 1;
1073}
1074
1076 std::optional<bool> EnableWavefrontSize32) {
1077 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1078 return 8;
1079
1080 bool IsWave32 = EnableWavefrontSize32 ?
1081 *EnableWavefrontSize32 :
1082 STI->getFeatureBits().test(FeatureWavefrontSize32);
1083
1084 if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs))
1085 return IsWave32 ? 24 : 12;
1086
1087 if (hasGFX10_3Insts(*STI))
1088 return IsWave32 ? 16 : 8;
1089
1090 return IsWave32 ? 8 : 4;
1091}
1092
1094 std::optional<bool> EnableWavefrontSize32) {
1095 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1096 return 8;
1097
1098 bool IsWave32 = EnableWavefrontSize32 ?
1099 *EnableWavefrontSize32 :
1100 STI->getFeatureBits().test(FeatureWavefrontSize32);
1101
1102 return IsWave32 ? 8 : 4;
1103}
1104
1105unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1106 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1107 return 512;
1108 if (!isGFX10Plus(*STI))
1109 return 256;
1110 bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);
1111 if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs))
1112 return IsWave32 ? 1536 : 768;
1113 return IsWave32 ? 1024 : 512;
1114}
1115
1116unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; }
1117
1119 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1120 return 512;
1121 return getAddressableNumArchVGPRs(STI);
1122}
1123
1125 unsigned NumVGPRs) {
1126 unsigned MaxWaves = getMaxWavesPerEU(STI);
1127 unsigned Granule = getVGPRAllocGranule(STI);
1128 if (NumVGPRs < Granule)
1129 return MaxWaves;
1130 unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1131 return std::min(std::max(getTotalNumVGPRs(STI) / RoundedRegs, 1u), MaxWaves);
1132}
1133
1134unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1135 assert(WavesPerEU != 0);
1136
1137 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1138 if (WavesPerEU >= MaxWavesPerEU)
1139 return 0;
1140
1141 unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1142 unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI);
1143 unsigned Granule = getVGPRAllocGranule(STI);
1144 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1145
1146 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1147 return 0;
1148
1149 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs);
1150 if (WavesPerEU < MinWavesPerEU)
1151 return getMinNumVGPRs(STI, MinWavesPerEU);
1152
1153 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1154 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1155 return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1156}
1157
1158unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1159 assert(WavesPerEU != 0);
1160
1161 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
1162 getVGPRAllocGranule(STI));
1163 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
1164 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1165}
1166
1167unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1168 std::optional<bool> EnableWavefrontSize32) {
1170 NumVGPRs, getVGPREncodingGranule(STI, EnableWavefrontSize32)) -
1171 1;
1172}
1173
1175 unsigned NumVGPRs,
1176 std::optional<bool> EnableWavefrontSize32) {
1178 NumVGPRs, getVGPRAllocGranule(STI, EnableWavefrontSize32));
1179}
1180} // end namespace IsaInfo
1181
1183 const MCSubtargetInfo *STI) {
1184 IsaVersion Version = getIsaVersion(STI->getCPU());
1185
1186 memset(&Header, 0, sizeof(Header));
1187
1188 Header.amd_kernel_code_version_major = 1;
1189 Header.amd_kernel_code_version_minor = 2;
1190 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1191 Header.amd_machine_version_major = Version.Major;
1192 Header.amd_machine_version_minor = Version.Minor;
1193 Header.amd_machine_version_stepping = Version.Stepping;
1194 Header.kernel_code_entry_byte_offset = sizeof(Header);
1195 Header.wavefront_size = 6;
1196
1197 // If the code object does not support indirect functions, then the value must
1198 // be 0xffffffff.
1199 Header.call_convention = -1;
1200
1201 // These alignment values are specified in powers of two, so alignment =
1202 // 2^n. The minimum alignment is 2^4 = 16.
1203 Header.kernarg_segment_alignment = 4;
1204 Header.group_segment_alignment = 4;
1205 Header.private_segment_alignment = 4;
1206
1207 if (Version.Major >= 10) {
1208 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
1209 Header.wavefront_size = 5;
1210 Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1211 }
1212 Header.compute_pgm_resource_registers |=
1213 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1215 }
1216}
1217
1219 const MCSubtargetInfo *STI) {
1220 IsaVersion Version = getIsaVersion(STI->getCPU());
1221
1223 memset(&KD, 0, sizeof(KD));
1224
1226 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
1228 if (Version.Major >= 12) {
1230 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, 0);
1232 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_DISABLE_PERF, 0);
1233 } else {
1235 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, 1);
1237 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, 1);
1238 }
1240 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
1241 if (Version.Major >= 10) {
1243 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
1244 STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
1246 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
1247 STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
1249 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 1);
1250 }
1251 if (AMDGPU::isGFX90A(*STI)) {
1253 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1254 STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0);
1255 }
1256 return KD;
1257}
1258
1261}
1262
1265}
1266
1268 unsigned AS = GV->getAddressSpace();
1269 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1271}
1272
1274 return TT.getArch() == Triple::r600;
1275}
1276
1277std::pair<unsigned, unsigned>
1279 std::pair<unsigned, unsigned> Default,
1280 bool OnlyFirstRequired) {
1281 Attribute A = F.getFnAttribute(Name);
1282 if (!A.isStringAttribute())
1283 return Default;
1284
1285 LLVMContext &Ctx = F.getContext();
1286 std::pair<unsigned, unsigned> Ints = Default;
1287 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1288 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1289 Ctx.emitError("can't parse first integer attribute " + Name);
1290 return Default;
1291 }
1292 if (Strs.second.trim().getAsInteger(0, Ints.second)) {
1293 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1294 Ctx.emitError("can't parse second integer attribute " + Name);
1295 return Default;
1296 }
1297 }
1298
1299 return Ints;
1300}
1301
1303 unsigned Size) {
1304 assert(Size > 2);
1306
1307 Attribute A = F.getFnAttribute(Name);
1308 if (!A.isStringAttribute())
1309 return Default;
1310
1311 SmallVector<unsigned> Vals(Size, 0);
1312
1313 LLVMContext &Ctx = F.getContext();
1314
1315 StringRef S = A.getValueAsString();
1316 unsigned i = 0;
1317 for (; !S.empty() && i < Size; i++) {
1318 std::pair<StringRef, StringRef> Strs = S.split(',');
1319 unsigned IntVal;
1320 if (Strs.first.trim().getAsInteger(0, IntVal)) {
1321 Ctx.emitError("can't parse integer attribute " + Strs.first + " in " +
1322 Name);
1323 return Default;
1324 }
1325 Vals[i] = IntVal;
1326 S = Strs.second;
1327 }
1328
1329 if (!S.empty() || i < Size) {
1330 Ctx.emitError("attribute " + Name +
1331 " has incorrect number of integers; expected " +
1332 llvm::utostr(Size));
1333 return Default;
1334 }
1335 return Vals;
1336}
1337
1338unsigned getVmcntBitMask(const IsaVersion &Version) {
1339 return (1 << (getVmcntBitWidthLo(Version.Major) +
1340 getVmcntBitWidthHi(Version.Major))) -
1341 1;
1342}
1343
1344unsigned getLoadcntBitMask(const IsaVersion &Version) {
1345 return (1 << getLoadcntBitWidth(Version.Major)) - 1;
1346}
1347
1348unsigned getSamplecntBitMask(const IsaVersion &Version) {
1349 return (1 << getSamplecntBitWidth(Version.Major)) - 1;
1350}
1351
1352unsigned getBvhcntBitMask(const IsaVersion &Version) {
1353 return (1 << getBvhcntBitWidth(Version.Major)) - 1;
1354}
1355
1356unsigned getExpcntBitMask(const IsaVersion &Version) {
1357 return (1 << getExpcntBitWidth(Version.Major)) - 1;
1358}
1359
1360unsigned getLgkmcntBitMask(const IsaVersion &Version) {
1361 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1362}
1363
1364unsigned getDscntBitMask(const IsaVersion &Version) {
1365 return (1 << getDscntBitWidth(Version.Major)) - 1;
1366}
1367
1368unsigned getKmcntBitMask(const IsaVersion &Version) {
1369 return (1 << getKmcntBitWidth(Version.Major)) - 1;
1370}
1371
1372unsigned getStorecntBitMask(const IsaVersion &Version) {
1373 return (1 << getStorecntBitWidth(Version.Major)) - 1;
1374}
1375
1376unsigned getWaitcntBitMask(const IsaVersion &Version) {
1377 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1378 getVmcntBitWidthLo(Version.Major));
1379 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1380 getExpcntBitWidth(Version.Major));
1381 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1382 getLgkmcntBitWidth(Version.Major));
1383 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1384 getVmcntBitWidthHi(Version.Major));
1385 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1386}
1387
1388unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1389 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1390 getVmcntBitWidthLo(Version.Major));
1391 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1392 getVmcntBitWidthHi(Version.Major));
1393 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1394}
1395
1396unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1397 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1398 getExpcntBitWidth(Version.Major));
1399}
1400
1401unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1402 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1403 getLgkmcntBitWidth(Version.Major));
1404}
1405
1406void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
1407 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
1408 Vmcnt = decodeVmcnt(Version, Waitcnt);
1409 Expcnt = decodeExpcnt(Version, Waitcnt);
1410 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1411}
1412
1413Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1414 Waitcnt Decoded;
1415 Decoded.LoadCnt = decodeVmcnt(Version, Encoded);
1416 Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
1417 Decoded.DsCnt = decodeLgkmcnt(Version, Encoded);
1418 return Decoded;
1419}
1420
1421unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1422 unsigned Vmcnt) {
1423 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1424 getVmcntBitWidthLo(Version.Major));
1425 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1426 getVmcntBitShiftHi(Version.Major),
1427 getVmcntBitWidthHi(Version.Major));
1428}
1429
1430unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1431 unsigned Expcnt) {
1432 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1433 getExpcntBitWidth(Version.Major));
1434}
1435
1436unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1437 unsigned Lgkmcnt) {
1438 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1439 getLgkmcntBitWidth(Version.Major));
1440}
1441
1442unsigned encodeWaitcnt(const IsaVersion &Version,
1443 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
1444 unsigned Waitcnt = getWaitcntBitMask(Version);
1445 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1446 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1447 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1448 return Waitcnt;
1449}
1450
1451unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1452 return encodeWaitcnt(Version, Decoded.LoadCnt, Decoded.ExpCnt, Decoded.DsCnt);
1453}
1454
1455static unsigned getCombinedCountBitMask(const IsaVersion &Version,
1456 bool IsStore) {
1457 unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),
1458 getDscntBitWidth(Version.Major));
1459 if (IsStore) {
1460 unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1461 getStorecntBitWidth(Version.Major));
1462 return Dscnt | Storecnt;
1463 } else {
1464 unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1465 getLoadcntBitWidth(Version.Major));
1466 return Dscnt | Loadcnt;
1467 }
1468}
1469
1470Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) {
1471 Waitcnt Decoded;
1472 Decoded.LoadCnt =
1473 unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major),
1474 getLoadcntBitWidth(Version.Major));
1475 Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major),
1476 getDscntBitWidth(Version.Major));
1477 return Decoded;
1478}
1479
1480Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) {
1481 Waitcnt Decoded;
1482 Decoded.StoreCnt =
1483 unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major),
1484 getStorecntBitWidth(Version.Major));
1485 Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major),
1486 getDscntBitWidth(Version.Major));
1487 return Decoded;
1488}
1489
1490static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1491 unsigned Loadcnt) {
1492 return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1493 getLoadcntBitWidth(Version.Major));
1494}
1495
1496static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1497 unsigned Storecnt) {
1498 return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1499 getStorecntBitWidth(Version.Major));
1500}
1501
1502static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1503 unsigned Dscnt) {
1504 return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major),
1505 getDscntBitWidth(Version.Major));
1506}
1507
1508static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1509 unsigned Dscnt) {
1510 unsigned Waitcnt = getCombinedCountBitMask(Version, false);
1511 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
1512 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1513 return Waitcnt;
1514}
1515
1516unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1517 return encodeLoadcntDscnt(Version, Decoded.LoadCnt, Decoded.DsCnt);
1518}
1519
1520static unsigned encodeStorecntDscnt(const IsaVersion &Version,
1521 unsigned Storecnt, unsigned Dscnt) {
1522 unsigned Waitcnt = getCombinedCountBitMask(Version, true);
1523 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
1524 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1525 return Waitcnt;
1526}
1527
1528unsigned encodeStorecntDscnt(const IsaVersion &Version,
1529 const Waitcnt &Decoded) {
1530 return encodeStorecntDscnt(Version, Decoded.StoreCnt, Decoded.DsCnt);
1531}
1532
1533//===----------------------------------------------------------------------===//
1534// Custom Operands.
1535//
1536// A table of custom operands shall describe "primary" operand names
1537// first followed by aliases if any. It is not required but recommended
1538// to arrange operands so that operand encoding match operand position
1539// in the table. This will make disassembly a bit more efficient.
1540// Unused slots in the table shall have an empty name.
1541//
1542//===----------------------------------------------------------------------===//
1543
1544template <class T>
1545static bool isValidOpr(int Idx, const CustomOperand<T> OpInfo[], int OpInfoSize,
1546 T Context) {
1547 return 0 <= Idx && Idx < OpInfoSize && !OpInfo[Idx].Name.empty() &&
1548 (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context));
1549}
1550
1551template <class T>
1552static int getOprIdx(std::function<bool(const CustomOperand<T> &)> Test,
1553 const CustomOperand<T> OpInfo[], int OpInfoSize,
1554 T Context) {
1555 int InvalidIdx = OPR_ID_UNKNOWN;
1556 for (int Idx = 0; Idx < OpInfoSize; ++Idx) {
1557 if (Test(OpInfo[Idx])) {
1558 if (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context))
1559 return Idx;
1560 InvalidIdx = OPR_ID_UNSUPPORTED;
1561 }
1562 }
1563 return InvalidIdx;
1564}
1565
1566template <class T>
1567static int getOprIdx(const StringRef Name, const CustomOperand<T> OpInfo[],
1568 int OpInfoSize, T Context) {
1569 auto Test = [=](const CustomOperand<T> &Op) { return Op.Name == Name; };
1570 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
1571}
1572
1573template <class T>
1574static int getOprIdx(int Id, const CustomOperand<T> OpInfo[], int OpInfoSize,
1575 T Context, bool QuickCheck = true) {
1576 auto Test = [=](const CustomOperand<T> &Op) {
1577 return Op.Encoding == Id && !Op.Name.empty();
1578 };
1579 // This is an optimization that should work in most cases.
1580 // As a side effect, it may cause selection of an alias
1581 // instead of a primary operand name in case of sparse tables.
1582 if (QuickCheck && isValidOpr<T>(Id, OpInfo, OpInfoSize, Context) &&
1583 OpInfo[Id].Encoding == Id) {
1584 return Id;
1585 }
1586 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
1587}
1588
1589//===----------------------------------------------------------------------===//
1590// Custom Operand Values
1591//===----------------------------------------------------------------------===//
1592
1594 int Size,
1595 const MCSubtargetInfo &STI) {
1596 unsigned Enc = 0;
1597 for (int Idx = 0; Idx < Size; ++Idx) {
1598 const auto &Op = Opr[Idx];
1599 if (Op.isSupported(STI))
1600 Enc |= Op.encode(Op.Default);
1601 }
1602 return Enc;
1603}
1604
1606 int Size, unsigned Code,
1607 bool &HasNonDefaultVal,
1608 const MCSubtargetInfo &STI) {
1609 unsigned UsedOprMask = 0;
1610 HasNonDefaultVal = false;
1611 for (int Idx = 0; Idx < Size; ++Idx) {
1612 const auto &Op = Opr[Idx];
1613 if (!Op.isSupported(STI))
1614 continue;
1615 UsedOprMask |= Op.getMask();
1616 unsigned Val = Op.decode(Code);
1617 if (!Op.isValid(Val))
1618 return false;
1619 HasNonDefaultVal |= (Val != Op.Default);
1620 }
1621 return (Code & ~UsedOprMask) == 0;
1622}
1623
1624static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
1625 unsigned Code, int &Idx, StringRef &Name,
1626 unsigned &Val, bool &IsDefault,
1627 const MCSubtargetInfo &STI) {
1628 while (Idx < Size) {
1629 const auto &Op = Opr[Idx++];
1630 if (Op.isSupported(STI)) {
1631 Name = Op.Name;
1632 Val = Op.decode(Code);
1633 IsDefault = (Val == Op.Default);
1634 return true;
1635 }
1636 }
1637
1638 return false;
1639}
1640
1642 int64_t InputVal) {
1643 if (InputVal < 0 || InputVal > Op.Max)
1644 return OPR_VAL_INVALID;
1645 return Op.encode(InputVal);
1646}
1647
1648static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
1649 const StringRef Name, int64_t InputVal,
1650 unsigned &UsedOprMask,
1651 const MCSubtargetInfo &STI) {
1652 int InvalidId = OPR_ID_UNKNOWN;
1653 for (int Idx = 0; Idx < Size; ++Idx) {
1654 const auto &Op = Opr[Idx];
1655 if (Op.Name == Name) {
1656 if (!Op.isSupported(STI)) {
1657 InvalidId = OPR_ID_UNSUPPORTED;
1658 continue;
1659 }
1660 auto OprMask = Op.getMask();
1661 if (OprMask & UsedOprMask)
1662 return OPR_ID_DUPLICATE;
1663 UsedOprMask |= OprMask;
1664 return encodeCustomOperandVal(Op, InputVal);
1665 }
1666 }
1667 return InvalidId;
1668}
1669
1670//===----------------------------------------------------------------------===//
1671// DepCtr
1672//===----------------------------------------------------------------------===//
1673
1674namespace DepCtr {
1675
1677 static int Default = -1;
1678 if (Default == -1)
1680 return Default;
1681}
1682
1683bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1684 const MCSubtargetInfo &STI) {
1686 HasNonDefaultVal, STI);
1687}
1688
1689bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1690 bool &IsDefault, const MCSubtargetInfo &STI) {
1691 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
1692 IsDefault, STI);
1693}
1694
1695int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1696 const MCSubtargetInfo &STI) {
1697 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
1698 STI);
1699}
1700
1701unsigned decodeFieldVmVsrc(unsigned Encoded) {
1702 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1703}
1704
1705unsigned decodeFieldVaVdst(unsigned Encoded) {
1706 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1707}
1708
1709unsigned decodeFieldSaSdst(unsigned Encoded) {
1710 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1711}
1712
1713unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
1714 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1715}
1716
1717unsigned encodeFieldVmVsrc(unsigned VmVsrc) {
1718 return encodeFieldVmVsrc(0xffff, VmVsrc);
1719}
1720
1721unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
1722 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1723}
1724
1725unsigned encodeFieldVaVdst(unsigned VaVdst) {
1726 return encodeFieldVaVdst(0xffff, VaVdst);
1727}
1728
1729unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
1730 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1731}
1732
1733unsigned encodeFieldSaSdst(unsigned SaSdst) {
1734 return encodeFieldSaSdst(0xffff, SaSdst);
1735}
1736
1737} // namespace DepCtr
1738
1739//===----------------------------------------------------------------------===//
1740// hwreg
1741//===----------------------------------------------------------------------===//
1742
1743namespace Hwreg {
1744
1745int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI) {
1746 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Opr, OPR_SIZE, STI);
1747 return (Idx < 0) ? Idx : Opr[Idx].Encoding;
1748}
1749
1750StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
1751 int Idx = getOprIdx<const MCSubtargetInfo &>(Id, Opr, OPR_SIZE, STI);
1752 return (Idx < 0) ? "" : Opr[Idx].Name;
1753}
1754
1755} // namespace Hwreg
1756
1757//===----------------------------------------------------------------------===//
1758// exp tgt
1759//===----------------------------------------------------------------------===//
1760
1761namespace Exp {
1762
1763struct ExpTgt {
1765 unsigned Tgt;
1766 unsigned MaxIndex;
1767};
1768
1769static constexpr ExpTgt ExpTgtInfo[] = {
1770 {{"null"}, ET_NULL, ET_NULL_MAX_IDX},
1771 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX},
1772 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX},
1773 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX},
1774 {{"pos"}, ET_POS0, ET_POS_MAX_IDX},
1775 {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
1776 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX},
1777};
1778
1779bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
1780 for (const ExpTgt &Val : ExpTgtInfo) {
1781 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
1782 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
1783 Name = Val.Name;
1784 return true;
1785 }
1786 }
1787 return false;
1788}
1789
1790unsigned getTgtId(const StringRef Name) {
1791
1792 for (const ExpTgt &Val : ExpTgtInfo) {
1793 if (Val.MaxIndex == 0 && Name == Val.Name)
1794 return Val.Tgt;
1795
1796 if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {
1797 StringRef Suffix = Name.drop_front(Val.Name.size());
1798
1799 unsigned Id;
1800 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
1801 return ET_INVALID;
1802
1803 // Disable leading zeroes
1804 if (Suffix.size() > 1 && Suffix[0] == '0')
1805 return ET_INVALID;
1806
1807 return Val.Tgt + Id;
1808 }
1809 }
1810 return ET_INVALID;
1811}
1812
1813bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
1814 switch (Id) {
1815 case ET_NULL:
1816 return !isGFX11Plus(STI);
1817 case ET_POS4:
1818 case ET_PRIM:
1819 return isGFX10Plus(STI);
1820 case ET_DUAL_SRC_BLEND0:
1821 case ET_DUAL_SRC_BLEND1:
1822 return isGFX11Plus(STI);
1823 default:
1824 if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
1825 return !isGFX11Plus(STI);
1826 return true;
1827 }
1828}
1829
1830} // namespace Exp
1831
1832//===----------------------------------------------------------------------===//
1833// MTBUF Format
1834//===----------------------------------------------------------------------===//
1835
1836namespace MTBUFFormat {
1837
1838int64_t getDfmt(const StringRef Name) {
1839 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
1840 if (Name == DfmtSymbolic[Id])
1841 return Id;
1842 }
1843 return DFMT_UNDEF;
1844}
1845
1847 assert(Id <= DFMT_MAX);
1848 return DfmtSymbolic[Id];
1849}
1850
1852 if (isSI(STI) || isCI(STI))
1853 return NfmtSymbolicSICI;
1854 if (isVI(STI) || isGFX9(STI))
1855 return NfmtSymbolicVI;
1856 return NfmtSymbolicGFX10;
1857}
1858
1859int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
1860 auto lookupTable = getNfmtLookupTable(STI);
1861 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
1862 if (Name == lookupTable[Id])
1863 return Id;
1864 }
1865 return NFMT_UNDEF;
1866}
1867
1868StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
1869 assert(Id <= NFMT_MAX);
1870 return getNfmtLookupTable(STI)[Id];
1871}
1872
1873bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1874 unsigned Dfmt;
1875 unsigned Nfmt;
1876 decodeDfmtNfmt(Id, Dfmt, Nfmt);
1877 return isValidNfmt(Nfmt, STI);
1878}
1879
1880bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1881 return !getNfmtName(Id, STI).empty();
1882}
1883
1884int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
1885 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
1886}
1887
1888void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
1889 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
1890 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
1891}
1892
1894 if (isGFX11Plus(STI)) {
1895 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1896 if (Name == UfmtSymbolicGFX11[Id])
1897 return Id;
1898 }
1899 } else {
1900 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1901 if (Name == UfmtSymbolicGFX10[Id])
1902 return Id;
1903 }
1904 }
1905 return UFMT_UNDEF;
1906}
1907
1909 if(isValidUnifiedFormat(Id, STI))
1910 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
1911 return "";
1912}
1913
1914bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
1915 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
1916}
1917
1918int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1919 const MCSubtargetInfo &STI) {
1920 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
1921 if (isGFX11Plus(STI)) {
1922 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1923 if (Fmt == DfmtNfmt2UFmtGFX11[Id])
1924 return Id;
1925 }
1926 } else {
1927 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1928 if (Fmt == DfmtNfmt2UFmtGFX10[Id])
1929 return Id;
1930 }
1931 }
1932 return UFMT_UNDEF;
1933}
1934
1935bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
1936 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
1937}
1938
1940 if (isGFX10Plus(STI))
1941 return UFMT_DEFAULT;
1942 return DFMT_NFMT_DEFAULT;
1943}
1944
1945} // namespace MTBUFFormat
1946
1947//===----------------------------------------------------------------------===//
1948// SendMsg
1949//===----------------------------------------------------------------------===//
1950
1951namespace SendMsg {
1952
1955}
1956
1957int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI) {
1958 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Msg, MSG_SIZE, STI);
1959 return (Idx < 0) ? Idx : Msg[Idx].Encoding;
1960}
1961
1962bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
1963 return (MsgId & ~(getMsgIdMask(STI))) == 0;
1964}
1965
1966StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI) {
1967 int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId, Msg, MSG_SIZE, STI);
1968 return (Idx < 0) ? "" : Msg[Idx].Name;
1969}
1970
1971int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
1972 const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
1973 const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
1974 const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
1975 for (int i = F; i < L; ++i) {
1976 if (Name == S[i]) {
1977 return i;
1978 }
1979 }
1980 return OP_UNKNOWN_;
1981}
1982
1983bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1984 bool Strict) {
1985 assert(isValidMsgId(MsgId, STI));
1986
1987 if (!Strict)
1988 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
1989
1990 if (MsgId == ID_SYSMSG)
1991 return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_;
1992 if (!isGFX11Plus(STI)) {
1993 switch (MsgId) {
1994 case ID_GS_PreGFX11:
1995 return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
1997 return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
1998 }
1999 }
2000 return OpId == OP_NONE_;
2001}
2002
2003StringRef getMsgOpName(int64_t MsgId, int64_t OpId,
2004 const MCSubtargetInfo &STI) {
2005 assert(msgRequiresOp(MsgId, STI));
2006 return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId];
2007}
2008
2009bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
2010 const MCSubtargetInfo &STI, bool Strict) {
2011 assert(isValidMsgOp(MsgId, OpId, STI, Strict));
2012
2013 if (!Strict)
2014 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
2015
2016 if (!isGFX11Plus(STI)) {
2017 switch (MsgId) {
2018 case ID_GS_PreGFX11:
2021 return (OpId == OP_GS_NOP) ?
2024 }
2025 }
2026 return StreamId == STREAM_ID_NONE_;
2027}
2028
2029bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
2030 return MsgId == ID_SYSMSG ||
2031 (!isGFX11Plus(STI) &&
2032 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
2033}
2034
2035bool msgSupportsStream(int64_t MsgId, int64_t OpId,
2036 const MCSubtargetInfo &STI) {
2037 return !isGFX11Plus(STI) &&
2038 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
2039 OpId != OP_GS_NOP;
2040}
2041
2042void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
2043 uint16_t &StreamId, const MCSubtargetInfo &STI) {
2044 MsgId = Val & getMsgIdMask(STI);
2045 if (isGFX11Plus(STI)) {
2046 OpId = 0;
2047 StreamId = 0;
2048 } else {
2049 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
2051 }
2052}
2053
2055 uint64_t OpId,
2057 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
2058}
2059
2060} // namespace SendMsg
2061
2062//===----------------------------------------------------------------------===//
2063//
2064//===----------------------------------------------------------------------===//
2065
2067 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
2068}
2069
2071 // As a safe default always respond as if PS has color exports.
2072 return F.getFnAttributeAsParsedInteger(
2073 "amdgpu-color-export",
2074 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
2075}
2076
2078 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
2079}
2080
2082 switch(cc) {
2092 return true;
2093 default:
2094 return false;
2095 }
2096}
2097
2099 return isShader(cc) || cc == CallingConv::AMDGPU_Gfx;
2100}
2101
2103 return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS;
2104}
2105
2107 switch (CC) {
2117 return true;
2118 default:
2119 return false;
2120 }
2121}
2122
2124 switch (CC) {
2126 return true;
2127 default:
2128 return isEntryFunctionCC(CC) || isChainCC(CC);
2129 }
2130}
2131
2133 switch (CC) {
2136 return true;
2137 default:
2138 return false;
2139 }
2140}
2141
2142bool isKernelCC(const Function *Func) {
2143 return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
2144}
2145
2146bool hasXNACK(const MCSubtargetInfo &STI) {
2147 return STI.hasFeature(AMDGPU::FeatureXNACK);
2148}
2149
2150bool hasSRAMECC(const MCSubtargetInfo &STI) {
2151 return STI.hasFeature(AMDGPU::FeatureSRAMECC);
2152}
2153
2155 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16);
2156}
2157
2158bool hasA16(const MCSubtargetInfo &STI) {
2159 return STI.hasFeature(AMDGPU::FeatureA16);
2160}
2161
2162bool hasG16(const MCSubtargetInfo &STI) {
2163 return STI.hasFeature(AMDGPU::FeatureG16);
2164}
2165
2167 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2168 !isSI(STI);
2169}
2170
2171bool hasGDS(const MCSubtargetInfo &STI) {
2172 return STI.hasFeature(AMDGPU::FeatureGDS);
2173}
2174
2175unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2176 auto Version = getIsaVersion(STI.getCPU());
2177 if (Version.Major == 10)
2178 return Version.Minor >= 3 ? 13 : 5;
2179 if (Version.Major == 11)
2180 return 5;
2181 if (Version.Major >= 12)
2182 return HasSampler ? 4 : 5;
2183 return 0;
2184}
2185
2186unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; }
2187
2188bool isSI(const MCSubtargetInfo &STI) {
2189 return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
2190}
2191
2192bool isCI(const MCSubtargetInfo &STI) {
2193 return STI.hasFeature(AMDGPU::FeatureSeaIslands);
2194}
2195
2196bool isVI(const MCSubtargetInfo &STI) {
2197 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2198}
2199
2200bool isGFX9(const MCSubtargetInfo &STI) {
2201 return STI.hasFeature(AMDGPU::FeatureGFX9);
2202}
2203
2205 return isGFX9(STI) || isGFX10(STI);
2206}
2207
2209 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2210}
2211
2213 return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2214}
2215
2216bool isGFX8Plus(const MCSubtargetInfo &STI) {
2217 return isVI(STI) || isGFX9Plus(STI);
2218}
2219
2220bool isGFX9Plus(const MCSubtargetInfo &STI) {
2221 return isGFX9(STI) || isGFX10Plus(STI);
2222}
2223
2224bool isGFX10(const MCSubtargetInfo &STI) {
2225 return STI.hasFeature(AMDGPU::FeatureGFX10);
2226}
2227
2229 return isGFX10(STI) || isGFX11(STI);
2230}
2231
2233 return isGFX10(STI) || isGFX11Plus(STI);
2234}
2235
2236bool isGFX11(const MCSubtargetInfo &STI) {
2237 return STI.hasFeature(AMDGPU::FeatureGFX11);
2238}
2239
2241 return isGFX11(STI) || isGFX12Plus(STI);
2242}
2243
2244bool isGFX12(const MCSubtargetInfo &STI) {
2245 return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2246}
2247
2248bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); }
2249
2250bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2251
2253 return !isGFX11Plus(STI);
2254}
2255
2257 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2258}
2259
2261 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2262}
2263
2265 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2266}
2267
2269 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2270}
2271
2273 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2274}
2275
2277 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2278}
2279
2281 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2282}
2283
2284bool isGFX90A(const MCSubtargetInfo &STI) {
2285 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2286}
2287
2288bool isGFX940(const MCSubtargetInfo &STI) {
2289 return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2290}
2291
2293 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2294}
2295
2297 return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2298}
2299
2300bool hasVOPD(const MCSubtargetInfo &STI) {
2301 return STI.hasFeature(AMDGPU::FeatureVOPD);
2302}
2303
2305 return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
2306}
2307
2309 return STI.hasFeature(AMDGPU::FeatureKernargPreload);
2310}
2311
2312int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2313 int32_t ArgNumVGPR) {
2314 if (has90AInsts && ArgNumAGPR)
2315 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2316 return std::max(ArgNumVGPR, ArgNumAGPR);
2317}
2318
2319bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
2320 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2321 const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2322 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2323 Reg == AMDGPU::SCC;
2324}
2325
2326bool isHi(unsigned Reg, const MCRegisterInfo &MRI) {
2327 return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI;
2328}
2329
2330#define MAP_REG2REG \
2331 using namespace AMDGPU; \
2332 switch(Reg) { \
2333 default: return Reg; \
2334 CASE_CI_VI(FLAT_SCR) \
2335 CASE_CI_VI(FLAT_SCR_LO) \
2336 CASE_CI_VI(FLAT_SCR_HI) \
2337 CASE_VI_GFX9PLUS(TTMP0) \
2338 CASE_VI_GFX9PLUS(TTMP1) \
2339 CASE_VI_GFX9PLUS(TTMP2) \
2340 CASE_VI_GFX9PLUS(TTMP3) \
2341 CASE_VI_GFX9PLUS(TTMP4) \
2342 CASE_VI_GFX9PLUS(TTMP5) \
2343 CASE_VI_GFX9PLUS(TTMP6) \
2344 CASE_VI_GFX9PLUS(TTMP7) \
2345 CASE_VI_GFX9PLUS(TTMP8) \
2346 CASE_VI_GFX9PLUS(TTMP9) \
2347 CASE_VI_GFX9PLUS(TTMP10) \
2348 CASE_VI_GFX9PLUS(TTMP11) \
2349 CASE_VI_GFX9PLUS(TTMP12) \
2350 CASE_VI_GFX9PLUS(TTMP13) \
2351 CASE_VI_GFX9PLUS(TTMP14) \
2352 CASE_VI_GFX9PLUS(TTMP15) \
2353 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2354 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2355 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2356 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2357 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2358 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2359 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2360 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2361 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2362 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2363 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2364 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2365 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2366 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2367 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2368 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2369 CASE_GFXPRE11_GFX11PLUS(M0) \
2370 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2371 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2372 }
2373
2374#define CASE_CI_VI(node) \
2375 assert(!isSI(STI)); \
2376 case node: return isCI(STI) ? node##_ci : node##_vi;
2377
2378#define CASE_VI_GFX9PLUS(node) \
2379 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2380
2381#define CASE_GFXPRE11_GFX11PLUS(node) \
2382 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2383
2384#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2385 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2386
2387unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
2388 if (STI.getTargetTriple().getArch() == Triple::r600)
2389 return Reg;
2391}
2392
2393#undef CASE_CI_VI
2394#undef CASE_VI_GFX9PLUS
2395#undef CASE_GFXPRE11_GFX11PLUS
2396#undef CASE_GFXPRE11_GFX11PLUS_TO
2397
2398#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
2399#define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
2400#define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;
2401#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2402
2403unsigned mc2PseudoReg(unsigned Reg) {
2405}
2406
2407bool isInlineValue(unsigned Reg) {
2408 switch (Reg) {
2409 case AMDGPU::SRC_SHARED_BASE_LO:
2410 case AMDGPU::SRC_SHARED_BASE:
2411 case AMDGPU::SRC_SHARED_LIMIT_LO:
2412 case AMDGPU::SRC_SHARED_LIMIT:
2413 case AMDGPU::SRC_PRIVATE_BASE_LO:
2414 case AMDGPU::SRC_PRIVATE_BASE:
2415 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2416 case AMDGPU::SRC_PRIVATE_LIMIT:
2417 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2418 return true;
2419 case AMDGPU::SRC_VCCZ:
2420 case AMDGPU::SRC_EXECZ:
2421 case AMDGPU::SRC_SCC:
2422 return true;
2423 case AMDGPU::SGPR_NULL:
2424 return true;
2425 default:
2426 return false;
2427 }
2428}
2429
2430#undef CASE_CI_VI
2431#undef CASE_VI_GFX9PLUS
2432#undef CASE_GFXPRE11_GFX11PLUS
2433#undef CASE_GFXPRE11_GFX11PLUS_TO
2434#undef MAP_REG2REG
2435
2436bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2437 assert(OpNo < Desc.NumOperands);
2438 unsigned OpType = Desc.operands()[OpNo].OperandType;
2439 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2440 OpType <= AMDGPU::OPERAND_SRC_LAST;
2441}
2442
2443bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2444 assert(OpNo < Desc.NumOperands);
2445 unsigned OpType = Desc.operands()[OpNo].OperandType;
2446 return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2447 OpType <= AMDGPU::OPERAND_KIMM_LAST;
2448}
2449
2450bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2451 assert(OpNo < Desc.NumOperands);
2452 unsigned OpType = Desc.operands()[OpNo].OperandType;
2453 switch (OpType) {
2470 return true;
2471 default:
2472 return false;
2473 }
2474}
2475
2476bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2477 assert(OpNo < Desc.NumOperands);
2478 unsigned OpType = Desc.operands()[OpNo].OperandType;
2479 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2483}
2484
2485// Avoid using MCRegisterClass::getSize, since that function will go away
2486// (move from MC* level to Target* level). Return size in bits.
2487unsigned getRegBitWidth(unsigned RCID) {
2488 switch (RCID) {
2489 case AMDGPU::SGPR_LO16RegClassID:
2490 case AMDGPU::AGPR_LO16RegClassID:
2491 return 16;
2492 case AMDGPU::SGPR_32RegClassID:
2493 case AMDGPU::VGPR_32RegClassID:
2494 case AMDGPU::VRegOrLds_32RegClassID:
2495 case AMDGPU::AGPR_32RegClassID:
2496 case AMDGPU::VS_32RegClassID:
2497 case AMDGPU::AV_32RegClassID:
2498 case AMDGPU::SReg_32RegClassID:
2499 case AMDGPU::SReg_32_XM0RegClassID:
2500 case AMDGPU::SRegOrLds_32RegClassID:
2501 return 32;
2502 case AMDGPU::SGPR_64RegClassID:
2503 case AMDGPU::VS_64RegClassID:
2504 case AMDGPU::SReg_64RegClassID:
2505 case AMDGPU::VReg_64RegClassID:
2506 case AMDGPU::AReg_64RegClassID:
2507 case AMDGPU::SReg_64_XEXECRegClassID:
2508 case AMDGPU::VReg_64_Align2RegClassID:
2509 case AMDGPU::AReg_64_Align2RegClassID:
2510 case AMDGPU::AV_64RegClassID:
2511 case AMDGPU::AV_64_Align2RegClassID:
2512 return 64;
2513 case AMDGPU::SGPR_96RegClassID:
2514 case AMDGPU::SReg_96RegClassID:
2515 case AMDGPU::VReg_96RegClassID:
2516 case AMDGPU::AReg_96RegClassID:
2517 case AMDGPU::VReg_96_Align2RegClassID:
2518 case AMDGPU::AReg_96_Align2RegClassID:
2519 case AMDGPU::AV_96RegClassID:
2520 case AMDGPU::AV_96_Align2RegClassID:
2521 return 96;
2522 case AMDGPU::SGPR_128RegClassID:
2523 case AMDGPU::SReg_128RegClassID:
2524 case AMDGPU::VReg_128RegClassID:
2525 case AMDGPU::AReg_128RegClassID:
2526 case AMDGPU::VReg_128_Align2RegClassID:
2527 case AMDGPU::AReg_128_Align2RegClassID:
2528 case AMDGPU::AV_128RegClassID:
2529 case AMDGPU::AV_128_Align2RegClassID:
2530 return 128;
2531 case AMDGPU::SGPR_160RegClassID:
2532 case AMDGPU::SReg_160RegClassID:
2533 case AMDGPU::VReg_160RegClassID:
2534 case AMDGPU::AReg_160RegClassID:
2535 case AMDGPU::VReg_160_Align2RegClassID:
2536 case AMDGPU::AReg_160_Align2RegClassID:
2537 case AMDGPU::AV_160RegClassID:
2538 case AMDGPU::AV_160_Align2RegClassID:
2539 return 160;
2540 case AMDGPU::SGPR_192RegClassID:
2541 case AMDGPU::SReg_192RegClassID:
2542 case AMDGPU::VReg_192RegClassID:
2543 case AMDGPU::AReg_192RegClassID:
2544 case AMDGPU::VReg_192_Align2RegClassID:
2545 case AMDGPU::AReg_192_Align2RegClassID:
2546 case AMDGPU::AV_192RegClassID:
2547 case AMDGPU::AV_192_Align2RegClassID:
2548 return 192;
2549 case AMDGPU::SGPR_224RegClassID:
2550 case AMDGPU::SReg_224RegClassID:
2551 case AMDGPU::VReg_224RegClassID:
2552 case AMDGPU::AReg_224RegClassID:
2553 case AMDGPU::VReg_224_Align2RegClassID:
2554 case AMDGPU::AReg_224_Align2RegClassID:
2555 case AMDGPU::AV_224RegClassID:
2556 case AMDGPU::AV_224_Align2RegClassID:
2557 return 224;
2558 case AMDGPU::SGPR_256RegClassID:
2559 case AMDGPU::SReg_256RegClassID:
2560 case AMDGPU::VReg_256RegClassID:
2561 case AMDGPU::AReg_256RegClassID:
2562 case AMDGPU::VReg_256_Align2RegClassID:
2563 case AMDGPU::AReg_256_Align2RegClassID:
2564 case AMDGPU::AV_256RegClassID:
2565 case AMDGPU::AV_256_Align2RegClassID:
2566 return 256;
2567 case AMDGPU::SGPR_288RegClassID:
2568 case AMDGPU::SReg_288RegClassID:
2569 case AMDGPU::VReg_288RegClassID:
2570 case AMDGPU::AReg_288RegClassID:
2571 case AMDGPU::VReg_288_Align2RegClassID:
2572 case AMDGPU::AReg_288_Align2RegClassID:
2573 case AMDGPU::AV_288RegClassID:
2574 case AMDGPU::AV_288_Align2RegClassID:
2575 return 288;
2576 case AMDGPU::SGPR_320RegClassID:
2577 case AMDGPU::SReg_320RegClassID:
2578 case AMDGPU::VReg_320RegClassID:
2579 case AMDGPU::AReg_320RegClassID:
2580 case AMDGPU::VReg_320_Align2RegClassID:
2581 case AMDGPU::AReg_320_Align2RegClassID:
2582 case AMDGPU::AV_320RegClassID:
2583 case AMDGPU::AV_320_Align2RegClassID:
2584 return 320;
2585 case AMDGPU::SGPR_352RegClassID:
2586 case AMDGPU::SReg_352RegClassID:
2587 case AMDGPU::VReg_352RegClassID:
2588 case AMDGPU::AReg_352RegClassID:
2589 case AMDGPU::VReg_352_Align2RegClassID:
2590 case AMDGPU::AReg_352_Align2RegClassID:
2591 case AMDGPU::AV_352RegClassID:
2592 case AMDGPU::AV_352_Align2RegClassID:
2593 return 352;
2594 case AMDGPU::SGPR_384RegClassID:
2595 case AMDGPU::SReg_384RegClassID:
2596 case AMDGPU::VReg_384RegClassID:
2597 case AMDGPU::AReg_384RegClassID:
2598 case AMDGPU::VReg_384_Align2RegClassID:
2599 case AMDGPU::AReg_384_Align2RegClassID:
2600 case AMDGPU::AV_384RegClassID:
2601 case AMDGPU::AV_384_Align2RegClassID:
2602 return 384;
2603 case AMDGPU::SGPR_512RegClassID:
2604 case AMDGPU::SReg_512RegClassID:
2605 case AMDGPU::VReg_512RegClassID:
2606 case AMDGPU::AReg_512RegClassID:
2607 case AMDGPU::VReg_512_Align2RegClassID:
2608 case AMDGPU::AReg_512_Align2RegClassID:
2609 case AMDGPU::AV_512RegClassID:
2610 case AMDGPU::AV_512_Align2RegClassID:
2611 return 512;
2612 case AMDGPU::SGPR_1024RegClassID:
2613 case AMDGPU::SReg_1024RegClassID:
2614 case AMDGPU::VReg_1024RegClassID:
2615 case AMDGPU::AReg_1024RegClassID:
2616 case AMDGPU::VReg_1024_Align2RegClassID:
2617 case AMDGPU::AReg_1024_Align2RegClassID:
2618 case AMDGPU::AV_1024RegClassID:
2619 case AMDGPU::AV_1024_Align2RegClassID:
2620 return 1024;
2621 default:
2622 llvm_unreachable("Unexpected register class");
2623 }
2624}
2625
2626unsigned getRegBitWidth(const MCRegisterClass &RC) {
2627 return getRegBitWidth(RC.getID());
2628}
2629
2631 unsigned OpNo) {
2632 assert(OpNo < Desc.NumOperands);
2633 unsigned RCID = Desc.operands()[OpNo].RegClass;
2634 return getRegBitWidth(RCID) / 8;
2635}
2636
2637bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
2639 return true;
2640
2641 uint64_t Val = static_cast<uint64_t>(Literal);
2642 return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
2643 (Val == llvm::bit_cast<uint64_t>(1.0)) ||
2644 (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
2645 (Val == llvm::bit_cast<uint64_t>(0.5)) ||
2646 (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
2647 (Val == llvm::bit_cast<uint64_t>(2.0)) ||
2648 (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
2649 (Val == llvm::bit_cast<uint64_t>(4.0)) ||
2650 (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
2651 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2652}
2653
2654bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
2656 return true;
2657
2658 // The actual type of the operand does not seem to matter as long
2659 // as the bits match one of the inline immediate values. For example:
2660 //
2661 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
2662 // so it is a legal inline immediate.
2663 //
2664 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
2665 // floating-point, so it is a legal inline immediate.
2666
2667 uint32_t Val = static_cast<uint32_t>(Literal);
2668 return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
2669 (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
2670 (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
2671 (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
2672 (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
2673 (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
2674 (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
2675 (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
2676 (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
2677 (Val == 0x3e22f983 && HasInv2Pi);
2678}
2679
2680bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
2681 if (!HasInv2Pi)
2682 return false;
2684 return true;
2685 uint16_t Val = static_cast<uint16_t>(Literal);
2686 return Val == 0x3F00 || // 0.5
2687 Val == 0xBF00 || // -0.5
2688 Val == 0x3F80 || // 1.0
2689 Val == 0xBF80 || // -1.0
2690 Val == 0x4000 || // 2.0
2691 Val == 0xC000 || // -2.0
2692 Val == 0x4080 || // 4.0
2693 Val == 0xC080 || // -4.0
2694 Val == 0x3E22; // 1.0 / (2.0 * pi)
2695}
2696
2697bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) {
2698 return isInlinableLiteral32(Literal, HasInv2Pi);
2699}
2700
2701bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
2702 if (!HasInv2Pi)
2703 return false;
2705 return true;
2706 uint16_t Val = static_cast<uint16_t>(Literal);
2707 return Val == 0x3C00 || // 1.0
2708 Val == 0xBC00 || // -1.0
2709 Val == 0x3800 || // 0.5
2710 Val == 0xB800 || // -0.5
2711 Val == 0x4000 || // 2.0
2712 Val == 0xC000 || // -2.0
2713 Val == 0x4400 || // 4.0
2714 Val == 0xC400 || // -4.0
2715 Val == 0x3118; // 1/2pi
2716}
2717
2718std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
2719 // Unfortunately, the Instruction Set Architecture Reference Guide is
2720 // misleading about how the inline operands work for (packed) 16-bit
2721 // instructions. In a nutshell, the actual HW behavior is:
2722 //
2723 // - integer encodings (-16 .. 64) are always produced as sign-extended
2724 // 32-bit values
2725 // - float encodings are produced as:
2726 // - for F16 instructions: corresponding half-precision float values in
2727 // the LSBs, 0 in the MSBs
2728 // - for UI16 instructions: corresponding single-precision float value
2729 int32_t Signed = static_cast<int32_t>(Literal);
2730 if (Signed >= 0 && Signed <= 64)
2731 return 128 + Signed;
2732
2733 if (Signed >= -16 && Signed <= -1)
2734 return 192 + std::abs(Signed);
2735
2736 if (IsFloat) {
2737 // clang-format off
2738 switch (Literal) {
2739 case 0x3800: return 240; // 0.5
2740 case 0xB800: return 241; // -0.5
2741 case 0x3C00: return 242; // 1.0
2742 case 0xBC00: return 243; // -1.0
2743 case 0x4000: return 244; // 2.0
2744 case 0xC000: return 245; // -2.0
2745 case 0x4400: return 246; // 4.0
2746 case 0xC400: return 247; // -4.0
2747 case 0x3118: return 248; // 1.0 / (2.0 * pi)
2748 default: break;
2749 }
2750 // clang-format on
2751 } else {
2752 // clang-format off
2753 switch (Literal) {
2754 case 0x3F000000: return 240; // 0.5
2755 case 0xBF000000: return 241; // -0.5
2756 case 0x3F800000: return 242; // 1.0
2757 case 0xBF800000: return 243; // -1.0
2758 case 0x40000000: return 244; // 2.0
2759 case 0xC0000000: return 245; // -2.0
2760 case 0x40800000: return 246; // 4.0
2761 case 0xC0800000: return 247; // -4.0
2762 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
2763 default: break;
2764 }
2765 // clang-format on
2766 }
2767
2768 return {};
2769}
2770
2771// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
2772// or nullopt.
2773std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
2774 return getInlineEncodingV216(false, Literal);
2775}
2776
2777// Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction
2778// or nullopt.
2779std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) {
2780 int32_t Signed = static_cast<int32_t>(Literal);
2781 if (Signed >= 0 && Signed <= 64)
2782 return 128 + Signed;
2783
2784 if (Signed >= -16 && Signed <= -1)
2785 return 192 + std::abs(Signed);
2786
2787 // clang-format off
2788 switch (Literal) {
2789 case 0x3F00: return 240; // 0.5
2790 case 0xBF00: return 241; // -0.5
2791 case 0x3F80: return 242; // 1.0
2792 case 0xBF80: return 243; // -1.0
2793 case 0x4000: return 244; // 2.0
2794 case 0xC000: return 245; // -2.0
2795 case 0x4080: return 246; // 4.0
2796 case 0xC080: return 247; // -4.0
2797 case 0x3E22: return 248; // 1.0 / (2.0 * pi)
2798 default: break;
2799 }
2800 // clang-format on
2801
2802 return std::nullopt;
2803}
2804
2805// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
2806// or nullopt.
2807std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
2808 return getInlineEncodingV216(true, Literal);
2809}
2810
2811// Whether the given literal can be inlined for a V_PK_* instruction.
2813 switch (OpType) {
2817 return getInlineEncodingV216(false, Literal).has_value();
2821 return getInlineEncodingV216(true, Literal).has_value();
2826 default:
2827 llvm_unreachable("bad packed operand type");
2828 }
2829}
2830
2831// Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
2833 return getInlineEncodingV2I16(Literal).has_value();
2834}
2835
2836// Whether the given literal can be inlined for a V_PK_*_BF16 instruction.
2838 return getInlineEncodingV2BF16(Literal).has_value();
2839}
2840
2841// Whether the given literal can be inlined for a V_PK_*_F16 instruction.
2843 return getInlineEncodingV2F16(Literal).has_value();
2844}
2845
2846bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
2847 if (IsFP64)
2848 return !(Val & 0xffffffffu);
2849
2850 return isUInt<32>(Val) || isInt<32>(Val);
2851}
2852
2854 const Function *F = A->getParent();
2855
2856 // Arguments to compute shaders are never a source of divergence.
2857 CallingConv::ID CC = F->getCallingConv();
2858 switch (CC) {
2861 return true;
2872 // For non-compute shaders, SGPR inputs are marked with either inreg or
2873 // byval. Everything else is in VGPRs.
2874 return A->hasAttribute(Attribute::InReg) ||
2875 A->hasAttribute(Attribute::ByVal);
2876 default:
2877 // TODO: treat i1 as divergent?
2878 return A->hasAttribute(Attribute::InReg);
2879 }
2880}
2881
2882bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
2883 // Arguments to compute shaders are never a source of divergence.
2885 switch (CC) {
2888 return true;
2899 // For non-compute shaders, SGPR inputs are marked with either inreg or
2900 // byval. Everything else is in VGPRs.
2901 return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
2902 CB->paramHasAttr(ArgNo, Attribute::ByVal);
2903 default:
2904 return CB->paramHasAttr(ArgNo, Attribute::InReg);
2905 }
2906}
2907
2908static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
2909 return isGCN3Encoding(ST) || isGFX10Plus(ST);
2910}
2911
2913 return isGFX9Plus(ST);
2914}
2915
2917 int64_t EncodedOffset) {
2918 if (isGFX12Plus(ST))
2919 return isUInt<23>(EncodedOffset);
2920
2921 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
2922 : isUInt<8>(EncodedOffset);
2923}
2924
2926 int64_t EncodedOffset,
2927 bool IsBuffer) {
2928 if (isGFX12Plus(ST))
2929 return isInt<24>(EncodedOffset);
2930
2931 return !IsBuffer &&
2933 isInt<21>(EncodedOffset);
2934}
2935
2936static bool isDwordAligned(uint64_t ByteOffset) {
2937 return (ByteOffset & 3) == 0;
2938}
2939
2941 uint64_t ByteOffset) {
2942 if (hasSMEMByteOffset(ST))
2943 return ByteOffset;
2944
2945 assert(isDwordAligned(ByteOffset));
2946 return ByteOffset >> 2;
2947}
2948
2949std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
2950 int64_t ByteOffset, bool IsBuffer) {
2951 if (isGFX12Plus(ST)) // 24 bit signed offsets
2952 return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2953 : std::nullopt;
2954
2955 // The signed version is always a byte offset.
2956 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
2958 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2959 : std::nullopt;
2960 }
2961
2962 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
2963 return std::nullopt;
2964
2965 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2966 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
2967 ? std::optional<int64_t>(EncodedOffset)
2968 : std::nullopt;
2969}
2970
2971std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
2972 int64_t ByteOffset) {
2973 if (!isCI(ST) || !isDwordAligned(ByteOffset))
2974 return std::nullopt;
2975
2976 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2977 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
2978 : std::nullopt;
2979}
2980
2982 if (AMDGPU::isGFX10(ST))
2983 return 12;
2984
2985 if (AMDGPU::isGFX12(ST))
2986 return 24;
2987 return 13;
2988}
2989
2990namespace {
2991
2992struct SourceOfDivergence {
2993 unsigned Intr;
2994};
2995const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
2996
2997struct AlwaysUniform {
2998 unsigned Intr;
2999};
3000const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
3001
3002#define GET_SourcesOfDivergence_IMPL
3003#define GET_UniformIntrinsics_IMPL
3004#define GET_Gfx9BufferFormat_IMPL
3005#define GET_Gfx10BufferFormat_IMPL
3006#define GET_Gfx11PlusBufferFormat_IMPL
3007#include "AMDGPUGenSearchableTables.inc"
3008
3009} // end anonymous namespace
3010
3011bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
3012 return lookupSourceOfDivergence(IntrID);
3013}
3014
3015bool isIntrinsicAlwaysUniform(unsigned IntrID) {
3016 return lookupAlwaysUniform(IntrID);
3017}
3018
3020 uint8_t NumComponents,
3021 uint8_t NumFormat,
3022 const MCSubtargetInfo &STI) {
3023 return isGFX11Plus(STI)
3024 ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents,
3025 NumFormat)
3026 : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp,
3027 NumComponents, NumFormat)
3028 : getGfx9BufferFormatInfo(BitsPerComp,
3029 NumComponents, NumFormat);
3030}
3031
3033 const MCSubtargetInfo &STI) {
3034 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
3035 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
3036 : getGfx9BufferFormatInfo(Format);
3037}
3038
3040 for (auto OpName : { OpName::vdst, OpName::src0, OpName::src1,
3041 OpName::src2 }) {
3042 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
3043 if (Idx == -1)
3044 continue;
3045
3046 if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID ||
3047 OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID)
3048 return true;
3049 }
3050
3051 return false;
3052}
3053
3054bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
3055 return hasAny64BitVGPROperands(OpDesc);
3056}
3057
3059 // Currently this is 128 for all subtargets
3060 return 128;
3061}
3062
3063} // namespace AMDGPU
3064
3067 switch (S) {
3069 OS << "Unsupported";
3070 break;
3072 OS << "Any";
3073 break;
3075 OS << "Off";
3076 break;
3078 OS << "On";
3079 break;
3080 }
3081 return OS;
3082}
3083
3084} // namespace llvm
unsigned const MachineRegisterInfo * MRI
#define MAP_REG2REG
unsigned Intr
static llvm::cl::opt< unsigned > DefaultAMDHSACodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5), llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " "or asm directive still take priority if present)"))
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_SET(DST, MSK, VAL)
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
unsigned const TargetRegisterInfo * TRI
unsigned Reg
LLVMContext & Context
const SmallVectorImpl< MachineOperand > & Cond
#define S_00B848_MEM_ORDERED(x)
Definition: SIDefines.h:1153
#define S_00B848_WGP_MODE(x)
Definition: SIDefines.h:1150
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file contains some functions that are useful when dealing with strings.
void setTargetIDFromFeaturesString(StringRef FS)
TargetIDSetting getXnackSetting() const
AMDGPUTargetID(const MCSubtargetInfo &STI)
void setTargetIDFromTargetIDStream(StringRef TargetID)
TargetIDSetting getSramEccSetting() const
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
unsigned getIndexOfDstInParsedOperands() const
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
unsigned getCompParsedSrcOperandsNum() const
std::optional< unsigned > getInvalidCompOperandIndex(std::function< unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc=false) const
std::array< unsigned, Component::MAX_OPR_NUM > RegIndices
Definition: Any.h:28
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1455
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1761
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
This class represents an Operation in the Expression.
Encoding
Size and signedness of expression operations' operands.
constexpr bool test(unsigned I) const
unsigned getAddressSpace() const
Definition: GlobalValue.h:205
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:230
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getID() const
getID() - Return the register class ID number.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
const Triple & getTargetTriple() const
const FeatureBitset & getFeatureBits() const
StringRef getCPU() const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:849
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:696
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:466
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:222
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:271
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
OSType getOS() const
Get the parsed operating system type of this triple.
Definition: Triple.h:370
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:361
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
unsigned decodeFieldSaSdst(unsigned Encoded)
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
const CustomOperandVal DepCtrInfo[]
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
unsigned decodeFieldVaVdst(unsigned Encoded)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
unsigned decodeFieldVmVsrc(unsigned Encoded)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
static constexpr ExpTgt ExpTgtInfo[]
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
unsigned getTgtId(const StringRef Name)
constexpr uint32_t VersionMajor
HSA metadata major version.
const CustomOperand< const MCSubtargetInfo & > Opr[]
int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI)
StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, unsigned Granule)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
StringLiteral const UfmtSymbolicGFX11[]
bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX10[]
StringLiteral const DfmtSymbolic[]
static StringLiteral const * getNfmtLookupTable(const MCSubtargetInfo &STI)
bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)
StringLiteral const NfmtSymbolicGFX10[]
bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
StringRef getDfmtName(unsigned Id)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX11[]
StringLiteral const NfmtSymbolicVI[]
StringLiteral const NfmtSymbolicSICI[]
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
StringLiteral const UfmtSymbolicGFX10[]
void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)
StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI)
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
int64_t getMsgOpId(int64_t MsgId, const StringRef Name)
const char *const OpGsSymbolic[OP_GS_LAST_]
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
const char *const OpSysSymbolic[OP_SYS_LAST_]
static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
const CustomOperand< const MCSubtargetInfo & > Msg[]
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned VOPD_VGPR_BANK_MASKS[]
constexpr unsigned COMPONENTS_NUM
bool isGCN3Encoding(const MCSubtargetInfo &STI)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isGFX10_GFX11(const MCSubtargetInfo &STI)
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size)
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Storecnt)
static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)
bool isVOPCAsmOnly(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool getMTBUFHasSrsrc(unsigned Opc)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
bool isGFX10Before1030(const MCSubtargetInfo &STI)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
const int OPR_ID_UNSUPPORTED
bool shouldEmitConstantsToTextSection(const Triple &TT)
bool isInlinableLiteralV2I16(uint32_t Literal)
int getMTBUFElements(unsigned Opc)
static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV216(bool IsFloat, uint32_t Literal)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
CanBeVOPD getCanBeVOPD(unsigned Opc)
static int getOprIdx(std::function< bool(const CustomOperand< T > &)> Test, const CustomOperand< T > OpInfo[], int OpInfoSize, T Context)
bool hasPackedD16(const MCSubtargetInfo &STI)
unsigned getStorecntBitMask(const IsaVersion &Version)
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
bool isGFX940(const MCSubtargetInfo &STI)
bool isEntryFunctionCC(CallingConv::ID CC)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX10_3_GFX11(const MCSubtargetInfo &STI)
bool isGroupSegment(const GlobalValue *GV)
IsaVersion getIsaVersion(StringRef GPU)
bool getMTBUFHasSoffset(unsigned Opc)
bool hasXNACK(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
static unsigned getCombinedCountBitMask(const IsaVersion &Version, bool IsStore)
unsigned getVOPDOpcode(unsigned Opc)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
bool isVOPC64DPP(unsigned Opc)
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool isCompute(CallingConv::ID cc)
bool getMAIIsGFX940XDL(unsigned Opc)
bool isSI(const MCSubtargetInfo &STI)
unsigned getDefaultAMDHSACodeObjectVersion()
bool isReadOnlySegment(const GlobalValue *GV)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
int getMUBUFBaseOpcode(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool getVOP3IsSingle(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
bool getVOP1IsSingle(unsigned Opc)
static bool isDwordAligned(uint64_t ByteOffset)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool getHasColorExport(const Function &F)
int getMTBUFBaseOpcode(unsigned Opc)
bool isChainCC(CallingConv::ID CC)
bool isGFX90A(const MCSubtargetInfo &STI)
unsigned getSamplecntBitMask(const IsaVersion &Version)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
bool hasSRAMECC(const MCSubtargetInfo &STI)
bool getHasDepthExport(const Function &F)
static bool isValidOpr(int Idx, const CustomOperand< T > OpInfo[], int OpInfoSize, T Context)
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
bool getMUBUFHasVAddr(unsigned Opc)
int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily)
bool isTrue16Inst(unsigned Opc)
bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc)
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned getInitialPSInputAddr(const Function &F)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
unsigned getKmcntBitMask(const IsaVersion &Version)
unsigned getVmcntBitMask(const IsaVersion &Version)
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
bool hasMAIInsts(const MCSubtargetInfo &STI)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isKernelCC(const Function *Func)
bool isGenericAtomic(unsigned Opc)
Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt)
bool isGFX8Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
unsigned getBvhcntBitMask(const IsaVersion &Version)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
unsigned getExpcntBitMask(const IsaVersion &Version)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool getMUBUFHasSoffset(unsigned Opc)
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV2F16(uint32_t Literal)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isShader(CallingConv::ID cc)
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)
static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Loadcnt)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
bool isGlobalSegment(const GlobalValue *GV)
@ OPERAND_KIMM_LAST
Definition: SIDefines.h:269
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:234
@ OPERAND_REG_INLINE_C_LAST
Definition: SIDefines.h:260
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:211
@ OPERAND_REG_INLINE_C_FP64
Definition: SIDefines.h:223
@ OPERAND_REG_INLINE_C_V2BF16
Definition: SIDefines.h:225
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:212
@ OPERAND_REG_INLINE_AC_V2FP16
Definition: SIDefines.h:246
@ OPERAND_SRC_FIRST
Definition: SIDefines.h:265
@ OPERAND_REG_IMM_V2BF16
Definition: SIDefines.h:210
@ OPERAND_REG_INLINE_AC_FIRST
Definition: SIDefines.h:262
@ OPERAND_KIMM_FIRST
Definition: SIDefines.h:268
@ OPERAND_REG_IMM_FP16
Definition: SIDefines.h:206
@ OPERAND_REG_IMM_FP64
Definition: SIDefines.h:204
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:226
@ OPERAND_REG_INLINE_AC_V2INT16
Definition: SIDefines.h:244
@ OPERAND_REG_INLINE_AC_FP16
Definition: SIDefines.h:241
@ OPERAND_REG_INLINE_AC_FP32
Definition: SIDefines.h:242
@ OPERAND_REG_INLINE_AC_V2BF16
Definition: SIDefines.h:245
@ OPERAND_REG_IMM_FP32
Definition: SIDefines.h:203
@ OPERAND_REG_INLINE_C_FIRST
Definition: SIDefines.h:259
@ OPERAND_REG_INLINE_C_FP32
Definition: SIDefines.h:222
@ OPERAND_REG_INLINE_AC_LAST
Definition: SIDefines.h:263
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:224
@ OPERAND_REG_IMM_V2FP32
Definition: SIDefines.h:214
@ OPERAND_REG_INLINE_AC_FP64
Definition: SIDefines.h:243
@ OPERAND_REG_INLINE_C_FP16
Definition: SIDefines.h:221
@ OPERAND_REG_INLINE_C_V2FP32
Definition: SIDefines.h:228
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_SRC_LAST
Definition: SIDefines.h:266
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc)
Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt)
std::optional< unsigned > getInlineEncodingV2I16(uint32_t Literal)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
static unsigned encodeStorecntDscnt(const IsaVersion &Version, unsigned Storecnt, unsigned Dscnt)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool getMUBUFIsBufferInv(unsigned Opc)
std::optional< unsigned > getInlineEncodingV2BF16(uint32_t Literal)
static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI)
bool getVOP2IsSingle(unsigned Opc)
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
bool isModuleEntryFunctionCC(CallingConv::ID CC)
bool isNotGFX12Plus(const MCSubtargetInfo &STI)
bool getMTBUFHasVAddr(unsigned Opc)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
unsigned getLoadcntBitMask(const IsaVersion &Version)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Dscnt)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI)
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
int getMUBUFElements(unsigned Opc)
static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, unsigned Dscnt)
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
bool isGraphics(CallingConv::ID cc)
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
bool isPermlane16(unsigned Opc)
bool getMUBUFHasSrsrc(unsigned Opc)
unsigned getDscntBitMask(const IsaVersion &Version)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:197
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:188
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition: CallingConv.h:232
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition: CallingConv.h:249
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:206
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition: CallingConv.h:245
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:218
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:213
@ ELFABIVERSION_AMDGPU_HSA_V4
Definition: ELF.h:378
@ ELFABIVERSION_AMDGPU_HSA_V5
Definition: ELF.h:379
@ ELFABIVERSION_AMDGPU_HSA_V6
Definition: ELF.h:380
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:417
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:428
@ AlwaysUniform
The result values are always uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
#define N
AMD Kernel Code Object (amd_kernel_code_t).
bool(* Cond)(T Context)
Instruction set architecture version.
Definition: TargetParser.h:125
Represents the counter values to wait for in an s_waitcnt instruction.
Description of the encoding of one expression Op.