LLVM 17.0.0git
AMDGPUBaseInfo.cpp
Go to the documentation of this file.
1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDKernelCodeT.h"
16#include "llvm/IR/Attributes.h"
17#include "llvm/IR/Constants.h"
18#include "llvm/IR/Function.h"
19#include "llvm/IR/GlobalValue.h"
20#include "llvm/IR/IntrinsicsAMDGPU.h"
21#include "llvm/IR/IntrinsicsR600.h"
22#include "llvm/IR/LLVMContext.h"
23#include "llvm/MC/MCInstrInfo.h"
29#include <optional>
30
31#define GET_INSTRINFO_NAMED_OPS
32#define GET_INSTRMAP_INFO
33#include "AMDGPUGenInstrInfo.inc"
34
36 AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden,
37 llvm::cl::desc("AMDHSA Code Object Version"),
39
40namespace {
41
42/// \returns Bit mask for given bit \p Shift and bit \p Width.
43unsigned getBitMask(unsigned Shift, unsigned Width) {
44 return ((1 << Width) - 1) << Shift;
45}
46
47/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
48///
49/// \returns Packed \p Dst.
50unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
51 unsigned Mask = getBitMask(Shift, Width);
52 return ((Src << Shift) & Mask) | (Dst & ~Mask);
53}
54
55/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
56///
57/// \returns Unpacked bits.
58unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
59 return (Src & getBitMask(Shift, Width)) >> Shift;
60}
61
62/// \returns Vmcnt bit shift (lower bits).
63unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
64 return VersionMajor >= 11 ? 10 : 0;
65}
66
67/// \returns Vmcnt bit width (lower bits).
68unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
69 return VersionMajor >= 11 ? 6 : 4;
70}
71
72/// \returns Expcnt bit shift.
73unsigned getExpcntBitShift(unsigned VersionMajor) {
74 return VersionMajor >= 11 ? 0 : 4;
75}
76
77/// \returns Expcnt bit width.
78unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
79
80/// \returns Lgkmcnt bit shift.
81unsigned getLgkmcntBitShift(unsigned VersionMajor) {
82 return VersionMajor >= 11 ? 4 : 8;
83}
84
85/// \returns Lgkmcnt bit width.
86unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
87 return VersionMajor >= 10 ? 6 : 4;
88}
89
90/// \returns Vmcnt bit shift (higher bits).
91unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
92
93/// \returns Vmcnt bit width (higher bits).
94unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
95 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
96}
97
98} // end namespace anonymous
99
100namespace llvm {
101
102namespace AMDGPU {
103
104std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
105 if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA)
106 return std::nullopt;
107
108 switch (AmdhsaCodeObjectVersion) {
109 case 2:
111 case 3:
113 case 4:
115 case 5:
117 default:
118 report_fatal_error(Twine("Unsupported AMDHSA Code Object Version ") +
120 }
121}
122
124 if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
125 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V2;
126 return false;
127}
128
130 if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
131 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V3;
132 return false;
133}
134
136 if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
137 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V4;
138 return false;
139}
140
142 if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
143 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V5;
144 return false;
145}
146
148 return isHsaAbiVersion3(STI) || isHsaAbiVersion4(STI) ||
149 isHsaAbiVersion5(STI);
150}
151
154}
155
156unsigned getCodeObjectVersion(const Module &M) {
157 if (auto Ver = mdconst::extract_or_null<ConstantInt>(
158 M.getModuleFlag("amdgpu_code_object_version"))) {
159 return (unsigned)Ver->getZExtValue() / 100;
160 }
161
162 // Default code object version.
163 return AMDHSA_COV4;
164}
165
166unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
167 switch (CodeObjectVersion) {
168 case AMDHSA_COV2:
169 case AMDHSA_COV3:
170 case AMDHSA_COV4:
171 return 48;
172 case AMDHSA_COV5:
173 default:
175 }
176}
177
178
179// FIXME: All such magic numbers about the ABI should be in a
180// central TD file.
181unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
182 switch (CodeObjectVersion) {
183 case AMDHSA_COV2:
184 case AMDHSA_COV3:
185 case AMDHSA_COV4:
186 return 24;
187 case AMDHSA_COV5:
188 default:
190 }
191}
192
193unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
194 switch (CodeObjectVersion) {
195 case AMDHSA_COV2:
196 case AMDHSA_COV3:
197 case AMDHSA_COV4:
198 return 32;
199 case AMDHSA_COV5:
200 default:
202 }
203}
204
205unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
206 switch (CodeObjectVersion) {
207 case AMDHSA_COV2:
208 case AMDHSA_COV3:
209 case AMDHSA_COV4:
210 return 40;
211 case AMDHSA_COV5:
212 default:
214 }
215}
216
217#define GET_MIMGBaseOpcodesTable_IMPL
218#define GET_MIMGDimInfoTable_IMPL
219#define GET_MIMGInfoTable_IMPL
220#define GET_MIMGLZMappingTable_IMPL
221#define GET_MIMGMIPMappingTable_IMPL
222#define GET_MIMGBiasMappingTable_IMPL
223#define GET_MIMGOffsetMappingTable_IMPL
224#define GET_MIMGG16MappingTable_IMPL
225#define GET_MAIInstInfoTable_IMPL
226#include "AMDGPUGenSearchableTables.inc"
227
228int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
229 unsigned VDataDwords, unsigned VAddrDwords) {
230 const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
231 VDataDwords, VAddrDwords);
232 return Info ? Info->Opcode : -1;
233}
234
236 const MIMGInfo *Info = getMIMGInfo(Opc);
237 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
238}
239
240int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
241 const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
242 const MIMGInfo *NewInfo =
243 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
244 NewChannels, OrigInfo->VAddrDwords);
245 return NewInfo ? NewInfo->Opcode : -1;
246}
247
248unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
249 const MIMGDimInfo *Dim, bool IsA16,
250 bool IsG16Supported) {
251 unsigned AddrWords = BaseOpcode->NumExtraArgs;
252 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
253 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
254 if (IsA16)
255 AddrWords += divideCeil(AddrComponents, 2);
256 else
257 AddrWords += AddrComponents;
258
259 // Note: For subtargets that support A16 but not G16, enabling A16 also
260 // enables 16 bit gradients.
261 // For subtargets that support A16 (operand) and G16 (done with a different
262 // instruction encoding), they are independent.
263
264 if (BaseOpcode->Gradients) {
265 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
266 // There are two gradients per coordinate, we pack them separately.
267 // For the 3d case,
268 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
269 AddrWords += alignTo<2>(Dim->NumGradients / 2);
270 else
271 AddrWords += Dim->NumGradients;
272 }
273 return AddrWords;
274}
275
276struct MUBUFInfo {
279 uint8_t elements;
284};
285
286struct MTBUFInfo {
289 uint8_t elements;
293};
294
295struct SMInfo {
298};
299
300struct VOPInfo {
303};
304
307};
308
313};
314
315struct VOPDInfo {
319};
320
324};
325
326#define GET_MTBUFInfoTable_DECL
327#define GET_MTBUFInfoTable_IMPL
328#define GET_MUBUFInfoTable_DECL
329#define GET_MUBUFInfoTable_IMPL
330#define GET_SMInfoTable_DECL
331#define GET_SMInfoTable_IMPL
332#define GET_VOP1InfoTable_DECL
333#define GET_VOP1InfoTable_IMPL
334#define GET_VOP2InfoTable_DECL
335#define GET_VOP2InfoTable_IMPL
336#define GET_VOP3InfoTable_DECL
337#define GET_VOP3InfoTable_IMPL
338#define GET_VOPC64DPPTable_DECL
339#define GET_VOPC64DPPTable_IMPL
340#define GET_VOPC64DPP8Table_DECL
341#define GET_VOPC64DPP8Table_IMPL
342#define GET_VOPDComponentTable_DECL
343#define GET_VOPDComponentTable_IMPL
344#define GET_VOPDPairs_DECL
345#define GET_VOPDPairs_IMPL
346#define GET_VOPTrue16Table_DECL
347#define GET_VOPTrue16Table_IMPL
348#define GET_WMMAOpcode2AddrMappingTable_DECL
349#define GET_WMMAOpcode2AddrMappingTable_IMPL
350#define GET_WMMAOpcode3AddrMappingTable_DECL
351#define GET_WMMAOpcode3AddrMappingTable_IMPL
352#include "AMDGPUGenSearchableTables.inc"
353
354int getMTBUFBaseOpcode(unsigned Opc) {
355 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
356 return Info ? Info->BaseOpcode : -1;
357}
358
359int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
360 const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
361 return Info ? Info->Opcode : -1;
362}
363
364int getMTBUFElements(unsigned Opc) {
365 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
366 return Info ? Info->elements : 0;
367}
368
369bool getMTBUFHasVAddr(unsigned Opc) {
370 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
371 return Info ? Info->has_vaddr : false;
372}
373
374bool getMTBUFHasSrsrc(unsigned Opc) {
375 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
376 return Info ? Info->has_srsrc : false;
377}
378
379bool getMTBUFHasSoffset(unsigned Opc) {
380 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
381 return Info ? Info->has_soffset : false;
382}
383
384int getMUBUFBaseOpcode(unsigned Opc) {
385 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
386 return Info ? Info->BaseOpcode : -1;
387}
388
389int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
390 const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
391 return Info ? Info->Opcode : -1;
392}
393
394int getMUBUFElements(unsigned Opc) {
395 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
396 return Info ? Info->elements : 0;
397}
398
399bool getMUBUFHasVAddr(unsigned Opc) {
400 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
401 return Info ? Info->has_vaddr : false;
402}
403
404bool getMUBUFHasSrsrc(unsigned Opc) {
405 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
406 return Info ? Info->has_srsrc : false;
407}
408
409bool getMUBUFHasSoffset(unsigned Opc) {
410 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
411 return Info ? Info->has_soffset : false;
412}
413
414bool getMUBUFIsBufferInv(unsigned Opc) {
415 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
416 return Info ? Info->IsBufferInv : false;
417}
418
419bool getSMEMIsBuffer(unsigned Opc) {
420 const SMInfo *Info = getSMEMOpcodeHelper(Opc);
421 return Info ? Info->IsBuffer : false;
422}
423
424bool getVOP1IsSingle(unsigned Opc) {
425 const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
426 return Info ? Info->IsSingle : false;
427}
428
429bool getVOP2IsSingle(unsigned Opc) {
430 const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
431 return Info ? Info->IsSingle : false;
432}
433
434bool getVOP3IsSingle(unsigned Opc) {
435 const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
436 return Info ? Info->IsSingle : false;
437}
438
439bool isVOPC64DPP(unsigned Opc) {
440 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
441}
442
443bool getMAIIsDGEMM(unsigned Opc) {
444 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
445 return Info ? Info->is_dgemm : false;
446}
447
448bool getMAIIsGFX940XDL(unsigned Opc) {
449 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
450 return Info ? Info->is_gfx940_xdl : false;
451}
452
453CanBeVOPD getCanBeVOPD(unsigned Opc) {
454 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
455 if (Info)
456 return {Info->CanBeVOPDX, true};
457 else
458 return {false, false};
459}
460
461unsigned getVOPDOpcode(unsigned Opc) {
462 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
463 return Info ? Info->VOPDOp : ~0u;
464}
465
466bool isVOPD(unsigned Opc) {
467 return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
468}
469
470bool isMAC(unsigned Opc) {
471 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
472 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
473 Opc == AMDGPU::V_MAC_F32_e64_vi ||
474 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
475 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
476 Opc == AMDGPU::V_MAC_F16_e64_vi ||
477 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
478 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
479 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
480 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
481 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
482 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
483 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
484 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
485 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
486 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
487 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
488 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
489}
490
491bool isPermlane16(unsigned Opc) {
492 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
493 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
494 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
495 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11;
496}
497
498bool isTrue16Inst(unsigned Opc) {
499 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
500 return Info ? Info->IsTrue16 : false;
501}
502
503unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
504 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
505 return Info ? Info->Opcode3Addr : ~0u;
506}
507
508unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
509 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
510 return Info ? Info->Opcode2Addr : ~0u;
511}
512
513// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
514// header files, so we need to wrap it in a function that takes unsigned
515// instead.
516int getMCOpcode(uint16_t Opcode, unsigned Gen) {
517 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
518}
519
520int getVOPDFull(unsigned OpX, unsigned OpY) {
521 const VOPDInfo *Info = getVOPDInfoFromComponentOpcodes(OpX, OpY);
522 return Info ? Info->Opcode : -1;
523}
524
525std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
526 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
527 assert(Info);
528 auto OpX = getVOPDBaseFromComponent(Info->OpX);
529 auto OpY = getVOPDBaseFromComponent(Info->OpY);
530 assert(OpX && OpY);
531 return {OpX->BaseVOP, OpY->BaseVOP};
532}
533
534namespace VOPD {
535
538
541 auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO);
542 assert(TiedIdx == -1 || TiedIdx == Component::DST);
543 HasSrc2Acc = TiedIdx != -1;
544
545 SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();
546 assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
547
548 auto OperandsNum = OpDesc.getNumOperands();
549 unsigned CompOprIdx;
550 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
551 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
552 MandatoryLiteralIdx = CompOprIdx;
553 break;
554 }
555 }
556}
557
558unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
559 assert(CompOprIdx < Component::MAX_OPR_NUM);
560
561 if (CompOprIdx == Component::DST)
563
564 auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
565 if (CompSrcIdx < getCompParsedSrcOperandsNum())
566 return getIndexOfSrcInParsedOperands(CompSrcIdx);
567
568 // The specified operand does not exist.
569 return 0;
570}
571
573 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const {
574
575 auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx);
576 auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx);
577
578 unsigned CompOprIdx;
579 for (CompOprIdx = 0; CompOprIdx < Component::MAX_OPR_NUM; ++CompOprIdx) {
580 unsigned BanksNum = BANKS_NUM[CompOprIdx];
581 if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&
582 (OpXRegs[CompOprIdx] % BanksNum == OpYRegs[CompOprIdx] % BanksNum))
583 return CompOprIdx;
584 }
585
586 return {};
587}
588
589// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
590// by the specified component. If an operand is unused
591// or is not a VGPR, the corresponding value is 0.
592//
593// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
594// for the specified component and MC operand. The callback must return 0
595// if the operand is not a register or not a VGPR.
596InstInfo::RegIndices InstInfo::getRegIndices(
597 unsigned CompIdx,
598 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const {
599 assert(CompIdx < COMPONENTS_NUM);
600
601 const auto &Comp = CompInfo[CompIdx];
603
604 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
605
606 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
607 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
608 RegIndices[CompOprIdx] =
609 Comp.hasRegSrcOperand(CompSrcIdx)
610 ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx))
611 : 0;
612 }
613 return RegIndices;
614}
615
616} // namespace VOPD
617
619 return VOPD::InstInfo(OpX, OpY);
620}
621
622VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
623 const MCInstrInfo *InstrInfo) {
624 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
625 const auto &OpXDesc = InstrInfo->get(OpX);
626 const auto &OpYDesc = InstrInfo->get(OpY);
628 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo);
629 return VOPD::InstInfo(OpXInfo, OpYInfo);
630}
631
632namespace IsaInfo {
633
635 : STI(STI), XnackSetting(TargetIDSetting::Any),
636 SramEccSetting(TargetIDSetting::Any), CodeObjectVersion(0) {
637 if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
638 XnackSetting = TargetIDSetting::Unsupported;
639 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
640 SramEccSetting = TargetIDSetting::Unsupported;
641}
642
644 // Check if xnack or sramecc is explicitly enabled or disabled. In the
645 // absence of the target features we assume we must generate code that can run
646 // in any environment.
647 SubtargetFeatures Features(FS);
648 std::optional<bool> XnackRequested;
649 std::optional<bool> SramEccRequested;
650
651 for (const std::string &Feature : Features.getFeatures()) {
652 if (Feature == "+xnack")
653 XnackRequested = true;
654 else if (Feature == "-xnack")
655 XnackRequested = false;
656 else if (Feature == "+sramecc")
657 SramEccRequested = true;
658 else if (Feature == "-sramecc")
659 SramEccRequested = false;
660 }
661
662 bool XnackSupported = isXnackSupported();
663 bool SramEccSupported = isSramEccSupported();
664
665 if (XnackRequested) {
666 if (XnackSupported) {
667 XnackSetting =
668 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
669 } else {
670 // If a specific xnack setting was requested and this GPU does not support
671 // xnack emit a warning. Setting will remain set to "Unsupported".
672 if (*XnackRequested) {
673 errs() << "warning: xnack 'On' was requested for a processor that does "
674 "not support it!\n";
675 } else {
676 errs() << "warning: xnack 'Off' was requested for a processor that "
677 "does not support it!\n";
678 }
679 }
680 }
681
682 if (SramEccRequested) {
683 if (SramEccSupported) {
684 SramEccSetting =
685 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
686 } else {
687 // If a specific sramecc setting was requested and this GPU does not
688 // support sramecc emit a warning. Setting will remain set to
689 // "Unsupported".
690 if (*SramEccRequested) {
691 errs() << "warning: sramecc 'On' was requested for a processor that "
692 "does not support it!\n";
693 } else {
694 errs() << "warning: sramecc 'Off' was requested for a processor that "
695 "does not support it!\n";
696 }
697 }
698 }
699}
700
701static TargetIDSetting
703 if (FeatureString.endswith("-"))
705 if (FeatureString.endswith("+"))
706 return TargetIDSetting::On;
707
708 llvm_unreachable("Malformed feature string");
709}
710
712 SmallVector<StringRef, 3> TargetIDSplit;
713 TargetID.split(TargetIDSplit, ':');
714
715 for (const auto &FeatureString : TargetIDSplit) {
716 if (FeatureString.startswith("xnack"))
717 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
718 if (FeatureString.startswith("sramecc"))
719 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
720 }
721}
722
723std::string AMDGPUTargetID::toString() const {
724 std::string StringRep;
725 raw_string_ostream StreamRep(StringRep);
726
727 auto TargetTriple = STI.getTargetTriple();
728 auto Version = getIsaVersion(STI.getCPU());
729
730 StreamRep << TargetTriple.getArchName() << '-'
731 << TargetTriple.getVendorName() << '-'
732 << TargetTriple.getOSName() << '-'
733 << TargetTriple.getEnvironmentName() << '-';
734
735 std::string Processor;
736 // TODO: Following else statement is present here because we used various
737 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
738 // Remove once all aliases are removed from GCNProcessors.td.
739 if (Version.Major >= 9)
740 Processor = STI.getCPU().str();
741 else
742 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
743 Twine(Version.Stepping))
744 .str();
745
746 std::string Features;
747 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
748 switch (CodeObjectVersion) {
750 // Code object V2 only supported specific processors and had fixed
751 // settings for the XNACK.
752 if (Processor == "gfx600") {
753 } else if (Processor == "gfx601") {
754 } else if (Processor == "gfx602") {
755 } else if (Processor == "gfx700") {
756 } else if (Processor == "gfx701") {
757 } else if (Processor == "gfx702") {
758 } else if (Processor == "gfx703") {
759 } else if (Processor == "gfx704") {
760 } else if (Processor == "gfx705") {
761 } else if (Processor == "gfx801") {
762 if (!isXnackOnOrAny())
764 "AMD GPU code object V2 does not support processor " +
765 Twine(Processor) + " without XNACK");
766 } else if (Processor == "gfx802") {
767 } else if (Processor == "gfx803") {
768 } else if (Processor == "gfx805") {
769 } else if (Processor == "gfx810") {
770 if (!isXnackOnOrAny())
772 "AMD GPU code object V2 does not support processor " +
773 Twine(Processor) + " without XNACK");
774 } else if (Processor == "gfx900") {
775 if (isXnackOnOrAny())
776 Processor = "gfx901";
777 } else if (Processor == "gfx902") {
778 if (isXnackOnOrAny())
779 Processor = "gfx903";
780 } else if (Processor == "gfx904") {
781 if (isXnackOnOrAny())
782 Processor = "gfx905";
783 } else if (Processor == "gfx906") {
784 if (isXnackOnOrAny())
785 Processor = "gfx907";
786 } else if (Processor == "gfx90c") {
787 if (isXnackOnOrAny())
789 "AMD GPU code object V2 does not support processor " +
790 Twine(Processor) + " with XNACK being ON or ANY");
791 } else {
793 "AMD GPU code object V2 does not support processor " +
794 Twine(Processor));
795 }
796 break;
798 // xnack.
799 if (isXnackOnOrAny())
800 Features += "+xnack";
801 // In code object v2 and v3, "sramecc" feature was spelled with a
802 // hyphen ("sram-ecc").
803 if (isSramEccOnOrAny())
804 Features += "+sram-ecc";
805 break;
808 // sramecc.
810 Features += ":sramecc-";
812 Features += ":sramecc+";
813 // xnack.
815 Features += ":xnack-";
817 Features += ":xnack+";
818 break;
819 default:
820 break;
821 }
822 }
823
824 StreamRep << Processor << Features;
825
826 StreamRep.flush();
827 return StringRep;
828}
829
830unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
831 if (STI->getFeatureBits().test(FeatureWavefrontSize16))
832 return 16;
833 if (STI->getFeatureBits().test(FeatureWavefrontSize32))
834 return 32;
835
836 return 64;
837}
838
840 unsigned BytesPerCU = 0;
841 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
842 BytesPerCU = 32768;
843 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
844 BytesPerCU = 65536;
845
846 // "Per CU" really means "per whatever functional block the waves of a
847 // workgroup must share". So the effective local memory size is doubled in
848 // WGP mode on gfx10.
849 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
850 BytesPerCU *= 2;
851
852 return BytesPerCU;
853}
854
856 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
857 return 32768;
858 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
859 return 65536;
860 return 0;
861}
862
863unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
864 // "Per CU" really means "per whatever functional block the waves of a
865 // workgroup must share". For gfx10 in CU mode this is the CU, which contains
866 // two SIMDs.
867 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
868 return 2;
869 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
870 // two CUs, so a total of four SIMDs.
871 return 4;
872}
873
875 unsigned FlatWorkGroupSize) {
876 assert(FlatWorkGroupSize != 0);
877 if (STI->getTargetTriple().getArch() != Triple::amdgcn)
878 return 8;
879 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
880 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
881 if (N == 1) {
882 // Single-wave workgroups don't consume barrier resources.
883 return MaxWaves;
884 }
885
886 unsigned MaxBarriers = 16;
887 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
888 MaxBarriers = 32;
889
890 return std::min(MaxWaves / N, MaxBarriers);
891}
892
893unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
894 return 1;
895}
896
897unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
898 // FIXME: Need to take scratch memory into account.
899 if (isGFX90A(*STI))
900 return 8;
901 if (!isGFX10Plus(*STI))
902 return 10;
903 return hasGFX10_3Insts(*STI) ? 16 : 20;
904}
905
907 unsigned FlatWorkGroupSize) {
908 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
909 getEUsPerCU(STI));
910}
911
913 return 1;
914}
915
917 // Some subtargets allow encoding 2048, but this isn't tested or supported.
918 return 1024;
919}
920
922 unsigned FlatWorkGroupSize) {
923 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
924}
925
927 IsaVersion Version = getIsaVersion(STI->getCPU());
928 if (Version.Major >= 10)
929 return getAddressableNumSGPRs(STI);
930 if (Version.Major >= 8)
931 return 16;
932 return 8;
933}
934
936 return 8;
937}
938
939unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
940 IsaVersion Version = getIsaVersion(STI->getCPU());
941 if (Version.Major >= 8)
942 return 800;
943 return 512;
944}
945
947 if (STI->getFeatureBits().test(FeatureSGPRInitBug))
949
950 IsaVersion Version = getIsaVersion(STI->getCPU());
951 if (Version.Major >= 10)
952 return 106;
953 if (Version.Major >= 8)
954 return 102;
955 return 104;
956}
957
958unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
959 assert(WavesPerEU != 0);
960
961 IsaVersion Version = getIsaVersion(STI->getCPU());
962 if (Version.Major >= 10)
963 return 0;
964
965 if (WavesPerEU >= getMaxWavesPerEU(STI))
966 return 0;
967
968 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
969 if (STI->getFeatureBits().test(FeatureTrapHandler))
970 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
971 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
972 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
973}
974
975unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
976 bool Addressable) {
977 assert(WavesPerEU != 0);
978
979 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
980 IsaVersion Version = getIsaVersion(STI->getCPU());
981 if (Version.Major >= 10)
982 return Addressable ? AddressableNumSGPRs : 108;
983 if (Version.Major >= 8 && !Addressable)
984 AddressableNumSGPRs = 112;
985 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
986 if (STI->getFeatureBits().test(FeatureTrapHandler))
987 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
988 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
989 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
990}
991
992unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
993 bool FlatScrUsed, bool XNACKUsed) {
994 unsigned ExtraSGPRs = 0;
995 if (VCCUsed)
996 ExtraSGPRs = 2;
997
998 IsaVersion Version = getIsaVersion(STI->getCPU());
999 if (Version.Major >= 10)
1000 return ExtraSGPRs;
1001
1002 if (Version.Major < 8) {
1003 if (FlatScrUsed)
1004 ExtraSGPRs = 4;
1005 } else {
1006 if (XNACKUsed)
1007 ExtraSGPRs = 4;
1008
1009 if (FlatScrUsed ||
1010 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
1011 ExtraSGPRs = 6;
1012 }
1013
1014 return ExtraSGPRs;
1015}
1016
1017unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1018 bool FlatScrUsed) {
1019 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1020 STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
1021}
1022
1023unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1024 NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
1025 // SGPRBlocks is actual number of SGPR blocks minus 1.
1026 return NumSGPRs / getSGPREncodingGranule(STI) - 1;
1027}
1028
1030 std::optional<bool> EnableWavefrontSize32) {
1031 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1032 return 8;
1033
1034 bool IsWave32 = EnableWavefrontSize32 ?
1035 *EnableWavefrontSize32 :
1036 STI->getFeatureBits().test(FeatureWavefrontSize32);
1037
1038 if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs))
1039 return IsWave32 ? 24 : 12;
1040
1041 if (hasGFX10_3Insts(*STI))
1042 return IsWave32 ? 16 : 8;
1043
1044 return IsWave32 ? 8 : 4;
1045}
1046
1048 std::optional<bool> EnableWavefrontSize32) {
1049 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1050 return 8;
1051
1052 bool IsWave32 = EnableWavefrontSize32 ?
1053 *EnableWavefrontSize32 :
1054 STI->getFeatureBits().test(FeatureWavefrontSize32);
1055
1056 return IsWave32 ? 8 : 4;
1057}
1058
1059unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1060 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1061 return 512;
1062 if (!isGFX10Plus(*STI))
1063 return 256;
1064 bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);
1065 if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs))
1066 return IsWave32 ? 1536 : 768;
1067 return IsWave32 ? 1024 : 512;
1068}
1069
1071 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1072 return 512;
1073 return 256;
1074}
1075
1077 unsigned NumVGPRs) {
1078 unsigned MaxWaves = getMaxWavesPerEU(STI);
1079 unsigned Granule = getVGPRAllocGranule(STI);
1080 if (NumVGPRs < Granule)
1081 return MaxWaves;
1082 unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1083 return std::min(std::max(getTotalNumVGPRs(STI) / RoundedRegs, 1u), MaxWaves);
1084}
1085
1086unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1087 assert(WavesPerEU != 0);
1088
1089 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1090 if (WavesPerEU >= MaxWavesPerEU)
1091 return 0;
1092
1093 unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1094 unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI);
1095 unsigned Granule = getVGPRAllocGranule(STI);
1096 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1097
1098 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1099 return 0;
1100
1101 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs);
1102 if (WavesPerEU < MinWavesPerEU)
1103 return getMinNumVGPRs(STI, MinWavesPerEU);
1104
1105 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1106 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1107 return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1108}
1109
1110unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1111 assert(WavesPerEU != 0);
1112
1113 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
1114 getVGPRAllocGranule(STI));
1115 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
1116 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1117}
1118
1119unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1120 std::optional<bool> EnableWavefrontSize32) {
1121 NumVGPRs = alignTo(std::max(1u, NumVGPRs),
1122 getVGPREncodingGranule(STI, EnableWavefrontSize32));
1123 // VGPRBlocks is actual number of VGPR blocks minus 1.
1124 return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
1125}
1126
1127} // end namespace IsaInfo
1128
1130 const MCSubtargetInfo *STI) {
1131 IsaVersion Version = getIsaVersion(STI->getCPU());
1132
1133 memset(&Header, 0, sizeof(Header));
1134
1135 Header.amd_kernel_code_version_major = 1;
1136 Header.amd_kernel_code_version_minor = 2;
1137 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1138 Header.amd_machine_version_major = Version.Major;
1139 Header.amd_machine_version_minor = Version.Minor;
1140 Header.amd_machine_version_stepping = Version.Stepping;
1141 Header.kernel_code_entry_byte_offset = sizeof(Header);
1142 Header.wavefront_size = 6;
1143
1144 // If the code object does not support indirect functions, then the value must
1145 // be 0xffffffff.
1146 Header.call_convention = -1;
1147
1148 // These alignment values are specified in powers of two, so alignment =
1149 // 2^n. The minimum alignment is 2^4 = 16.
1150 Header.kernarg_segment_alignment = 4;
1151 Header.group_segment_alignment = 4;
1152 Header.private_segment_alignment = 4;
1153
1154 if (Version.Major >= 10) {
1155 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
1156 Header.wavefront_size = 5;
1157 Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1158 }
1159 Header.compute_pgm_resource_registers |=
1160 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1162 }
1163}
1164
1166 const MCSubtargetInfo *STI) {
1167 IsaVersion Version = getIsaVersion(STI->getCPU());
1168
1170 memset(&KD, 0, sizeof(KD));
1171
1173 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
1176 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
1178 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
1180 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
1181 if (Version.Major >= 10) {
1183 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
1184 STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
1186 amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
1187 STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
1189 amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
1190 }
1191 if (AMDGPU::isGFX90A(*STI)) {
1193 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1194 STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0);
1195 }
1196 return KD;
1197}
1198
1201}
1202
1205}
1206
1208 unsigned AS = GV->getAddressSpace();
1209 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1211}
1212
1214 return TT.getArch() == Triple::r600;
1215}
1216
1217std::pair<int, int> getIntegerPairAttribute(const Function &F,
1219 std::pair<int, int> Default,
1220 bool OnlyFirstRequired) {
1221 Attribute A = F.getFnAttribute(Name);
1222 if (!A.isStringAttribute())
1223 return Default;
1224
1225 LLVMContext &Ctx = F.getContext();
1226 std::pair<int, int> Ints = Default;
1227 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1228 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1229 Ctx.emitError("can't parse first integer attribute " + Name);
1230 return Default;
1231 }
1232 if (Strs.second.trim().getAsInteger(0, Ints.second)) {
1233 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1234 Ctx.emitError("can't parse second integer attribute " + Name);
1235 return Default;
1236 }
1237 }
1238
1239 return Ints;
1240}
1241
1242unsigned getVmcntBitMask(const IsaVersion &Version) {
1243 return (1 << (getVmcntBitWidthLo(Version.Major) +
1244 getVmcntBitWidthHi(Version.Major))) -
1245 1;
1246}
1247
1248unsigned getExpcntBitMask(const IsaVersion &Version) {
1249 return (1 << getExpcntBitWidth(Version.Major)) - 1;
1250}
1251
1252unsigned getLgkmcntBitMask(const IsaVersion &Version) {
1253 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1254}
1255
1256unsigned getWaitcntBitMask(const IsaVersion &Version) {
1257 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1258 getVmcntBitWidthLo(Version.Major));
1259 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1260 getExpcntBitWidth(Version.Major));
1261 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1262 getLgkmcntBitWidth(Version.Major));
1263 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1264 getVmcntBitWidthHi(Version.Major));
1265 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1266}
1267
1268unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1269 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1270 getVmcntBitWidthLo(Version.Major));
1271 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1272 getVmcntBitWidthHi(Version.Major));
1273 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1274}
1275
1276unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1277 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1278 getExpcntBitWidth(Version.Major));
1279}
1280
1281unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1282 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1283 getLgkmcntBitWidth(Version.Major));
1284}
1285
1286void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
1287 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
1288 Vmcnt = decodeVmcnt(Version, Waitcnt);
1289 Expcnt = decodeExpcnt(Version, Waitcnt);
1290 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1291}
1292
1293Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1294 Waitcnt Decoded;
1295 Decoded.VmCnt = decodeVmcnt(Version, Encoded);
1296 Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
1297 Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded);
1298 return Decoded;
1299}
1300
1301unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1302 unsigned Vmcnt) {
1303 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1304 getVmcntBitWidthLo(Version.Major));
1305 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1306 getVmcntBitShiftHi(Version.Major),
1307 getVmcntBitWidthHi(Version.Major));
1308}
1309
1310unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1311 unsigned Expcnt) {
1312 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1313 getExpcntBitWidth(Version.Major));
1314}
1315
1316unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1317 unsigned Lgkmcnt) {
1318 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1319 getLgkmcntBitWidth(Version.Major));
1320}
1321
1322unsigned encodeWaitcnt(const IsaVersion &Version,
1323 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
1324 unsigned Waitcnt = getWaitcntBitMask(Version);
1325 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1326 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1327 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1328 return Waitcnt;
1329}
1330
1331unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1332 return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
1333}
1334
1335//===----------------------------------------------------------------------===//
1336// Custom Operands.
1337//
1338// A table of custom operands shall describe "primary" operand names
1339// first followed by aliases if any. It is not required but recommended
1340// to arrange operands so that operand encoding match operand position
1341// in the table. This will make disassembly a bit more efficient.
1342// Unused slots in the table shall have an empty name.
1343//
1344//===----------------------------------------------------------------------===//
1345
1346template <class T>
1347static bool isValidOpr(int Idx, const CustomOperand<T> OpInfo[], int OpInfoSize,
1348 T Context) {
1349 return 0 <= Idx && Idx < OpInfoSize && !OpInfo[Idx].Name.empty() &&
1350 (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context));
1351}
1352
1353template <class T>
1354static int getOprIdx(std::function<bool(const CustomOperand<T> &)> Test,
1355 const CustomOperand<T> OpInfo[], int OpInfoSize,
1356 T Context) {
1357 int InvalidIdx = OPR_ID_UNKNOWN;
1358 for (int Idx = 0; Idx < OpInfoSize; ++Idx) {
1359 if (Test(OpInfo[Idx])) {
1360 if (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context))
1361 return Idx;
1362 InvalidIdx = OPR_ID_UNSUPPORTED;
1363 }
1364 }
1365 return InvalidIdx;
1366}
1367
1368template <class T>
1369static int getOprIdx(const StringRef Name, const CustomOperand<T> OpInfo[],
1370 int OpInfoSize, T Context) {
1371 auto Test = [=](const CustomOperand<T> &Op) { return Op.Name == Name; };
1372 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
1373}
1374
1375template <class T>
1376static int getOprIdx(int Id, const CustomOperand<T> OpInfo[], int OpInfoSize,
1377 T Context, bool QuickCheck = true) {
1378 auto Test = [=](const CustomOperand<T> &Op) {
1379 return Op.Encoding == Id && !Op.Name.empty();
1380 };
1381 // This is an optimization that should work in most cases.
1382 // As a side effect, it may cause selection of an alias
1383 // instead of a primary operand name in case of sparse tables.
1384 if (QuickCheck && isValidOpr<T>(Id, OpInfo, OpInfoSize, Context) &&
1385 OpInfo[Id].Encoding == Id) {
1386 return Id;
1387 }
1388 return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
1389}
1390
1391//===----------------------------------------------------------------------===//
1392// Custom Operand Values
1393//===----------------------------------------------------------------------===//
1394
1396 int Size,
1397 const MCSubtargetInfo &STI) {
1398 unsigned Enc = 0;
1399 for (int Idx = 0; Idx < Size; ++Idx) {
1400 const auto &Op = Opr[Idx];
1401 if (Op.isSupported(STI))
1402 Enc |= Op.encode(Op.Default);
1403 }
1404 return Enc;
1405}
1406
1408 int Size, unsigned Code,
1409 bool &HasNonDefaultVal,
1410 const MCSubtargetInfo &STI) {
1411 unsigned UsedOprMask = 0;
1412 HasNonDefaultVal = false;
1413 for (int Idx = 0; Idx < Size; ++Idx) {
1414 const auto &Op = Opr[Idx];
1415 if (!Op.isSupported(STI))
1416 continue;
1417 UsedOprMask |= Op.getMask();
1418 unsigned Val = Op.decode(Code);
1419 if (!Op.isValid(Val))
1420 return false;
1421 HasNonDefaultVal |= (Val != Op.Default);
1422 }
1423 return (Code & ~UsedOprMask) == 0;
1424}
1425
1426static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
1427 unsigned Code, int &Idx, StringRef &Name,
1428 unsigned &Val, bool &IsDefault,
1429 const MCSubtargetInfo &STI) {
1430 while (Idx < Size) {
1431 const auto &Op = Opr[Idx++];
1432 if (Op.isSupported(STI)) {
1433 Name = Op.Name;
1434 Val = Op.decode(Code);
1435 IsDefault = (Val == Op.Default);
1436 return true;
1437 }
1438 }
1439
1440 return false;
1441}
1442
1444 int64_t InputVal) {
1445 if (InputVal < 0 || InputVal > Op.Max)
1446 return OPR_VAL_INVALID;
1447 return Op.encode(InputVal);
1448}
1449
1450static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
1451 const StringRef Name, int64_t InputVal,
1452 unsigned &UsedOprMask,
1453 const MCSubtargetInfo &STI) {
1454 int InvalidId = OPR_ID_UNKNOWN;
1455 for (int Idx = 0; Idx < Size; ++Idx) {
1456 const auto &Op = Opr[Idx];
1457 if (Op.Name == Name) {
1458 if (!Op.isSupported(STI)) {
1459 InvalidId = OPR_ID_UNSUPPORTED;
1460 continue;
1461 }
1462 auto OprMask = Op.getMask();
1463 if (OprMask & UsedOprMask)
1464 return OPR_ID_DUPLICATE;
1465 UsedOprMask |= OprMask;
1466 return encodeCustomOperandVal(Op, InputVal);
1467 }
1468 }
1469 return InvalidId;
1470}
1471
1472//===----------------------------------------------------------------------===//
1473// DepCtr
1474//===----------------------------------------------------------------------===//
1475
1476namespace DepCtr {
1477
1479 static int Default = -1;
1480 if (Default == -1)
1482 return Default;
1483}
1484
1485bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1486 const MCSubtargetInfo &STI) {
1488 HasNonDefaultVal, STI);
1489}
1490
1491bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1492 bool &IsDefault, const MCSubtargetInfo &STI) {
1493 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
1494 IsDefault, STI);
1495}
1496
1497int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1498 const MCSubtargetInfo &STI) {
1499 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
1500 STI);
1501}
1502
1503} // namespace DepCtr
1504
1505//===----------------------------------------------------------------------===//
1506// hwreg
1507//===----------------------------------------------------------------------===//
1508
1509namespace Hwreg {
1510
1511int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI) {
1512 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Opr, OPR_SIZE, STI);
1513 return (Idx < 0) ? Idx : Opr[Idx].Encoding;
1514}
1515
1516bool isValidHwreg(int64_t Id) {
1517 return 0 <= Id && isUInt<ID_WIDTH_>(Id);
1518}
1519
1521 return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset);
1522}
1523
1525 return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1);
1526}
1527
1529 return (Id << ID_SHIFT_) |
1530 (Offset << OFFSET_SHIFT_) |
1531 ((Width - 1) << WIDTH_M1_SHIFT_);
1532}
1533
1534StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
1535 int Idx = getOprIdx<const MCSubtargetInfo &>(Id, Opr, OPR_SIZE, STI);
1536 return (Idx < 0) ? "" : Opr[Idx].Name;
1537}
1538
1539void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) {
1540 Id = (Val & ID_MASK_) >> ID_SHIFT_;
1541 Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_;
1542 Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
1543}
1544
1545} // namespace Hwreg
1546
1547//===----------------------------------------------------------------------===//
1548// exp tgt
1549//===----------------------------------------------------------------------===//
1550
1551namespace Exp {
1552
1553struct ExpTgt {
1555 unsigned Tgt;
1556 unsigned MaxIndex;
1557};
1558
1559static constexpr ExpTgt ExpTgtInfo[] = {
1560 {{"null"}, ET_NULL, ET_NULL_MAX_IDX},
1561 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX},
1562 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX},
1563 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX},
1564 {{"pos"}, ET_POS0, ET_POS_MAX_IDX},
1565 {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
1566 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX},
1567};
1568
1569bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
1570 for (const ExpTgt &Val : ExpTgtInfo) {
1571 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
1572 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
1573 Name = Val.Name;
1574 return true;
1575 }
1576 }
1577 return false;
1578}
1579
1580unsigned getTgtId(const StringRef Name) {
1581
1582 for (const ExpTgt &Val : ExpTgtInfo) {
1583 if (Val.MaxIndex == 0 && Name == Val.Name)
1584 return Val.Tgt;
1585
1586 if (Val.MaxIndex > 0 && Name.startswith(Val.Name)) {
1587 StringRef Suffix = Name.drop_front(Val.Name.size());
1588
1589 unsigned Id;
1590 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
1591 return ET_INVALID;
1592
1593 // Disable leading zeroes
1594 if (Suffix.size() > 1 && Suffix[0] == '0')
1595 return ET_INVALID;
1596
1597 return Val.Tgt + Id;
1598 }
1599 }
1600 return ET_INVALID;
1601}
1602
1603bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
1604 switch (Id) {
1605 case ET_NULL:
1606 return !isGFX11Plus(STI);
1607 case ET_POS4:
1608 case ET_PRIM:
1609 return isGFX10Plus(STI);
1610 case ET_DUAL_SRC_BLEND0:
1611 case ET_DUAL_SRC_BLEND1:
1612 return isGFX11Plus(STI);
1613 default:
1614 if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
1615 return !isGFX11Plus(STI);
1616 return true;
1617 }
1618}
1619
1620} // namespace Exp
1621
1622//===----------------------------------------------------------------------===//
1623// MTBUF Format
1624//===----------------------------------------------------------------------===//
1625
1626namespace MTBUFFormat {
1627
1628int64_t getDfmt(const StringRef Name) {
1629 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
1630 if (Name == DfmtSymbolic[Id])
1631 return Id;
1632 }
1633 return DFMT_UNDEF;
1634}
1635
1637 assert(Id <= DFMT_MAX);
1638 return DfmtSymbolic[Id];
1639}
1640
1642 if (isSI(STI) || isCI(STI))
1643 return NfmtSymbolicSICI;
1644 if (isVI(STI) || isGFX9(STI))
1645 return NfmtSymbolicVI;
1646 return NfmtSymbolicGFX10;
1647}
1648
1649int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
1650 auto lookupTable = getNfmtLookupTable(STI);
1651 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
1652 if (Name == lookupTable[Id])
1653 return Id;
1654 }
1655 return NFMT_UNDEF;
1656}
1657
1658StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
1659 assert(Id <= NFMT_MAX);
1660 return getNfmtLookupTable(STI)[Id];
1661}
1662
1663bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1664 unsigned Dfmt;
1665 unsigned Nfmt;
1666 decodeDfmtNfmt(Id, Dfmt, Nfmt);
1667 return isValidNfmt(Nfmt, STI);
1668}
1669
1670bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1671 return !getNfmtName(Id, STI).empty();
1672}
1673
1674int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
1675 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
1676}
1677
1678void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
1679 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
1680 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
1681}
1682
1684 if (isGFX11Plus(STI)) {
1685 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1686 if (Name == UfmtSymbolicGFX11[Id])
1687 return Id;
1688 }
1689 } else {
1690 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1691 if (Name == UfmtSymbolicGFX10[Id])
1692 return Id;
1693 }
1694 }
1695 return UFMT_UNDEF;
1696}
1697
1699 if(isValidUnifiedFormat(Id, STI))
1700 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
1701 return "";
1702}
1703
1704bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
1705 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
1706}
1707
1708int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1709 const MCSubtargetInfo &STI) {
1710 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
1711 if (isGFX11Plus(STI)) {
1712 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1713 if (Fmt == DfmtNfmt2UFmtGFX11[Id])
1714 return Id;
1715 }
1716 } else {
1717 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1718 if (Fmt == DfmtNfmt2UFmtGFX10[Id])
1719 return Id;
1720 }
1721 }
1722 return UFMT_UNDEF;
1723}
1724
1725bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
1726 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
1727}
1728
1730 if (isGFX10Plus(STI))
1731 return UFMT_DEFAULT;
1732 return DFMT_NFMT_DEFAULT;
1733}
1734
1735} // namespace MTBUFFormat
1736
1737//===----------------------------------------------------------------------===//
1738// SendMsg
1739//===----------------------------------------------------------------------===//
1740
1741namespace SendMsg {
1742
1745}
1746
1747int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI) {
1748 int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Msg, MSG_SIZE, STI);
1749 return (Idx < 0) ? Idx : Msg[Idx].Encoding;
1750}
1751
1752bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
1753 return (MsgId & ~(getMsgIdMask(STI))) == 0;
1754}
1755
1756StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI) {
1757 int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId, Msg, MSG_SIZE, STI);
1758 return (Idx < 0) ? "" : Msg[Idx].Name;
1759}
1760
1761int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
1762 const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
1763 const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
1764 const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
1765 for (int i = F; i < L; ++i) {
1766 if (Name == S[i]) {
1767 return i;
1768 }
1769 }
1770 return OP_UNKNOWN_;
1771}
1772
1773bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1774 bool Strict) {
1775 assert(isValidMsgId(MsgId, STI));
1776
1777 if (!Strict)
1778 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
1779
1780 if (MsgId == ID_SYSMSG)
1781 return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_;
1782 if (!isGFX11Plus(STI)) {
1783 switch (MsgId) {
1784 case ID_GS_PreGFX11:
1785 return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
1787 return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
1788 }
1789 }
1790 return OpId == OP_NONE_;
1791}
1792
1793StringRef getMsgOpName(int64_t MsgId, int64_t OpId,
1794 const MCSubtargetInfo &STI) {
1795 assert(msgRequiresOp(MsgId, STI));
1796 return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId];
1797}
1798
1799bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1800 const MCSubtargetInfo &STI, bool Strict) {
1801 assert(isValidMsgOp(MsgId, OpId, STI, Strict));
1802
1803 if (!Strict)
1804 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
1805
1806 if (!isGFX11Plus(STI)) {
1807 switch (MsgId) {
1808 case ID_GS_PreGFX11:
1811 return (OpId == OP_GS_NOP) ?
1814 }
1815 }
1816 return StreamId == STREAM_ID_NONE_;
1817}
1818
1819bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
1820 return MsgId == ID_SYSMSG ||
1821 (!isGFX11Plus(STI) &&
1822 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
1823}
1824
1825bool msgSupportsStream(int64_t MsgId, int64_t OpId,
1826 const MCSubtargetInfo &STI) {
1827 return !isGFX11Plus(STI) &&
1828 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
1829 OpId != OP_GS_NOP;
1830}
1831
1832void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1833 uint16_t &StreamId, const MCSubtargetInfo &STI) {
1834 MsgId = Val & getMsgIdMask(STI);
1835 if (isGFX11Plus(STI)) {
1836 OpId = 0;
1837 StreamId = 0;
1838 } else {
1839 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
1841 }
1842}
1843
1845 uint64_t OpId,
1847 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
1848}
1849
1850} // namespace SendMsg
1851
1852//===----------------------------------------------------------------------===//
1853//
1854//===----------------------------------------------------------------------===//
1855
1857 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
1858}
1859
1861 // As a safe default always respond as if PS has color exports.
1862 return F.getFnAttributeAsParsedInteger(
1863 "amdgpu-color-export",
1864 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
1865}
1866
1868 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
1869}
1870
1872 switch(cc) {
1880 return true;
1881 default:
1882 return false;
1883 }
1884}
1885
1887 return isShader(cc) || cc == CallingConv::AMDGPU_Gfx;
1888}
1889
1891 return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS;
1892}
1893
1895 switch (CC) {
1905 return true;
1906 default:
1907 return false;
1908 }
1909}
1910
1912 switch (CC) {
1914 return true;
1915 default:
1916 return isEntryFunctionCC(CC);
1917 }
1918}
1919
1920bool isKernelCC(const Function *Func) {
1921 return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
1922}
1923
1924bool hasXNACK(const MCSubtargetInfo &STI) {
1925 return STI.hasFeature(AMDGPU::FeatureXNACK);
1926}
1927
1928bool hasSRAMECC(const MCSubtargetInfo &STI) {
1929 return STI.hasFeature(AMDGPU::FeatureSRAMECC);
1930}
1931
1933 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16);
1934}
1935
1936bool hasA16(const MCSubtargetInfo &STI) {
1937 return STI.hasFeature(AMDGPU::FeatureA16);
1938}
1939
1940bool hasG16(const MCSubtargetInfo &STI) {
1941 return STI.hasFeature(AMDGPU::FeatureG16);
1942}
1943
1945 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
1946 !isSI(STI);
1947}
1948
1949unsigned getNSAMaxSize(const MCSubtargetInfo &STI) {
1950 auto Version = getIsaVersion(STI.getCPU());
1951 if (Version.Major == 10)
1952 return Version.Minor >= 3 ? 13 : 5;
1953 if (Version.Major == 11)
1954 return 5;
1955 return 0;
1956}
1957
1958bool isSI(const MCSubtargetInfo &STI) {
1959 return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
1960}
1961
1962bool isCI(const MCSubtargetInfo &STI) {
1963 return STI.hasFeature(AMDGPU::FeatureSeaIslands);
1964}
1965
1966bool isVI(const MCSubtargetInfo &STI) {
1967 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
1968}
1969
1970bool isGFX9(const MCSubtargetInfo &STI) {
1971 return STI.hasFeature(AMDGPU::FeatureGFX9);
1972}
1973
1975 return isGFX9(STI) || isGFX10(STI);
1976}
1977
1979 return isVI(STI) || isGFX9(STI) || isGFX10(STI);
1980}
1981
1982bool isGFX8Plus(const MCSubtargetInfo &STI) {
1983 return isVI(STI) || isGFX9Plus(STI);
1984}
1985
1986bool isGFX9Plus(const MCSubtargetInfo &STI) {
1987 return isGFX9(STI) || isGFX10Plus(STI);
1988}
1989
1990bool isGFX10(const MCSubtargetInfo &STI) {
1991 return STI.hasFeature(AMDGPU::FeatureGFX10);
1992}
1993
1995 return isGFX10(STI) || isGFX11Plus(STI);
1996}
1997
1998bool isGFX11(const MCSubtargetInfo &STI) {
1999 return STI.hasFeature(AMDGPU::FeatureGFX11);
2000}
2001
2003 return isGFX11(STI);
2004}
2005
2007 return !isGFX11Plus(STI);
2008}
2009
2011 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2012}
2013
2015 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2016}
2017
2019 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2020}
2021
2023 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2024}
2025
2027 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2028}
2029
2031 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2032}
2033
2034bool isGFX90A(const MCSubtargetInfo &STI) {
2035 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2036}
2037
2038bool isGFX940(const MCSubtargetInfo &STI) {
2039 return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2040}
2041
2043 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2044}
2045
2047 return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2048}
2049
2050bool hasVOPD(const MCSubtargetInfo &STI) {
2051 return STI.hasFeature(AMDGPU::FeatureVOPD);
2052}
2053
2054int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2055 int32_t ArgNumVGPR) {
2056 if (has90AInsts && ArgNumAGPR)
2057 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2058 return std::max(ArgNumVGPR, ArgNumAGPR);
2059}
2060
2061bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
2062 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2063 const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2064 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2065 Reg == AMDGPU::SCC;
2066}
2067
2068#define MAP_REG2REG \
2069 using namespace AMDGPU; \
2070 switch(Reg) { \
2071 default: return Reg; \
2072 CASE_CI_VI(FLAT_SCR) \
2073 CASE_CI_VI(FLAT_SCR_LO) \
2074 CASE_CI_VI(FLAT_SCR_HI) \
2075 CASE_VI_GFX9PLUS(TTMP0) \
2076 CASE_VI_GFX9PLUS(TTMP1) \
2077 CASE_VI_GFX9PLUS(TTMP2) \
2078 CASE_VI_GFX9PLUS(TTMP3) \
2079 CASE_VI_GFX9PLUS(TTMP4) \
2080 CASE_VI_GFX9PLUS(TTMP5) \
2081 CASE_VI_GFX9PLUS(TTMP6) \
2082 CASE_VI_GFX9PLUS(TTMP7) \
2083 CASE_VI_GFX9PLUS(TTMP8) \
2084 CASE_VI_GFX9PLUS(TTMP9) \
2085 CASE_VI_GFX9PLUS(TTMP10) \
2086 CASE_VI_GFX9PLUS(TTMP11) \
2087 CASE_VI_GFX9PLUS(TTMP12) \
2088 CASE_VI_GFX9PLUS(TTMP13) \
2089 CASE_VI_GFX9PLUS(TTMP14) \
2090 CASE_VI_GFX9PLUS(TTMP15) \
2091 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2092 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2093 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2094 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2095 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2096 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2097 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2098 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2099 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2100 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2101 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2102 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2103 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2104 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2105 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2106 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2107 CASE_GFXPRE11_GFX11PLUS(M0) \
2108 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2109 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2110 }
2111
2112#define CASE_CI_VI(node) \
2113 assert(!isSI(STI)); \
2114 case node: return isCI(STI) ? node##_ci : node##_vi;
2115
2116#define CASE_VI_GFX9PLUS(node) \
2117 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2118
2119#define CASE_GFXPRE11_GFX11PLUS(node) \
2120 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2121
2122#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2123 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2124
2125unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
2126 if (STI.getTargetTriple().getArch() == Triple::r600)
2127 return Reg;
2129}
2130
2131#undef CASE_CI_VI
2132#undef CASE_VI_GFX9PLUS
2133#undef CASE_GFXPRE11_GFX11PLUS
2134#undef CASE_GFXPRE11_GFX11PLUS_TO
2135
2136#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
2137#define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
2138#define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;
2139#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2140
2141unsigned mc2PseudoReg(unsigned Reg) {
2143}
2144
2145bool isInlineValue(unsigned Reg) {
2146 switch (Reg) {
2147 case AMDGPU::SRC_SHARED_BASE_LO:
2148 case AMDGPU::SRC_SHARED_BASE:
2149 case AMDGPU::SRC_SHARED_LIMIT_LO:
2150 case AMDGPU::SRC_SHARED_LIMIT:
2151 case AMDGPU::SRC_PRIVATE_BASE_LO:
2152 case AMDGPU::SRC_PRIVATE_BASE:
2153 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2154 case AMDGPU::SRC_PRIVATE_LIMIT:
2155 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2156 return true;
2157 case AMDGPU::SRC_VCCZ:
2158 case AMDGPU::SRC_EXECZ:
2159 case AMDGPU::SRC_SCC:
2160 return true;
2161 case AMDGPU::SGPR_NULL:
2162 return true;
2163 default:
2164 return false;
2165 }
2166}
2167
2168#undef CASE_CI_VI
2169#undef CASE_VI_GFX9PLUS
2170#undef CASE_GFXPRE11_GFX11PLUS
2171#undef CASE_GFXPRE11_GFX11PLUS_TO
2172#undef MAP_REG2REG
2173
2174bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2175 assert(OpNo < Desc.NumOperands);
2176 unsigned OpType = Desc.operands()[OpNo].OperandType;
2177 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2178 OpType <= AMDGPU::OPERAND_SRC_LAST;
2179}
2180
2181bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2182 assert(OpNo < Desc.NumOperands);
2183 unsigned OpType = Desc.operands()[OpNo].OperandType;
2184 return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2185 OpType <= AMDGPU::OPERAND_KIMM_LAST;
2186}
2187
2188bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2189 assert(OpNo < Desc.NumOperands);
2190 unsigned OpType = Desc.operands()[OpNo].OperandType;
2191 switch (OpType) {
2211 return true;
2212 default:
2213 return false;
2214 }
2215}
2216
2217bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2218 assert(OpNo < Desc.NumOperands);
2219 unsigned OpType = Desc.operands()[OpNo].OperandType;
2220 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2222}
2223
2224// Avoid using MCRegisterClass::getSize, since that function will go away
2225// (move from MC* level to Target* level). Return size in bits.
2226unsigned getRegBitWidth(unsigned RCID) {
2227 switch (RCID) {
2228 case AMDGPU::VGPR_LO16RegClassID:
2229 case AMDGPU::VGPR_HI16RegClassID:
2230 case AMDGPU::SGPR_LO16RegClassID:
2231 case AMDGPU::AGPR_LO16RegClassID:
2232 return 16;
2233 case AMDGPU::SGPR_32RegClassID:
2234 case AMDGPU::VGPR_32RegClassID:
2235 case AMDGPU::VRegOrLds_32RegClassID:
2236 case AMDGPU::AGPR_32RegClassID:
2237 case AMDGPU::VS_32RegClassID:
2238 case AMDGPU::AV_32RegClassID:
2239 case AMDGPU::SReg_32RegClassID:
2240 case AMDGPU::SReg_32_XM0RegClassID:
2241 case AMDGPU::SRegOrLds_32RegClassID:
2242 return 32;
2243 case AMDGPU::SGPR_64RegClassID:
2244 case AMDGPU::VS_64RegClassID:
2245 case AMDGPU::SReg_64RegClassID:
2246 case AMDGPU::VReg_64RegClassID:
2247 case AMDGPU::AReg_64RegClassID:
2248 case AMDGPU::SReg_64_XEXECRegClassID:
2249 case AMDGPU::VReg_64_Align2RegClassID:
2250 case AMDGPU::AReg_64_Align2RegClassID:
2251 case AMDGPU::AV_64RegClassID:
2252 case AMDGPU::AV_64_Align2RegClassID:
2253 return 64;
2254 case AMDGPU::SGPR_96RegClassID:
2255 case AMDGPU::SReg_96RegClassID:
2256 case AMDGPU::VReg_96RegClassID:
2257 case AMDGPU::AReg_96RegClassID:
2258 case AMDGPU::VReg_96_Align2RegClassID:
2259 case AMDGPU::AReg_96_Align2RegClassID:
2260 case AMDGPU::AV_96RegClassID:
2261 case AMDGPU::AV_96_Align2RegClassID:
2262 return 96;
2263 case AMDGPU::SGPR_128RegClassID:
2264 case AMDGPU::SReg_128RegClassID:
2265 case AMDGPU::VReg_128RegClassID:
2266 case AMDGPU::AReg_128RegClassID:
2267 case AMDGPU::VReg_128_Align2RegClassID:
2268 case AMDGPU::AReg_128_Align2RegClassID:
2269 case AMDGPU::AV_128RegClassID:
2270 case AMDGPU::AV_128_Align2RegClassID:
2271 return 128;
2272 case AMDGPU::SGPR_160RegClassID:
2273 case AMDGPU::SReg_160RegClassID:
2274 case AMDGPU::VReg_160RegClassID:
2275 case AMDGPU::AReg_160RegClassID:
2276 case AMDGPU::VReg_160_Align2RegClassID:
2277 case AMDGPU::AReg_160_Align2RegClassID:
2278 case AMDGPU::AV_160RegClassID:
2279 case AMDGPU::AV_160_Align2RegClassID:
2280 return 160;
2281 case AMDGPU::SGPR_192RegClassID:
2282 case AMDGPU::SReg_192RegClassID:
2283 case AMDGPU::VReg_192RegClassID:
2284 case AMDGPU::AReg_192RegClassID:
2285 case AMDGPU::VReg_192_Align2RegClassID:
2286 case AMDGPU::AReg_192_Align2RegClassID:
2287 case AMDGPU::AV_192RegClassID:
2288 case AMDGPU::AV_192_Align2RegClassID:
2289 return 192;
2290 case AMDGPU::SGPR_224RegClassID:
2291 case AMDGPU::SReg_224RegClassID:
2292 case AMDGPU::VReg_224RegClassID:
2293 case AMDGPU::AReg_224RegClassID:
2294 case AMDGPU::VReg_224_Align2RegClassID:
2295 case AMDGPU::AReg_224_Align2RegClassID:
2296 case AMDGPU::AV_224RegClassID:
2297 case AMDGPU::AV_224_Align2RegClassID:
2298 return 224;
2299 case AMDGPU::SGPR_256RegClassID:
2300 case AMDGPU::SReg_256RegClassID:
2301 case AMDGPU::VReg_256RegClassID:
2302 case AMDGPU::AReg_256RegClassID:
2303 case AMDGPU::VReg_256_Align2RegClassID:
2304 case AMDGPU::AReg_256_Align2RegClassID:
2305 case AMDGPU::AV_256RegClassID:
2306 case AMDGPU::AV_256_Align2RegClassID:
2307 return 256;
2308 case AMDGPU::SGPR_288RegClassID:
2309 case AMDGPU::SReg_288RegClassID:
2310 case AMDGPU::VReg_288RegClassID:
2311 case AMDGPU::AReg_288RegClassID:
2312 case AMDGPU::VReg_288_Align2RegClassID:
2313 case AMDGPU::AReg_288_Align2RegClassID:
2314 case AMDGPU::AV_288RegClassID:
2315 case AMDGPU::AV_288_Align2RegClassID:
2316 return 288;
2317 case AMDGPU::SGPR_320RegClassID:
2318 case AMDGPU::SReg_320RegClassID:
2319 case AMDGPU::VReg_320RegClassID:
2320 case AMDGPU::AReg_320RegClassID:
2321 case AMDGPU::VReg_320_Align2RegClassID:
2322 case AMDGPU::AReg_320_Align2RegClassID:
2323 case AMDGPU::AV_320RegClassID:
2324 case AMDGPU::AV_320_Align2RegClassID:
2325 return 320;
2326 case AMDGPU::SGPR_352RegClassID:
2327 case AMDGPU::SReg_352RegClassID:
2328 case AMDGPU::VReg_352RegClassID:
2329 case AMDGPU::AReg_352RegClassID:
2330 case AMDGPU::VReg_352_Align2RegClassID:
2331 case AMDGPU::AReg_352_Align2RegClassID:
2332 case AMDGPU::AV_352RegClassID:
2333 case AMDGPU::AV_352_Align2RegClassID:
2334 return 352;
2335 case AMDGPU::SGPR_384RegClassID:
2336 case AMDGPU::SReg_384RegClassID:
2337 case AMDGPU::VReg_384RegClassID:
2338 case AMDGPU::AReg_384RegClassID:
2339 case AMDGPU::VReg_384_Align2RegClassID:
2340 case AMDGPU::AReg_384_Align2RegClassID:
2341 case AMDGPU::AV_384RegClassID:
2342 case AMDGPU::AV_384_Align2RegClassID:
2343 return 384;
2344 case AMDGPU::SGPR_512RegClassID:
2345 case AMDGPU::SReg_512RegClassID:
2346 case AMDGPU::VReg_512RegClassID:
2347 case AMDGPU::AReg_512RegClassID:
2348 case AMDGPU::VReg_512_Align2RegClassID:
2349 case AMDGPU::AReg_512_Align2RegClassID:
2350 case AMDGPU::AV_512RegClassID:
2351 case AMDGPU::AV_512_Align2RegClassID:
2352 return 512;
2353 case AMDGPU::SGPR_1024RegClassID:
2354 case AMDGPU::SReg_1024RegClassID:
2355 case AMDGPU::VReg_1024RegClassID:
2356 case AMDGPU::AReg_1024RegClassID:
2357 case AMDGPU::VReg_1024_Align2RegClassID:
2358 case AMDGPU::AReg_1024_Align2RegClassID:
2359 case AMDGPU::AV_1024RegClassID:
2360 case AMDGPU::AV_1024_Align2RegClassID:
2361 return 1024;
2362 default:
2363 llvm_unreachable("Unexpected register class");
2364 }
2365}
2366
2367unsigned getRegBitWidth(const MCRegisterClass &RC) {
2368 return getRegBitWidth(RC.getID());
2369}
2370
2372 return getRegBitWidth(RC.getID());
2373}
2374
2376 unsigned OpNo) {
2377 assert(OpNo < Desc.NumOperands);
2378 unsigned RCID = Desc.operands()[OpNo].RegClass;
2379 return getRegBitWidth(RCID) / 8;
2380}
2381
2382bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
2384 return true;
2385
2386 uint64_t Val = static_cast<uint64_t>(Literal);
2387 return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
2388 (Val == llvm::bit_cast<uint64_t>(1.0)) ||
2389 (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
2390 (Val == llvm::bit_cast<uint64_t>(0.5)) ||
2391 (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
2392 (Val == llvm::bit_cast<uint64_t>(2.0)) ||
2393 (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
2394 (Val == llvm::bit_cast<uint64_t>(4.0)) ||
2395 (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
2396 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2397}
2398
2399bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
2401 return true;
2402
2403 // The actual type of the operand does not seem to matter as long
2404 // as the bits match one of the inline immediate values. For example:
2405 //
2406 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
2407 // so it is a legal inline immediate.
2408 //
2409 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
2410 // floating-point, so it is a legal inline immediate.
2411
2412 uint32_t Val = static_cast<uint32_t>(Literal);
2413 return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
2414 (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
2415 (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
2416 (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
2417 (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
2418 (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
2419 (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
2420 (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
2421 (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
2422 (Val == 0x3e22f983 && HasInv2Pi);
2423}
2424
2425bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
2426 if (!HasInv2Pi)
2427 return false;
2428
2430 return true;
2431
2432 uint16_t Val = static_cast<uint16_t>(Literal);
2433 return Val == 0x3C00 || // 1.0
2434 Val == 0xBC00 || // -1.0
2435 Val == 0x3800 || // 0.5
2436 Val == 0xB800 || // -0.5
2437 Val == 0x4000 || // 2.0
2438 Val == 0xC000 || // -2.0
2439 Val == 0x4400 || // 4.0
2440 Val == 0xC400 || // -4.0
2441 Val == 0x3118; // 1/2pi
2442}
2443
2444bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
2445 assert(HasInv2Pi);
2446
2447 if (isInt<16>(Literal) || isUInt<16>(Literal)) {
2448 int16_t Trunc = static_cast<int16_t>(Literal);
2449 return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi);
2450 }
2451 if (!(Literal & 0xffff))
2452 return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi);
2453
2454 int16_t Lo16 = static_cast<int16_t>(Literal);
2455 int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
2456 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
2457}
2458
2460 int16_t Lo16 = static_cast<int16_t>(Literal);
2461 if (isInt<16>(Literal) || isUInt<16>(Literal))
2462 return isInlinableIntLiteral(Lo16);
2463
2464 int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
2465 if (!(Literal & 0xffff))
2466 return isInlinableIntLiteral(Hi16);
2467 return Lo16 == Hi16 && isInlinableIntLiteral(Lo16);
2468}
2469
2470bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi) {
2471 assert(HasInv2Pi);
2472
2473 int16_t Lo16 = static_cast<int16_t>(Literal);
2474 if (isInt<16>(Literal) || isUInt<16>(Literal))
2475 return true;
2476
2477 int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
2478 if (!(Literal & 0xffff))
2479 return true;
2480 return Lo16 == Hi16;
2481}
2482
2484 const Function *F = A->getParent();
2485
2486 // Arguments to compute shaders are never a source of divergence.
2487 CallingConv::ID CC = F->getCallingConv();
2488 switch (CC) {
2491 return true;
2500 // For non-compute shaders, SGPR inputs are marked with either inreg or
2501 // byval. Everything else is in VGPRs.
2502 return A->hasAttribute(Attribute::InReg) ||
2503 A->hasAttribute(Attribute::ByVal);
2504 default:
2505 // TODO: Should calls support inreg for SGPR inputs?
2506 return false;
2507 }
2508}
2509
2510bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
2511 // Arguments to compute shaders are never a source of divergence.
2513 switch (CC) {
2516 return true;
2525 // For non-compute shaders, SGPR inputs are marked with either inreg or
2526 // byval. Everything else is in VGPRs.
2527 return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
2528 CB->paramHasAttr(ArgNo, Attribute::ByVal);
2529 default:
2530 // TODO: Should calls support inreg for SGPR inputs?
2531 return false;
2532 }
2533}
2534
2535static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
2536 return isGCN3Encoding(ST) || isGFX10Plus(ST);
2537}
2538
2540 return isGFX9Plus(ST);
2541}
2542
2544 int64_t EncodedOffset) {
2545 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
2546 : isUInt<8>(EncodedOffset);
2547}
2548
2550 int64_t EncodedOffset,
2551 bool IsBuffer) {
2552 return !IsBuffer &&
2554 isInt<21>(EncodedOffset);
2555}
2556
2557static bool isDwordAligned(uint64_t ByteOffset) {
2558 return (ByteOffset & 3) == 0;
2559}
2560
2562 uint64_t ByteOffset) {
2563 if (hasSMEMByteOffset(ST))
2564 return ByteOffset;
2565
2566 assert(isDwordAligned(ByteOffset));
2567 return ByteOffset >> 2;
2568}
2569
2570std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
2571 int64_t ByteOffset, bool IsBuffer) {
2572 // The signed version is always a byte offset.
2573 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
2575 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2576 : std::nullopt;
2577 }
2578
2579 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
2580 return std::nullopt;
2581
2582 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2583 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
2584 ? std::optional<int64_t>(EncodedOffset)
2585 : std::nullopt;
2586}
2587
2588std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
2589 int64_t ByteOffset) {
2590 if (!isCI(ST) || !isDwordAligned(ByteOffset))
2591 return std::nullopt;
2592
2593 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2594 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
2595 : std::nullopt;
2596}
2597
2599 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9 and GFX11+.
2600 if (AMDGPU::isGFX10(ST))
2601 return 12;
2602
2603 return 13;
2604}
2605
2606namespace {
2607
2608struct SourceOfDivergence {
2609 unsigned Intr;
2610};
2611const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
2612
2613struct AlwaysUniform {
2614 unsigned Intr;
2615};
2616const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
2617
2618#define GET_SourcesOfDivergence_IMPL
2619#define GET_UniformIntrinsics_IMPL
2620#define GET_Gfx9BufferFormat_IMPL
2621#define GET_Gfx10BufferFormat_IMPL
2622#define GET_Gfx11PlusBufferFormat_IMPL
2623#include "AMDGPUGenSearchableTables.inc"
2624
2625} // end anonymous namespace
2626
2627bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
2628 return lookupSourceOfDivergence(IntrID);
2629}
2630
2631bool isIntrinsicAlwaysUniform(unsigned IntrID) {
2632 return lookupAlwaysUniform(IntrID);
2633}
2634
2636 uint8_t NumComponents,
2637 uint8_t NumFormat,
2638 const MCSubtargetInfo &STI) {
2639 return isGFX11Plus(STI)
2640 ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents,
2641 NumFormat)
2642 : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp,
2643 NumComponents, NumFormat)
2644 : getGfx9BufferFormatInfo(BitsPerComp,
2645 NumComponents, NumFormat);
2646}
2647
2649 const MCSubtargetInfo &STI) {
2650 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
2651 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
2652 : getGfx9BufferFormatInfo(Format);
2653}
2654
2655} // namespace AMDGPU
2656
2659 switch (S) {
2661 OS << "Unsupported";
2662 break;
2664 OS << "Any";
2665 break;
2667 OS << "Off";
2668 break;
2670 OS << "On";
2671 break;
2672 }
2673 return OS;
2674}
2675
2676} // namespace llvm
unsigned const MachineRegisterInfo * MRI
static llvm::cl::opt< unsigned > AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(4))
#define MAP_REG2REG
unsigned Intr
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_SET(DST, MSK, VAL)
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file contains the simple types necessary to represent the attributes associated with functions a...
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
unsigned const TargetRegisterInfo * TRI
unsigned Reg
LLVMContext & Context
return InstrInfo
#define S_00B848_MEM_ORDERED(x)
Definition: SIDefines.h:1032
#define S_00B848_WGP_MODE(x)
Definition: SIDefines.h:1029
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
void setTargetIDFromFeaturesString(StringRef FS)
TargetIDSetting getXnackSetting() const
AMDGPUTargetID(const MCSubtargetInfo &STI)
void setTargetIDFromTargetIDStream(StringRef TargetID)
TargetIDSetting getSramEccSetting() const
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
unsigned getIndexOfDstInParsedOperands() const
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
unsigned getCompParsedSrcOperandsNum() const
std::optional< unsigned > getInvalidCompOperandIndex(std::function< unsigned(unsigned, unsigned)> GetRegIdx) const
std::array< unsigned, Component::MAX_OPR_NUM > RegIndices
Definition: Any.h:28
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1190
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1471
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
constexpr bool test(unsigned I) const
unsigned getAddressSpace() const
Definition: GlobalValue.h:201
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
unsigned short NumOperands
Definition: MCInstrDesc.h:206
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getID() const
getID() - Return the register class ID number.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
const Triple & getTargetTriple() const
const FeatureBitset & getFeatureBits() const
StringRef getCPU() const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:857
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:704
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:474
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:222
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
bool endswith(StringRef Suffix) const
Definition: StringRef.h:280
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
unsigned getID() const
Return the register class ID number.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
OSType getOS() const
Get the parsed operating system type of this triple.
Definition: Triple.h:366
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:357
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
Definition: AMDGPU.h:395
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:392
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:391
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:388
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
const CustomOperandVal DepCtrInfo[]
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
static constexpr ExpTgt ExpTgtInfo[]
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
unsigned getTgtId(const StringRef Name)
constexpr uint32_t VersionMajor
HSA metadata major version.
bool isValidHwreg(int64_t Id)
const CustomOperand< const MCSubtargetInfo & > Opr[]
bool isValidHwregOffset(int64_t Offset)
uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width)
bool isValidHwregWidth(int64_t Width)
int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI)
StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI)
void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
StringLiteral const UfmtSymbolicGFX11[]
bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX10[]
StringLiteral const DfmtSymbolic[]
static StringLiteral const * getNfmtLookupTable(const MCSubtargetInfo &STI)
bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)
StringLiteral const NfmtSymbolicGFX10[]
bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
StringRef getDfmtName(unsigned Id)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)
unsigned const DfmtNfmt2UFmtGFX11[]
StringLiteral const NfmtSymbolicVI[]
StringLiteral const NfmtSymbolicSICI[]
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
StringLiteral const UfmtSymbolicGFX10[]
void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)
StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI)
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
int64_t getMsgOpId(int64_t MsgId, const StringRef Name)
const char *const OpGsSymbolic[OP_GS_LAST_]
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
const char *const OpSysSymbolic[OP_SYS_LAST_]
static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
const CustomOperand< const MCSubtargetInfo & > Msg[]
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned COMPONENTS_NUM
constexpr unsigned BANKS_NUM[]
bool isGCN3Encoding(const MCSubtargetInfo &STI)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI)
int getVOPDFull(unsigned OpX, unsigned OpY)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)
bool isHsaAbiVersion2(const MCSubtargetInfo *STI)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isHsaAbiVersion5(const MCSubtargetInfo *STI)
bool getMTBUFHasSrsrc(unsigned Opc)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isInlinableIntLiteralV216(int32_t Literal)
static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
bool isGFX10Before1030(const MCSubtargetInfo &STI)
bool isHsaAbiVersion4(const MCSubtargetInfo *STI)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
const int OPR_ID_UNSUPPORTED
bool shouldEmitConstantsToTextSection(const Triple &TT)
bool isHsaAbiVersion3(const MCSubtargetInfo *STI)
int getMTBUFElements(unsigned Opc)
static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)
std::pair< int, int > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< int, int > Default, bool OnlyFirstRequired)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For FLAT segment the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
CanBeVOPD getCanBeVOPD(unsigned Opc)
static int getOprIdx(std::function< bool(const CustomOperand< T > &)> Test, const CustomOperand< T > OpInfo[], int OpInfoSize, T Context)
bool hasPackedD16(const MCSubtargetInfo &STI)
unsigned getCodeObjectVersion(const Module &M)
bool isGFX940(const MCSubtargetInfo &STI)
bool isEntryFunctionCC(CallingConv::ID CC)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGroupSegment(const GlobalValue *GV)
IsaVersion getIsaVersion(StringRef GPU)
bool getMTBUFHasSoffset(unsigned Opc)
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool hasXNACK(const MCSubtargetInfo &STI)
unsigned getVOPDOpcode(unsigned Opc)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
bool isVOPC64DPP(unsigned Opc)
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool isCompute(CallingConv::ID cc)
bool getMAIIsGFX940XDL(unsigned Opc)
bool isSI(const MCSubtargetInfo &STI)
bool isReadOnlySegment(const GlobalValue *GV)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
int getMUBUFBaseOpcode(unsigned Opc)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool getVOP3IsSingle(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
bool getVOP1IsSingle(unsigned Opc)
static bool isDwordAligned(uint64_t ByteOffset)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool getHasColorExport(const Function &F)
int getMTBUFBaseOpcode(unsigned Opc)
bool isGFX90A(const MCSubtargetInfo &STI)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
bool hasSRAMECC(const MCSubtargetInfo &STI)
bool getHasDepthExport(const Function &F)
static bool isValidOpr(int Idx, const CustomOperand< T > OpInfo[], int OpInfoSize, T Context)
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
bool getMUBUFHasVAddr(unsigned Opc)
bool isTrue16Inst(unsigned Opc)
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
unsigned getInitialPSInputAddr(const Function &F)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
unsigned getAmdhsaCodeObjectVersion()
unsigned getVmcntBitMask(const IsaVersion &Version)
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
bool hasMAIInsts(const MCSubtargetInfo &STI)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isKernelCC(const Function *Func)
bool isGFX8Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
unsigned getExpcntBitMask(const IsaVersion &Version)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool getMUBUFHasSoffset(unsigned Opc)
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isShader(CallingConv::ID cc)
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
bool isGlobalSegment(const GlobalValue *GV)
@ OPERAND_KIMM_LAST
Definition: SIDefines.h:226
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:193
@ OPERAND_REG_INLINE_C_LAST
Definition: SIDefines.h:217
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:175
@ OPERAND_REG_INLINE_C_FP64
Definition: SIDefines.h:186
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:176
@ OPERAND_REG_INLINE_AC_V2FP16
Definition: SIDefines.h:203
@ OPERAND_SRC_FIRST
Definition: SIDefines.h:222
@ OPERAND_KIMM_FIRST
Definition: SIDefines.h:225
@ OPERAND_REG_IMM_FP16
Definition: SIDefines.h:172
@ OPERAND_REG_IMM_FP64
Definition: SIDefines.h:171
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:188
@ OPERAND_REG_INLINE_AC_V2INT16
Definition: SIDefines.h:202
@ OPERAND_REG_INLINE_AC_FP16
Definition: SIDefines.h:199
@ OPERAND_REG_INLINE_AC_FP32
Definition: SIDefines.h:200
@ OPERAND_REG_IMM_FP32
Definition: SIDefines.h:170
@ OPERAND_REG_INLINE_C_FIRST
Definition: SIDefines.h:216
@ OPERAND_REG_INLINE_C_FP32
Definition: SIDefines.h:185
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:187
@ OPERAND_REG_IMM_V2FP32
Definition: SIDefines.h:178
@ OPERAND_REG_INLINE_AC_FP64
Definition: SIDefines.h:201
@ OPERAND_REG_INLINE_C_FP16
Definition: SIDefines.h:184
@ OPERAND_REG_INLINE_C_V2FP32
Definition: SIDefines.h:190
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:174
@ OPERAND_SRC_LAST
Definition: SIDefines.h:223
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:173
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
std::optional< uint8_t > getHsaAbiVersion(const MCSubtargetInfo *STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool getMUBUFIsBufferInv(unsigned Opc)
static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI)
bool isMAC(unsigned Opc)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI)
bool getVOP2IsSingle(unsigned Opc)
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi)
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
bool isModuleEntryFunctionCC(CallingConv::ID CC)
bool getMTBUFHasVAddr(unsigned Opc)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
int getMUBUFElements(unsigned Opc)
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
bool isGraphics(CallingConv::ID cc)
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
bool isPermlane16(unsigned Opc)
bool getMUBUFHasSrsrc(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:119
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:194
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:185
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:197
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition: CallingConv.h:229
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:203
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:188
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:191
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:141
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:215
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:210
@ ELFABIVERSION_AMDGPU_HSA_V4
Definition: ELF.h:376
@ ELFABIVERSION_AMDGPU_HSA_V5
Definition: ELF.h:377
@ ELFABIVERSION_AMDGPU_HSA_V3
Definition: ELF.h:375
@ ELFABIVERSION_AMDGPU_HSA_V2
Definition: ELF.h:374
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:483
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:494
@ AlwaysUniform
The result values are always uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
#define N
AMD Kernel Code Object (amd_kernel_code_t).
bool(* Cond)(T Context)
Instruction set architecture version.
Definition: TargetParser.h:112
Represents the counter values to wait for in an s_waitcnt instruction.