20#include "llvm/IR/IntrinsicsAMDGPU.h"
21#include "llvm/IR/IntrinsicsR600.h"
31#define GET_INSTRINFO_NAMED_OPS
32#define GET_INSTRMAP_INFO
33#include "AMDGPUGenInstrInfo.inc"
43unsigned getBitMask(
unsigned Shift,
unsigned Width) {
44 return ((1 << Width) - 1) << Shift;
50unsigned packBits(
unsigned Src,
unsigned Dst,
unsigned Shift,
unsigned Width) {
51 unsigned Mask = getBitMask(Shift, Width);
52 return ((Src << Shift) & Mask) | (Dst & ~Mask);
58unsigned unpackBits(
unsigned Src,
unsigned Shift,
unsigned Width) {
59 return (Src & getBitMask(Shift, Width)) >> Shift;
63unsigned getVmcntBitShiftLo(
unsigned VersionMajor) {
68unsigned getVmcntBitWidthLo(
unsigned VersionMajor) {
73unsigned getExpcntBitShift(
unsigned VersionMajor) {
78unsigned getExpcntBitWidth(
unsigned VersionMajor) {
return 3; }
81unsigned getLgkmcntBitShift(
unsigned VersionMajor) {
86unsigned getLgkmcntBitWidth(
unsigned VersionMajor) {
91unsigned getVmcntBitShiftHi(
unsigned VersionMajor) {
return 14; }
94unsigned getVmcntBitWidthHi(
unsigned VersionMajor) {
95 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
157 if (
auto Ver = mdconst::extract_or_null<ConstantInt>(
158 M.getModuleFlag(
"amdgpu_code_object_version"))) {
159 return (
unsigned)Ver->getZExtValue() / 100;
167 switch (CodeObjectVersion) {
182 switch (CodeObjectVersion) {
194 switch (CodeObjectVersion) {
206 switch (CodeObjectVersion) {
217#define GET_MIMGBaseOpcodesTable_IMPL
218#define GET_MIMGDimInfoTable_IMPL
219#define GET_MIMGInfoTable_IMPL
220#define GET_MIMGLZMappingTable_IMPL
221#define GET_MIMGMIPMappingTable_IMPL
222#define GET_MIMGBiasMappingTable_IMPL
223#define GET_MIMGOffsetMappingTable_IMPL
224#define GET_MIMGG16MappingTable_IMPL
225#define GET_MAIInstInfoTable_IMPL
226#include "AMDGPUGenSearchableTables.inc"
229 unsigned VDataDwords,
unsigned VAddrDwords) {
230 const MIMGInfo *
Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
231 VDataDwords, VAddrDwords);
245 return NewInfo ? NewInfo->
Opcode : -1;
250 bool IsG16Supported) {
257 AddrWords += AddrComponents;
265 if ((IsA16 && !IsG16Supported) || BaseOpcode->
G16)
326#define GET_MTBUFInfoTable_DECL
327#define GET_MTBUFInfoTable_IMPL
328#define GET_MUBUFInfoTable_DECL
329#define GET_MUBUFInfoTable_IMPL
330#define GET_SMInfoTable_DECL
331#define GET_SMInfoTable_IMPL
332#define GET_VOP1InfoTable_DECL
333#define GET_VOP1InfoTable_IMPL
334#define GET_VOP2InfoTable_DECL
335#define GET_VOP2InfoTable_IMPL
336#define GET_VOP3InfoTable_DECL
337#define GET_VOP3InfoTable_IMPL
338#define GET_VOPC64DPPTable_DECL
339#define GET_VOPC64DPPTable_IMPL
340#define GET_VOPC64DPP8Table_DECL
341#define GET_VOPC64DPP8Table_IMPL
342#define GET_VOPDComponentTable_DECL
343#define GET_VOPDComponentTable_IMPL
344#define GET_VOPDPairs_DECL
345#define GET_VOPDPairs_IMPL
346#define GET_VOPTrue16Table_DECL
347#define GET_VOPTrue16Table_IMPL
348#define GET_WMMAOpcode2AddrMappingTable_DECL
349#define GET_WMMAOpcode2AddrMappingTable_IMPL
350#define GET_WMMAOpcode3AddrMappingTable_DECL
351#define GET_WMMAOpcode3AddrMappingTable_IMPL
352#include "AMDGPUGenSearchableTables.inc"
356 return Info ?
Info->BaseOpcode : -1;
360 const MTBUFInfo *
Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
371 return Info ?
Info->has_vaddr :
false;
376 return Info ?
Info->has_srsrc :
false;
381 return Info ?
Info->has_soffset :
false;
386 return Info ?
Info->BaseOpcode : -1;
390 const MUBUFInfo *
Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
401 return Info ?
Info->has_vaddr :
false;
406 return Info ?
Info->has_srsrc :
false;
411 return Info ?
Info->has_soffset :
false;
416 return Info ?
Info->IsBufferInv :
false;
420 const SMInfo *
Info = getSMEMOpcodeHelper(Opc);
421 return Info ?
Info->IsBuffer :
false;
426 return Info ?
Info->IsSingle :
false;
431 return Info ?
Info->IsSingle :
false;
436 return Info ?
Info->IsSingle :
false;
440 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
445 return Info ?
Info->is_dgemm :
false;
450 return Info ?
Info->is_gfx940_xdl :
false;
456 return {
Info->CanBeVOPDX,
true};
458 return {
false,
false};
471 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
472 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
473 Opc == AMDGPU::V_MAC_F32_e64_vi ||
474 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
475 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
476 Opc == AMDGPU::V_MAC_F16_e64_vi ||
477 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
478 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
479 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
480 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
481 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
482 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
483 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
484 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
485 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
486 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
487 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
488 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
492 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
493 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
494 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
495 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11;
500 return Info ?
Info->IsTrue16 :
false;
505 return Info ?
Info->Opcode3Addr : ~0u;
510 return Info ?
Info->Opcode2Addr : ~0u;
517 return getMCOpcodeGen(Opcode,
static_cast<Subtarget
>(Gen));
521 const VOPDInfo *
Info = getVOPDInfoFromComponentOpcodes(OpX, OpY);
528 auto OpX = getVOPDBaseFromComponent(
Info->OpX);
529 auto OpY = getVOPDBaseFromComponent(
Info->OpY);
531 return {OpX->BaseVOP, OpY->BaseVOP};
543 HasSrc2Acc = TiedIdx != -1;
550 for (CompOprIdx =
Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
552 MandatoryLiteralIdx = CompOprIdx;
573 std::function<
unsigned(
unsigned,
unsigned)> GetRegIdx)
const {
580 unsigned BanksNum =
BANKS_NUM[CompOprIdx];
581 if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&
582 (OpXRegs[CompOprIdx] % BanksNum == OpYRegs[CompOprIdx] % BanksNum))
598 std::function<
unsigned(
unsigned,
unsigned)> GetRegIdx)
const {
601 const auto &Comp = CompInfo[CompIdx];
604 RegIndices[
DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
607 unsigned CompSrcIdx = CompOprIdx -
DST_NUM;
609 Comp.hasRegSrcOperand(CompSrcIdx)
610 ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx))
625 const auto &OpXDesc =
InstrInfo->get(OpX);
626 const auto &OpYDesc =
InstrInfo->get(OpY);
648 std::optional<bool> XnackRequested;
649 std::optional<bool> SramEccRequested;
651 for (
const std::string &Feature : Features.
getFeatures()) {
652 if (Feature ==
"+xnack")
653 XnackRequested =
true;
654 else if (Feature ==
"-xnack")
655 XnackRequested =
false;
656 else if (Feature ==
"+sramecc")
657 SramEccRequested =
true;
658 else if (Feature ==
"-sramecc")
659 SramEccRequested =
false;
665 if (XnackRequested) {
666 if (XnackSupported) {
672 if (*XnackRequested) {
673 errs() <<
"warning: xnack 'On' was requested for a processor that does "
676 errs() <<
"warning: xnack 'Off' was requested for a processor that "
677 "does not support it!\n";
682 if (SramEccRequested) {
683 if (SramEccSupported) {
690 if (*SramEccRequested) {
691 errs() <<
"warning: sramecc 'On' was requested for a processor that "
692 "does not support it!\n";
694 errs() <<
"warning: sramecc 'Off' was requested for a processor that "
695 "does not support it!\n";
713 TargetID.
split(TargetIDSplit,
':');
715 for (
const auto &FeatureString : TargetIDSplit) {
716 if (FeatureString.startswith(
"xnack"))
718 if (FeatureString.startswith(
"sramecc"))
724 std::string StringRep;
730 StreamRep << TargetTriple.getArchName() <<
'-'
731 << TargetTriple.getVendorName() <<
'-'
732 << TargetTriple.getOSName() <<
'-'
733 << TargetTriple.getEnvironmentName() <<
'-';
735 std::string Processor;
739 if (Version.Major >= 9)
742 Processor = (
Twine(
"gfx") +
Twine(Version.Major) +
Twine(Version.Minor) +
743 Twine(Version.Stepping))
746 std::string Features;
748 switch (CodeObjectVersion) {
752 if (Processor ==
"gfx600") {
753 }
else if (Processor ==
"gfx601") {
754 }
else if (Processor ==
"gfx602") {
755 }
else if (Processor ==
"gfx700") {
756 }
else if (Processor ==
"gfx701") {
757 }
else if (Processor ==
"gfx702") {
758 }
else if (Processor ==
"gfx703") {
759 }
else if (Processor ==
"gfx704") {
760 }
else if (Processor ==
"gfx705") {
761 }
else if (Processor ==
"gfx801") {
764 "AMD GPU code object V2 does not support processor " +
765 Twine(Processor) +
" without XNACK");
766 }
else if (Processor ==
"gfx802") {
767 }
else if (Processor ==
"gfx803") {
768 }
else if (Processor ==
"gfx805") {
769 }
else if (Processor ==
"gfx810") {
772 "AMD GPU code object V2 does not support processor " +
773 Twine(Processor) +
" without XNACK");
774 }
else if (Processor ==
"gfx900") {
776 Processor =
"gfx901";
777 }
else if (Processor ==
"gfx902") {
779 Processor =
"gfx903";
780 }
else if (Processor ==
"gfx904") {
782 Processor =
"gfx905";
783 }
else if (Processor ==
"gfx906") {
785 Processor =
"gfx907";
786 }
else if (Processor ==
"gfx90c") {
789 "AMD GPU code object V2 does not support processor " +
790 Twine(Processor) +
" with XNACK being ON or ANY");
793 "AMD GPU code object V2 does not support processor " +
800 Features +=
"+xnack";
804 Features +=
"+sram-ecc";
810 Features +=
":sramecc-";
812 Features +=
":sramecc+";
815 Features +=
":xnack-";
817 Features +=
":xnack+";
824 StreamRep << Processor << Features;
840 unsigned BytesPerCU = 0;
875 unsigned FlatWorkGroupSize) {
876 assert(FlatWorkGroupSize != 0);
886 unsigned MaxBarriers = 16;
890 return std::min(MaxWaves /
N, MaxBarriers);
907 unsigned FlatWorkGroupSize) {
922 unsigned FlatWorkGroupSize) {
928 if (Version.Major >= 10)
930 if (Version.Major >= 8)
941 if (Version.Major >= 8)
951 if (Version.Major >= 10)
953 if (Version.Major >= 8)
962 if (Version.Major >= 10)
981 if (Version.Major >= 10)
982 return Addressable ? AddressableNumSGPRs : 108;
983 if (Version.Major >= 8 && !Addressable)
984 AddressableNumSGPRs = 112;
989 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
993 bool FlatScrUsed,
bool XNACKUsed) {
994 unsigned ExtraSGPRs = 0;
999 if (Version.Major >= 10)
1002 if (Version.Major < 8) {
1030 std::optional<bool> EnableWavefrontSize32) {
1034 bool IsWave32 = EnableWavefrontSize32 ?
1035 *EnableWavefrontSize32 :
1039 return IsWave32 ? 24 : 12;
1042 return IsWave32 ? 16 : 8;
1044 return IsWave32 ? 8 : 4;
1048 std::optional<bool> EnableWavefrontSize32) {
1052 bool IsWave32 = EnableWavefrontSize32 ?
1053 *EnableWavefrontSize32 :
1056 return IsWave32 ? 8 : 4;
1066 return IsWave32 ? 1536 : 768;
1067 return IsWave32 ? 1024 : 512;
1077 unsigned NumVGPRs) {
1080 if (NumVGPRs < Granule)
1082 unsigned RoundedRegs =
alignTo(NumVGPRs, Granule);
1083 return std::min(std::max(
getTotalNumVGPRs(STI) / RoundedRegs, 1u), MaxWaves);
1090 if (WavesPerEU >= MaxWavesPerEU)
1096 unsigned MaxNumVGPRs =
alignDown(TotNumVGPRs / WavesPerEU, Granule);
1098 if (MaxNumVGPRs ==
alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1102 if (WavesPerEU < MinWavesPerEU)
1105 unsigned MaxNumVGPRsNext =
alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1106 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1107 return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1116 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1120 std::optional<bool> EnableWavefrontSize32) {
1121 NumVGPRs =
alignTo(std::max(1u, NumVGPRs),
1133 memset(&Header, 0,
sizeof(Header));
1135 Header.amd_kernel_code_version_major = 1;
1136 Header.amd_kernel_code_version_minor = 2;
1137 Header.amd_machine_kind = 1;
1138 Header.amd_machine_version_major = Version.Major;
1139 Header.amd_machine_version_minor = Version.Minor;
1140 Header.amd_machine_version_stepping = Version.Stepping;
1141 Header.kernel_code_entry_byte_offset =
sizeof(Header);
1142 Header.wavefront_size = 6;
1146 Header.call_convention = -1;
1150 Header.kernarg_segment_alignment = 4;
1151 Header.group_segment_alignment = 4;
1152 Header.private_segment_alignment = 4;
1154 if (Version.Major >= 10) {
1156 Header.wavefront_size = 5;
1159 Header.compute_pgm_resource_registers |=
1170 memset(&KD, 0,
sizeof(KD));
1173 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
1176 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
1178 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
1180 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
1181 if (Version.Major >= 10) {
1183 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
1186 amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
1189 amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
1193 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1220 bool OnlyFirstRequired) {
1222 if (!
A.isStringAttribute())
1226 std::pair<int, int> Ints =
Default;
1227 std::pair<StringRef, StringRef> Strs =
A.getValueAsString().split(
',');
1228 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1229 Ctx.
emitError(
"can't parse first integer attribute " +
Name);
1232 if (Strs.second.trim().getAsInteger(0, Ints.second)) {
1233 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1234 Ctx.
emitError(
"can't parse second integer attribute " +
Name);
1243 return (1 << (getVmcntBitWidthLo(Version.Major) +
1244 getVmcntBitWidthHi(Version.Major))) -
1249 return (1 << getExpcntBitWidth(Version.Major)) - 1;
1253 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1257 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1258 getVmcntBitWidthLo(Version.Major));
1259 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1260 getExpcntBitWidth(Version.Major));
1261 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1262 getLgkmcntBitWidth(Version.Major));
1263 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1264 getVmcntBitWidthHi(Version.Major));
1265 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1269 unsigned VmcntLo = unpackBits(
Waitcnt, getVmcntBitShiftLo(Version.Major),
1270 getVmcntBitWidthLo(Version.Major));
1271 unsigned VmcntHi = unpackBits(
Waitcnt, getVmcntBitShiftHi(Version.Major),
1272 getVmcntBitWidthHi(Version.Major));
1273 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1277 return unpackBits(
Waitcnt, getExpcntBitShift(Version.Major),
1278 getExpcntBitWidth(Version.Major));
1282 return unpackBits(
Waitcnt, getLgkmcntBitShift(Version.Major),
1283 getLgkmcntBitWidth(Version.Major));
1287 unsigned &Vmcnt,
unsigned &Expcnt,
unsigned &Lgkmcnt) {
1303 Waitcnt = packBits(Vmcnt,
Waitcnt, getVmcntBitShiftLo(Version.Major),
1304 getVmcntBitWidthLo(Version.Major));
1305 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major),
Waitcnt,
1306 getVmcntBitShiftHi(Version.Major),
1307 getVmcntBitWidthHi(Version.Major));
1312 return packBits(Expcnt,
Waitcnt, getExpcntBitShift(Version.Major),
1313 getExpcntBitWidth(Version.Major));
1318 return packBits(Lgkmcnt,
Waitcnt, getLgkmcntBitShift(Version.Major),
1319 getLgkmcntBitWidth(Version.Major));
1323 unsigned Vmcnt,
unsigned Expcnt,
unsigned Lgkmcnt) {
1358 for (
int Idx = 0;
Idx < OpInfoSize; ++
Idx) {
1370 int OpInfoSize,
T Context) {
1372 return getOprIdx<T>(
Test, OpInfo, OpInfoSize,
Context);
1377 T Context,
bool QuickCheck =
true) {
1379 return Op.Encoding == Id && !Op.Name.empty();
1384 if (QuickCheck && isValidOpr<T>(Id, OpInfo, OpInfoSize,
Context) &&
1388 return getOprIdx<T>(
Test, OpInfo, OpInfoSize,
Context);
1400 const auto &Op = Opr[
Idx];
1401 if (Op.isSupported(STI))
1402 Enc |= Op.encode(Op.Default);
1408 int Size,
unsigned Code,
1409 bool &HasNonDefaultVal,
1411 unsigned UsedOprMask = 0;
1412 HasNonDefaultVal =
false;
1414 const auto &Op = Opr[
Idx];
1415 if (!Op.isSupported(STI))
1417 UsedOprMask |= Op.getMask();
1418 unsigned Val = Op.decode(Code);
1419 if (!Op.isValid(Val))
1421 HasNonDefaultVal |= (Val != Op.Default);
1423 return (Code & ~UsedOprMask) == 0;
1428 unsigned &Val,
bool &IsDefault,
1431 const auto &Op = Opr[
Idx++];
1432 if (Op.isSupported(STI)) {
1434 Val = Op.decode(Code);
1435 IsDefault = (Val == Op.Default);
1445 if (InputVal < 0 || InputVal > Op.Max)
1447 return Op.encode(InputVal);
1452 unsigned &UsedOprMask,
1456 const auto &Op = Opr[
Idx];
1457 if (Op.Name ==
Name) {
1458 if (!Op.isSupported(STI)) {
1462 auto OprMask = Op.getMask();
1463 if (OprMask & UsedOprMask)
1465 UsedOprMask |= OprMask;
1488 HasNonDefaultVal, STI);
1517 return 0 <=
Id && isUInt<ID_WIDTH_>(
Id);
1525 return 0 <= (
Width - 1) && isUInt<WIDTH_M1_WIDTH_>(
Width - 1);
1571 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
1572 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
1583 if (Val.MaxIndex == 0 &&
Name == Val.Name)
1586 if (Val.MaxIndex > 0 &&
Name.startswith(Val.Name)) {
1594 if (Suffix.
size() > 1 && Suffix[0] ==
'0')
1597 return Val.Tgt + Id;
1626namespace MTBUFFormat {
1652 if (
Name == lookupTable[Id])
1757 int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId,
Msg,
MSG_SIZE, STI);
1765 for (
int i =
F; i < L; ++i) {
1778 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
1857 return F.getFnAttributeAsParsedInteger(
"InitialPSInputAddr", 0);
1862 return F.getFnAttributeAsParsedInteger(
1863 "amdgpu-color-export",
1868 return F.getFnAttributeAsParsedInteger(
"amdgpu-depth-export", 0) != 0;
1929 return STI.
hasFeature(AMDGPU::FeatureSRAMECC);
1945 return !STI.
hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !
isCI(STI) &&
1951 if (Version.Major == 10)
1952 return Version.Minor >= 3 ? 13 : 5;
1953 if (Version.Major == 11)
1959 return STI.
hasFeature(AMDGPU::FeatureSouthernIslands);
1963 return STI.
hasFeature(AMDGPU::FeatureSeaIslands);
1967 return STI.
hasFeature(AMDGPU::FeatureVolcanicIslands);
2019 return STI.
hasFeature(AMDGPU::FeatureGCN3Encoding);
2023 return STI.
hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2027 return STI.
hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2031 return STI.
hasFeature(AMDGPU::FeatureGFX10_3Insts);
2035 return STI.
hasFeature(AMDGPU::FeatureGFX90AInsts);
2039 return STI.
hasFeature(AMDGPU::FeatureGFX940Insts);
2043 return STI.
hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2047 return STI.
hasFeature(AMDGPU::FeatureMAIInsts);
2055 int32_t ArgNumVGPR) {
2056 if (has90AInsts && ArgNumAGPR)
2057 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2058 return std::max(ArgNumVGPR, ArgNumAGPR);
2063 const unsigned FirstSubReg =
TRI->getSubReg(
Reg, AMDGPU::sub0);
2064 return SGPRClass.
contains(FirstSubReg != 0 ? FirstSubReg :
Reg) ||
2068#define MAP_REG2REG \
2069 using namespace AMDGPU; \
2071 default: return Reg; \
2072 CASE_CI_VI(FLAT_SCR) \
2073 CASE_CI_VI(FLAT_SCR_LO) \
2074 CASE_CI_VI(FLAT_SCR_HI) \
2075 CASE_VI_GFX9PLUS(TTMP0) \
2076 CASE_VI_GFX9PLUS(TTMP1) \
2077 CASE_VI_GFX9PLUS(TTMP2) \
2078 CASE_VI_GFX9PLUS(TTMP3) \
2079 CASE_VI_GFX9PLUS(TTMP4) \
2080 CASE_VI_GFX9PLUS(TTMP5) \
2081 CASE_VI_GFX9PLUS(TTMP6) \
2082 CASE_VI_GFX9PLUS(TTMP7) \
2083 CASE_VI_GFX9PLUS(TTMP8) \
2084 CASE_VI_GFX9PLUS(TTMP9) \
2085 CASE_VI_GFX9PLUS(TTMP10) \
2086 CASE_VI_GFX9PLUS(TTMP11) \
2087 CASE_VI_GFX9PLUS(TTMP12) \
2088 CASE_VI_GFX9PLUS(TTMP13) \
2089 CASE_VI_GFX9PLUS(TTMP14) \
2090 CASE_VI_GFX9PLUS(TTMP15) \
2091 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2092 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2093 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2094 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2095 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2096 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2097 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2098 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2099 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2100 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2101 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2102 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2103 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2104 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2105 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2106 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2107 CASE_GFXPRE11_GFX11PLUS(M0) \
2108 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2109 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2112#define CASE_CI_VI(node) \
2113 assert(!isSI(STI)); \
2114 case node: return isCI(STI) ? node##_ci : node##_vi;
2116#define CASE_VI_GFX9PLUS(node) \
2117 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2119#define CASE_GFXPRE11_GFX11PLUS(node) \
2120 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2122#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2123 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2132#undef CASE_VI_GFX9PLUS
2133#undef CASE_GFXPRE11_GFX11PLUS
2134#undef CASE_GFXPRE11_GFX11PLUS_TO
2136#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
2137#define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
2138#define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;
2139#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2147 case AMDGPU::SRC_SHARED_BASE_LO:
2148 case AMDGPU::SRC_SHARED_BASE:
2149 case AMDGPU::SRC_SHARED_LIMIT_LO:
2150 case AMDGPU::SRC_SHARED_LIMIT:
2151 case AMDGPU::SRC_PRIVATE_BASE_LO:
2152 case AMDGPU::SRC_PRIVATE_BASE:
2153 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2154 case AMDGPU::SRC_PRIVATE_LIMIT:
2155 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2157 case AMDGPU::SRC_VCCZ:
2158 case AMDGPU::SRC_EXECZ:
2159 case AMDGPU::SRC_SCC:
2161 case AMDGPU::SGPR_NULL:
2169#undef CASE_VI_GFX9PLUS
2170#undef CASE_GFXPRE11_GFX11PLUS
2171#undef CASE_GFXPRE11_GFX11PLUS_TO
2176 unsigned OpType = Desc.
operands()[OpNo].OperandType;
2183 unsigned OpType = Desc.
operands()[OpNo].OperandType;
2190 unsigned OpType = Desc.
operands()[OpNo].OperandType;
2219 unsigned OpType = Desc.
operands()[OpNo].OperandType;
2228 case AMDGPU::VGPR_LO16RegClassID:
2229 case AMDGPU::VGPR_HI16RegClassID:
2230 case AMDGPU::SGPR_LO16RegClassID:
2231 case AMDGPU::AGPR_LO16RegClassID:
2233 case AMDGPU::SGPR_32RegClassID:
2234 case AMDGPU::VGPR_32RegClassID:
2235 case AMDGPU::VRegOrLds_32RegClassID:
2236 case AMDGPU::AGPR_32RegClassID:
2237 case AMDGPU::VS_32RegClassID:
2238 case AMDGPU::AV_32RegClassID:
2239 case AMDGPU::SReg_32RegClassID:
2240 case AMDGPU::SReg_32_XM0RegClassID:
2241 case AMDGPU::SRegOrLds_32RegClassID:
2243 case AMDGPU::SGPR_64RegClassID:
2244 case AMDGPU::VS_64RegClassID:
2245 case AMDGPU::SReg_64RegClassID:
2246 case AMDGPU::VReg_64RegClassID:
2247 case AMDGPU::AReg_64RegClassID:
2248 case AMDGPU::SReg_64_XEXECRegClassID:
2249 case AMDGPU::VReg_64_Align2RegClassID:
2250 case AMDGPU::AReg_64_Align2RegClassID:
2251 case AMDGPU::AV_64RegClassID:
2252 case AMDGPU::AV_64_Align2RegClassID:
2254 case AMDGPU::SGPR_96RegClassID:
2255 case AMDGPU::SReg_96RegClassID:
2256 case AMDGPU::VReg_96RegClassID:
2257 case AMDGPU::AReg_96RegClassID:
2258 case AMDGPU::VReg_96_Align2RegClassID:
2259 case AMDGPU::AReg_96_Align2RegClassID:
2260 case AMDGPU::AV_96RegClassID:
2261 case AMDGPU::AV_96_Align2RegClassID:
2263 case AMDGPU::SGPR_128RegClassID:
2264 case AMDGPU::SReg_128RegClassID:
2265 case AMDGPU::VReg_128RegClassID:
2266 case AMDGPU::AReg_128RegClassID:
2267 case AMDGPU::VReg_128_Align2RegClassID:
2268 case AMDGPU::AReg_128_Align2RegClassID:
2269 case AMDGPU::AV_128RegClassID:
2270 case AMDGPU::AV_128_Align2RegClassID:
2272 case AMDGPU::SGPR_160RegClassID:
2273 case AMDGPU::SReg_160RegClassID:
2274 case AMDGPU::VReg_160RegClassID:
2275 case AMDGPU::AReg_160RegClassID:
2276 case AMDGPU::VReg_160_Align2RegClassID:
2277 case AMDGPU::AReg_160_Align2RegClassID:
2278 case AMDGPU::AV_160RegClassID:
2279 case AMDGPU::AV_160_Align2RegClassID:
2281 case AMDGPU::SGPR_192RegClassID:
2282 case AMDGPU::SReg_192RegClassID:
2283 case AMDGPU::VReg_192RegClassID:
2284 case AMDGPU::AReg_192RegClassID:
2285 case AMDGPU::VReg_192_Align2RegClassID:
2286 case AMDGPU::AReg_192_Align2RegClassID:
2287 case AMDGPU::AV_192RegClassID:
2288 case AMDGPU::AV_192_Align2RegClassID:
2290 case AMDGPU::SGPR_224RegClassID:
2291 case AMDGPU::SReg_224RegClassID:
2292 case AMDGPU::VReg_224RegClassID:
2293 case AMDGPU::AReg_224RegClassID:
2294 case AMDGPU::VReg_224_Align2RegClassID:
2295 case AMDGPU::AReg_224_Align2RegClassID:
2296 case AMDGPU::AV_224RegClassID:
2297 case AMDGPU::AV_224_Align2RegClassID:
2299 case AMDGPU::SGPR_256RegClassID:
2300 case AMDGPU::SReg_256RegClassID:
2301 case AMDGPU::VReg_256RegClassID:
2302 case AMDGPU::AReg_256RegClassID:
2303 case AMDGPU::VReg_256_Align2RegClassID:
2304 case AMDGPU::AReg_256_Align2RegClassID:
2305 case AMDGPU::AV_256RegClassID:
2306 case AMDGPU::AV_256_Align2RegClassID:
2308 case AMDGPU::SGPR_288RegClassID:
2309 case AMDGPU::SReg_288RegClassID:
2310 case AMDGPU::VReg_288RegClassID:
2311 case AMDGPU::AReg_288RegClassID:
2312 case AMDGPU::VReg_288_Align2RegClassID:
2313 case AMDGPU::AReg_288_Align2RegClassID:
2314 case AMDGPU::AV_288RegClassID:
2315 case AMDGPU::AV_288_Align2RegClassID:
2317 case AMDGPU::SGPR_320RegClassID:
2318 case AMDGPU::SReg_320RegClassID:
2319 case AMDGPU::VReg_320RegClassID:
2320 case AMDGPU::AReg_320RegClassID:
2321 case AMDGPU::VReg_320_Align2RegClassID:
2322 case AMDGPU::AReg_320_Align2RegClassID:
2323 case AMDGPU::AV_320RegClassID:
2324 case AMDGPU::AV_320_Align2RegClassID:
2326 case AMDGPU::SGPR_352RegClassID:
2327 case AMDGPU::SReg_352RegClassID:
2328 case AMDGPU::VReg_352RegClassID:
2329 case AMDGPU::AReg_352RegClassID:
2330 case AMDGPU::VReg_352_Align2RegClassID:
2331 case AMDGPU::AReg_352_Align2RegClassID:
2332 case AMDGPU::AV_352RegClassID:
2333 case AMDGPU::AV_352_Align2RegClassID:
2335 case AMDGPU::SGPR_384RegClassID:
2336 case AMDGPU::SReg_384RegClassID:
2337 case AMDGPU::VReg_384RegClassID:
2338 case AMDGPU::AReg_384RegClassID:
2339 case AMDGPU::VReg_384_Align2RegClassID:
2340 case AMDGPU::AReg_384_Align2RegClassID:
2341 case AMDGPU::AV_384RegClassID:
2342 case AMDGPU::AV_384_Align2RegClassID:
2344 case AMDGPU::SGPR_512RegClassID:
2345 case AMDGPU::SReg_512RegClassID:
2346 case AMDGPU::VReg_512RegClassID:
2347 case AMDGPU::AReg_512RegClassID:
2348 case AMDGPU::VReg_512_Align2RegClassID:
2349 case AMDGPU::AReg_512_Align2RegClassID:
2350 case AMDGPU::AV_512RegClassID:
2351 case AMDGPU::AV_512_Align2RegClassID:
2353 case AMDGPU::SGPR_1024RegClassID:
2354 case AMDGPU::SReg_1024RegClassID:
2355 case AMDGPU::VReg_1024RegClassID:
2356 case AMDGPU::AReg_1024RegClassID:
2357 case AMDGPU::VReg_1024_Align2RegClassID:
2358 case AMDGPU::AReg_1024_Align2RegClassID:
2359 case AMDGPU::AV_1024RegClassID:
2360 case AMDGPU::AV_1024_Align2RegClassID:
2378 unsigned RCID = Desc.
operands()[OpNo].RegClass;
2387 return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
2388 (Val == llvm::bit_cast<uint64_t>(1.0)) ||
2389 (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
2390 (Val == llvm::bit_cast<uint64_t>(0.5)) ||
2391 (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
2392 (Val == llvm::bit_cast<uint64_t>(2.0)) ||
2393 (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
2394 (Val == llvm::bit_cast<uint64_t>(4.0)) ||
2395 (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
2396 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2413 return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
2414 (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
2415 (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
2416 (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
2417 (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
2418 (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
2419 (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
2420 (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
2421 (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
2422 (Val == 0x3e22f983 && HasInv2Pi);
2433 return Val == 0x3C00 ||
2448 int16_t Trunc =
static_cast<int16_t
>(
Literal);
2454 int16_t Lo16 =
static_cast<int16_t
>(
Literal);
2455 int16_t Hi16 =
static_cast<int16_t
>(
Literal >> 16);
2460 int16_t Lo16 =
static_cast<int16_t
>(
Literal);
2464 int16_t Hi16 =
static_cast<int16_t
>(
Literal >> 16);
2473 int16_t Lo16 =
static_cast<int16_t
>(
Literal);
2477 int16_t Hi16 =
static_cast<int16_t
>(
Literal >> 16);
2480 return Lo16 == Hi16;
2502 return A->hasAttribute(Attribute::InReg) ||
2503 A->hasAttribute(Attribute::ByVal);
2544 int64_t EncodedOffset) {
2546 : isUInt<8>(EncodedOffset);
2550 int64_t EncodedOffset,
2554 isInt<21>(EncodedOffset);
2558 return (ByteOffset & 3) == 0;
2567 return ByteOffset >> 2;
2571 int64_t ByteOffset,
bool IsBuffer) {
2575 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2580 return std::nullopt;
2584 ? std::optional<int64_t>(EncodedOffset)
2589 int64_t ByteOffset) {
2591 return std::nullopt;
2594 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
2608struct SourceOfDivergence {
2611const SourceOfDivergence *lookupSourceOfDivergence(
unsigned Intr);
2618#define GET_SourcesOfDivergence_IMPL
2619#define GET_UniformIntrinsics_IMPL
2620#define GET_Gfx9BufferFormat_IMPL
2621#define GET_Gfx10BufferFormat_IMPL
2622#define GET_Gfx11PlusBufferFormat_IMPL
2623#include "AMDGPUGenSearchableTables.inc"
2628 return lookupSourceOfDivergence(IntrID);
2632 return lookupAlwaysUniform(IntrID);
2636 uint8_t NumComponents,
2640 ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents,
2642 :
isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp,
2643 NumComponents, NumFormat)
2644 : getGfx9BufferFormatInfo(BitsPerComp,
2645 NumComponents, NumFormat);
2652 : getGfx9BufferFormatInfo(
Format);
2661 OS <<
"Unsupported";
unsigned const MachineRegisterInfo * MRI
static llvm::cl::opt< unsigned > AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(4))
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_SET(DST, MSK, VAL)
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file contains the simple types necessary to represent the attributes associated with functions a...
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
unsigned const TargetRegisterInfo * TRI
#define S_00B848_MEM_ORDERED(x)
#define S_00B848_WGP_MODE(x)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isSramEccSupported() const
void setTargetIDFromFeaturesString(StringRef FS)
bool isSramEccOnOrAny() const
TargetIDSetting getXnackSetting() const
bool isXnackOnOrAny() const
AMDGPUTargetID(const MCSubtargetInfo &STI)
bool isXnackSupported() const
void setTargetIDFromTargetIDStream(StringRef TargetID)
std::string toString() const
TargetIDSetting getSramEccSetting() const
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
unsigned getIndexOfDstInParsedOperands() const
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
unsigned getCompParsedSrcOperandsNum() const
std::optional< unsigned > getInvalidCompOperandIndex(std::function< unsigned(unsigned, unsigned)> GetRegIdx) const
std::array< unsigned, Component::MAX_OPR_NUM > RegIndices
This class represents an incoming formal argument to a Function.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
CallingConv::ID getCallingConv() const
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
constexpr bool test(unsigned I) const
unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned short NumOperands
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Interface to description of machine instruction set.
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getID() const
getID() - Return the register class ID number.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
const Triple & getTargetTriple() const
const FeatureBitset & getFeatureBits() const
A Module instance is used to store all the information related to an LLVM module.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
bool endswith(StringRef Suffix) const
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
unsigned getID() const
Return the register class ID number.
Triple - Helper class for working with autoconf configuration names.
OSType getOS() const
Get the parsed operating system type of this triple.
ArchType getArch() const
Get the parsed architecture type of this triple.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
const CustomOperandVal DepCtrInfo[]
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
static constexpr ExpTgt ExpTgtInfo[]
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
unsigned getTgtId(const StringRef Name)
@ ET_DUAL_SRC_BLEND_MAX_IDX
constexpr uint32_t VersionMajor
HSA metadata major version.
bool isValidHwreg(int64_t Id)
const CustomOperand< const MCSubtargetInfo & > Opr[]
bool isValidHwregOffset(int64_t Offset)
uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width)
bool isValidHwregWidth(int64_t Width)
int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI)
StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI)
void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width)
@ COMPLETION_ACTION_OFFSET
@ MULTIGRID_SYNC_ARG_OFFSET
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI)
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
int64_t getMsgOpId(int64_t MsgId, const StringRef Name)
const char *const OpGsSymbolic[OP_GS_LAST_]
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
const char *const OpSysSymbolic[OP_SYS_LAST_]
static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
const CustomOperand< const MCSubtargetInfo & > Msg[]
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned COMPONENTS_NUM
constexpr unsigned BANKS_NUM[]
bool isGCN3Encoding(const MCSubtargetInfo &STI)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI)
int getVOPDFull(unsigned OpX, unsigned OpY)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)
bool isHsaAbiVersion2(const MCSubtargetInfo *STI)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isHsaAbiVersion5(const MCSubtargetInfo *STI)
bool getMTBUFHasSrsrc(unsigned Opc)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isInlinableIntLiteralV216(int32_t Literal)
static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
bool isGFX10Before1030(const MCSubtargetInfo &STI)
bool isHsaAbiVersion4(const MCSubtargetInfo *STI)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
const int OPR_ID_UNSUPPORTED
bool shouldEmitConstantsToTextSection(const Triple &TT)
bool isHsaAbiVersion3(const MCSubtargetInfo *STI)
int getMTBUFElements(unsigned Opc)
static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)
std::pair< int, int > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< int, int > Default, bool OnlyFirstRequired)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For FLAT segment the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
CanBeVOPD getCanBeVOPD(unsigned Opc)
static int getOprIdx(std::function< bool(const CustomOperand< T > &)> Test, const CustomOperand< T > OpInfo[], int OpInfoSize, T Context)
bool hasPackedD16(const MCSubtargetInfo &STI)
unsigned getCodeObjectVersion(const Module &M)
bool isGFX940(const MCSubtargetInfo &STI)
bool isEntryFunctionCC(CallingConv::ID CC)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGroupSegment(const GlobalValue *GV)
IsaVersion getIsaVersion(StringRef GPU)
bool getMTBUFHasSoffset(unsigned Opc)
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool hasXNACK(const MCSubtargetInfo &STI)
unsigned getVOPDOpcode(unsigned Opc)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
bool isVOPC64DPP(unsigned Opc)
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool isCompute(CallingConv::ID cc)
bool getMAIIsGFX940XDL(unsigned Opc)
bool isSI(const MCSubtargetInfo &STI)
bool isReadOnlySegment(const GlobalValue *GV)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
int getMUBUFBaseOpcode(unsigned Opc)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool getVOP3IsSingle(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
bool getVOP1IsSingle(unsigned Opc)
static bool isDwordAligned(uint64_t ByteOffset)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool getHasColorExport(const Function &F)
int getMTBUFBaseOpcode(unsigned Opc)
bool isGFX90A(const MCSubtargetInfo &STI)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
bool hasSRAMECC(const MCSubtargetInfo &STI)
bool getHasDepthExport(const Function &F)
static bool isValidOpr(int Idx, const CustomOperand< T > OpInfo[], int OpInfoSize, T Context)
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
bool getMUBUFHasVAddr(unsigned Opc)
bool isTrue16Inst(unsigned Opc)
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
unsigned getInitialPSInputAddr(const Function &F)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
unsigned getAmdhsaCodeObjectVersion()
unsigned getVmcntBitMask(const IsaVersion &Version)
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
bool hasMAIInsts(const MCSubtargetInfo &STI)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isKernelCC(const Function *Func)
bool isGFX8Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
unsigned getExpcntBitMask(const IsaVersion &Version)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool getMUBUFHasSoffset(unsigned Opc)
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isShader(CallingConv::ID cc)
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
bool isGlobalSegment(const GlobalValue *GV)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_LAST
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_AC_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_V2INT16
@ OPERAND_REG_INLINE_AC_FP16
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_C_FIRST
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_REG_INLINE_C_V2FP32
@ OPERAND_REG_IMM_FP32_DEFERRED
@ OPERAND_REG_IMM_FP16_DEFERRED
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
std::optional< uint8_t > getHsaAbiVersion(const MCSubtargetInfo *STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool getMUBUFIsBufferInv(unsigned Opc)
static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI)
bool getVOP2IsSingle(unsigned Opc)
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi)
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
bool isModuleEntryFunctionCC(CallingConv::ID CC)
bool getMTBUFHasVAddr(unsigned Opc)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
int getMUBUFElements(unsigned Opc)
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
bool isGraphics(CallingConv::ID cc)
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
bool isPermlane16(unsigned Opc)
bool getMUBUFHasSrsrc(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ ELFABIVERSION_AMDGPU_HSA_V4
@ ELFABIVERSION_AMDGPU_HSA_V5
@ ELFABIVERSION_AMDGPU_HSA_V3
@ ELFABIVERSION_AMDGPU_HSA_V2
@ FLOAT_DENORM_MODE_FLUSH_NONE
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
@ AlwaysUniform
The result values are always uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
AMD Kernel Code Object (amd_kernel_code_t).
Instruction set architecture version.
Represents the counter values to wait for in an s_waitcnt instruction.
uint32_t compute_pgm_rsrc1
uint32_t compute_pgm_rsrc2
uint16_t kernel_code_properties
uint32_t compute_pgm_rsrc3