Go to the documentation of this file.
19 #include "llvm/IR/IntrinsicsAMDGPU.h"
20 #include "llvm/IR/IntrinsicsR600.h"
27 #define GET_INSTRINFO_NAMED_OPS
28 #define GET_INSTRMAP_INFO
29 #include "AMDGPUGenInstrInfo.inc"
44 unsigned getBitMask(
unsigned Shift,
unsigned Width) {
51 unsigned packBits(
unsigned Src,
unsigned Dst,
unsigned Shift,
unsigned Width) {
59 unsigned unpackBits(
unsigned Src,
unsigned Shift,
unsigned Width) {
79 unsigned getExpcntBitWidth(
unsigned VersionMajor) {
return 3; }
92 unsigned getVmcntBitShiftHi(
unsigned VersionMajor) {
return 14; }
188 #define GET_MIMGBaseOpcodesTable_IMPL
189 #define GET_MIMGDimInfoTable_IMPL
190 #define GET_MIMGInfoTable_IMPL
191 #define GET_MIMGLZMappingTable_IMPL
192 #define GET_MIMGMIPMappingTable_IMPL
193 #define GET_MIMGBiasMappingTable_IMPL
194 #define GET_MIMGOffsetMappingTable_IMPL
195 #define GET_MIMGG16MappingTable_IMPL
196 #define GET_MAIInstInfoTable_IMPL
197 #include "AMDGPUGenSearchableTables.inc"
200 unsigned VDataDwords,
unsigned VAddrDwords) {
201 const MIMGInfo *
Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
202 VDataDwords, VAddrDwords);
216 return NewInfo ? NewInfo->
Opcode : -1;
221 bool IsG16Supported) {
223 unsigned AddrComponents = (BaseOpcode->
Coordinates ?
Dim->NumCoords : 0) +
228 AddrWords += AddrComponents;
236 if ((IsA16 && !IsG16Supported) || BaseOpcode->
G16)
240 AddrWords += alignTo<2>(
Dim->NumGradients / 2);
242 AddrWords +=
Dim->NumGradients;
280 #define GET_MTBUFInfoTable_DECL
281 #define GET_MTBUFInfoTable_IMPL
282 #define GET_MUBUFInfoTable_DECL
283 #define GET_MUBUFInfoTable_IMPL
284 #define GET_SMInfoTable_DECL
285 #define GET_SMInfoTable_IMPL
286 #define GET_VOP1InfoTable_DECL
287 #define GET_VOP1InfoTable_IMPL
288 #define GET_VOP2InfoTable_DECL
289 #define GET_VOP2InfoTable_IMPL
290 #define GET_VOP3InfoTable_DECL
291 #define GET_VOP3InfoTable_IMPL
292 #define GET_VOPC64DPPTable_DECL
293 #define GET_VOPC64DPPTable_IMPL
294 #define GET_VOPC64DPP8Table_DECL
295 #define GET_VOPC64DPP8Table_IMPL
296 #define GET_WMMAOpcode2AddrMappingTable_DECL
297 #define GET_WMMAOpcode2AddrMappingTable_IMPL
298 #define GET_WMMAOpcode3AddrMappingTable_DECL
299 #define GET_WMMAOpcode3AddrMappingTable_IMPL
300 #include "AMDGPUGenSearchableTables.inc"
304 return Info ?
Info->BaseOpcode : -1;
308 const MTBUFInfo *
Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
319 return Info ?
Info->has_vaddr :
false;
324 return Info ?
Info->has_srsrc :
false;
329 return Info ?
Info->has_soffset :
false;
334 return Info ?
Info->BaseOpcode : -1;
338 const MUBUFInfo *
Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
349 return Info ?
Info->has_vaddr :
false;
354 return Info ?
Info->has_srsrc :
false;
359 return Info ?
Info->has_soffset :
false;
364 return Info ?
Info->IsBufferInv :
false;
368 const SMInfo *
Info = getSMEMOpcodeHelper(Opc);
369 return Info ?
Info->IsBuffer :
false;
374 return Info ?
Info->IsSingle :
false;
379 return Info ?
Info->IsSingle :
false;
384 return Info ?
Info->IsSingle :
false;
388 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
393 return Info ?
Info->is_dgemm :
false;
398 return Info ?
Info->is_gfx940_xdl :
false;
403 return Info ?
Info->Opcode3Addr : ~0u;
408 return Info ?
Info->Opcode2Addr : ~0u;
415 return getMCOpcodeGen(Opcode,
static_cast<Subtarget
>(Gen));
437 for (
const std::string &Feature : Features.
getFeatures()) {
438 if (Feature ==
"+xnack")
439 XnackRequested =
true;
440 else if (Feature ==
"-xnack")
441 XnackRequested =
false;
442 else if (Feature ==
"+sramecc")
443 SramEccRequested =
true;
444 else if (Feature ==
"-sramecc")
445 SramEccRequested =
false;
451 if (XnackRequested) {
452 if (XnackSupported) {
458 if (*XnackRequested) {
459 errs() <<
"warning: xnack 'On' was requested for a processor that does "
462 errs() <<
"warning: xnack 'Off' was requested for a processor that "
463 "does not support it!\n";
468 if (SramEccRequested) {
469 if (SramEccSupported) {
476 if (*SramEccRequested) {
477 errs() <<
"warning: sramecc 'On' was requested for a processor that "
478 "does not support it!\n";
480 errs() <<
"warning: sramecc 'Off' was requested for a processor that "
481 "does not support it!\n";
499 TargetID.
split(TargetIDSplit,
':');
501 for (
const auto &FeatureString : TargetIDSplit) {
502 if (FeatureString.startswith(
"xnack"))
504 if (FeatureString.startswith(
"sramecc"))
510 std::string StringRep;
516 StreamRep << TargetTriple.getArchName() <<
'-'
517 << TargetTriple.getVendorName() <<
'-'
518 << TargetTriple.getOSName() <<
'-'
519 << TargetTriple.getEnvironmentName() <<
'-';
521 std::string Processor;
532 std::string Features;
534 switch (*HsaAbiVersion) {
538 if (Processor ==
"gfx600") {
539 }
else if (Processor ==
"gfx601") {
540 }
else if (Processor ==
"gfx602") {
541 }
else if (Processor ==
"gfx700") {
542 }
else if (Processor ==
"gfx701") {
543 }
else if (Processor ==
"gfx702") {
544 }
else if (Processor ==
"gfx703") {
545 }
else if (Processor ==
"gfx704") {
546 }
else if (Processor ==
"gfx705") {
547 }
else if (Processor ==
"gfx801") {
550 "AMD GPU code object V2 does not support processor " +
551 Twine(Processor) +
" without XNACK");
552 }
else if (Processor ==
"gfx802") {
553 }
else if (Processor ==
"gfx803") {
554 }
else if (Processor ==
"gfx805") {
555 }
else if (Processor ==
"gfx810") {
558 "AMD GPU code object V2 does not support processor " +
559 Twine(Processor) +
" without XNACK");
560 }
else if (Processor ==
"gfx900") {
562 Processor =
"gfx901";
563 }
else if (Processor ==
"gfx902") {
565 Processor =
"gfx903";
566 }
else if (Processor ==
"gfx904") {
568 Processor =
"gfx905";
569 }
else if (Processor ==
"gfx906") {
571 Processor =
"gfx907";
572 }
else if (Processor ==
"gfx90c") {
575 "AMD GPU code object V2 does not support processor " +
576 Twine(Processor) +
" with XNACK being ON or ANY");
579 "AMD GPU code object V2 does not support processor " +
586 Features +=
"+xnack";
590 Features +=
"+sram-ecc";
596 Features +=
":sramecc-";
598 Features +=
":sramecc+";
601 Features +=
":xnack-";
603 Features +=
":xnack+";
610 StreamRep << Processor << Features;
646 unsigned FlatWorkGroupSize) {
647 assert(FlatWorkGroupSize != 0);
671 unsigned FlatWorkGroupSize) {
686 unsigned FlatWorkGroupSize) {
746 return Addressable ? AddressableNumSGPRs : 108;
747 if (
Version.Major >= 8 && !Addressable)
748 AddressableNumSGPRs = 112;
753 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
757 bool FlatScrUsed,
bool XNACKUsed) {
758 unsigned ExtraSGPRs = 0;
798 bool IsWave32 = EnableWavefrontSize32 ?
799 *EnableWavefrontSize32 :
803 return IsWave32 ? 16 : 8;
805 return IsWave32 ? 8 : 4;
813 bool IsWave32 = EnableWavefrontSize32 ?
814 *EnableWavefrontSize32 :
817 return IsWave32 ? 8 : 4;
848 unsigned MinNumVGPRs =
860 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
877 memset(&Header, 0,
sizeof(Header));
914 memset(&KD, 0,
sizeof(KD));
917 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
920 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
922 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
924 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
927 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
930 amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
933 amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
937 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
963 int Result = Default;
965 if (A.isStringAttribute()) {
967 if (Str.getAsInteger(0, Result)) {
978 std::pair<int, int> Default,
979 bool OnlyFirstRequired) {
981 if (!A.isStringAttribute())
985 std::pair<int, int> Ints = Default;
986 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(
',');
987 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
988 Ctx.
emitError(
"can't parse first integer attribute " +
Name);
991 if (Strs.second.trim().getAsInteger(0, Ints.second)) {
992 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
993 Ctx.
emitError(
"can't parse second integer attribute " +
Name);
1002 return (1 << (getVmcntBitWidthLo(
Version.Major) +
1003 getVmcntBitWidthHi(
Version.Major))) -
1008 return (1 << getExpcntBitWidth(
Version.Major)) - 1;
1012 return (1 << getLgkmcntBitWidth(
Version.Major)) - 1;
1016 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(
Version.Major),
1017 getVmcntBitWidthLo(
Version.Major));
1018 unsigned Expcnt = getBitMask(getExpcntBitShift(
Version.Major),
1019 getExpcntBitWidth(
Version.Major));
1020 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(
Version.Major),
1021 getLgkmcntBitWidth(
Version.Major));
1022 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(
Version.Major),
1023 getVmcntBitWidthHi(
Version.Major));
1024 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1028 unsigned VmcntLo = unpackBits(
Waitcnt, getVmcntBitShiftLo(
Version.Major),
1029 getVmcntBitWidthLo(
Version.Major));
1030 unsigned VmcntHi = unpackBits(
Waitcnt, getVmcntBitShiftHi(
Version.Major),
1031 getVmcntBitWidthHi(
Version.Major));
1032 return VmcntLo | VmcntHi << getVmcntBitWidthLo(
Version.Major);
1037 getExpcntBitWidth(
Version.Major));
1042 getLgkmcntBitWidth(
Version.Major));
1046 unsigned &Vmcnt,
unsigned &Expcnt,
unsigned &Lgkmcnt) {
1063 getVmcntBitWidthLo(
Version.Major));
1064 return packBits(Vmcnt >> getVmcntBitWidthLo(
Version.Major),
Waitcnt,
1065 getVmcntBitShiftHi(
Version.Major),
1066 getVmcntBitWidthHi(
Version.Major));
1071 return packBits(Expcnt,
Waitcnt, getExpcntBitShift(
Version.Major),
1072 getExpcntBitWidth(
Version.Major));
1077 return packBits(Lgkmcnt,
Waitcnt, getLgkmcntBitShift(
Version.Major),
1078 getLgkmcntBitWidth(
Version.Major));
1082 unsigned Vmcnt,
unsigned Expcnt,
unsigned Lgkmcnt) {
1108 return 0 <= Idx && Idx < OpInfoSize && !OpInfo[Idx].
Name.empty() &&
1117 for (
int Idx = 0; Idx < OpInfoSize; ++Idx) {
1118 if (
Test(OpInfo[Idx])) {
1131 return getOprIdx<T>(
Test, OpInfo, OpInfoSize,
Context);
1136 T
Context,
bool QuickCheck =
true) {
1138 return Op.Encoding ==
Id && !
Op.Name.empty();
1143 if (QuickCheck && isValidOpr<T>(
Id, OpInfo, OpInfoSize,
Context) &&
1147 return getOprIdx<T>(
Test, OpInfo, OpInfoSize,
Context);
1158 for (
int Idx = 0; Idx < Size; ++Idx) {
1159 const auto &
Op =
Opr[Idx];
1160 if (
Op.isSupported(STI))
1161 Enc |=
Op.encode(
Op.Default);
1167 int Size,
unsigned Code,
1168 bool &HasNonDefaultVal,
1170 unsigned UsedOprMask = 0;
1171 HasNonDefaultVal =
false;
1172 for (
int Idx = 0; Idx < Size; ++Idx) {
1173 const auto &
Op =
Opr[Idx];
1174 if (!
Op.isSupported(STI))
1176 UsedOprMask |=
Op.getMask();
1177 unsigned Val =
Op.decode(Code);
1178 if (!
Op.isValid(Val))
1180 HasNonDefaultVal |= (Val !=
Op.Default);
1182 return (Code & ~UsedOprMask) == 0;
1187 unsigned &Val,
bool &IsDefault,
1189 while (Idx < Size) {
1190 const auto &
Op =
Opr[Idx++];
1191 if (
Op.isSupported(STI)) {
1193 Val =
Op.decode(Code);
1194 IsDefault = (Val ==
Op.Default);
1204 if (InputVal < 0 || InputVal >
Op.Max)
1206 return Op.encode(InputVal);
1211 unsigned &UsedOprMask,
1214 for (
int Idx = 0; Idx < Size; ++Idx) {
1215 const auto &
Op =
Opr[Idx];
1217 if (!
Op.isSupported(STI)) {
1221 auto OprMask =
Op.getMask();
1222 if (OprMask & UsedOprMask)
1224 UsedOprMask |= OprMask;
1238 static int Default = -1;
1247 HasNonDefaultVal, STI);
1271 int Idx = getOprIdx<const MCSubtargetInfo &>(
Name,
Opr,
OPR_SIZE, STI);
1272 return (Idx < 0) ? Idx :
Opr[Idx].Encoding;
1276 return 0 <=
Id && isUInt<ID_WIDTH_>(
Id);
1284 return 0 <= (
Width - 1) && isUInt<WIDTH_M1_WIDTH_>(
Width - 1);
1294 int Idx = getOprIdx<const MCSubtargetInfo &>(
Id,
Opr,
OPR_SIZE, STI);
1295 return (Idx < 0) ?
"" :
Opr[Idx].Name;
1330 if (Val.Tgt <=
Id &&
Id <= Val.Tgt + Val.MaxIndex) {
1331 Index = (Val.MaxIndex == 0) ? -1 : (
Id - Val.Tgt);
1342 if (Val.MaxIndex == 0 &&
Name == Val.Name)
1345 if (Val.MaxIndex > 0 &&
Name.startswith(Val.Name)) {
1353 if (Suffix.
size() > 1 && Suffix[0] ==
'0')
1356 return Val.Tgt +
Id;
1385 namespace MTBUFFormat {
1411 if (
Name == lookupTable[
Id])
1507 int Idx = getOprIdx<const MCSubtargetInfo &>(
Name,
Msg,
MSG_SIZE, STI);
1508 return (Idx < 0) ? Idx :
Msg[Idx].Encoding;
1516 int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId,
Msg,
MSG_SIZE, STI);
1517 return (Idx < 0) ?
"" :
Msg[Idx].Name;
1524 for (
int i =
F;
i < L; ++
i) {
1537 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
1622 F,
"amdgpu-color-export",
1793 return STI.
getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1805 int32_t ArgNumVGPR) {
1806 if (has90AInsts && ArgNumAGPR)
1807 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
1808 return std::max(ArgNumVGPR, ArgNumAGPR);
1813 const unsigned FirstSubReg =
TRI->getSubReg(
Reg, AMDGPU::sub0);
1814 return SGPRClass.
contains(FirstSubReg != 0 ? FirstSubReg :
Reg) ||
1818 #define MAP_REG2REG \
1819 using namespace AMDGPU; \
1821 default: return Reg; \
1822 CASE_CI_VI(FLAT_SCR) \
1823 CASE_CI_VI(FLAT_SCR_LO) \
1824 CASE_CI_VI(FLAT_SCR_HI) \
1825 CASE_VI_GFX9PLUS(TTMP0) \
1826 CASE_VI_GFX9PLUS(TTMP1) \
1827 CASE_VI_GFX9PLUS(TTMP2) \
1828 CASE_VI_GFX9PLUS(TTMP3) \
1829 CASE_VI_GFX9PLUS(TTMP4) \
1830 CASE_VI_GFX9PLUS(TTMP5) \
1831 CASE_VI_GFX9PLUS(TTMP6) \
1832 CASE_VI_GFX9PLUS(TTMP7) \
1833 CASE_VI_GFX9PLUS(TTMP8) \
1834 CASE_VI_GFX9PLUS(TTMP9) \
1835 CASE_VI_GFX9PLUS(TTMP10) \
1836 CASE_VI_GFX9PLUS(TTMP11) \
1837 CASE_VI_GFX9PLUS(TTMP12) \
1838 CASE_VI_GFX9PLUS(TTMP13) \
1839 CASE_VI_GFX9PLUS(TTMP14) \
1840 CASE_VI_GFX9PLUS(TTMP15) \
1841 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
1842 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
1843 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
1844 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
1845 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
1846 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
1847 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
1848 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
1849 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
1850 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
1851 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
1852 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
1853 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
1854 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
1855 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
1856 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
1857 CASE_GFXPRE11_GFX11PLUS(M0) \
1858 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
1859 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
1862 #define CASE_CI_VI(node) \
1863 assert(!isSI(STI)); \
1864 case node: return isCI(STI) ? node##_ci : node##_vi;
1866 #define CASE_VI_GFX9PLUS(node) \
1867 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
1869 #define CASE_GFXPRE11_GFX11PLUS(node) \
1870 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
1872 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
1873 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
1882 #undef CASE_VI_GFX9PLUS
1883 #undef CASE_GFXPRE11_GFX11PLUS
1884 #undef CASE_GFXPRE11_GFX11PLUS_TO
1886 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
1887 #define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
1888 #define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;
1889 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
1896 #undef CASE_VI_GFX9PLUS
1897 #undef CASE_GFXPRE11_GFX11PLUS
1898 #undef CASE_GFXPRE11_GFX11PLUS_TO
1948 case AMDGPU::VGPR_LO16RegClassID:
1949 case AMDGPU::VGPR_HI16RegClassID:
1950 case AMDGPU::SGPR_LO16RegClassID:
1951 case AMDGPU::AGPR_LO16RegClassID:
1953 case AMDGPU::SGPR_32RegClassID:
1954 case AMDGPU::VGPR_32RegClassID:
1955 case AMDGPU::VRegOrLds_32RegClassID:
1956 case AMDGPU::AGPR_32RegClassID:
1957 case AMDGPU::VS_32RegClassID:
1958 case AMDGPU::AV_32RegClassID:
1959 case AMDGPU::SReg_32RegClassID:
1960 case AMDGPU::SReg_32_XM0RegClassID:
1961 case AMDGPU::SRegOrLds_32RegClassID:
1963 case AMDGPU::SGPR_64RegClassID:
1964 case AMDGPU::VS_64RegClassID:
1965 case AMDGPU::SReg_64RegClassID:
1966 case AMDGPU::VReg_64RegClassID:
1967 case AMDGPU::AReg_64RegClassID:
1968 case AMDGPU::SReg_64_XEXECRegClassID:
1969 case AMDGPU::VReg_64_Align2RegClassID:
1970 case AMDGPU::AReg_64_Align2RegClassID:
1971 case AMDGPU::AV_64RegClassID:
1972 case AMDGPU::AV_64_Align2RegClassID:
1974 case AMDGPU::SGPR_96RegClassID:
1975 case AMDGPU::SReg_96RegClassID:
1976 case AMDGPU::VReg_96RegClassID:
1977 case AMDGPU::AReg_96RegClassID:
1978 case AMDGPU::VReg_96_Align2RegClassID:
1979 case AMDGPU::AReg_96_Align2RegClassID:
1980 case AMDGPU::AV_96RegClassID:
1981 case AMDGPU::AV_96_Align2RegClassID:
1983 case AMDGPU::SGPR_128RegClassID:
1984 case AMDGPU::SReg_128RegClassID:
1985 case AMDGPU::VReg_128RegClassID:
1986 case AMDGPU::AReg_128RegClassID:
1987 case AMDGPU::VReg_128_Align2RegClassID:
1988 case AMDGPU::AReg_128_Align2RegClassID:
1989 case AMDGPU::AV_128RegClassID:
1990 case AMDGPU::AV_128_Align2RegClassID:
1992 case AMDGPU::SGPR_160RegClassID:
1993 case AMDGPU::SReg_160RegClassID:
1994 case AMDGPU::VReg_160RegClassID:
1995 case AMDGPU::AReg_160RegClassID:
1996 case AMDGPU::VReg_160_Align2RegClassID:
1997 case AMDGPU::AReg_160_Align2RegClassID:
1998 case AMDGPU::AV_160RegClassID:
1999 case AMDGPU::AV_160_Align2RegClassID:
2001 case AMDGPU::SGPR_192RegClassID:
2002 case AMDGPU::SReg_192RegClassID:
2003 case AMDGPU::VReg_192RegClassID:
2004 case AMDGPU::AReg_192RegClassID:
2005 case AMDGPU::VReg_192_Align2RegClassID:
2006 case AMDGPU::AReg_192_Align2RegClassID:
2007 case AMDGPU::AV_192RegClassID:
2008 case AMDGPU::AV_192_Align2RegClassID:
2010 case AMDGPU::SGPR_224RegClassID:
2011 case AMDGPU::SReg_224RegClassID:
2012 case AMDGPU::VReg_224RegClassID:
2013 case AMDGPU::AReg_224RegClassID:
2014 case AMDGPU::VReg_224_Align2RegClassID:
2015 case AMDGPU::AReg_224_Align2RegClassID:
2016 case AMDGPU::AV_224RegClassID:
2017 case AMDGPU::AV_224_Align2RegClassID:
2019 case AMDGPU::SGPR_256RegClassID:
2020 case AMDGPU::SReg_256RegClassID:
2021 case AMDGPU::VReg_256RegClassID:
2022 case AMDGPU::AReg_256RegClassID:
2023 case AMDGPU::VReg_256_Align2RegClassID:
2024 case AMDGPU::AReg_256_Align2RegClassID:
2025 case AMDGPU::AV_256RegClassID:
2026 case AMDGPU::AV_256_Align2RegClassID:
2028 case AMDGPU::SGPR_512RegClassID:
2029 case AMDGPU::SReg_512RegClassID:
2030 case AMDGPU::VReg_512RegClassID:
2031 case AMDGPU::AReg_512RegClassID:
2032 case AMDGPU::VReg_512_Align2RegClassID:
2033 case AMDGPU::AReg_512_Align2RegClassID:
2034 case AMDGPU::AV_512RegClassID:
2035 case AMDGPU::AV_512_Align2RegClassID:
2037 case AMDGPU::SGPR_1024RegClassID:
2038 case AMDGPU::SReg_1024RegClassID:
2039 case AMDGPU::VReg_1024RegClassID:
2040 case AMDGPU::AReg_1024RegClassID:
2041 case AMDGPU::VReg_1024_Align2RegClassID:
2042 case AMDGPU::AReg_1024_Align2RegClassID:
2043 case AMDGPU::AV_1024RegClassID:
2044 case AMDGPU::AV_1024_Align2RegClassID:
2076 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2102 (Val == 0x3e22f983 && HasInv2Pi);
2113 return Val == 0x3C00 ||
2128 int16_t Trunc =
static_cast<int16_t
>(Literal);
2131 if (!(Literal & 0xffff))
2134 int16_t Lo16 =
static_cast<int16_t
>(Literal);
2135 int16_t Hi16 =
static_cast<int16_t
>(Literal >> 16);
2140 int16_t Lo16 =
static_cast<int16_t
>(Literal);
2144 int16_t Hi16 =
static_cast<int16_t
>(Literal >> 16);
2145 if (!(Literal & 0xffff))
2153 int16_t Lo16 =
static_cast<int16_t
>(Literal);
2157 int16_t Hi16 =
static_cast<int16_t
>(Literal >> 16);
2158 if (!(Literal & 0xffff))
2160 return Lo16 == Hi16;
2182 return F->getAttributes().hasParamAttr(A->getArgNo(), Attribute::InReg) ||
2183 F->getAttributes().hasParamAttr(A->getArgNo(), Attribute::ByVal);
2199 int64_t EncodedOffset) {
2205 int64_t EncodedOffset,
2209 isInt<21>(EncodedOffset);
2213 return (ByteOffset & 3) == 0;
2222 return ByteOffset >> 2;
2226 int64_t ByteOffset,
bool IsBuffer) {
2243 int64_t ByteOffset) {
2272 if (
Imm <= MaxImm + 64) {
2274 Overflow =
Imm - MaxImm;
2289 Overflow =
High - Alignment.value();
2308 StringRef IEEEAttr =
F.getFnAttribute(
"amdgpu-ieee").getValueAsString();
2309 if (!IEEEAttr.
empty())
2310 IEEE = IEEEAttr ==
"true";
2313 =
F.getFnAttribute(
"amdgpu-dx10-clamp").getValueAsString();
2314 if (!DX10ClampAttr.
empty())
2317 StringRef DenormF32Attr =
F.getFnAttribute(
"denormal-fp-math-f32").getValueAsString();
2318 if (!DenormF32Attr.
empty()) {
2324 StringRef DenormAttr =
F.getFnAttribute(
"denormal-fp-math").getValueAsString();
2325 if (!DenormAttr.
empty()) {
2328 if (DenormF32Attr.
empty()) {
2340 struct SourceOfDivergence {
2343 const SourceOfDivergence *lookupSourceOfDivergence(
unsigned Intr);
2345 #define GET_SourcesOfDivergence_IMPL
2346 #define GET_Gfx9BufferFormat_IMPL
2347 #define GET_Gfx10BufferFormat_IMPL
2348 #define GET_Gfx11PlusBufferFormat_IMPL
2349 #include "AMDGPUGenSearchableTables.inc"
2354 return lookupSourceOfDivergence(IntrID);
2358 uint8_t NumComponents,
2362 ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents,
2364 :
isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp,
2366 : getGfx9BufferFormatInfo(BitsPerComp,
2372 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
2373 :
isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
2374 : getGfx9BufferFormatInfo(Format);
2383 OS <<
"Unsupported";
@ OPERAND_REG_INLINE_C_FP64
uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
const int OPR_ID_UNSUPPORTED
bool getMUBUFIsBufferInv(unsigned Opc)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
bool isHsaAbiVersion3(const MCSubtargetInfo *STI)
This class represents an incoming formal argument to a Function.
AMDGPUTargetID(const MCSubtargetInfo &STI)
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
This is an optimization pass for GlobalISel generic memory operations.
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
bool getMUBUFHasSoffset(unsigned Opc)
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
@ AMDGPU_HS
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI)
bool hasVOPD(const MCSubtargetInfo &STI)
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
@ AMDGPU_VS
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool isGFX11Plus(const MCSubtargetInfo &STI)
IsaVersion getIsaVersion(StringRef GPU)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
LLVM_NODISCARD bool endswith(StringRef Suffix) const
Check if this string ends with the given Suffix.
const char *const OpGsSymbolic[OP_GS_LAST_]
int getMUBUFBaseOpcode(unsigned Opc)
Optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
bool isSramEccOnOrAny() const
int getMCOpcode(uint16_t Opcode, unsigned Gen)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
A raw_ostream that writes to an std::string.
bool hasSRAMECC(const MCSubtargetInfo &STI)
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
bool hasXNACK(const MCSubtargetInfo &STI)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
uint64_t compute_pgm_resource_registers
Shader program settings for CS.
bool getVOP2IsSingle(unsigned Opc)
const int OPR_VAL_INVALID
Reg
All possible values of the reg field in the ModR/M byte.
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
Optional< uint8_t > getHsaAbiVersion(const MCSubtargetInfo *STI)
Triple - Helper class for working with autoconf configuration names.
const char *const OpSysSymbolic[OP_SYS_LAST_]
Optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
unsigned getAmdhsaCodeObjectVersion()
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
uint32_t FloatToBits(float Float)
This function takes a float and returns the bit equivalent 32-bit integer.
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
bool isGFX10(const MCSubtargetInfo &STI)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
static llvm::cl::opt< bool > LimitTo128VGPRs("amdgpu-limit-to-128-vgprs", llvm::cl::Hidden, llvm::cl::desc("Never use more than 128 VGPRs"))
void setTargetIDFromTargetIDStream(StringRef TargetID)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC)
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
bool FP32InputDenormals
If this is set, neither input or output denormals are flushed for most f32 instructions.
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
unsigned getVmcntBitMask(const IsaVersion &Version)
bool isGlobalSegment(const GlobalValue *GV)
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
uint32_t compute_pgm_rsrc2
@ AMDGPU_Gfx
Calling convention used for AMD graphics targets.
unsigned const TargetRegisterInfo * TRI
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
static llvm::cl::opt< unsigned > AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(4))
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
constexpr uint32_t VersionMajor
HSA metadata major version.
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
MCRegisterClass - Base class of TargetRegisterClass.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, int Size, const MCSubtargetInfo &STI)
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width)
Instruction set architecture version.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
bool isGFX90A(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
bool getMTBUFHasSrsrc(unsigned Opc)
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
bool isShader(CallingConv::ID cc)
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
uint32_t amd_kernel_code_version_major
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST)
Itanium Name Demangler i e convert the string _Z1fv into f()". You can also use the CRTP base ManglingParser to perform some simple analysis on the mangled name
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
bool isInlinableIntLiteralV216(int32_t Literal)
@ OPERAND_REG_INLINE_AC_FP16
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
const Triple & getTargetTriple() const
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX940(const MCSubtargetInfo &STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool hasMAIInsts(const MCSubtargetInfo &STI)
static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString)
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
uint64_t DoubleToBits(double Double)
This function takes a double and returns the bit equivalent 64-bit integer.
Manages the enabling and disabling of subtarget specific features.
bool isXnackOnOrAny() const
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool getMAIIsGFX940XDL(unsigned Opc)
@ ELFABIVERSION_AMDGPU_HSA_V5
@ AMDGPU_ES
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_GS
Calling convention used for Mesa/AMDPAL geometry shaders.
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Describe properties that are true of each instruction in the target description file.
#define S_00B848_MEM_ORDERED(x)
bool hasGFX10A16(const MCSubtargetInfo &STI)
const FeatureBitset & getFeatureBits() const
@ LOCAL_ADDRESS
Address space for local memory.
static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, const StringRef Name, int64_t InputVal, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getMTBUFBaseOpcode(unsigned Opc)
unsigned getInitialPSInputAddr(const Function &F)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
This class implements an extremely fast bulk output stream that can only output to a stream.
bool getMTBUFHasSoffset(unsigned Opc)
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
int getNumOccurrences() const
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
uint8_t wavefront_size
Wavefront size expressed as a power of two.
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
TargetIDSetting getSramEccSetting() const
uint16_t kernel_code_properties
bool getMUBUFHasSrsrc(unsigned Opc)
Analysis containing CSE Info
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
This struct is a compact representation of a valid (non-zero power of two) alignment.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
ArchType getArch() const
Get the parsed architecture type of this triple.
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
bool isCI(const MCSubtargetInfo &STI)
std::enable_if_t< std::numeric_limits< T >::is_signed, bool > getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool isGFX10Plus(const MCSubtargetInfo &STI)
static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, int Size, unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
uint32_t compute_pgm_rsrc3
bool isEntryFunctionCC(CallingConv::ID CC)
uint16_t amd_machine_version_minor
bool isHsaAbiVersion2(const MCSubtargetInfo *STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isValidHwregWidth(int64_t Width)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool shouldEmitConstantsToTextSection(const Triple &TT)
int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI)
const CustomOperandVal DepCtrInfo[]
bool isValidHwreg(int64_t Id)
bool isVI(const MCSubtargetInfo &STI)
@ OPERAND_REG_IMM_FP16_DEFERRED
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi)
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
unsigned getExpcntBitMask(const IsaVersion &Version)
unsigned short NumOperands
unsigned getMultigridSyncArgImplicitArgPosition()
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
bool isHsaAbiVersion4(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool hasG16(const MCSubtargetInfo &STI)
constexpr bool isUInt< 16 >(uint64_t x)
constexpr LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
OSType getOS() const
Get the parsed operating system type of this triple.
bool isVOPC64DPP(unsigned Opc)
@ OPERAND_REG_INLINE_C_FP32
bool isGFX9(const MCSubtargetInfo &STI)
This is an important class for using LLVM in a threaded context.
uint8_t OperandType
Information about the type of the operand.
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Represent subnormal handling kind for floating point instruction inputs and outputs.
initializer< Ty > init(const Ty &Val)
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
@ ELFABIVERSION_AMDGPU_HSA_V2
constexpr bool isUInt< 32 >(uint64_t x)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
bool isValidHwregOffset(int64_t Offset)
bool DX10Clamp
Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise,...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
constexpr bool isUInt< 8 >(uint64_t x)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
TargetIDSetting getXnackSetting() const
bool FP64FP16InputDenormals
If this is set, neither input or output denormals are flushed for both f64 and f16/v2f16 instructions...
uint16_t amd_machine_version_stepping
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
uint8_t group_segment_alignment
const MCOperandInfo * OpInfo
print Print MemDeps of function
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
const LLVM_READONLY MIMGInfo * getMIMGInfo(unsigned Opc)
bool isGFX10Before1030(const MCSubtargetInfo &STI)
@ OPERAND_REG_INLINE_C_V2INT16
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
@ OPERAND_REG_IMM_FP32_DEFERRED
@ IEEE
IEEE-754 denormal numbers preserved.
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment.
uint32_t amd_kernel_code_version_minor
static constexpr ExpTgt ExpTgtInfo[]
@ OPERAND_REG_INLINE_AC_FP32
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
const LLVM_READONLY MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
static bool isDwordAligned(uint64_t ByteOffset)
bool isHsaAbiVersion5(const MCSubtargetInfo *STI)
static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
SmallVector< MachineOperand, 4 > Cond
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
StringRef - Represent a constant reference to a string, i.e.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
bool getVOP3IsSingle(unsigned Opc)
bool isModuleEntryFunctionCC(CallingConv::ID CC)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isCompute(CallingConv::ID cc)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
unsigned getID() const
getID() - Return the register class ID number.
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, Optional< bool > EnableWavefrontSize32)
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Can this operand also contain immediate values?
bool isGraphics(CallingConv::ID cc)
AMD Kernel Code Object (amd_kernel_code_t).
bool isXnackSupported() const
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
bool getMTBUFHasVAddr(unsigned Opc)
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
#define AMDHSA_BITS_SET(DST, MSK, VAL)
@ FLOAT_DENORM_MODE_FLUSH_NONE
int64_t kernel_code_entry_byte_offset
Byte offset (possibly negative) from start of amd_kernel_code_t object to kernel's entry point instru...
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI)
unsigned const MachineRegisterInfo * MRI
const CustomOperand< const MCSubtargetInfo & > Msg[]
uint16_t amd_machine_kind
bool FP64FP16OutputDenormals
bool isGroupSegment(const GlobalValue *GV)
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr bool isInt< 16 >(int64_t x)
constexpr LLVM_NODISCARD size_t size() const
size - Get the string size.
bool isSI(const MCSubtargetInfo &STI)
@ ELFABIVERSION_AMDGPU_HSA_V4
@ OPERAND_REG_INLINE_AC_FP64
std::string toString() const
uint16_t amd_machine_version_major
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
bool isArgPassedInSGPR(const Argument *A)
@ MULTIGRID_SYNC_ARG_OFFSET
Represents the counter values to wait for in an s_waitcnt instruction.
@ ET_DUAL_SRC_BLEND_MAX_IDX
const CustomOperand< const MCSubtargetInfo & > Opr[]
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
uint8_t private_segment_alignment
static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, unsigned Code, int &Idx, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
bool getMUBUFHasVAddr(unsigned Opc)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
@ OPERAND_REG_IMM_V2INT16
int getMUBUFElements(unsigned Opc)
static int encodeCustomOperandVal(const CustomOperandVal &Op, int64_t InputVal)
int64_t getMsgOpId(int64_t MsgId, const StringRef Name)
uint32_t code_properties
Code properties.
bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi)
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_C_FIRST
bool getVOP1IsSingle(unsigned Opc)
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, const GCNSubtarget *Subtarget, Align Alignment)
unsigned getTgtId(const StringRef Name)
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
@ OPERAND_REG_INLINE_C_V2FP32
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
uint32_t compute_pgm_rsrc1
unsigned getLgkmcntBitMask(const IsaVersion &Version)
unsigned getAddressSpace() const
Generation getGeneration() const
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX8Plus(const MCSubtargetInfo &STI)
constexpr bool test(unsigned I) const
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isGCN3Encoding(const MCSubtargetInfo &STI)
static bool isValidOpr(int Idx, const CustomOperand< T > OpInfo[], int OpInfoSize, T Context)
StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed)
For FLAT segment the offset must be positive; MSB is ignored and forced to zero.
@ AMDGPU_LS
Calling convention used for AMDPAL vertex shader if tessellation is in use.
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
@ SPIR_KERNEL
SPIR_KERNEL - Calling convention for SPIR kernel functions.
DenormalModeKind Output
Denormal flushing mode for floating point instruction results in the default floating point environme...
@ OPERAND_REG_INLINE_C_FP16
@ AMDGPU_CS
Calling convention used for Mesa/AMDPAL compute shaders.
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
void setTargetIDFromFeaturesString(StringRef FS)
bool isReadOnlySegment(const GlobalValue *GV)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
#define S_00B848_WGP_MODE(x)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
@ OPERAND_REG_INLINE_AC_V2INT16
std::pair< int, int > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< int, int > Default, bool OnlyFirstRequired)
bool isSramEccSupported() const
bool isKernelCC(const Function *Func)
unsigned getHostcallImplicitArgPosition()
static uint64_t getMsgIdMask(const MCSubtargetInfo &STI)
Generic base class for all target subtargets.
bool getHasColorExport(const Function &F)
bool getHasDepthExport(const Function &F)
@ ELFABIVERSION_AMDGPU_HSA_V3
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, Optional< bool > EnableWavefrontSize32)
StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
int getMTBUFElements(unsigned Opc)
@ OPERAND_REG_INLINE_C_LAST
@ OPERAND_REG_INLINE_AC_V2FP16
static int getOprIdx(std::function< bool(const CustomOperand< T > &)> Test, const CustomOperand< T > OpInfo[], int OpInfoSize, T Context)