29#define GET_REGINFO_TARGET_DESC
30#include "AMDGPUGenRegisterInfo.inc"
33 "amdgpu-spill-sgpr-to-vgpr",
34 cl::desc(
"Enable spilling SGPRs to VGPRs"),
39 "amdgpu-spill-cfi-saved-regs",
40 cl::desc(
"Enable spilling the registers required for CFI emission"),
43std::array<std::vector<int16_t>, 32> SIRegisterInfo::RegSplitParts;
44std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
51 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
54 const Twine &ErrMsg) {
127 MI->getOperand(0).isKill(),
Index,
RS) {}
142 MovOpc = AMDGPU::S_MOV_B32;
143 NotOpc = AMDGPU::S_NOT_B32;
146 MovOpc = AMDGPU::S_MOV_B64;
147 NotOpc = AMDGPU::S_NOT_B64;
152 SuperReg != AMDGPU::EXEC &&
"exec should never spill");
183 assert(
RS &&
"Cannot spill SGPR to memory without RegScavenger");
184 TmpVGPR =
RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
false,
212 IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
232 if (
RS->isRegUsed(AMDGPU::SCC))
234 "unhandled SGPR spill to memory");
244 I->getOperand(2).setIsDead();
279 I->getOperand(2).setIsDead();
308 if (
RS->isRegUsed(AMDGPU::SCC))
310 "unhandled SGPR spill to memory");
335 ST.getAMDGPUDwarfFlavour(),
340 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
341 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
342 (getSubRegIndexLaneMask(AMDGPU::lo16) |
343 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
344 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
345 "getNumCoveredRegs() will not work with generated subreg masks!");
347 RegPressureIgnoredUnits.resize(getNumRegUnits());
348 RegPressureIgnoredUnits.set(
350 for (
auto Reg : AMDGPU::VGPR_16RegClass) {
352 RegPressureIgnoredUnits.set(
353 static_cast<unsigned>(*regunits(Reg).begin()));
359 static auto InitializeRegSplitPartsOnce = [
this]() {
360 for (
unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) {
361 unsigned Size = getSubRegIdxSize(Idx);
364 std::vector<int16_t> &Vec = RegSplitParts[
Size / 16 - 1];
365 unsigned Pos = getSubRegIdxOffset(Idx);
370 unsigned MaxNumParts = 1024 /
Size;
371 Vec.resize(MaxNumParts);
379 static auto InitializeSubRegFromChannelTableOnce = [
this]() {
380 for (
auto &Row : SubRegFromChannelTable)
381 Row.fill(AMDGPU::NoSubRegister);
382 for (
unsigned Idx = 1; Idx < getNumSubRegIndices(); ++Idx) {
383 unsigned Width = getSubRegIdxSize(Idx) / 32;
384 unsigned Offset = getSubRegIdxOffset(Idx) / 32;
389 unsigned TableIdx = Width - 1;
390 assert(TableIdx < SubRegFromChannelTable.size());
392 SubRegFromChannelTable[TableIdx][
Offset] = Idx;
396 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
398 InitializeSubRegFromChannelTableOnce);
415 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_SaveList
416 : CSR_AMDGPU_SaveList;
419 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList
420 : CSR_AMDGPU_SI_Gfx_SaveList;
422 return CSR_AMDGPU_CS_ChainPreserve_SaveList;
425 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
426 return &NoCalleeSavedReg;
442 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_RegMask
443 : CSR_AMDGPU_RegMask;
446 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask
447 : CSR_AMDGPU_SI_Gfx_RegMask;
452 return AMDGPU_AllVGPRs_RegMask;
459 return CSR_AMDGPU_NoRegs_RegMask;
463 return VGPR >= AMDGPU::VGPR0 && VGPR < AMDGPU::VGPR8;
474 if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
475 return &AMDGPU::AV_32RegClass;
476 if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
477 return &AMDGPU::AV_64RegClass;
478 if (RC == &AMDGPU::VReg_64_Align2RegClass ||
479 RC == &AMDGPU::AReg_64_Align2RegClass)
480 return &AMDGPU::AV_64_Align2RegClass;
481 if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)
482 return &AMDGPU::AV_96RegClass;
483 if (RC == &AMDGPU::VReg_96_Align2RegClass ||
484 RC == &AMDGPU::AReg_96_Align2RegClass)
485 return &AMDGPU::AV_96_Align2RegClass;
486 if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)
487 return &AMDGPU::AV_128RegClass;
488 if (RC == &AMDGPU::VReg_128_Align2RegClass ||
489 RC == &AMDGPU::AReg_128_Align2RegClass)
490 return &AMDGPU::AV_128_Align2RegClass;
491 if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)
492 return &AMDGPU::AV_160RegClass;
493 if (RC == &AMDGPU::VReg_160_Align2RegClass ||
494 RC == &AMDGPU::AReg_160_Align2RegClass)
495 return &AMDGPU::AV_160_Align2RegClass;
496 if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)
497 return &AMDGPU::AV_192RegClass;
498 if (RC == &AMDGPU::VReg_192_Align2RegClass ||
499 RC == &AMDGPU::AReg_192_Align2RegClass)
500 return &AMDGPU::AV_192_Align2RegClass;
501 if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)
502 return &AMDGPU::AV_256RegClass;
503 if (RC == &AMDGPU::VReg_256_Align2RegClass ||
504 RC == &AMDGPU::AReg_256_Align2RegClass)
505 return &AMDGPU::AV_256_Align2RegClass;
506 if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)
507 return &AMDGPU::AV_512RegClass;
508 if (RC == &AMDGPU::VReg_512_Align2RegClass ||
509 RC == &AMDGPU::AReg_512_Align2RegClass)
510 return &AMDGPU::AV_512_Align2RegClass;
511 if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)
512 return &AMDGPU::AV_1024RegClass;
513 if (RC == &AMDGPU::VReg_1024_Align2RegClass ||
514 RC == &AMDGPU::AReg_1024_Align2RegClass)
515 return &AMDGPU::AV_1024_Align2RegClass;
545 return AMDGPU_AllVGPRs_RegMask;
549 return AMDGPU_AllAGPRs_RegMask;
553 return AMDGPU_AllVectorRegs_RegMask;
557 return AMDGPU_AllAllocatableSRegs_RegMask;
564 assert(NumRegIndex &&
"Not implemented");
565 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].
size());
566 return SubRegFromChannelTable[NumRegIndex - 1][Channel];
575 const unsigned Align,
578 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
579 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, RC);
597 reserveRegisterTuples(
Reserved, AMDGPU::EXEC);
598 reserveRegisterTuples(
Reserved, AMDGPU::FLAT_SCR);
601 reserveRegisterTuples(
Reserved, AMDGPU::M0);
604 reserveRegisterTuples(
Reserved, AMDGPU::SRC_VCCZ);
605 reserveRegisterTuples(
Reserved, AMDGPU::SRC_EXECZ);
606 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SCC);
609 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SHARED_BASE);
610 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SHARED_LIMIT);
611 reserveRegisterTuples(
Reserved, AMDGPU::SRC_PRIVATE_BASE);
612 reserveRegisterTuples(
Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
613 reserveRegisterTuples(
Reserved, AMDGPU::SRC_FLAT_SCRATCH_BASE_LO);
614 reserveRegisterTuples(
Reserved, AMDGPU::SRC_FLAT_SCRATCH_BASE_HI);
617 reserveRegisterTuples(
Reserved, AMDGPU::ASYNCcnt);
618 reserveRegisterTuples(
Reserved, AMDGPU::TENSORcnt);
621 reserveRegisterTuples(
Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
624 reserveRegisterTuples(
Reserved, AMDGPU::XNACK_MASK);
627 reserveRegisterTuples(
Reserved, AMDGPU::LDS_DIRECT);
630 reserveRegisterTuples(
Reserved, AMDGPU::TBA);
631 reserveRegisterTuples(
Reserved, AMDGPU::TMA);
632 reserveRegisterTuples(
Reserved, AMDGPU::TTMP0_TTMP1);
633 reserveRegisterTuples(
Reserved, AMDGPU::TTMP2_TTMP3);
634 reserveRegisterTuples(
Reserved, AMDGPU::TTMP4_TTMP5);
635 reserveRegisterTuples(
Reserved, AMDGPU::TTMP6_TTMP7);
636 reserveRegisterTuples(
Reserved, AMDGPU::TTMP8_TTMP9);
637 reserveRegisterTuples(
Reserved, AMDGPU::TTMP10_TTMP11);
638 reserveRegisterTuples(
Reserved, AMDGPU::TTMP12_TTMP13);
639 reserveRegisterTuples(
Reserved, AMDGPU::TTMP14_TTMP15);
642 reserveRegisterTuples(
Reserved, AMDGPU::SGPR_NULL64);
646 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
647 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
650 unsigned NumRegs =
divideCeil(getRegSizeInBits(*RC), 32);
653 if (Index + NumRegs > MaxNumSGPRs && Index < TotalNumSGPRs &&
654 Reg != AMDGPU::VCC_LO && Reg != AMDGPU::VCC_HI &&
662 if (ScratchRSrcReg != AMDGPU::NoRegister) {
666 reserveRegisterTuples(
Reserved, ScratchRSrcReg);
670 if (LongBranchReservedReg)
671 reserveRegisterTuples(
Reserved, LongBranchReservedReg);
678 reserveRegisterTuples(
Reserved, StackPtrReg);
679 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
684 reserveRegisterTuples(
Reserved, FrameReg);
685 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
690 reserveRegisterTuples(
Reserved, BasePtrReg);
691 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
698 reserveRegisterTuples(
Reserved, ExecCopyReg);
702 auto [MaxNumVGPRs, MaxNumAGPRs] = ST.getMaxNumVectorRegs(MF.
getFunction());
706 unsigned NumRegs =
divideCeil(getRegSizeInBits(*RC), 32);
709 if (Index + NumRegs > MaxNumVGPRs)
716 if (!ST.hasMAIInsts())
720 unsigned NumRegs =
divideCeil(getRegSizeInBits(*RC), 32);
723 if (Index + NumRegs > MaxNumAGPRs)
731 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
739 if (!NonWWMRegMask.
empty()) {
740 for (
unsigned RegI = AMDGPU::VGPR0, RegE = AMDGPU::VGPR0 + MaxNumVGPRs;
741 RegI < RegE; ++RegI) {
742 if (NonWWMRegMask.
test(RegI))
743 reserveRegisterTuples(
Reserved, RegI);
748 reserveRegisterTuples(
Reserved, Reg);
752 reserveRegisterTuples(
Reserved, Reg);
755 reserveRegisterTuples(
Reserved, Reg);
772 if (Info->isBottomOfStack())
780 if (Info->isEntryFunction()) {
813 int OffIdx = AMDGPU::getNamedOperandIdx(
MI->getOpcode(),
814 AMDGPU::OpName::offset);
815 return MI->getOperand(OffIdx).getImm();
820 switch (
MI->getOpcode()) {
821 case AMDGPU::V_ADD_U32_e32:
822 case AMDGPU::V_ADD_U32_e64:
823 case AMDGPU::V_ADD_CO_U32_e32: {
824 int OtherIdx = Idx == 1 ? 2 : 1;
828 case AMDGPU::V_ADD_CO_U32_e64: {
829 int OtherIdx = Idx == 2 ? 3 : 2;
840 assert((Idx == AMDGPU::getNamedOperandIdx(
MI->getOpcode(),
841 AMDGPU::OpName::vaddr) ||
842 (Idx == AMDGPU::getNamedOperandIdx(
MI->getOpcode(),
843 AMDGPU::OpName::saddr))) &&
844 "Should never see frame index on non-address operand");
856 return Src1.
isImm() || (Src1.
isReg() &&
TRI.isVGPR(
MI.getMF()->getRegInfo(),
861 return Src0.
isImm() || (Src0.
isReg() &&
TRI.isVGPR(
MI.getMF()->getRegInfo(),
870 switch (
MI->getOpcode()) {
871 case AMDGPU::V_ADD_U32_e32: {
874 if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e32) < 2 &&
879 case AMDGPU::V_ADD_U32_e64:
889 return !ST.hasFlatScratchEnabled();
890 case AMDGPU::V_ADD_CO_U32_e32:
891 if (ST.getConstantBusLimit(AMDGPU::V_ADD_CO_U32_e32) < 2 &&
896 return MI->getOperand(3).isDead();
897 case AMDGPU::V_ADD_CO_U32_e64:
899 return MI->getOperand(1).isDead();
911 return !
TII->isLegalMUBUFImmOffset(FullOffset);
923 if (Ins !=
MBB->end())
924 DL = Ins->getDebugLoc();
930 ST.hasFlatScratchEnabled() ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
933 ST.hasFlatScratchEnabled() ? &AMDGPU::SReg_32_XEXEC_HIRegClass
934 : &AMDGPU::VGPR_32RegClass);
945 ? &AMDGPU::SReg_32_XM0RegClass
946 : &AMDGPU::VGPR_32RegClass);
953 if (ST.hasFlatScratchEnabled()) {
962 TII->getAddNoCarry(*
MBB, Ins,
DL, BaseReg)
974 switch (
MI.getOpcode()) {
975 case AMDGPU::V_ADD_U32_e32:
976 case AMDGPU::V_ADD_CO_U32_e32: {
982 if (!ImmOp->
isImm()) {
985 TII->legalizeOperandsVOP2(
MI.getMF()->getRegInfo(),
MI);
990 if (TotalOffset == 0) {
991 MI.setDesc(
TII->get(AMDGPU::COPY));
992 for (
unsigned I =
MI.getNumOperands() - 1;
I != 1; --
I)
995 MI.getOperand(1).ChangeToRegister(BaseReg,
false);
999 ImmOp->
setImm(TotalOffset);
1014 MI.getOperand(2).ChangeToRegister(BaseRegVGPR,
false);
1016 MI.getOperand(2).ChangeToRegister(BaseReg,
false);
1020 case AMDGPU::V_ADD_U32_e64:
1021 case AMDGPU::V_ADD_CO_U32_e64: {
1022 int Src0Idx =
MI.getNumExplicitDefs();
1028 if (!ImmOp->
isImm()) {
1030 TII->legalizeOperandsVOP3(
MI.getMF()->getRegInfo(),
MI);
1035 if (TotalOffset == 0) {
1036 MI.setDesc(
TII->get(AMDGPU::COPY));
1038 for (
unsigned I =
MI.getNumOperands() - 1;
I != 1; --
I)
1039 MI.removeOperand(
I);
1041 MI.getOperand(1).ChangeToRegister(BaseReg,
false);
1044 ImmOp->
setImm(TotalOffset);
1053 bool IsFlat =
TII->isFLATScratch(
MI);
1057 bool SeenFI =
false;
1069 TII->getNamedOperand(
MI, IsFlat ? AMDGPU::OpName::saddr
1070 : AMDGPU::OpName::vaddr);
1075 assert(FIOp && FIOp->
isFI() &&
"frame index must be address operand");
1081 "offset should be legal");
1092 assert(
TII->isLegalMUBUFImmOffset(NewOffset) &&
"offset should be legal");
1102 switch (
MI->getOpcode()) {
1103 case AMDGPU::V_ADD_U32_e32:
1104 case AMDGPU::V_ADD_CO_U32_e32:
1106 case AMDGPU::V_ADD_U32_e64:
1107 case AMDGPU::V_ADD_CO_U32_e64:
1120 return TII->isLegalMUBUFImmOffset(NewOffset);
1131 return &AMDGPU::VGPR_32RegClass;
1136 return RC == &AMDGPU::SCC_CLASSRegClass ? &AMDGPU::SReg_32RegClass : RC;
1142 unsigned Op =
MI.getOpcode();
1144 case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE:
1145 case AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE:
1146 case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE:
1151 (
uint64_t)
TII->getNamedOperand(
MI, AMDGPU::OpName::mask)->getImm());
1152 case AMDGPU::SI_SPILL_S1024_SAVE:
1153 case AMDGPU::SI_SPILL_S1024_CFI_SAVE:
1154 case AMDGPU::SI_SPILL_S1024_RESTORE:
1155 case AMDGPU::SI_SPILL_V1024_SAVE:
1156 case AMDGPU::SI_SPILL_V1024_CFI_SAVE:
1157 case AMDGPU::SI_SPILL_V1024_RESTORE:
1158 case AMDGPU::SI_SPILL_A1024_SAVE:
1159 case AMDGPU::SI_SPILL_A1024_CFI_SAVE:
1160 case AMDGPU::SI_SPILL_A1024_RESTORE:
1161 case AMDGPU::SI_SPILL_AV1024_SAVE:
1162 case AMDGPU::SI_SPILL_AV1024_CFI_SAVE:
1163 case AMDGPU::SI_SPILL_AV1024_RESTORE:
1165 case AMDGPU::SI_SPILL_S512_SAVE:
1166 case AMDGPU::SI_SPILL_S512_CFI_SAVE:
1167 case AMDGPU::SI_SPILL_S512_RESTORE:
1168 case AMDGPU::SI_SPILL_V512_SAVE:
1169 case AMDGPU::SI_SPILL_V512_CFI_SAVE:
1170 case AMDGPU::SI_SPILL_V512_RESTORE:
1171 case AMDGPU::SI_SPILL_A512_SAVE:
1172 case AMDGPU::SI_SPILL_A512_CFI_SAVE:
1173 case AMDGPU::SI_SPILL_A512_RESTORE:
1174 case AMDGPU::SI_SPILL_AV512_SAVE:
1175 case AMDGPU::SI_SPILL_AV512_CFI_SAVE:
1176 case AMDGPU::SI_SPILL_AV512_RESTORE:
1178 case AMDGPU::SI_SPILL_S384_SAVE:
1179 case AMDGPU::SI_SPILL_S384_RESTORE:
1180 case AMDGPU::SI_SPILL_V384_SAVE:
1181 case AMDGPU::SI_SPILL_V384_RESTORE:
1182 case AMDGPU::SI_SPILL_A384_SAVE:
1183 case AMDGPU::SI_SPILL_A384_RESTORE:
1184 case AMDGPU::SI_SPILL_AV384_SAVE:
1185 case AMDGPU::SI_SPILL_AV384_RESTORE:
1187 case AMDGPU::SI_SPILL_S352_SAVE:
1188 case AMDGPU::SI_SPILL_S352_RESTORE:
1189 case AMDGPU::SI_SPILL_V352_SAVE:
1190 case AMDGPU::SI_SPILL_V352_RESTORE:
1191 case AMDGPU::SI_SPILL_A352_SAVE:
1192 case AMDGPU::SI_SPILL_A352_RESTORE:
1193 case AMDGPU::SI_SPILL_AV352_SAVE:
1194 case AMDGPU::SI_SPILL_AV352_RESTORE:
1196 case AMDGPU::SI_SPILL_S320_SAVE:
1197 case AMDGPU::SI_SPILL_S320_RESTORE:
1198 case AMDGPU::SI_SPILL_V320_SAVE:
1199 case AMDGPU::SI_SPILL_V320_RESTORE:
1200 case AMDGPU::SI_SPILL_A320_SAVE:
1201 case AMDGPU::SI_SPILL_A320_RESTORE:
1202 case AMDGPU::SI_SPILL_AV320_SAVE:
1203 case AMDGPU::SI_SPILL_AV320_RESTORE:
1205 case AMDGPU::SI_SPILL_S288_SAVE:
1206 case AMDGPU::SI_SPILL_S288_RESTORE:
1207 case AMDGPU::SI_SPILL_V288_SAVE:
1208 case AMDGPU::SI_SPILL_V288_RESTORE:
1209 case AMDGPU::SI_SPILL_A288_SAVE:
1210 case AMDGPU::SI_SPILL_A288_RESTORE:
1211 case AMDGPU::SI_SPILL_AV288_SAVE:
1212 case AMDGPU::SI_SPILL_AV288_RESTORE:
1214 case AMDGPU::SI_SPILL_S256_SAVE:
1215 case AMDGPU::SI_SPILL_S256_CFI_SAVE:
1216 case AMDGPU::SI_SPILL_S256_RESTORE:
1217 case AMDGPU::SI_SPILL_V256_SAVE:
1218 case AMDGPU::SI_SPILL_V256_CFI_SAVE:
1219 case AMDGPU::SI_SPILL_V256_RESTORE:
1220 case AMDGPU::SI_SPILL_A256_SAVE:
1221 case AMDGPU::SI_SPILL_A256_CFI_SAVE:
1222 case AMDGPU::SI_SPILL_A256_RESTORE:
1223 case AMDGPU::SI_SPILL_AV256_SAVE:
1224 case AMDGPU::SI_SPILL_AV256_CFI_SAVE:
1225 case AMDGPU::SI_SPILL_AV256_RESTORE:
1227 case AMDGPU::SI_SPILL_S224_SAVE:
1228 case AMDGPU::SI_SPILL_S224_CFI_SAVE:
1229 case AMDGPU::SI_SPILL_S224_RESTORE:
1230 case AMDGPU::SI_SPILL_V224_SAVE:
1231 case AMDGPU::SI_SPILL_V224_CFI_SAVE:
1232 case AMDGPU::SI_SPILL_V224_RESTORE:
1233 case AMDGPU::SI_SPILL_A224_SAVE:
1234 case AMDGPU::SI_SPILL_A224_CFI_SAVE:
1235 case AMDGPU::SI_SPILL_A224_RESTORE:
1236 case AMDGPU::SI_SPILL_AV224_SAVE:
1237 case AMDGPU::SI_SPILL_AV224_CFI_SAVE:
1238 case AMDGPU::SI_SPILL_AV224_RESTORE:
1240 case AMDGPU::SI_SPILL_S192_SAVE:
1241 case AMDGPU::SI_SPILL_S192_CFI_SAVE:
1242 case AMDGPU::SI_SPILL_S192_RESTORE:
1243 case AMDGPU::SI_SPILL_V192_SAVE:
1244 case AMDGPU::SI_SPILL_V192_CFI_SAVE:
1245 case AMDGPU::SI_SPILL_V192_RESTORE:
1246 case AMDGPU::SI_SPILL_A192_SAVE:
1247 case AMDGPU::SI_SPILL_A192_CFI_SAVE:
1248 case AMDGPU::SI_SPILL_A192_RESTORE:
1249 case AMDGPU::SI_SPILL_AV192_SAVE:
1250 case AMDGPU::SI_SPILL_AV192_CFI_SAVE:
1251 case AMDGPU::SI_SPILL_AV192_RESTORE:
1253 case AMDGPU::SI_SPILL_S160_SAVE:
1254 case AMDGPU::SI_SPILL_S160_CFI_SAVE:
1255 case AMDGPU::SI_SPILL_S160_RESTORE:
1256 case AMDGPU::SI_SPILL_V160_SAVE:
1257 case AMDGPU::SI_SPILL_V160_CFI_SAVE:
1258 case AMDGPU::SI_SPILL_V160_RESTORE:
1259 case AMDGPU::SI_SPILL_A160_SAVE:
1260 case AMDGPU::SI_SPILL_A160_CFI_SAVE:
1261 case AMDGPU::SI_SPILL_A160_RESTORE:
1262 case AMDGPU::SI_SPILL_AV160_SAVE:
1263 case AMDGPU::SI_SPILL_AV160_CFI_SAVE:
1264 case AMDGPU::SI_SPILL_AV160_RESTORE:
1266 case AMDGPU::SI_SPILL_S128_SAVE:
1267 case AMDGPU::SI_SPILL_S128_CFI_SAVE:
1268 case AMDGPU::SI_SPILL_S128_RESTORE:
1269 case AMDGPU::SI_SPILL_V128_SAVE:
1270 case AMDGPU::SI_SPILL_V128_CFI_SAVE:
1271 case AMDGPU::SI_SPILL_V128_RESTORE:
1272 case AMDGPU::SI_SPILL_A128_SAVE:
1273 case AMDGPU::SI_SPILL_A128_CFI_SAVE:
1274 case AMDGPU::SI_SPILL_A128_RESTORE:
1275 case AMDGPU::SI_SPILL_AV128_SAVE:
1276 case AMDGPU::SI_SPILL_AV128_CFI_SAVE:
1277 case AMDGPU::SI_SPILL_AV128_RESTORE:
1279 case AMDGPU::SI_SPILL_S96_SAVE:
1280 case AMDGPU::SI_SPILL_S96_CFI_SAVE:
1281 case AMDGPU::SI_SPILL_S96_RESTORE:
1282 case AMDGPU::SI_SPILL_V96_SAVE:
1283 case AMDGPU::SI_SPILL_V96_CFI_SAVE:
1284 case AMDGPU::SI_SPILL_V96_RESTORE:
1285 case AMDGPU::SI_SPILL_A96_SAVE:
1286 case AMDGPU::SI_SPILL_A96_CFI_SAVE:
1287 case AMDGPU::SI_SPILL_A96_RESTORE:
1288 case AMDGPU::SI_SPILL_AV96_SAVE:
1289 case AMDGPU::SI_SPILL_AV96_CFI_SAVE:
1290 case AMDGPU::SI_SPILL_AV96_RESTORE:
1292 case AMDGPU::SI_SPILL_S64_SAVE:
1293 case AMDGPU::SI_SPILL_S64_CFI_SAVE:
1294 case AMDGPU::SI_SPILL_S64_RESTORE:
1295 case AMDGPU::SI_SPILL_V64_SAVE:
1296 case AMDGPU::SI_SPILL_V64_CFI_SAVE:
1297 case AMDGPU::SI_SPILL_V64_RESTORE:
1298 case AMDGPU::SI_SPILL_A64_SAVE:
1299 case AMDGPU::SI_SPILL_A64_CFI_SAVE:
1300 case AMDGPU::SI_SPILL_A64_RESTORE:
1301 case AMDGPU::SI_SPILL_AV64_SAVE:
1302 case AMDGPU::SI_SPILL_AV64_CFI_SAVE:
1303 case AMDGPU::SI_SPILL_AV64_RESTORE:
1305 case AMDGPU::SI_SPILL_S32_SAVE:
1306 case AMDGPU::SI_SPILL_S32_CFI_SAVE:
1307 case AMDGPU::SI_SPILL_S32_RESTORE:
1308 case AMDGPU::SI_SPILL_V32_SAVE:
1309 case AMDGPU::SI_SPILL_V32_CFI_SAVE:
1310 case AMDGPU::SI_SPILL_V32_RESTORE:
1311 case AMDGPU::SI_SPILL_A32_SAVE:
1312 case AMDGPU::SI_SPILL_A32_CFI_SAVE:
1313 case AMDGPU::SI_SPILL_A32_RESTORE:
1314 case AMDGPU::SI_SPILL_AV32_SAVE:
1315 case AMDGPU::SI_SPILL_AV32_CFI_SAVE:
1316 case AMDGPU::SI_SPILL_AV32_RESTORE:
1317 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
1318 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
1319 case AMDGPU::SI_SPILL_WWM_AV32_SAVE:
1320 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE:
1321 case AMDGPU::SI_SPILL_V16_SAVE:
1322 case AMDGPU::SI_SPILL_V16_RESTORE:
1330 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
1331 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1332 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
1333 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
1334 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
1335 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
1336 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
1337 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
1338 case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN:
1339 return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET;
1340 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
1341 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
1342 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
1343 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
1344 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
1345 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
1353 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
1354 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1355 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
1356 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
1357 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
1358 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
1359 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
1360 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
1361 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
1362 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
1363 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
1364 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
1365 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN:
1366 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET;
1367 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
1368 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
1369 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
1370 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
1371 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
1372 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
1373 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
1374 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
1375 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
1376 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
1377 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
1378 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
1379 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
1380 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
1388 case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
1389 return AMDGPU::BUFFER_STORE_DWORD_OFFEN;
1390 case AMDGPU::BUFFER_STORE_BYTE_OFFSET:
1391 return AMDGPU::BUFFER_STORE_BYTE_OFFEN;
1392 case AMDGPU::BUFFER_STORE_SHORT_OFFSET:
1393 return AMDGPU::BUFFER_STORE_SHORT_OFFEN;
1394 case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
1395 return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
1396 case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET:
1397 return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN;
1398 case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET:
1399 return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
1400 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET:
1401 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN;
1402 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET:
1403 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN;
1411 case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
1412 return AMDGPU::BUFFER_LOAD_DWORD_OFFEN;
1413 case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET:
1414 return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN;
1415 case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET:
1416 return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN;
1417 case AMDGPU::BUFFER_LOAD_USHORT_OFFSET:
1418 return AMDGPU::BUFFER_LOAD_USHORT_OFFEN;
1419 case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET:
1420 return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN;
1421 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET:
1422 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
1423 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET:
1424 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN;
1425 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET:
1426 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN;
1427 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET:
1428 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN;
1429 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
1430 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN;
1431 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET:
1432 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN;
1433 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
1434 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN;
1435 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET:
1436 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN;
1437 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
1438 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN;
1447 unsigned ValueReg,
bool IsKill,
bool NeedsCFI) {
1455 if (
Reg == AMDGPU::NoRegister)
1458 bool IsStore =
MI->mayStore();
1462 unsigned Dst = IsStore ?
Reg : ValueReg;
1463 unsigned Src = IsStore ? ValueReg :
Reg;
1464 bool IsVGPR =
TRI->isVGPR(MRI,
Reg);
1466 if (IsVGPR ==
TRI->isVGPR(MRI, ValueReg)) {
1478 unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
1479 : AMDGPU::V_ACCVGPR_READ_B32_e64;
1499 bool IsStore =
MI->mayStore();
1501 unsigned Opc =
MI->getOpcode();
1502 int LoadStoreOp = IsStore ?
1504 if (LoadStoreOp == -1)
1515 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::srsrc))
1516 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset))
1523 AMDGPU::OpName::vdata_in);
1525 NewMI.
add(*VDataIn);
1530 unsigned LoadStoreOp,
1532 bool IsStore =
TII->get(LoadStoreOp).mayStore();
1538 if (
TII->isBlockLoadStore(LoadStoreOp))
1543 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1544 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
1547 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1548 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1551 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1552 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1555 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1556 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1572 unsigned LoadStoreOp,
int Index,
Register ValueReg,
bool IsKill,
1575 assert((!RS || !LiveUnits) &&
"Only RS or LiveUnits can be set but not both");
1584 bool IsStore =
Desc->mayStore();
1585 bool IsFlat =
TII->isFLATScratch(LoadStoreOp);
1586 bool IsBlock =
TII->isBlockLoadStore(LoadStoreOp);
1588 bool CanClobberSCC =
false;
1589 bool Scavenged =
false;
1594 const bool IsAGPR = !ST.hasGFX90AInsts() &&
isAGPRClass(RC);
1605 bool IsRegMisaligned =
false;
1606 if (!IsBlock && !IsAGPR && RegWidth > 4) {
1607 unsigned SpillOpcode =
1610 IsStore ? AMDGPU::getNamedOperandIdx(SpillOpcode, AMDGPU::OpName::vdata)
1613 TII->getRegClass(
TII->get(SpillOpcode), VDataIdx);
1614 if (!ExpectedRC->
contains(ValueReg)) {
1618 getMatchingSuperRegClass(RC, ExpectedRC, SubIdx);
1619 if (!MatchRC || !MatchRC->
contains(ValueReg))
1620 IsRegMisaligned =
true;
1624 if (IsRegMisaligned)
1629 unsigned EltSize = IsBlock ? RegWidth
1630 : (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u)
1632 unsigned NumSubRegs = RegWidth / EltSize;
1633 unsigned Size = NumSubRegs * EltSize;
1634 unsigned RemSize = RegWidth -
Size;
1635 unsigned NumRemSubRegs = RemSize ? 1 : 0;
1637 if (IsRegMisaligned)
1640 int64_t MaterializedOffset =
Offset;
1645 int64_t MaxOffset =
Offset +
Size - (RemSize ? 0 : EltSize);
1646 int64_t ScratchOffsetRegDelta = 0;
1647 int64_t AdditionalCFIOffset = 0;
1649 if (IsFlat && EltSize > 4) {
1651 Desc = &
TII->get(LoadStoreOp);
1658 "unexpected VGPR spill offset");
1665 bool UseVGPROffset =
false;
1672 if (IsFlat && SGPRBase) {
1677 if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) >= 2) {
1696 bool IsOffsetLegal =
1699 :
TII->isLegalMUBUFImmOffset(MaxOffset);
1700 if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) {
1708 SOffset = RS->scavengeRegisterBackwards(AMDGPU::SGPR_32RegClass,
MI,
false, 0,
false);
1711 CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC);
1712 }
else if (LiveUnits) {
1713 CanClobberSCC = LiveUnits->
available(AMDGPU::SCC);
1714 for (
MCRegister Reg : AMDGPU::SGPR_32RegClass) {
1722 if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC)
1726 UseVGPROffset =
true;
1729 TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
false, 0);
1732 for (
MCRegister Reg : AMDGPU::VGPR_32RegClass) {
1734 TmpOffsetVGPR = Reg;
1741 }
else if (!SOffset && CanClobberSCC) {
1752 if (!ScratchOffsetReg)
1754 SOffset = ScratchOffsetReg;
1755 ScratchOffsetRegDelta =
Offset;
1760 AdditionalCFIOffset =
Offset;
1764 if (!IsFlat && !UseVGPROffset)
1765 Offset *= ST.getWavefrontSize();
1767 if (!UseVGPROffset && !SOffset)
1770 if (UseVGPROffset) {
1772 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR,
Offset);
1773 }
else if (ScratchOffsetReg == AMDGPU::NoRegister) {
1778 .
addReg(ScratchOffsetReg)
1780 Add->getOperand(3).setIsDead();
1786 if (IsFlat && SOffset == AMDGPU::NoRegister) {
1787 assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0
1788 &&
"Unexpected vaddr for flat scratch with a FI operand");
1790 if (UseVGPROffset) {
1793 assert(ST.hasFlatScratchSTMode());
1794 assert(!
TII->isBlockLoadStore(LoadStoreOp) &&
"Block ops don't have ST");
1798 Desc = &
TII->get(LoadStoreOp);
1803 unsigned OrigEltSize = EltSize;
1804 for (
unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
1805 ++i, RegOffset += EltSize) {
1806 if (IsRegMisaligned) {
1814 IsRegMisaligned =
false;
1815 EltSize = OrigEltSize;
1819 if (i == NumSubRegs) {
1823 Desc = &
TII->get(LoadStoreOp);
1825 if (!IsFlat && UseVGPROffset) {
1828 Desc = &
TII->get(NewLoadStoreOp);
1831 if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) {
1838 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset);
1841 unsigned NumRegs = EltSize / 4;
1849 const bool IsLastSubReg = i + 1 == e;
1850 const bool IsFirstSubReg = i == 0;
1859 bool NeedSuperRegDef = e > 1 && IsStore && IsFirstSubReg;
1860 bool NeedSuperRegImpOperand = e > 1;
1864 unsigned RemEltSize = EltSize;
1872 for (
int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,
1873 LaneE = RegOffset / 4;
1874 Lane >= LaneE; --Lane) {
1875 bool IsSubReg = e > 1 || EltSize > 4;
1881 if (!MIB.getInstr())
1883 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) {
1885 NeedSuperRegDef =
false;
1887 if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) {
1888 NeedSuperRegImpOperand =
true;
1890 if (!IsLastSubReg || (Lane != LaneE))
1892 if (!IsFirstSubReg || (Lane != LaneS))
1902 if (RemEltSize != EltSize) {
1903 assert(IsFlat && EltSize > 4);
1905 unsigned NumRegs = RemEltSize / 4;
1906 SubReg =
Register(getSubReg(ValueReg,
1912 unsigned FinalReg = SubReg;
1917 if (!TmpIntermediateVGPR) {
1923 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
1924 TmpIntermediateVGPR)
1926 if (NeedSuperRegDef)
1928 if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
1932 SubReg = TmpIntermediateVGPR;
1933 }
else if (UseVGPROffset) {
1934 if (!TmpOffsetVGPR) {
1935 TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
1937 RS->setRegUsed(TmpOffsetVGPR);
1942 if (LoadStoreOp == AMDGPU::SCRATCH_LOAD_USHORT_SADDR) {
1946 RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
false, 0);
1962 if (UseVGPROffset) {
1971 if (SOffset == AMDGPU::NoRegister) {
1973 if (UseVGPROffset && ScratchOffsetReg) {
1974 MIB.addReg(ScratchOffsetReg);
1981 MIB.addReg(SOffset, SOffsetRegState);
1991 MIB.addMemOperand(NewMMO);
1993 if (FinalValueReg != ValueReg) {
1995 ValueReg = getSubReg(ValueReg, AMDGPU::lo16);
2001 ValueReg = FinalValueReg;
2004 if (IsStore && NeedsCFI) {
2005 if (
TII->isBlockLoadStore(LoadStoreOp)) {
2007 "expected whole register block to be treated as single element");
2012 (
Offset + RegOffset) * ST.getWavefrontSize() + AdditionalCFIOffset);
2016 if (!IsAGPR && NeedSuperRegDef)
2019 if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) {
2027 bool PartialReloadCopy = (RemEltSize != EltSize) && !IsStore;
2028 if (NeedSuperRegImpOperand &&
2029 (IsFirstSubReg || (IsLastSubReg && !IsSrcDstDef))) {
2031 if (PartialReloadCopy)
2056 if (!IsStore &&
MI !=
MBB.end() &&
MI->isReturn() &&
2057 MI->readsRegister(SubReg,
this)) {
2059 MIB->tieOperands(0, MIB->getNumOperands() - 1);
2067 if (!IsStore &&
TII->isBlockLoadStore(LoadStoreOp))
2071 if (ScratchOffsetRegDelta != 0) {
2075 .
addImm(-ScratchOffsetRegDelta);
2084 Register BaseVGPR = getSubReg(BlockReg, AMDGPU::sub0);
2085 for (
unsigned RegOffset = 1; RegOffset < 32; ++RegOffset)
2086 if (!(Mask & (1 << RegOffset)) &&
2087 isCalleeSavedPhysReg(BaseVGPR + RegOffset, *MF))
2098 Register BaseVGPR = getSubReg(BlockReg, AMDGPU::sub0);
2099 for (
unsigned RegOffset = 0; RegOffset < 32; ++RegOffset) {
2100 Register VGPR = BaseVGPR + RegOffset;
2101 if (Mask & (1 << RegOffset)) {
2102 assert(isCalleeSavedPhysReg(VGPR, *MF));
2103 ST.getFrameLowering()->buildCFIForVGPRToVMEMSpill(
2105 (
Offset + RegOffset) * ST.getWavefrontSize());
2106 }
else if (isCalleeSavedPhysReg(VGPR, *MF)) {
2111 BaseVGPR + RegOffset);
2118 bool IsKill)
const {
2128 Align Alignment = FrameInfo.getObjectAlign(Index);
2135 unsigned Opc = ST.hasFlatScratchEnabled()
2136 ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
2137 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2141 unsigned Opc = ST.hasFlatScratchEnabled()
2142 ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
2143 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2154 bool SpillToPhysVGPRLane,
bool NeedsCFI)
const {
2155 assert(!
MI->getOperand(0).isUndef() &&
2156 "undef spill should have been deleted earlier");
2163 bool SpillToVGPR = !VGPRSpills.
empty();
2164 if (OnlyToVGPR && !SpillToVGPR)
2179 "Num of SGPRs spilled should be less than or equal to num of "
2182 for (
unsigned i = 0, e = SB.
NumSubRegs; i < e; ++i) {
2189 bool IsFirstSubreg = i == 0;
2191 bool UseKill = SB.
IsKill && IsLastSubreg;
2197 SB.
TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), Spill.VGPR)
2207 AMDGPU::PC_REG, VGPRSpills);
2210 Spill.VGPR, Spill.Lane);
2230 if (SB.
NumSubRegs > 1 && (IsFirstSubreg || IsLastSubreg))
2250 for (
unsigned i =
Offset * PVD.PerVGPR,
2260 SB.
TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), SB.
TmpVGPR)
2261 .
addReg(SubReg, SubKillState)
2292 ST.getWavefrontSize();
2294 AMDGPU::PC_REG, CFIOffset);
2303 MI->eraseFromParent();
2315 bool SpillToPhysVGPRLane)
const {
2321 bool SpillToVGPR = !VGPRSpills.
empty();
2322 if (OnlyToVGPR && !SpillToVGPR)
2326 for (
unsigned i = 0, e = SB.
NumSubRegs; i < e; ++i) {
2334 SB.
TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
2357 for (
unsigned i =
Offset * PVD.PerVGPR,
2365 bool LastSubReg = (i + 1 == e);
2367 SB.
TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
2384 MI->eraseFromParent();
2404 for (
unsigned i =
Offset * PVD.PerVGPR,
2415 .
addReg(SubReg, SubKillState)
2433 MI = RestoreMBB.
end();
2439 for (
unsigned i =
Offset * PVD.PerVGPR,
2448 bool LastSubReg = (i + 1 == e);
2469 bool NeedsCFI =
false;
2470 switch (
MI->getOpcode()) {
2471 case AMDGPU::SI_SPILL_S1024_CFI_SAVE:
2472 case AMDGPU::SI_SPILL_S512_CFI_SAVE:
2473 case AMDGPU::SI_SPILL_S256_CFI_SAVE:
2474 case AMDGPU::SI_SPILL_S224_CFI_SAVE:
2475 case AMDGPU::SI_SPILL_S192_CFI_SAVE:
2476 case AMDGPU::SI_SPILL_S160_CFI_SAVE:
2477 case AMDGPU::SI_SPILL_S128_CFI_SAVE:
2478 case AMDGPU::SI_SPILL_S96_CFI_SAVE:
2479 case AMDGPU::SI_SPILL_S64_CFI_SAVE:
2480 case AMDGPU::SI_SPILL_S32_CFI_SAVE:
2483 case AMDGPU::SI_SPILL_S1024_SAVE:
2484 case AMDGPU::SI_SPILL_S512_SAVE:
2485 case AMDGPU::SI_SPILL_S384_SAVE:
2486 case AMDGPU::SI_SPILL_S352_SAVE:
2487 case AMDGPU::SI_SPILL_S320_SAVE:
2488 case AMDGPU::SI_SPILL_S288_SAVE:
2489 case AMDGPU::SI_SPILL_S256_SAVE:
2490 case AMDGPU::SI_SPILL_S224_SAVE:
2491 case AMDGPU::SI_SPILL_S192_SAVE:
2492 case AMDGPU::SI_SPILL_S160_SAVE:
2493 case AMDGPU::SI_SPILL_S128_SAVE:
2494 case AMDGPU::SI_SPILL_S96_SAVE:
2495 case AMDGPU::SI_SPILL_S64_SAVE:
2496 case AMDGPU::SI_SPILL_S32_SAVE:
2497 return spillSGPR(
MI, FI, RS, Indexes, LIS,
true, SpillToPhysVGPRLane,
2499 case AMDGPU::SI_SPILL_S1024_RESTORE:
2500 case AMDGPU::SI_SPILL_S512_RESTORE:
2501 case AMDGPU::SI_SPILL_S384_RESTORE:
2502 case AMDGPU::SI_SPILL_S352_RESTORE:
2503 case AMDGPU::SI_SPILL_S320_RESTORE:
2504 case AMDGPU::SI_SPILL_S288_RESTORE:
2505 case AMDGPU::SI_SPILL_S256_RESTORE:
2506 case AMDGPU::SI_SPILL_S224_RESTORE:
2507 case AMDGPU::SI_SPILL_S192_RESTORE:
2508 case AMDGPU::SI_SPILL_S160_RESTORE:
2509 case AMDGPU::SI_SPILL_S128_RESTORE:
2510 case AMDGPU::SI_SPILL_S96_RESTORE:
2511 case AMDGPU::SI_SPILL_S64_RESTORE:
2512 case AMDGPU::SI_SPILL_S32_RESTORE:
2513 return restoreSGPR(
MI, FI, RS, Indexes, LIS,
true, SpillToPhysVGPRLane);
2520 int SPAdj,
unsigned FIOperandNum,
2529 assert(SPAdj == 0 &&
"unhandled SP adjustment in call sequence?");
2532 "unreserved scratch RSRC register");
2535 int Index =
MI->getOperand(FIOperandNum).getIndex();
2541 bool NeedsCFI =
false;
2543 switch (
MI->getOpcode()) {
2545 case AMDGPU::SI_SPILL_S1024_CFI_SAVE:
2546 case AMDGPU::SI_SPILL_S512_CFI_SAVE:
2547 case AMDGPU::SI_SPILL_S256_CFI_SAVE:
2548 case AMDGPU::SI_SPILL_S224_CFI_SAVE:
2549 case AMDGPU::SI_SPILL_S192_CFI_SAVE:
2550 case AMDGPU::SI_SPILL_S160_CFI_SAVE:
2551 case AMDGPU::SI_SPILL_S128_CFI_SAVE:
2552 case AMDGPU::SI_SPILL_S96_CFI_SAVE:
2553 case AMDGPU::SI_SPILL_S64_CFI_SAVE:
2554 case AMDGPU::SI_SPILL_S32_CFI_SAVE: {
2558 case AMDGPU::SI_SPILL_S1024_SAVE:
2559 case AMDGPU::SI_SPILL_S512_SAVE:
2560 case AMDGPU::SI_SPILL_S384_SAVE:
2561 case AMDGPU::SI_SPILL_S352_SAVE:
2562 case AMDGPU::SI_SPILL_S320_SAVE:
2563 case AMDGPU::SI_SPILL_S288_SAVE:
2564 case AMDGPU::SI_SPILL_S256_SAVE:
2565 case AMDGPU::SI_SPILL_S224_SAVE:
2566 case AMDGPU::SI_SPILL_S192_SAVE:
2567 case AMDGPU::SI_SPILL_S160_SAVE:
2568 case AMDGPU::SI_SPILL_S128_SAVE:
2569 case AMDGPU::SI_SPILL_S96_SAVE:
2570 case AMDGPU::SI_SPILL_S64_SAVE:
2571 case AMDGPU::SI_SPILL_S32_SAVE: {
2572 return spillSGPR(
MI, Index, RS,
nullptr,
nullptr,
false,
false, NeedsCFI);
2576 case AMDGPU::SI_SPILL_S1024_RESTORE:
2577 case AMDGPU::SI_SPILL_S512_RESTORE:
2578 case AMDGPU::SI_SPILL_S384_RESTORE:
2579 case AMDGPU::SI_SPILL_S352_RESTORE:
2580 case AMDGPU::SI_SPILL_S320_RESTORE:
2581 case AMDGPU::SI_SPILL_S288_RESTORE:
2582 case AMDGPU::SI_SPILL_S256_RESTORE:
2583 case AMDGPU::SI_SPILL_S224_RESTORE:
2584 case AMDGPU::SI_SPILL_S192_RESTORE:
2585 case AMDGPU::SI_SPILL_S160_RESTORE:
2586 case AMDGPU::SI_SPILL_S128_RESTORE:
2587 case AMDGPU::SI_SPILL_S96_RESTORE:
2588 case AMDGPU::SI_SPILL_S64_RESTORE:
2589 case AMDGPU::SI_SPILL_S32_RESTORE: {
2594 case AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE:
2595 case AMDGPU::SI_SPILL_V1024_CFI_SAVE:
2596 case AMDGPU::SI_SPILL_V512_CFI_SAVE:
2597 case AMDGPU::SI_SPILL_V256_CFI_SAVE:
2598 case AMDGPU::SI_SPILL_V224_CFI_SAVE:
2599 case AMDGPU::SI_SPILL_V192_CFI_SAVE:
2600 case AMDGPU::SI_SPILL_V160_CFI_SAVE:
2601 case AMDGPU::SI_SPILL_V128_CFI_SAVE:
2602 case AMDGPU::SI_SPILL_V96_CFI_SAVE:
2603 case AMDGPU::SI_SPILL_V64_CFI_SAVE:
2604 case AMDGPU::SI_SPILL_V32_CFI_SAVE:
2605 case AMDGPU::SI_SPILL_A1024_CFI_SAVE:
2606 case AMDGPU::SI_SPILL_A512_CFI_SAVE:
2607 case AMDGPU::SI_SPILL_A256_CFI_SAVE:
2608 case AMDGPU::SI_SPILL_A224_CFI_SAVE:
2609 case AMDGPU::SI_SPILL_A192_CFI_SAVE:
2610 case AMDGPU::SI_SPILL_A160_CFI_SAVE:
2611 case AMDGPU::SI_SPILL_A128_CFI_SAVE:
2612 case AMDGPU::SI_SPILL_A96_CFI_SAVE:
2613 case AMDGPU::SI_SPILL_A64_CFI_SAVE:
2614 case AMDGPU::SI_SPILL_A32_CFI_SAVE:
2615 case AMDGPU::SI_SPILL_AV1024_CFI_SAVE:
2616 case AMDGPU::SI_SPILL_AV512_CFI_SAVE:
2617 case AMDGPU::SI_SPILL_AV256_CFI_SAVE:
2618 case AMDGPU::SI_SPILL_AV224_CFI_SAVE:
2619 case AMDGPU::SI_SPILL_AV192_CFI_SAVE:
2620 case AMDGPU::SI_SPILL_AV160_CFI_SAVE:
2621 case AMDGPU::SI_SPILL_AV128_CFI_SAVE:
2622 case AMDGPU::SI_SPILL_AV96_CFI_SAVE:
2623 case AMDGPU::SI_SPILL_AV64_CFI_SAVE:
2624 case AMDGPU::SI_SPILL_AV32_CFI_SAVE:
2627 case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE:
2628 case AMDGPU::SI_SPILL_V1024_SAVE:
2629 case AMDGPU::SI_SPILL_V512_SAVE:
2630 case AMDGPU::SI_SPILL_V384_SAVE:
2631 case AMDGPU::SI_SPILL_V352_SAVE:
2632 case AMDGPU::SI_SPILL_V320_SAVE:
2633 case AMDGPU::SI_SPILL_V288_SAVE:
2634 case AMDGPU::SI_SPILL_V256_SAVE:
2635 case AMDGPU::SI_SPILL_V224_SAVE:
2636 case AMDGPU::SI_SPILL_V192_SAVE:
2637 case AMDGPU::SI_SPILL_V160_SAVE:
2638 case AMDGPU::SI_SPILL_V128_SAVE:
2639 case AMDGPU::SI_SPILL_V96_SAVE:
2640 case AMDGPU::SI_SPILL_V64_SAVE:
2641 case AMDGPU::SI_SPILL_V32_SAVE:
2642 case AMDGPU::SI_SPILL_V16_SAVE:
2643 case AMDGPU::SI_SPILL_A1024_SAVE:
2644 case AMDGPU::SI_SPILL_A512_SAVE:
2645 case AMDGPU::SI_SPILL_A384_SAVE:
2646 case AMDGPU::SI_SPILL_A352_SAVE:
2647 case AMDGPU::SI_SPILL_A320_SAVE:
2648 case AMDGPU::SI_SPILL_A288_SAVE:
2649 case AMDGPU::SI_SPILL_A256_SAVE:
2650 case AMDGPU::SI_SPILL_A224_SAVE:
2651 case AMDGPU::SI_SPILL_A192_SAVE:
2652 case AMDGPU::SI_SPILL_A160_SAVE:
2653 case AMDGPU::SI_SPILL_A128_SAVE:
2654 case AMDGPU::SI_SPILL_A96_SAVE:
2655 case AMDGPU::SI_SPILL_A64_SAVE:
2656 case AMDGPU::SI_SPILL_A32_SAVE:
2657 case AMDGPU::SI_SPILL_AV1024_SAVE:
2658 case AMDGPU::SI_SPILL_AV512_SAVE:
2659 case AMDGPU::SI_SPILL_AV384_SAVE:
2660 case AMDGPU::SI_SPILL_AV352_SAVE:
2661 case AMDGPU::SI_SPILL_AV320_SAVE:
2662 case AMDGPU::SI_SPILL_AV288_SAVE:
2663 case AMDGPU::SI_SPILL_AV256_SAVE:
2664 case AMDGPU::SI_SPILL_AV224_SAVE:
2665 case AMDGPU::SI_SPILL_AV192_SAVE:
2666 case AMDGPU::SI_SPILL_AV160_SAVE:
2667 case AMDGPU::SI_SPILL_AV128_SAVE:
2668 case AMDGPU::SI_SPILL_AV96_SAVE:
2669 case AMDGPU::SI_SPILL_AV64_SAVE:
2670 case AMDGPU::SI_SPILL_AV32_SAVE:
2671 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
2672 case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
2674 MI->getOpcode() != AMDGPU::SI_BLOCK_SPILL_V1024_SAVE &&
2675 "block spill does not currenty support spilling non-CSR registers");
2677 if (
MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE)
2681 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::mask));
2684 AMDGPU::OpName::vdata);
2686 MI->eraseFromParent();
2690 assert(
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset)->getReg() ==
2694 if (
MI->getOpcode() == AMDGPU::SI_SPILL_V16_SAVE) {
2695 assert(ST.hasFlatScratchEnabled() &&
"Flat Scratch is not enabled!");
2696 Opc = AMDGPU::SCRATCH_STORE_SHORT_SADDR_t16;
2698 Opc =
MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE
2699 ? AMDGPU::SCRATCH_STORE_BLOCK_SADDR
2700 : ST.hasFlatScratchEnabled() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
2701 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2704 auto *
MBB =
MI->getParent();
2705 bool IsWWMRegSpill =
TII->isWWMRegSpillOpcode(
MI->getOpcode());
2706 if (IsWWMRegSpill) {
2708 RS->isRegUsed(AMDGPU::SCC));
2712 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm(),
2713 *
MI->memoperands_begin(), RS,
nullptr, NeedsCFI);
2718 MI->eraseFromParent();
2721 case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE: {
2725 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::mask));
2728 case AMDGPU::SI_SPILL_V16_RESTORE:
2729 case AMDGPU::SI_SPILL_V32_RESTORE:
2730 case AMDGPU::SI_SPILL_V64_RESTORE:
2731 case AMDGPU::SI_SPILL_V96_RESTORE:
2732 case AMDGPU::SI_SPILL_V128_RESTORE:
2733 case AMDGPU::SI_SPILL_V160_RESTORE:
2734 case AMDGPU::SI_SPILL_V192_RESTORE:
2735 case AMDGPU::SI_SPILL_V224_RESTORE:
2736 case AMDGPU::SI_SPILL_V256_RESTORE:
2737 case AMDGPU::SI_SPILL_V288_RESTORE:
2738 case AMDGPU::SI_SPILL_V320_RESTORE:
2739 case AMDGPU::SI_SPILL_V352_RESTORE:
2740 case AMDGPU::SI_SPILL_V384_RESTORE:
2741 case AMDGPU::SI_SPILL_V512_RESTORE:
2742 case AMDGPU::SI_SPILL_V1024_RESTORE:
2743 case AMDGPU::SI_SPILL_A32_RESTORE:
2744 case AMDGPU::SI_SPILL_A64_RESTORE:
2745 case AMDGPU::SI_SPILL_A96_RESTORE:
2746 case AMDGPU::SI_SPILL_A128_RESTORE:
2747 case AMDGPU::SI_SPILL_A160_RESTORE:
2748 case AMDGPU::SI_SPILL_A192_RESTORE:
2749 case AMDGPU::SI_SPILL_A224_RESTORE:
2750 case AMDGPU::SI_SPILL_A256_RESTORE:
2751 case AMDGPU::SI_SPILL_A288_RESTORE:
2752 case AMDGPU::SI_SPILL_A320_RESTORE:
2753 case AMDGPU::SI_SPILL_A352_RESTORE:
2754 case AMDGPU::SI_SPILL_A384_RESTORE:
2755 case AMDGPU::SI_SPILL_A512_RESTORE:
2756 case AMDGPU::SI_SPILL_A1024_RESTORE:
2757 case AMDGPU::SI_SPILL_AV32_RESTORE:
2758 case AMDGPU::SI_SPILL_AV64_RESTORE:
2759 case AMDGPU::SI_SPILL_AV96_RESTORE:
2760 case AMDGPU::SI_SPILL_AV128_RESTORE:
2761 case AMDGPU::SI_SPILL_AV160_RESTORE:
2762 case AMDGPU::SI_SPILL_AV192_RESTORE:
2763 case AMDGPU::SI_SPILL_AV224_RESTORE:
2764 case AMDGPU::SI_SPILL_AV256_RESTORE:
2765 case AMDGPU::SI_SPILL_AV288_RESTORE:
2766 case AMDGPU::SI_SPILL_AV320_RESTORE:
2767 case AMDGPU::SI_SPILL_AV352_RESTORE:
2768 case AMDGPU::SI_SPILL_AV384_RESTORE:
2769 case AMDGPU::SI_SPILL_AV512_RESTORE:
2770 case AMDGPU::SI_SPILL_AV1024_RESTORE:
2771 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
2772 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: {
2774 AMDGPU::OpName::vdata);
2775 assert(
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset)->getReg() ==
2779 if (
MI->getOpcode() == AMDGPU::SI_SPILL_V16_RESTORE) {
2780 assert(ST.hasFlatScratchEnabled() &&
"Flat Scratch is not enabled!");
2781 Opc = ST.d16PreservesUnusedBits()
2782 ? AMDGPU::SCRATCH_LOAD_SHORT_D16_SADDR_t16
2783 : AMDGPU::SCRATCH_LOAD_USHORT_SADDR;
2785 Opc =
MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE
2786 ? AMDGPU::SCRATCH_LOAD_BLOCK_SADDR
2787 : ST.hasFlatScratchEnabled() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
2788 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2791 auto *
MBB =
MI->getParent();
2792 bool IsWWMRegSpill =
TII->isWWMRegSpillOpcode(
MI->getOpcode());
2793 if (IsWWMRegSpill) {
2795 RS->isRegUsed(AMDGPU::SCC));
2800 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm(),
2801 *
MI->memoperands_begin(), RS);
2806 MI->eraseFromParent();
2809 case AMDGPU::V_ADD_U32_e32:
2810 case AMDGPU::V_ADD_U32_e64:
2811 case AMDGPU::V_ADD_CO_U32_e32:
2812 case AMDGPU::V_ADD_CO_U32_e64: {
2814 unsigned NumDefs =
MI->getNumExplicitDefs();
2815 unsigned Src0Idx = NumDefs;
2817 bool HasClamp =
false;
2820 switch (
MI->getOpcode()) {
2821 case AMDGPU::V_ADD_U32_e32:
2823 case AMDGPU::V_ADD_U32_e64:
2824 HasClamp =
MI->getOperand(3).getImm();
2826 case AMDGPU::V_ADD_CO_U32_e32:
2827 VCCOp = &
MI->getOperand(3);
2829 case AMDGPU::V_ADD_CO_U32_e64:
2830 VCCOp = &
MI->getOperand(1);
2831 HasClamp =
MI->getOperand(4).getImm();
2836 bool DeadVCC = !VCCOp || VCCOp->
isDead();
2840 unsigned OtherOpIdx =
2841 FIOperandNum == Src0Idx ? FIOperandNum + 1 : Src0Idx;
2844 unsigned Src1Idx = Src0Idx + 1;
2845 Register MaterializedReg = FrameReg;
2848 int64_t
Offset = FrameInfo.getObjectOffset(Index);
2852 if (OtherOp->
isImm()) {
2863 OtherOp->
setImm(TotalOffset);
2867 if (FrameReg && !ST.hasFlatScratchEnabled()) {
2875 ScavengedVGPR = RS->scavengeRegisterBackwards(
2876 AMDGPU::VGPR_32RegClass,
MI,
false, 0);
2882 .
addImm(ST.getWavefrontSizeLog2())
2884 MaterializedReg = ScavengedVGPR;
2887 if ((!OtherOp->
isImm() || OtherOp->
getImm() != 0) && MaterializedReg) {
2888 if (ST.hasFlatScratchEnabled() &&
2889 !
TII->isOperandLegal(*
MI, Src1Idx, OtherOp)) {
2896 if (!ScavengedVGPR) {
2897 ScavengedVGPR = RS->scavengeRegisterBackwards(
2898 AMDGPU::VGPR_32RegClass,
MI,
false,
2902 assert(ScavengedVGPR != DstReg);
2907 MaterializedReg = ScavengedVGPR;
2916 AddI32.
add(
MI->getOperand(1));
2921 if (
isVGPRClass(getPhysRegBaseClass(MaterializedReg))) {
2926 .addReg(MaterializedReg, MaterializedRegFlags);
2931 .addReg(MaterializedReg, MaterializedRegFlags)
2935 if (
MI->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 ||
2936 MI->getOpcode() == AMDGPU::V_ADD_U32_e64)
2939 if (
MI->getOpcode() == AMDGPU::V_ADD_CO_U32_e32)
2940 AddI32.setOperandDead(3);
2942 MaterializedReg = DstReg;
2948 }
else if (
Offset != 0) {
2949 assert(!MaterializedReg);
2953 if (DeadVCC && !HasClamp) {
2958 if (OtherOp->
isReg() && OtherOp->
getReg() == DstReg) {
2960 MI->eraseFromParent();
2965 MI->setDesc(
TII->get(AMDGPU::V_MOV_B32_e32));
2966 MI->removeOperand(FIOperandNum);
2968 unsigned NumOps =
MI->getNumOperands();
2969 for (
unsigned I =
NumOps - 2;
I >= NumDefs + 1; --
I)
2970 MI->removeOperand(
I);
2973 MI->removeOperand(1);
2985 if (!
TII->isOperandLegal(*
MI, Src1Idx) &&
TII->commuteInstruction(*
MI)) {
2993 for (
unsigned SrcIdx : {FIOperandNum, OtherOpIdx}) {
2994 if (!
TII->isOperandLegal(*
MI, SrcIdx)) {
2998 if (!ScavengedVGPR) {
2999 ScavengedVGPR = RS->scavengeRegisterBackwards(
3000 AMDGPU::VGPR_32RegClass,
MI,
false,
3004 assert(ScavengedVGPR != DstReg);
3010 Src.ChangeToRegister(ScavengedVGPR,
false);
3011 Src.setIsKill(
true);
3017 if (FIOp->
isImm() && FIOp->
getImm() == 0 && DeadVCC && !HasClamp) {
3018 if (OtherOp->
isReg() && OtherOp->
getReg() != DstReg) {
3022 MI->eraseFromParent();
3027 case AMDGPU::S_ADD_I32:
3028 case AMDGPU::S_ADD_U32: {
3030 unsigned OtherOpIdx = FIOperandNum == 1 ? 2 : 1;
3037 Register MaterializedReg = FrameReg;
3040 bool DeadSCC =
MI->getOperand(3).isDead();
3049 if (FrameReg && !ST.hasFlatScratchEnabled()) {
3054 TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
3061 .
addImm(ST.getWavefrontSizeLog2())
3064 MaterializedReg = TmpReg;
3067 int64_t
Offset = FrameInfo.getObjectOffset(Index);
3072 if (OtherOp.
isImm()) {
3076 if (MaterializedReg)
3080 }
else if (MaterializedReg) {
3084 if (!TmpReg && MaterializedReg == FrameReg) {
3085 TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
3099 MaterializedReg = DstReg;
3112 if (DeadSCC && OtherOp.
isImm() && OtherOp.
getImm() == 0) {
3114 MI->removeOperand(3);
3115 MI->removeOperand(OtherOpIdx);
3116 MI->setDesc(
TII->get(FIOp->
isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
3117 }
else if (DeadSCC && FIOp->
isImm() && FIOp->
getImm() == 0) {
3119 MI->removeOperand(3);
3120 MI->removeOperand(FIOperandNum);
3122 TII->get(OtherOp.
isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
3133 int64_t
Offset = FrameInfo.getObjectOffset(Index);
3134 if (ST.hasFlatScratchEnabled()) {
3135 if (
TII->isFLATScratch(*
MI)) {
3137 (int16_t)FIOperandNum ==
3138 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::saddr));
3145 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset);
3156 unsigned Opc =
MI->getOpcode();
3160 }
else if (ST.hasFlatScratchSTMode()) {
3170 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
3171 bool TiedVDst = VDstIn != -1 &&
MI->getOperand(VDstIn).isReg() &&
3172 MI->getOperand(VDstIn).isTied();
3174 MI->untieRegOperand(VDstIn);
3177 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr));
3181 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
3183 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
3184 assert(NewVDst != -1 && NewVDstIn != -1 &&
"Must be tied!");
3185 MI->tieOperands(NewVDst, NewVDstIn);
3187 MI->setDesc(
TII->get(NewOpc));
3195 if (
TII->isImmOperandLegal(*
MI, FIOperandNum, *FIOp))
3202 bool UseSGPR =
TII->isOperandLegal(*
MI, FIOperandNum, FIOp);
3204 if (!
Offset && FrameReg && UseSGPR) {
3210 UseSGPR ? &AMDGPU::SReg_32_XM0RegClass : &AMDGPU::VGPR_32RegClass;
3213 RS->scavengeRegisterBackwards(*RC,
MI,
false, 0, !UseSGPR);
3217 if ((!FrameReg || !
Offset) && TmpReg) {
3218 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
3221 MIB.addReg(FrameReg);
3228 bool NeedSaveSCC = (RS->isRegUsed(AMDGPU::SCC) &&
3229 !
MI->definesRegister(AMDGPU::SCC,
nullptr)) ||
3230 MI->readsRegister(AMDGPU::SCC,
nullptr);
3234 : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
3235 MI,
false, 0, !UseSGPR);
3237 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR)) {
3239 if (ST.hasFlatScratchSVSMode() && SVOpcode != -1) {
3240 Register TmpVGPR = RS->scavengeRegisterBackwards(
3241 AMDGPU::VGPR_32RegClass,
MI,
false, 0,
true);
3247 MIB.addReg(FrameReg);
3258 .
add(
MI->getOperand(0))
3261 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::cpol));
3262 MI->eraseFromParent();
3276 assert(!(
Offset & 0x1) &&
"Flat scratch offset must be aligned!");
3296 if (TmpSReg == FrameReg) {
3299 !
MI->registerDefIsDead(AMDGPU::SCC,
nullptr)) {
3323 bool IsMUBUF =
TII->isMUBUF(*
MI);
3329 bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&
3330 !
MI->definesRegister(AMDGPU::SCC,
nullptr);
3332 ? &AMDGPU::SReg_32RegClass
3333 : &AMDGPU::VGPR_32RegClass;
3334 bool IsCopy =
MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
3335 MI->getOpcode() == AMDGPU::V_MOV_B32_e64 ||
3336 MI->getOpcode() == AMDGPU::S_MOV_B32;
3338 IsCopy ?
MI->getOperand(0).getReg()
3339 : RS->scavengeRegisterBackwards(*RC,
MI,
false, 0);
3341 int64_t
Offset = FrameInfo.getObjectOffset(Index);
3344 IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32 : AMDGPU::V_LSHRREV_B32_e64;
3346 if (IsSALU && LiveSCC) {
3347 TmpResultReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
3352 if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
3355 Shift.addImm(ST.getWavefrontSizeLog2()).addReg(FrameReg);
3357 Shift.addReg(FrameReg).addImm(ST.getWavefrontSizeLog2());
3358 if (IsSALU && !LiveSCC)
3359 Shift.getInstr()->getOperand(3).setIsDead();
3360 if (IsSALU && LiveSCC) {
3364 NewDest = ResultReg;
3366 NewDest = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
3371 ResultReg = NewDest;
3376 if ((MIB =
TII->getAddNoCarry(*
MBB,
MI,
DL, ResultReg, *RS)) !=
3383 .
addImm(ST.getWavefrontSizeLog2())
3386 const bool IsVOP2 = MIB->
getOpcode() == AMDGPU::V_ADD_U32_e32;
3398 "Need to reuse carry out register");
3403 ConstOffsetReg = getSubReg(MIB.
getReg(1), AMDGPU::sub0);
3405 ConstOffsetReg = MIB.
getReg(1);
3416 if (!MIB || IsSALU) {
3423 Register TmpScaledReg = IsCopy && IsSALU
3425 : RS->scavengeRegisterBackwards(
3426 AMDGPU::SReg_32_XM0RegClass,
MI,
3428 Register ScaledReg = TmpScaledReg.
isValid() ? TmpScaledReg : FrameReg;
3434 .
addImm(ST.getWavefrontSizeLog2());
3439 TmpResultReg = RS->scavengeRegisterBackwards(
3440 AMDGPU::VGPR_32RegClass,
MI,
false, 0,
true);
3443 if ((
Add =
TII->getAddNoCarry(*
MBB,
MI,
DL, TmpResultReg, *RS))) {
3446 .
addImm(ST.getWavefrontSizeLog2())
3448 if (
Add->getOpcode() == AMDGPU::V_ADD_CO_U32_e64) {
3458 "offset is unsafe for v_mad_u32_u24");
3467 bool IsInlinableLiteral =
3469 if (!IsInlinableLiteral) {
3478 if (!IsInlinableLiteral) {
3484 Add.addImm(ST.getWavefrontSize()).addReg(FrameReg).addImm(0);
3487 .
addImm(ST.getWavefrontSizeLog2())
3493 NewDest = ResultReg;
3495 NewDest = RS->scavengeRegisterBackwards(
3496 AMDGPU::SReg_32_XM0RegClass, *
Add,
false, 0,
3503 ResultReg = NewDest;
3509 if (!TmpScaledReg.
isValid()) {
3515 .
addImm(ST.getWavefrontSizeLog2());
3522 MI->eraseFromParent();
3532 static_cast<int>(FIOperandNum) ==
3533 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::vaddr));
3535 auto &SOffset = *
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset);
3536 assert((SOffset.isImm() && SOffset.getImm() == 0));
3538 if (FrameReg != AMDGPU::NoRegister)
3539 SOffset.ChangeToRegister(FrameReg,
false);
3541 int64_t
Offset = FrameInfo.getObjectOffset(Index);
3543 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm();
3544 int64_t NewOffset = OldImm +
Offset;
3546 if (
TII->isLegalMUBUFImmOffset(NewOffset) &&
3548 MI->eraseFromParent();
3557 if (!
TII->isImmOperandLegal(*
MI, FIOperandNum, *FIOp)) {
3559 RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
false, 0);
3583 return &AMDGPU::VReg_64RegClass;
3585 return &AMDGPU::VReg_96RegClass;
3587 return &AMDGPU::VReg_128RegClass;
3589 return &AMDGPU::VReg_160RegClass;
3591 return &AMDGPU::VReg_192RegClass;
3593 return &AMDGPU::VReg_224RegClass;
3595 return &AMDGPU::VReg_256RegClass;
3597 return &AMDGPU::VReg_288RegClass;
3599 return &AMDGPU::VReg_320RegClass;
3601 return &AMDGPU::VReg_352RegClass;
3603 return &AMDGPU::VReg_384RegClass;
3605 return &AMDGPU::VReg_512RegClass;
3607 return &AMDGPU::VReg_1024RegClass;
3615 return &AMDGPU::VReg_64_Align2RegClass;
3617 return &AMDGPU::VReg_96_Align2RegClass;
3619 return &AMDGPU::VReg_128_Align2RegClass;
3621 return &AMDGPU::VReg_160_Align2RegClass;
3623 return &AMDGPU::VReg_192_Align2RegClass;
3625 return &AMDGPU::VReg_224_Align2RegClass;
3627 return &AMDGPU::VReg_256_Align2RegClass;
3629 return &AMDGPU::VReg_288_Align2RegClass;
3631 return &AMDGPU::VReg_320_Align2RegClass;
3633 return &AMDGPU::VReg_352_Align2RegClass;
3635 return &AMDGPU::VReg_384_Align2RegClass;
3637 return &AMDGPU::VReg_512_Align2RegClass;
3639 return &AMDGPU::VReg_1024_Align2RegClass;
3647 return &AMDGPU::VReg_1RegClass;
3649 return &AMDGPU::VGPR_16RegClass;
3651 return &AMDGPU::VGPR_32RegClass;
3659 return &AMDGPU::VGPR_32_Lo256RegClass;
3661 return &AMDGPU::VReg_64_Lo256_Align2RegClass;
3663 return &AMDGPU::VReg_96_Lo256_Align2RegClass;
3665 return &AMDGPU::VReg_128_Lo256_Align2RegClass;
3667 return &AMDGPU::VReg_160_Lo256_Align2RegClass;
3669 return &AMDGPU::VReg_192_Lo256_Align2RegClass;
3671 return &AMDGPU::VReg_224_Lo256_Align2RegClass;
3673 return &AMDGPU::VReg_256_Lo256_Align2RegClass;
3675 return &AMDGPU::VReg_288_Lo256_Align2RegClass;
3677 return &AMDGPU::VReg_320_Lo256_Align2RegClass;
3679 return &AMDGPU::VReg_352_Lo256_Align2RegClass;
3681 return &AMDGPU::VReg_384_Lo256_Align2RegClass;
3683 return &AMDGPU::VReg_512_Lo256_Align2RegClass;
3685 return &AMDGPU::VReg_1024_Lo256_Align2RegClass;
3693 return &AMDGPU::AReg_64RegClass;
3695 return &AMDGPU::AReg_96RegClass;
3697 return &AMDGPU::AReg_128RegClass;
3699 return &AMDGPU::AReg_160RegClass;
3701 return &AMDGPU::AReg_192RegClass;
3703 return &AMDGPU::AReg_224RegClass;
3705 return &AMDGPU::AReg_256RegClass;
3707 return &AMDGPU::AReg_288RegClass;
3709 return &AMDGPU::AReg_320RegClass;
3711 return &AMDGPU::AReg_352RegClass;
3713 return &AMDGPU::AReg_384RegClass;
3715 return &AMDGPU::AReg_512RegClass;
3717 return &AMDGPU::AReg_1024RegClass;
3725 return &AMDGPU::AReg_64_Align2RegClass;
3727 return &AMDGPU::AReg_96_Align2RegClass;
3729 return &AMDGPU::AReg_128_Align2RegClass;
3731 return &AMDGPU::AReg_160_Align2RegClass;
3733 return &AMDGPU::AReg_192_Align2RegClass;
3735 return &AMDGPU::AReg_224_Align2RegClass;
3737 return &AMDGPU::AReg_256_Align2RegClass;
3739 return &AMDGPU::AReg_288_Align2RegClass;
3741 return &AMDGPU::AReg_320_Align2RegClass;
3743 return &AMDGPU::AReg_352_Align2RegClass;
3745 return &AMDGPU::AReg_384_Align2RegClass;
3747 return &AMDGPU::AReg_512_Align2RegClass;
3749 return &AMDGPU::AReg_1024_Align2RegClass;
3757 return &AMDGPU::AGPR_LO16RegClass;
3759 return &AMDGPU::AGPR_32RegClass;
3767 return &AMDGPU::AV_64RegClass;
3769 return &AMDGPU::AV_96RegClass;
3771 return &AMDGPU::AV_128RegClass;
3773 return &AMDGPU::AV_160RegClass;
3775 return &AMDGPU::AV_192RegClass;
3777 return &AMDGPU::AV_224RegClass;
3779 return &AMDGPU::AV_256RegClass;
3781 return &AMDGPU::AV_288RegClass;
3783 return &AMDGPU::AV_320RegClass;
3785 return &AMDGPU::AV_352RegClass;
3787 return &AMDGPU::AV_384RegClass;
3789 return &AMDGPU::AV_512RegClass;
3791 return &AMDGPU::AV_1024RegClass;
3799 return &AMDGPU::AV_64_Align2RegClass;
3801 return &AMDGPU::AV_96_Align2RegClass;
3803 return &AMDGPU::AV_128_Align2RegClass;
3805 return &AMDGPU::AV_160_Align2RegClass;
3807 return &AMDGPU::AV_192_Align2RegClass;
3809 return &AMDGPU::AV_224_Align2RegClass;
3811 return &AMDGPU::AV_256_Align2RegClass;
3813 return &AMDGPU::AV_288_Align2RegClass;
3815 return &AMDGPU::AV_320_Align2RegClass;
3817 return &AMDGPU::AV_352_Align2RegClass;
3819 return &AMDGPU::AV_384_Align2RegClass;
3821 return &AMDGPU::AV_512_Align2RegClass;
3823 return &AMDGPU::AV_1024_Align2RegClass;
3831 return &AMDGPU::AV_32RegClass;
3832 return ST.needsAlignedVGPRs()
3851 return &AMDGPU::SReg_32RegClass;
3853 return &AMDGPU::SReg_64RegClass;
3855 return &AMDGPU::SGPR_96RegClass;
3857 return &AMDGPU::SGPR_128RegClass;
3859 return &AMDGPU::SGPR_160RegClass;
3861 return &AMDGPU::SGPR_192RegClass;
3863 return &AMDGPU::SGPR_224RegClass;
3865 return &AMDGPU::SGPR_256RegClass;
3867 return &AMDGPU::SGPR_288RegClass;
3869 return &AMDGPU::SGPR_320RegClass;
3871 return &AMDGPU::SGPR_352RegClass;
3873 return &AMDGPU::SGPR_384RegClass;
3875 return &AMDGPU::SGPR_512RegClass;
3877 return &AMDGPU::SGPR_1024RegClass;
3885 if (Reg.isVirtual())
3888 RC = getPhysRegBaseClass(Reg);
3894 unsigned Size = getRegSizeInBits(*SRC);
3896 switch (SRC->
getID()) {
3899 case AMDGPU::VS_32_Lo256RegClassID:
3900 case AMDGPU::VS_64_Lo256RegClassID:
3906 assert(VRC &&
"Invalid register class size");
3912 unsigned Size = getRegSizeInBits(*SRC);
3914 assert(ARC &&
"Invalid register class size");
3920 unsigned Size = getRegSizeInBits(*SRC);
3922 assert(ARC &&
"Invalid register class size");
3928 unsigned Size = getRegSizeInBits(*VRC);
3930 return &AMDGPU::SGPR_32RegClass;
3932 assert(SRC &&
"Invalid register class size");
3939 unsigned SubIdx)
const {
3942 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
3943 return MatchRC && MatchRC->
hasSubClassEq(SuperRC) ? MatchRC :
nullptr;
3949 return !ST.hasMFMAInlineLiteralBug();
3970 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::VCC_HI;
3973 if (ReserveHighestRegister) {
3996 unsigned EltSize)
const {
3998 assert(RegBitWidth >= 32 && RegBitWidth <= 1024 && EltSize >= 2);
4000 const unsigned RegHalves = RegBitWidth / 16;
4001 const unsigned EltHalves = EltSize / 2;
4002 assert(RegSplitParts.size() + 1 >= EltHalves);
4004 const std::vector<int16_t> &Parts = RegSplitParts[EltHalves - 1];
4005 const unsigned NumParts = RegHalves / EltHalves;
4007 return ArrayRef(Parts.data(), NumParts);
4013 return Reg.isVirtual() ? MRI.
getRegClass(Reg) : getPhysRegBaseClass(Reg);
4020 return getSubRegisterClass(SrcRC, MO.
getSubReg());
4040 unsigned MinOcc = ST.getOccupancyWithWorkGroupSizes(MF).first;
4041 switch (RC->
getID()) {
4043 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
4044 case AMDGPU::VGPR_32RegClassID:
4049 ST.getMaxNumVGPRs(MF));
4050 case AMDGPU::SGPR_32RegClassID:
4051 case AMDGPU::SGPR_LO16RegClassID:
4052 return std::min(ST.getMaxNumSGPRs(MinOcc,
true), ST.getMaxNumSGPRs(MF));
4057 unsigned Idx)
const {
4058 switch (
static_cast<AMDGPU::RegisterPressureSets
>(Idx)) {
4059 case AMDGPU::RegisterPressureSets::VGPR_32:
4060 case AMDGPU::RegisterPressureSets::AGPR_32:
4063 case AMDGPU::RegisterPressureSets::SReg_32:
4072 static const int Empty[] = { -1 };
4074 if (RegPressureIgnoredUnits[
static_cast<unsigned>(RegUnit)])
4077 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
4092 switch (Hint.first) {
4099 getMatchingSuperReg(Paired, AMDGPU::lo16, &AMDGPU::VGPR_32RegClass);
4100 }
else if (VRM && VRM->
hasPhys(Paired)) {
4101 PairedPhys = getMatchingSuperReg(VRM->
getPhys(Paired), AMDGPU::lo16,
4102 &AMDGPU::VGPR_32RegClass);
4117 PairedPhys =
TRI->getSubReg(Paired, AMDGPU::lo16);
4118 }
else if (VRM && VRM->
hasPhys(Paired)) {
4119 PairedPhys =
TRI->getSubReg(VRM->
getPhys(Paired), AMDGPU::lo16);
4134 if (AMDGPU::VGPR_16RegClass.
contains(PhysReg) &&
4149 return AMDGPU::SGPR30_SGPR31;
4155 switch (RB.
getID()) {
4156 case AMDGPU::VGPRRegBankID:
4158 std::max(ST.useRealTrue16Insts() ? 16u : 32u,
Size));
4159 case AMDGPU::VCCRegBankID:
4162 case AMDGPU::SGPRRegBankID:
4164 case AMDGPU::AGPRRegBankID:
4179 return getAllocatableClass(RC);
4185 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
4189 return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
4194 return ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass
4195 : &AMDGPU::VReg_64RegClass;
4207 if (Reg.isVirtual()) {
4211 LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
4216 if ((S.LaneMask & SubLanes) == SubLanes) {
4217 V = S.getVNInfoAt(UseIdx);
4229 for (MCRegUnit Unit : regunits(Reg.asMCReg())) {
4244 if (!Def || !MDT.dominates(Def, &
Use))
4247 assert(Def->modifiesRegister(Reg,
this));
4253 assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32);
4256 AMDGPU::SReg_32RegClass,
4257 AMDGPU::AGPR_32RegClass } ) {
4258 if (
MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
4261 if (
MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
4262 &AMDGPU::VGPR_32RegClass)) {
4266 return AMDGPU::NoRegister;
4270 if (!ST.needsAlignedVGPRs())
4281 assert(&RC != &AMDGPU::VS_64RegClass);
4288 return ArrayRef(AMDGPU::SGPR_128RegClass.begin(), ST.getMaxNumSGPRs(MF) / 4);
4293 return ArrayRef(AMDGPU::SGPR_64RegClass.begin(), ST.getMaxNumSGPRs(MF) / 2);
4298 return ArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF));
4303 unsigned SubReg)
const {
4306 return std::min(128u, getSubRegIdxSize(SubReg));
4310 return std::min(32u, getSubRegIdxSize(SubReg));
4319 bool IncludeCalls)
const {
4320 unsigned NumArchVGPRs = ST.getAddressableNumArchVGPRs();
4322 (RC.
getID() == AMDGPU::VGPR_32RegClassID)
4326 if (Reg != AMDGPU::VCC_LO && Reg != AMDGPU::VCC_HI &&
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
This file declares the machine register scavenger class.
SI Pre allocate WWM Registers
static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill, bool NeedsCFI)
static int getOffenMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyAGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFLoad(unsigned Opc)
static const std::array< unsigned, 17 > SubRegFromChannelTableWidthMap
static unsigned getNumSubRegsForSpillOp(const MachineInstr &MI, const SIInstrInfo *TII)
static void emitUnsupportedError(const Function &Fn, const MachineInstr &MI, const Twine &ErrMsg)
static const TargetRegisterClass * getAlignedAGPRClassForBitWidth(unsigned BitWidth)
static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
static cl::opt< bool > EnableSpillCFISavedRegs("amdgpu-spill-cfi-saved-regs", cl::desc("Enable spilling the registers required for CFI emission"), cl::ReallyHidden, cl::init(false), cl::ZeroOrMore)
static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, unsigned LoadStoreOp, unsigned EltSize)
static const TargetRegisterClass * getAlignedVGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyVGPRClassForBitWidth(unsigned BitWidth)
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling SGPRs to VGPRs"), cl::ReallyHidden, cl::init(true))
static const TargetRegisterClass * getAlignedVectorSuperClassForBitWidth(unsigned BitWidth)
static const TargetRegisterClass * getAnyVectorSuperClassForBitWidth(unsigned BitWidth)
static bool isFIPlusImmOrVGPR(const SIRegisterInfo &TRI, const MachineInstr &MI)
static int getOffenMUBUFLoad(unsigned Opc)
Interface definition for SIRegisterInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const char * getRegisterName(MCRegister Reg)
bool isBottomOfStack() const
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
bool test(unsigned Idx) const
Returns true if bit Idx is set.
bool empty() const
Returns whether there are no bits in this bitvector.
Diagnostic information for unsupported feature in backend.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
bool hasInterval(Register Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
MachineDominatorTree & getDomTree()
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LiveRange & getRegUnit(MCRegUnit Unit)
Return the live range for register unit Unit.
This class represents the liveness of a register, stack slot, etc.
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
Describe properties that are true of each instruction in the target description file.
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
Generic base class for all target subtargets.
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasCalls() const
Return true if the current function has any function calls.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
void setAsmPrinterFlag(AsmPrinterFlagTy Flag)
Set a flag for the AsmPrinter.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
void setImm(int64_t immVal)
LLVM_ABI void setIsRenamable(bool Val=true)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
const RegClassOrRegBank & getRegClassOrRegBank(Register Reg) const
Return the register bank or register class of Reg.
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
std::pair< unsigned, Register > getRegAllocationHint(Register VReg) const
getRegAllocationHint - Return the register allocation hint for the specified virtual register.
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI LaneBitmask getMaxLaneMaskForVReg(Register Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
LLVM_ABI bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
Holds all the information related to register banks.
virtual bool isDivergentRegBank(const RegisterBank *RB) const
Returns true if the register bank is considered divergent.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
MachineInstr * buildCFIForSGPRToVMEMSpill(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister SGPR, int64_t Offset) const
Create a CFI index describing a spill of a SGPR to VMEM and build a MachineInstr around it.
MachineInstr * buildCFIForVRegToVRegSpill(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCRegister Reg, const MCRegister RegCopy) const
Create a CFI index describing a spill of the VGPR/AGPR Reg to another VGPR/AGPR RegCopy and build a M...
MachineInstr * buildCFIForVGPRToVMEMSpill(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister VGPR, int64_t Offset) const
Create a CFI index describing a spill of a VGPR to VMEM and build a MachineInstr around it.
MachineInstr * buildCFIForSGPRToVGPRSpill(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCRegister SGPR, const MCRegister VGPR, const int Lane) const
Create a CFI index describing a spill of an SGPR to a single lane of a VGPR and build a MachineInstr ...
static bool isFLATScratch(const MachineInstr &MI)
static bool isMUBUF(const MachineInstr &MI)
static bool isVOP3(const MCInstrDesc &Desc)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
Register getLongBranchReservedReg() const
unsigned getDynamicVGPRBlockSize() const
Register getStackPtrOffsetReg() const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const
uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const
Register getSGPRForEXECCopy() const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
Register getVGPRForAGPRCopy() const
Register getFrameOffsetReg() const
BitVector getNonWWMRegMask() const
bool checkFlag(Register Reg, uint8_t Flag) const
void addToSpilledVGPRs(unsigned num)
const ReservedRegSet & getWWMReservedRegs() const
void addToSpilledSGPRs(unsigned num)
Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, int64_t Offset) const override
int64_t getScratchInstrOffset(const MachineInstr *MI) const
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, int64_t Offset) const override
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
ArrayRef< MCPhysReg > getAllSGPR64(const MachineFunction &MF) const
Return all SGPR64 which satisfy the waves per execution unit requirement of the subtarget.
MCRegister findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF, bool ReserveHighestVGPR=false) const
Returns a lowest register that is not used at any point in the function.
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
void buildSpillLoadStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned LoadStoreOp, int Index, Register ValueReg, bool ValueIsKill, MCRegister ScratchOffsetReg, int64_t InstrOffset, MachineMemOperand *MMO, RegScavenger *RS, LiveRegUnits *LiveUnits=nullptr, bool NeedsCFI=false) const
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
bool shouldRealignStack(const MachineFunction &MF) const override
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
Register getFrameRegister(const MachineFunction &MF) const override
LLVM_READONLY const TargetRegisterClass * getVectorSuperClassForBitWidth(unsigned BitWidth) const
bool spillEmergencySGPR(MachineBasicBlock::iterator MI, MachineBasicBlock &RestoreMBB, Register SGPR, RegScavenger *RS) const
SIRegisterInfo(const GCNSubtarget &ST)
const uint32_t * getAllVGPRRegMask() const
MCRegister getReturnAddressReg(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
bool hasBasePointer(const MachineFunction &MF) const
const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override
Returns a legal register class to copy a register in the specified class to or from.
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
ArrayRef< MCPhysReg > getAllSGPR32(const MachineFunction &MF) const
Return all SGPR32 which satisfy the waves per execution unit requirement of the subtarget.
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool SpillToPhysVGPRLane=false) const
Special case of eliminateFrameIndex.
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool isAsmClobberable(const MachineFunction &MF, MCRegister PhysReg) const override
LLVM_READONLY const TargetRegisterClass * getAGPRClassForBitWidth(unsigned BitWidth) const
static bool isChainScratchRegister(Register VGPR)
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register Reg) const override
const uint32_t * getNoPreservedMask() const override
StringRef getRegAsmName(MCRegister Reg) const override
const uint32_t * getAllAllocatableSRegMask() const
MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF, const unsigned Align, const TargetRegisterClass *RC) const
Return the largest available SGPR aligned to Align for the register class RC.
void buildCFIForBlockCSRStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register BlockReg, int64_t Offset) const
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
unsigned getHWRegIndex(MCRegister Reg) const
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
const uint32_t * getAllVectorRegMask() const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
const TargetRegisterClass * getPointerRegClass(unsigned Kind=0) const override
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
bool opCanUseLiteralConstant(unsigned OpType) const
Register getBaseRegister() const
bool getRegAllocationHints(Register VirtReg, ArrayRef< MCPhysReg > Order, SmallVectorImpl< MCPhysReg > &Hints, const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const override
LLVM_READONLY const TargetRegisterClass * getAlignedLo256VGPRClassForBitWidth(unsigned BitWidth) const
LLVM_READONLY const TargetRegisterClass * getVGPRClassForBitWidth(unsigned BitWidth) const
const TargetRegisterClass * getEquivalentAVClass(const TargetRegisterClass *SRC) const
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
static bool isVGPRClass(const TargetRegisterClass *RC)
MachineInstr * findReachingDef(Register Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
SmallVector< StringLiteral > getVRegFlagsOfReg(Register Reg, const MachineFunction &MF) const override
LLVM_READONLY const TargetRegisterClass * getDefaultVectorSuperClassForBitWidth(unsigned BitWidth) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
ArrayRef< MCPhysReg > getAllSGPR128(const MachineFunction &MF) const
Return all SGPR128 which satisfy the waves per execution unit requirement of the subtarget.
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
const TargetRegisterClass * getRegClassForOperandReg(const MachineRegisterInfo &MRI, const MachineOperand &MO) const
void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, Register BlockReg) const
unsigned getNumUsedPhysRegs(const MachineRegisterInfo &MRI, const TargetRegisterClass &RC, bool IncludeCalls=true) const
const uint32_t * getAllAGPRRegMask() const
const int * getRegUnitPressureSets(MCRegUnit RegUnit) const override
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false, bool NeedsCFI=false) const
If OnlyToVGPR is true, this will only succeed if this manages to find a free VGPR lane to spill.
MCRegister getExec() const
MCRegister getVCC() const
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
bool isVectorSuperClass(const TargetRegisterClass *RC) const
const TargetRegisterClass * getWaveMaskRegClass() const
unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC, unsigned SubReg) const
void resolveFrameIndex(MachineInstr &MI, Register BaseReg, int64_t Offset) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
const TargetRegisterClass * getVGPR64Class() const
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill=true) const
bool isCFISavedRegsSpillEnabled() const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
SlotIndex - An opaque wrapper around machine indexes.
bool isValid() const
Returns true if this is a valid index.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
SlotIndex replaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in maps used by register allocat...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
const uint8_t TSFlags
Configurable target specific flags.
ArrayRef< MCPhysReg > getRegisters() const
unsigned getID() const
Return the register class ID number.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const
Returns the largest super class of RC that is legal to use in the current sub-target and has the same...
virtual bool shouldRealignStack(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
virtual bool getRegAllocationHints(Register VirtReg, ArrayRef< MCPhysReg > Order, SmallVectorImpl< MCPhysReg > &Hints, const MachineFunction &MF, const VirtRegMap *VRM=nullptr, const LiveRegMatrix *Matrix=nullptr) const
Get a list of 'hint' registers that the register allocator should try first when allocating a physica...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
A Use represents the edge between a Value definition and its users.
VNInfo - Value Number Information.
MCRegister getPhys(Register virtReg) const
returns the physical register mapped to the specified virtual register
bool hasPhys(Register virtReg) const
returns true if the specified virtual register is mapped to a physical register
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ PRIVATE_ADDRESS
Address space for private memory.
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
@ OPERAND_REG_INLINE_AC_FIRST
@ OPERAND_REG_INLINE_AC_LAST
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY int32_t getFlatScratchInstSVfromSVS(uint32_t Opcode)
LLVM_READONLY int32_t getFlatScratchInstSVfromSS(uint32_t Opcode)
LLVM_READONLY int32_t getFlatScratchInstSTfromSS(uint32_t Opcode)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
RegState
Flags to represent properties of register accesses.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
@ Renamable
Register that may be renamed.
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
auto reverse(ContainerTy &&C)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr RegState getDefRegState(bool B)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
constexpr bool hasRegState(RegState Value, RegState Test)
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
FunctionAddr VTableAddr uintptr_t uintptr_t Data
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
constexpr unsigned BitWidth
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
static const MachineMemOperand::Flags MOThreadPrivate
Mark the MMO of accesses to memory locations that are never written to by other threads.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI)
ArrayRef< int16_t > SplitParts
SIMachineFunctionInfo & MFI
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, int Index, RegScavenger *RS)
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, Register Reg, bool IsKill, int Index, RegScavenger *RS)
PerVGPRData getPerVGPRData()
MachineBasicBlock::iterator MI
void readWriteTmpVGPR(unsigned Offset, bool IsLoad)
const SIRegisterInfo & TRI
The llvm::once_flag structure.