29#define GET_REGINFO_TARGET_DESC
30#include "AMDGPUGenRegisterInfo.inc"
33 "amdgpu-spill-sgpr-to-vgpr",
34 cl::desc(
"Enable spilling SGPRs to VGPRs"),
38std::array<std::vector<int16_t>, 32> SIRegisterInfo::RegSplitParts;
39std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
49 const Twine &ErrMsg) {
122 MI->getOperand(0).isKill(),
Index,
RS) {}
137 MovOpc = AMDGPU::S_MOV_B32;
138 NotOpc = AMDGPU::S_NOT_B32;
141 MovOpc = AMDGPU::S_MOV_B64;
142 NotOpc = AMDGPU::S_NOT_B64;
147 SuperReg != AMDGPU::EXEC &&
"exec should never spill");
178 assert(
RS &&
"Cannot spill SGPR to memory without RegScavenger");
179 TmpVGPR =
RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
false,
207 IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
227 if (
RS->isRegUsed(AMDGPU::SCC))
229 "unhandled SGPR spill to memory");
239 I->getOperand(2).setIsDead();
274 I->getOperand(2).setIsDead();
303 if (
RS->isRegUsed(AMDGPU::SCC))
305 "unhandled SGPR spill to memory");
330 ST.getAMDGPUDwarfFlavour(),
335 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
336 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
337 (getSubRegIndexLaneMask(AMDGPU::lo16) |
338 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
339 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
340 "getNumCoveredRegs() will not work with generated subreg masks!");
342 RegPressureIgnoredUnits.resize(getNumRegUnits());
343 RegPressureIgnoredUnits.set(*regunits(
MCRegister::from(AMDGPU::M0)).begin());
344 for (
auto Reg : AMDGPU::VGPR_16RegClass) {
346 RegPressureIgnoredUnits.set(*regunits(Reg).begin());
352 static auto InitializeRegSplitPartsOnce = [
this]() {
353 for (
unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) {
354 unsigned Size = getSubRegIdxSize(Idx);
357 std::vector<int16_t> &Vec = RegSplitParts[
Size / 16 - 1];
358 unsigned Pos = getSubRegIdxOffset(Idx);
363 unsigned MaxNumParts = 1024 /
Size;
364 Vec.resize(MaxNumParts);
372 static auto InitializeSubRegFromChannelTableOnce = [
this]() {
373 for (
auto &Row : SubRegFromChannelTable)
374 Row.fill(AMDGPU::NoSubRegister);
375 for (
unsigned Idx = 1; Idx < getNumSubRegIndices(); ++Idx) {
376 unsigned Width = getSubRegIdxSize(Idx) / 32;
377 unsigned Offset = getSubRegIdxOffset(Idx) / 32;
382 unsigned TableIdx = Width - 1;
383 assert(TableIdx < SubRegFromChannelTable.size());
385 SubRegFromChannelTable[TableIdx][
Offset] = Idx;
389 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
391 InitializeSubRegFromChannelTableOnce);
408 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_SaveList
409 : CSR_AMDGPU_SaveList;
412 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList
413 : CSR_AMDGPU_SI_Gfx_SaveList;
415 return CSR_AMDGPU_CS_ChainPreserve_SaveList;
418 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
419 return &NoCalleeSavedReg;
435 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_RegMask
436 : CSR_AMDGPU_RegMask;
439 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask
440 : CSR_AMDGPU_SI_Gfx_RegMask;
445 return AMDGPU_AllVGPRs_RegMask;
452 return CSR_AMDGPU_NoRegs_RegMask;
456 return VGPR >= AMDGPU::VGPR0 && VGPR < AMDGPU::VGPR8;
467 if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
468 return &AMDGPU::AV_32RegClass;
469 if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
470 return &AMDGPU::AV_64RegClass;
471 if (RC == &AMDGPU::VReg_64_Align2RegClass ||
472 RC == &AMDGPU::AReg_64_Align2RegClass)
473 return &AMDGPU::AV_64_Align2RegClass;
474 if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)
475 return &AMDGPU::AV_96RegClass;
476 if (RC == &AMDGPU::VReg_96_Align2RegClass ||
477 RC == &AMDGPU::AReg_96_Align2RegClass)
478 return &AMDGPU::AV_96_Align2RegClass;
479 if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)
480 return &AMDGPU::AV_128RegClass;
481 if (RC == &AMDGPU::VReg_128_Align2RegClass ||
482 RC == &AMDGPU::AReg_128_Align2RegClass)
483 return &AMDGPU::AV_128_Align2RegClass;
484 if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)
485 return &AMDGPU::AV_160RegClass;
486 if (RC == &AMDGPU::VReg_160_Align2RegClass ||
487 RC == &AMDGPU::AReg_160_Align2RegClass)
488 return &AMDGPU::AV_160_Align2RegClass;
489 if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)
490 return &AMDGPU::AV_192RegClass;
491 if (RC == &AMDGPU::VReg_192_Align2RegClass ||
492 RC == &AMDGPU::AReg_192_Align2RegClass)
493 return &AMDGPU::AV_192_Align2RegClass;
494 if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)
495 return &AMDGPU::AV_256RegClass;
496 if (RC == &AMDGPU::VReg_256_Align2RegClass ||
497 RC == &AMDGPU::AReg_256_Align2RegClass)
498 return &AMDGPU::AV_256_Align2RegClass;
499 if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)
500 return &AMDGPU::AV_512RegClass;
501 if (RC == &AMDGPU::VReg_512_Align2RegClass ||
502 RC == &AMDGPU::AReg_512_Align2RegClass)
503 return &AMDGPU::AV_512_Align2RegClass;
504 if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)
505 return &AMDGPU::AV_1024RegClass;
506 if (RC == &AMDGPU::VReg_1024_Align2RegClass ||
507 RC == &AMDGPU::AReg_1024_Align2RegClass)
508 return &AMDGPU::AV_1024_Align2RegClass;
538 return AMDGPU_AllVGPRs_RegMask;
542 return AMDGPU_AllAGPRs_RegMask;
546 return AMDGPU_AllVectorRegs_RegMask;
550 return AMDGPU_AllAllocatableSRegs_RegMask;
557 assert(NumRegIndex &&
"Not implemented");
558 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].
size());
559 return SubRegFromChannelTable[NumRegIndex - 1][Channel];
564 const unsigned Align,
567 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
568 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, RC);
586 reserveRegisterTuples(
Reserved, AMDGPU::EXEC);
587 reserveRegisterTuples(
Reserved, AMDGPU::FLAT_SCR);
590 reserveRegisterTuples(
Reserved, AMDGPU::M0);
593 reserveRegisterTuples(
Reserved, AMDGPU::SRC_VCCZ);
594 reserveRegisterTuples(
Reserved, AMDGPU::SRC_EXECZ);
595 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SCC);
598 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SHARED_BASE);
599 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SHARED_LIMIT);
600 reserveRegisterTuples(
Reserved, AMDGPU::SRC_PRIVATE_BASE);
601 reserveRegisterTuples(
Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
602 reserveRegisterTuples(
Reserved, AMDGPU::SRC_FLAT_SCRATCH_BASE_LO);
603 reserveRegisterTuples(
Reserved, AMDGPU::SRC_FLAT_SCRATCH_BASE_HI);
606 reserveRegisterTuples(
Reserved, AMDGPU::ASYNCcnt);
607 reserveRegisterTuples(
Reserved, AMDGPU::TENSORcnt);
610 reserveRegisterTuples(
Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
613 reserveRegisterTuples(
Reserved, AMDGPU::XNACK_MASK);
616 reserveRegisterTuples(
Reserved, AMDGPU::LDS_DIRECT);
619 reserveRegisterTuples(
Reserved, AMDGPU::TBA);
620 reserveRegisterTuples(
Reserved, AMDGPU::TMA);
621 reserveRegisterTuples(
Reserved, AMDGPU::TTMP0_TTMP1);
622 reserveRegisterTuples(
Reserved, AMDGPU::TTMP2_TTMP3);
623 reserveRegisterTuples(
Reserved, AMDGPU::TTMP4_TTMP5);
624 reserveRegisterTuples(
Reserved, AMDGPU::TTMP6_TTMP7);
625 reserveRegisterTuples(
Reserved, AMDGPU::TTMP8_TTMP9);
626 reserveRegisterTuples(
Reserved, AMDGPU::TTMP10_TTMP11);
627 reserveRegisterTuples(
Reserved, AMDGPU::TTMP12_TTMP13);
628 reserveRegisterTuples(
Reserved, AMDGPU::TTMP14_TTMP15);
631 reserveRegisterTuples(
Reserved, AMDGPU::SGPR_NULL64);
635 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
636 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
639 unsigned NumRegs =
divideCeil(getRegSizeInBits(*RC), 32);
642 if (Index + NumRegs > MaxNumSGPRs && Index < TotalNumSGPRs)
649 if (ScratchRSrcReg != AMDGPU::NoRegister) {
653 reserveRegisterTuples(
Reserved, ScratchRSrcReg);
657 if (LongBranchReservedReg)
658 reserveRegisterTuples(
Reserved, LongBranchReservedReg);
665 reserveRegisterTuples(
Reserved, StackPtrReg);
666 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
671 reserveRegisterTuples(
Reserved, FrameReg);
672 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
677 reserveRegisterTuples(
Reserved, BasePtrReg);
678 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
685 reserveRegisterTuples(
Reserved, ExecCopyReg);
689 auto [MaxNumVGPRs, MaxNumAGPRs] = ST.getMaxNumVectorRegs(MF.
getFunction());
693 unsigned NumRegs =
divideCeil(getRegSizeInBits(*RC), 32);
696 if (Index + NumRegs > MaxNumVGPRs)
703 if (!ST.hasMAIInsts())
707 unsigned NumRegs =
divideCeil(getRegSizeInBits(*RC), 32);
710 if (Index + NumRegs > MaxNumAGPRs)
718 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
726 if (!NonWWMRegMask.
empty()) {
727 for (
unsigned RegI = AMDGPU::VGPR0, RegE = AMDGPU::VGPR0 + MaxNumVGPRs;
728 RegI < RegE; ++RegI) {
729 if (NonWWMRegMask.
test(RegI))
730 reserveRegisterTuples(
Reserved, RegI);
735 reserveRegisterTuples(
Reserved, Reg);
739 reserveRegisterTuples(
Reserved, Reg);
742 reserveRegisterTuples(
Reserved, Reg);
759 if (Info->isBottomOfStack())
767 if (Info->isEntryFunction()) {
800 int OffIdx = AMDGPU::getNamedOperandIdx(
MI->getOpcode(),
801 AMDGPU::OpName::offset);
802 return MI->getOperand(OffIdx).getImm();
807 switch (
MI->getOpcode()) {
808 case AMDGPU::V_ADD_U32_e32:
809 case AMDGPU::V_ADD_U32_e64:
810 case AMDGPU::V_ADD_CO_U32_e32: {
811 int OtherIdx = Idx == 1 ? 2 : 1;
815 case AMDGPU::V_ADD_CO_U32_e64: {
816 int OtherIdx = Idx == 2 ? 3 : 2;
827 assert((Idx == AMDGPU::getNamedOperandIdx(
MI->getOpcode(),
828 AMDGPU::OpName::vaddr) ||
829 (Idx == AMDGPU::getNamedOperandIdx(
MI->getOpcode(),
830 AMDGPU::OpName::saddr))) &&
831 "Should never see frame index on non-address operand");
843 return Src1.
isImm() || (Src1.
isReg() &&
TRI.isVGPR(
MI.getMF()->getRegInfo(),
848 return Src0.
isImm() || (Src0.
isReg() &&
TRI.isVGPR(
MI.getMF()->getRegInfo(),
857 switch (
MI->getOpcode()) {
858 case AMDGPU::V_ADD_U32_e32: {
861 if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e32) < 2 &&
866 case AMDGPU::V_ADD_U32_e64:
875 return !ST.enableFlatScratch();
876 case AMDGPU::V_ADD_CO_U32_e32:
877 if (ST.getConstantBusLimit(AMDGPU::V_ADD_CO_U32_e32) < 2 &&
882 return MI->getOperand(3).isDead();
883 case AMDGPU::V_ADD_CO_U32_e64:
885 return MI->getOperand(1).isDead();
897 return !
TII->isLegalMUBUFImmOffset(FullOffset);
909 if (Ins !=
MBB->end())
910 DL = Ins->getDebugLoc();
915 unsigned MovOpc = ST.enableFlatScratch() ? AMDGPU::S_MOV_B32
916 : AMDGPU::V_MOV_B32_e32;
919 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XEXEC_HIRegClass
920 : &AMDGPU::VGPR_32RegClass);
928 Register OffsetReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
931 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XM0RegClass
932 : &AMDGPU::VGPR_32RegClass);
939 if (ST.enableFlatScratch() ) {
948 TII->getAddNoCarry(*
MBB, Ins,
DL, BaseReg)
960 switch (
MI.getOpcode()) {
961 case AMDGPU::V_ADD_U32_e32:
962 case AMDGPU::V_ADD_CO_U32_e32: {
968 if (!ImmOp->
isImm()) {
971 TII->legalizeOperandsVOP2(
MI.getMF()->getRegInfo(),
MI);
976 if (TotalOffset == 0) {
977 MI.setDesc(
TII->get(AMDGPU::COPY));
978 for (
unsigned I =
MI.getNumOperands() - 1;
I != 1; --
I)
981 MI.getOperand(1).ChangeToRegister(BaseReg,
false);
985 ImmOp->
setImm(TotalOffset);
997 MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1000 MI.getOperand(2).ChangeToRegister(BaseRegVGPR,
false);
1002 MI.getOperand(2).ChangeToRegister(BaseReg,
false);
1006 case AMDGPU::V_ADD_U32_e64:
1007 case AMDGPU::V_ADD_CO_U32_e64: {
1008 int Src0Idx =
MI.getNumExplicitDefs();
1014 if (!ImmOp->
isImm()) {
1016 TII->legalizeOperandsVOP3(
MI.getMF()->getRegInfo(),
MI);
1021 if (TotalOffset == 0) {
1022 MI.setDesc(
TII->get(AMDGPU::COPY));
1024 for (
unsigned I =
MI.getNumOperands() - 1;
I != 1; --
I)
1025 MI.removeOperand(
I);
1027 MI.getOperand(1).ChangeToRegister(BaseReg,
false);
1030 ImmOp->
setImm(TotalOffset);
1039 bool IsFlat =
TII->isFLATScratch(
MI);
1043 bool SeenFI =
false;
1055 TII->getNamedOperand(
MI, IsFlat ? AMDGPU::OpName::saddr
1056 : AMDGPU::OpName::vaddr);
1061 assert(FIOp && FIOp->
isFI() &&
"frame index must be address operand");
1067 "offset should be legal");
1078 assert(
TII->isLegalMUBUFImmOffset(NewOffset) &&
"offset should be legal");
1088 switch (
MI->getOpcode()) {
1089 case AMDGPU::V_ADD_U32_e32:
1090 case AMDGPU::V_ADD_CO_U32_e32:
1092 case AMDGPU::V_ADD_U32_e64:
1093 case AMDGPU::V_ADD_CO_U32_e64:
1106 return TII->isLegalMUBUFImmOffset(NewOffset);
1117 return &AMDGPU::VGPR_32RegClass;
1122 return RC == &AMDGPU::SCC_CLASSRegClass ? &AMDGPU::SReg_32RegClass : RC;
1128 unsigned Op =
MI.getOpcode();
1130 case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE:
1131 case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE:
1136 (
uint64_t)
TII->getNamedOperand(
MI, AMDGPU::OpName::mask)->getImm());
1137 case AMDGPU::SI_SPILL_S1024_SAVE:
1138 case AMDGPU::SI_SPILL_S1024_RESTORE:
1139 case AMDGPU::SI_SPILL_V1024_SAVE:
1140 case AMDGPU::SI_SPILL_V1024_RESTORE:
1141 case AMDGPU::SI_SPILL_A1024_SAVE:
1142 case AMDGPU::SI_SPILL_A1024_RESTORE:
1143 case AMDGPU::SI_SPILL_AV1024_SAVE:
1144 case AMDGPU::SI_SPILL_AV1024_RESTORE:
1146 case AMDGPU::SI_SPILL_S512_SAVE:
1147 case AMDGPU::SI_SPILL_S512_RESTORE:
1148 case AMDGPU::SI_SPILL_V512_SAVE:
1149 case AMDGPU::SI_SPILL_V512_RESTORE:
1150 case AMDGPU::SI_SPILL_A512_SAVE:
1151 case AMDGPU::SI_SPILL_A512_RESTORE:
1152 case AMDGPU::SI_SPILL_AV512_SAVE:
1153 case AMDGPU::SI_SPILL_AV512_RESTORE:
1155 case AMDGPU::SI_SPILL_S384_SAVE:
1156 case AMDGPU::SI_SPILL_S384_RESTORE:
1157 case AMDGPU::SI_SPILL_V384_SAVE:
1158 case AMDGPU::SI_SPILL_V384_RESTORE:
1159 case AMDGPU::SI_SPILL_A384_SAVE:
1160 case AMDGPU::SI_SPILL_A384_RESTORE:
1161 case AMDGPU::SI_SPILL_AV384_SAVE:
1162 case AMDGPU::SI_SPILL_AV384_RESTORE:
1164 case AMDGPU::SI_SPILL_S352_SAVE:
1165 case AMDGPU::SI_SPILL_S352_RESTORE:
1166 case AMDGPU::SI_SPILL_V352_SAVE:
1167 case AMDGPU::SI_SPILL_V352_RESTORE:
1168 case AMDGPU::SI_SPILL_A352_SAVE:
1169 case AMDGPU::SI_SPILL_A352_RESTORE:
1170 case AMDGPU::SI_SPILL_AV352_SAVE:
1171 case AMDGPU::SI_SPILL_AV352_RESTORE:
1173 case AMDGPU::SI_SPILL_S320_SAVE:
1174 case AMDGPU::SI_SPILL_S320_RESTORE:
1175 case AMDGPU::SI_SPILL_V320_SAVE:
1176 case AMDGPU::SI_SPILL_V320_RESTORE:
1177 case AMDGPU::SI_SPILL_A320_SAVE:
1178 case AMDGPU::SI_SPILL_A320_RESTORE:
1179 case AMDGPU::SI_SPILL_AV320_SAVE:
1180 case AMDGPU::SI_SPILL_AV320_RESTORE:
1182 case AMDGPU::SI_SPILL_S288_SAVE:
1183 case AMDGPU::SI_SPILL_S288_RESTORE:
1184 case AMDGPU::SI_SPILL_V288_SAVE:
1185 case AMDGPU::SI_SPILL_V288_RESTORE:
1186 case AMDGPU::SI_SPILL_A288_SAVE:
1187 case AMDGPU::SI_SPILL_A288_RESTORE:
1188 case AMDGPU::SI_SPILL_AV288_SAVE:
1189 case AMDGPU::SI_SPILL_AV288_RESTORE:
1191 case AMDGPU::SI_SPILL_S256_SAVE:
1192 case AMDGPU::SI_SPILL_S256_RESTORE:
1193 case AMDGPU::SI_SPILL_V256_SAVE:
1194 case AMDGPU::SI_SPILL_V256_RESTORE:
1195 case AMDGPU::SI_SPILL_A256_SAVE:
1196 case AMDGPU::SI_SPILL_A256_RESTORE:
1197 case AMDGPU::SI_SPILL_AV256_SAVE:
1198 case AMDGPU::SI_SPILL_AV256_RESTORE:
1200 case AMDGPU::SI_SPILL_S224_SAVE:
1201 case AMDGPU::SI_SPILL_S224_RESTORE:
1202 case AMDGPU::SI_SPILL_V224_SAVE:
1203 case AMDGPU::SI_SPILL_V224_RESTORE:
1204 case AMDGPU::SI_SPILL_A224_SAVE:
1205 case AMDGPU::SI_SPILL_A224_RESTORE:
1206 case AMDGPU::SI_SPILL_AV224_SAVE:
1207 case AMDGPU::SI_SPILL_AV224_RESTORE:
1209 case AMDGPU::SI_SPILL_S192_SAVE:
1210 case AMDGPU::SI_SPILL_S192_RESTORE:
1211 case AMDGPU::SI_SPILL_V192_SAVE:
1212 case AMDGPU::SI_SPILL_V192_RESTORE:
1213 case AMDGPU::SI_SPILL_A192_SAVE:
1214 case AMDGPU::SI_SPILL_A192_RESTORE:
1215 case AMDGPU::SI_SPILL_AV192_SAVE:
1216 case AMDGPU::SI_SPILL_AV192_RESTORE:
1218 case AMDGPU::SI_SPILL_S160_SAVE:
1219 case AMDGPU::SI_SPILL_S160_RESTORE:
1220 case AMDGPU::SI_SPILL_V160_SAVE:
1221 case AMDGPU::SI_SPILL_V160_RESTORE:
1222 case AMDGPU::SI_SPILL_A160_SAVE:
1223 case AMDGPU::SI_SPILL_A160_RESTORE:
1224 case AMDGPU::SI_SPILL_AV160_SAVE:
1225 case AMDGPU::SI_SPILL_AV160_RESTORE:
1227 case AMDGPU::SI_SPILL_S128_SAVE:
1228 case AMDGPU::SI_SPILL_S128_RESTORE:
1229 case AMDGPU::SI_SPILL_V128_SAVE:
1230 case AMDGPU::SI_SPILL_V128_RESTORE:
1231 case AMDGPU::SI_SPILL_A128_SAVE:
1232 case AMDGPU::SI_SPILL_A128_RESTORE:
1233 case AMDGPU::SI_SPILL_AV128_SAVE:
1234 case AMDGPU::SI_SPILL_AV128_RESTORE:
1236 case AMDGPU::SI_SPILL_S96_SAVE:
1237 case AMDGPU::SI_SPILL_S96_RESTORE:
1238 case AMDGPU::SI_SPILL_V96_SAVE:
1239 case AMDGPU::SI_SPILL_V96_RESTORE:
1240 case AMDGPU::SI_SPILL_A96_SAVE:
1241 case AMDGPU::SI_SPILL_A96_RESTORE:
1242 case AMDGPU::SI_SPILL_AV96_SAVE:
1243 case AMDGPU::SI_SPILL_AV96_RESTORE:
1245 case AMDGPU::SI_SPILL_S64_SAVE:
1246 case AMDGPU::SI_SPILL_S64_RESTORE:
1247 case AMDGPU::SI_SPILL_V64_SAVE:
1248 case AMDGPU::SI_SPILL_V64_RESTORE:
1249 case AMDGPU::SI_SPILL_A64_SAVE:
1250 case AMDGPU::SI_SPILL_A64_RESTORE:
1251 case AMDGPU::SI_SPILL_AV64_SAVE:
1252 case AMDGPU::SI_SPILL_AV64_RESTORE:
1254 case AMDGPU::SI_SPILL_S32_SAVE:
1255 case AMDGPU::SI_SPILL_S32_RESTORE:
1256 case AMDGPU::SI_SPILL_V32_SAVE:
1257 case AMDGPU::SI_SPILL_V32_RESTORE:
1258 case AMDGPU::SI_SPILL_A32_SAVE:
1259 case AMDGPU::SI_SPILL_A32_RESTORE:
1260 case AMDGPU::SI_SPILL_AV32_SAVE:
1261 case AMDGPU::SI_SPILL_AV32_RESTORE:
1262 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
1263 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
1264 case AMDGPU::SI_SPILL_WWM_AV32_SAVE:
1265 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE:
1266 case AMDGPU::SI_SPILL_V16_SAVE:
1267 case AMDGPU::SI_SPILL_V16_RESTORE:
1275 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
1276 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1277 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
1278 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
1279 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
1280 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
1281 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
1282 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
1283 case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN:
1284 return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET;
1285 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
1286 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
1287 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
1288 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
1289 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
1290 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
1298 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
1299 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1300 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
1301 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
1302 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
1303 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
1304 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
1305 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
1306 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
1307 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
1308 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
1309 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
1310 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN:
1311 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET;
1312 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
1313 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
1314 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
1315 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
1316 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
1317 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
1318 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
1319 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
1320 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
1321 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
1322 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
1323 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
1324 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
1325 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
1333 case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
1334 return AMDGPU::BUFFER_STORE_DWORD_OFFEN;
1335 case AMDGPU::BUFFER_STORE_BYTE_OFFSET:
1336 return AMDGPU::BUFFER_STORE_BYTE_OFFEN;
1337 case AMDGPU::BUFFER_STORE_SHORT_OFFSET:
1338 return AMDGPU::BUFFER_STORE_SHORT_OFFEN;
1339 case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
1340 return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
1341 case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET:
1342 return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN;
1343 case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET:
1344 return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
1345 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET:
1346 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN;
1347 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET:
1348 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN;
1356 case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
1357 return AMDGPU::BUFFER_LOAD_DWORD_OFFEN;
1358 case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET:
1359 return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN;
1360 case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET:
1361 return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN;
1362 case AMDGPU::BUFFER_LOAD_USHORT_OFFSET:
1363 return AMDGPU::BUFFER_LOAD_USHORT_OFFEN;
1364 case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET:
1365 return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN;
1366 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET:
1367 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
1368 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET:
1369 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN;
1370 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET:
1371 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN;
1372 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET:
1373 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN;
1374 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
1375 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN;
1376 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET:
1377 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN;
1378 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
1379 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN;
1380 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET:
1381 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN;
1382 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
1383 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN;
1392 int Index,
unsigned Lane,
1393 unsigned ValueReg,
bool IsKill) {
1400 if (
Reg == AMDGPU::NoRegister)
1403 bool IsStore =
MI->mayStore();
1407 unsigned Dst = IsStore ?
Reg : ValueReg;
1408 unsigned Src = IsStore ? ValueReg :
Reg;
1411 if (IsVGPR ==
TRI->isVGPR(
MRI, ValueReg)) {
1421 unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
1422 : AMDGPU::V_ACCVGPR_READ_B32_e64;
1440 bool IsStore =
MI->mayStore();
1442 unsigned Opc =
MI->getOpcode();
1443 int LoadStoreOp = IsStore ?
1445 if (LoadStoreOp == -1)
1455 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::srsrc))
1456 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset))
1463 AMDGPU::OpName::vdata_in);
1465 NewMI.
add(*VDataIn);
1470 unsigned LoadStoreOp,
1472 bool IsStore =
TII->get(LoadStoreOp).mayStore();
1478 if (
TII->isBlockLoadStore(LoadStoreOp))
1483 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1484 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
1487 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1488 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1491 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1492 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1495 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1496 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1512 unsigned LoadStoreOp,
int Index,
Register ValueReg,
bool IsKill,
1515 assert((!RS || !LiveUnits) &&
"Only RS or LiveUnits can be set but not both");
1523 bool IsStore =
Desc->mayStore();
1524 bool IsFlat =
TII->isFLATScratch(LoadStoreOp);
1525 bool IsBlock =
TII->isBlockLoadStore(LoadStoreOp);
1527 bool CanClobberSCC =
false;
1528 bool Scavenged =
false;
1533 const bool IsAGPR = !ST.hasGFX90AInsts() &&
isAGPRClass(RC);
1539 unsigned EltSize = IsBlock ? RegWidth
1540 : (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u)
1542 unsigned NumSubRegs = RegWidth / EltSize;
1543 unsigned Size = NumSubRegs * EltSize;
1544 unsigned RemSize = RegWidth -
Size;
1545 unsigned NumRemSubRegs = RemSize ? 1 : 0;
1547 int64_t MaterializedOffset =
Offset;
1549 int64_t MaxOffset =
Offset +
Size + RemSize - EltSize;
1550 int64_t ScratchOffsetRegDelta = 0;
1552 if (IsFlat && EltSize > 4) {
1554 Desc = &
TII->get(LoadStoreOp);
1561 "unexpected VGPR spill offset");
1568 bool UseVGPROffset =
false;
1575 if (IsFlat && SGPRBase) {
1580 if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) >= 2) {
1599 bool IsOffsetLegal =
1602 :
TII->isLegalMUBUFImmOffset(MaxOffset);
1603 if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) {
1611 SOffset = RS->scavengeRegisterBackwards(AMDGPU::SGPR_32RegClass,
MI,
false, 0,
false);
1614 CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC);
1615 }
else if (LiveUnits) {
1616 CanClobberSCC = LiveUnits->
available(AMDGPU::SCC);
1617 for (
MCRegister Reg : AMDGPU::SGPR_32RegClass) {
1625 if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC)
1629 UseVGPROffset =
true;
1632 TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
false, 0);
1635 for (
MCRegister Reg : AMDGPU::VGPR_32RegClass) {
1637 TmpOffsetVGPR = Reg;
1644 }
else if (!SOffset && CanClobberSCC) {
1655 if (!ScratchOffsetReg)
1657 SOffset = ScratchOffsetReg;
1658 ScratchOffsetRegDelta =
Offset;
1666 if (!IsFlat && !UseVGPROffset)
1667 Offset *= ST.getWavefrontSize();
1669 if (!UseVGPROffset && !SOffset)
1672 if (UseVGPROffset) {
1674 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR,
Offset);
1675 }
else if (ScratchOffsetReg == AMDGPU::NoRegister) {
1680 .
addReg(ScratchOffsetReg)
1682 Add->getOperand(3).setIsDead();
1688 if (IsFlat && SOffset == AMDGPU::NoRegister) {
1689 assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0
1690 &&
"Unexpected vaddr for flat scratch with a FI operand");
1692 if (UseVGPROffset) {
1695 assert(ST.hasFlatScratchSTMode());
1696 assert(!
TII->isBlockLoadStore(LoadStoreOp) &&
"Block ops don't have ST");
1700 Desc = &
TII->get(LoadStoreOp);
1703 for (
unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
1704 ++i, RegOffset += EltSize) {
1705 if (i == NumSubRegs) {
1709 Desc = &
TII->get(LoadStoreOp);
1711 if (!IsFlat && UseVGPROffset) {
1714 Desc = &
TII->get(NewLoadStoreOp);
1717 if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) {
1724 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset);
1727 unsigned NumRegs = EltSize / 4;
1733 unsigned SOffsetRegState = 0;
1735 const bool IsLastSubReg = i + 1 == e;
1736 const bool IsFirstSubReg = i == 0;
1745 bool NeedSuperRegDef = e > 1 && IsStore && IsFirstSubReg;
1746 bool NeedSuperRegImpOperand = e > 1;
1750 unsigned RemEltSize = EltSize;
1758 for (
int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,
1759 LaneE = RegOffset / 4;
1760 Lane >= LaneE; --Lane) {
1761 bool IsSubReg = e > 1 || EltSize > 4;
1766 if (!MIB.getInstr())
1768 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) {
1770 NeedSuperRegDef =
false;
1772 if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) {
1773 NeedSuperRegImpOperand =
true;
1774 unsigned State = SrcDstRegState;
1775 if (!IsLastSubReg || (Lane != LaneE))
1777 if (!IsFirstSubReg || (Lane != LaneS))
1787 if (RemEltSize != EltSize) {
1788 assert(IsFlat && EltSize > 4);
1790 unsigned NumRegs = RemEltSize / 4;
1797 unsigned FinalReg =
SubReg;
1802 if (!TmpIntermediateVGPR) {
1808 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
1809 TmpIntermediateVGPR)
1811 if (NeedSuperRegDef)
1813 if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
1817 SubReg = TmpIntermediateVGPR;
1818 }
else if (UseVGPROffset) {
1819 if (!TmpOffsetVGPR) {
1820 TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
1822 RS->setRegUsed(TmpOffsetVGPR);
1835 if (UseVGPROffset) {
1844 if (SOffset == AMDGPU::NoRegister) {
1846 if (UseVGPROffset && ScratchOffsetReg) {
1847 MIB.addReg(ScratchOffsetReg);
1854 MIB.addReg(SOffset, SOffsetRegState);
1864 MIB.addMemOperand(NewMMO);
1866 if (!IsAGPR && NeedSuperRegDef)
1869 if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) {
1877 if (NeedSuperRegImpOperand &&
1878 (IsFirstSubReg || (IsLastSubReg && !IsSrcDstDef)))
1902 if (!IsStore &&
MI !=
MBB.end() &&
MI->isReturn() &&
1905 MIB->tieOperands(0, MIB->getNumOperands() - 1);
1913 if (!IsStore &&
TII->isBlockLoadStore(LoadStoreOp))
1917 if (ScratchOffsetRegDelta != 0) {
1921 .
addImm(-ScratchOffsetRegDelta);
1930 Register BaseVGPR = getSubReg(BlockReg, AMDGPU::sub0);
1931 for (
unsigned RegOffset = 1; RegOffset < 32; ++RegOffset)
1932 if (!(Mask & (1 << RegOffset)) &&
1933 isCalleeSavedPhysReg(BaseVGPR + RegOffset, *MF))
1939 bool IsKill)
const {
1956 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
1957 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1961 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1962 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1973 bool SpillToPhysVGPRLane)
const {
1974 assert(!
MI->getOperand(0).isUndef() &&
1975 "undef spill should have been deleted earlier");
1982 bool SpillToVGPR = !VGPRSpills.
empty();
1983 if (OnlyToVGPR && !SpillToVGPR)
1996 "Num of SGPRs spilled should be less than or equal to num of "
1999 for (
unsigned i = 0, e = SB.
NumSubRegs; i < e; ++i) {
2006 bool IsFirstSubreg = i == 0;
2008 bool UseKill = SB.
IsKill && IsLastSubreg;
2014 SB.
TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), Spill.VGPR)
2031 if (SB.
NumSubRegs > 1 && (IsFirstSubreg || IsLastSubreg))
2051 for (
unsigned i =
Offset * PVD.PerVGPR,
2061 SB.
TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), SB.
TmpVGPR)
2078 unsigned SuperKillState = 0;
2092 MI->eraseFromParent();
2104 bool SpillToPhysVGPRLane)
const {
2110 bool SpillToVGPR = !VGPRSpills.
empty();
2111 if (OnlyToVGPR && !SpillToVGPR)
2115 for (
unsigned i = 0, e = SB.
NumSubRegs; i < e; ++i) {
2123 SB.
TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR),
SubReg)
2146 for (
unsigned i =
Offset * PVD.PerVGPR,
2154 bool LastSubReg = (i + 1 == e);
2156 SB.
TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR),
SubReg)
2173 MI->eraseFromParent();
2193 for (
unsigned i =
Offset * PVD.PerVGPR,
2212 unsigned SuperKillState = 0;
2222 MI = RestoreMBB.
end();
2228 for (
unsigned i =
Offset * PVD.PerVGPR,
2237 bool LastSubReg = (i + 1 == e);
2258 switch (
MI->getOpcode()) {
2259 case AMDGPU::SI_SPILL_S1024_SAVE:
2260 case AMDGPU::SI_SPILL_S512_SAVE:
2261 case AMDGPU::SI_SPILL_S384_SAVE:
2262 case AMDGPU::SI_SPILL_S352_SAVE:
2263 case AMDGPU::SI_SPILL_S320_SAVE:
2264 case AMDGPU::SI_SPILL_S288_SAVE:
2265 case AMDGPU::SI_SPILL_S256_SAVE:
2266 case AMDGPU::SI_SPILL_S224_SAVE:
2267 case AMDGPU::SI_SPILL_S192_SAVE:
2268 case AMDGPU::SI_SPILL_S160_SAVE:
2269 case AMDGPU::SI_SPILL_S128_SAVE:
2270 case AMDGPU::SI_SPILL_S96_SAVE:
2271 case AMDGPU::SI_SPILL_S64_SAVE:
2272 case AMDGPU::SI_SPILL_S32_SAVE:
2273 return spillSGPR(
MI, FI, RS, Indexes, LIS,
true, SpillToPhysVGPRLane);
2274 case AMDGPU::SI_SPILL_S1024_RESTORE:
2275 case AMDGPU::SI_SPILL_S512_RESTORE:
2276 case AMDGPU::SI_SPILL_S384_RESTORE:
2277 case AMDGPU::SI_SPILL_S352_RESTORE:
2278 case AMDGPU::SI_SPILL_S320_RESTORE:
2279 case AMDGPU::SI_SPILL_S288_RESTORE:
2280 case AMDGPU::SI_SPILL_S256_RESTORE:
2281 case AMDGPU::SI_SPILL_S224_RESTORE:
2282 case AMDGPU::SI_SPILL_S192_RESTORE:
2283 case AMDGPU::SI_SPILL_S160_RESTORE:
2284 case AMDGPU::SI_SPILL_S128_RESTORE:
2285 case AMDGPU::SI_SPILL_S96_RESTORE:
2286 case AMDGPU::SI_SPILL_S64_RESTORE:
2287 case AMDGPU::SI_SPILL_S32_RESTORE:
2288 return restoreSGPR(
MI, FI, RS, Indexes, LIS,
true, SpillToPhysVGPRLane);
2295 int SPAdj,
unsigned FIOperandNum,
2304 assert(SPAdj == 0 &&
"unhandled SP adjustment in call sequence?");
2307 "unreserved scratch RSRC register");
2310 int Index =
MI->getOperand(FIOperandNum).getIndex();
2316 switch (
MI->getOpcode()) {
2318 case AMDGPU::SI_SPILL_S1024_SAVE:
2319 case AMDGPU::SI_SPILL_S512_SAVE:
2320 case AMDGPU::SI_SPILL_S384_SAVE:
2321 case AMDGPU::SI_SPILL_S352_SAVE:
2322 case AMDGPU::SI_SPILL_S320_SAVE:
2323 case AMDGPU::SI_SPILL_S288_SAVE:
2324 case AMDGPU::SI_SPILL_S256_SAVE:
2325 case AMDGPU::SI_SPILL_S224_SAVE:
2326 case AMDGPU::SI_SPILL_S192_SAVE:
2327 case AMDGPU::SI_SPILL_S160_SAVE:
2328 case AMDGPU::SI_SPILL_S128_SAVE:
2329 case AMDGPU::SI_SPILL_S96_SAVE:
2330 case AMDGPU::SI_SPILL_S64_SAVE:
2331 case AMDGPU::SI_SPILL_S32_SAVE: {
2336 case AMDGPU::SI_SPILL_S1024_RESTORE:
2337 case AMDGPU::SI_SPILL_S512_RESTORE:
2338 case AMDGPU::SI_SPILL_S384_RESTORE:
2339 case AMDGPU::SI_SPILL_S352_RESTORE:
2340 case AMDGPU::SI_SPILL_S320_RESTORE:
2341 case AMDGPU::SI_SPILL_S288_RESTORE:
2342 case AMDGPU::SI_SPILL_S256_RESTORE:
2343 case AMDGPU::SI_SPILL_S224_RESTORE:
2344 case AMDGPU::SI_SPILL_S192_RESTORE:
2345 case AMDGPU::SI_SPILL_S160_RESTORE:
2346 case AMDGPU::SI_SPILL_S128_RESTORE:
2347 case AMDGPU::SI_SPILL_S96_RESTORE:
2348 case AMDGPU::SI_SPILL_S64_RESTORE:
2349 case AMDGPU::SI_SPILL_S32_RESTORE: {
2354 case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE: {
2358 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::mask));
2361 case AMDGPU::SI_SPILL_V1024_SAVE:
2362 case AMDGPU::SI_SPILL_V512_SAVE:
2363 case AMDGPU::SI_SPILL_V384_SAVE:
2364 case AMDGPU::SI_SPILL_V352_SAVE:
2365 case AMDGPU::SI_SPILL_V320_SAVE:
2366 case AMDGPU::SI_SPILL_V288_SAVE:
2367 case AMDGPU::SI_SPILL_V256_SAVE:
2368 case AMDGPU::SI_SPILL_V224_SAVE:
2369 case AMDGPU::SI_SPILL_V192_SAVE:
2370 case AMDGPU::SI_SPILL_V160_SAVE:
2371 case AMDGPU::SI_SPILL_V128_SAVE:
2372 case AMDGPU::SI_SPILL_V96_SAVE:
2373 case AMDGPU::SI_SPILL_V64_SAVE:
2374 case AMDGPU::SI_SPILL_V32_SAVE:
2375 case AMDGPU::SI_SPILL_V16_SAVE:
2376 case AMDGPU::SI_SPILL_A1024_SAVE:
2377 case AMDGPU::SI_SPILL_A512_SAVE:
2378 case AMDGPU::SI_SPILL_A384_SAVE:
2379 case AMDGPU::SI_SPILL_A352_SAVE:
2380 case AMDGPU::SI_SPILL_A320_SAVE:
2381 case AMDGPU::SI_SPILL_A288_SAVE:
2382 case AMDGPU::SI_SPILL_A256_SAVE:
2383 case AMDGPU::SI_SPILL_A224_SAVE:
2384 case AMDGPU::SI_SPILL_A192_SAVE:
2385 case AMDGPU::SI_SPILL_A160_SAVE:
2386 case AMDGPU::SI_SPILL_A128_SAVE:
2387 case AMDGPU::SI_SPILL_A96_SAVE:
2388 case AMDGPU::SI_SPILL_A64_SAVE:
2389 case AMDGPU::SI_SPILL_A32_SAVE:
2390 case AMDGPU::SI_SPILL_AV1024_SAVE:
2391 case AMDGPU::SI_SPILL_AV512_SAVE:
2392 case AMDGPU::SI_SPILL_AV384_SAVE:
2393 case AMDGPU::SI_SPILL_AV352_SAVE:
2394 case AMDGPU::SI_SPILL_AV320_SAVE:
2395 case AMDGPU::SI_SPILL_AV288_SAVE:
2396 case AMDGPU::SI_SPILL_AV256_SAVE:
2397 case AMDGPU::SI_SPILL_AV224_SAVE:
2398 case AMDGPU::SI_SPILL_AV192_SAVE:
2399 case AMDGPU::SI_SPILL_AV160_SAVE:
2400 case AMDGPU::SI_SPILL_AV128_SAVE:
2401 case AMDGPU::SI_SPILL_AV96_SAVE:
2402 case AMDGPU::SI_SPILL_AV64_SAVE:
2403 case AMDGPU::SI_SPILL_AV32_SAVE:
2404 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
2405 case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
2407 AMDGPU::OpName::vdata);
2409 MI->eraseFromParent();
2413 assert(
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset)->getReg() ==
2417 if (
MI->getOpcode() == AMDGPU::SI_SPILL_V16_SAVE) {
2418 assert(ST.enableFlatScratch() &&
"Flat Scratch is not enabled!");
2419 Opc = AMDGPU::SCRATCH_STORE_SHORT_SADDR_t16;
2421 Opc =
MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_SAVE
2422 ? AMDGPU::SCRATCH_STORE_BLOCK_SADDR
2423 : ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
2424 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2427 auto *
MBB =
MI->getParent();
2428 bool IsWWMRegSpill =
TII->isWWMRegSpillOpcode(
MI->getOpcode());
2429 if (IsWWMRegSpill) {
2431 RS->isRegUsed(AMDGPU::SCC));
2435 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm(),
2436 *
MI->memoperands_begin(), RS);
2441 MI->eraseFromParent();
2444 case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE: {
2448 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::mask));
2451 case AMDGPU::SI_SPILL_V16_RESTORE:
2452 case AMDGPU::SI_SPILL_V32_RESTORE:
2453 case AMDGPU::SI_SPILL_V64_RESTORE:
2454 case AMDGPU::SI_SPILL_V96_RESTORE:
2455 case AMDGPU::SI_SPILL_V128_RESTORE:
2456 case AMDGPU::SI_SPILL_V160_RESTORE:
2457 case AMDGPU::SI_SPILL_V192_RESTORE:
2458 case AMDGPU::SI_SPILL_V224_RESTORE:
2459 case AMDGPU::SI_SPILL_V256_RESTORE:
2460 case AMDGPU::SI_SPILL_V288_RESTORE:
2461 case AMDGPU::SI_SPILL_V320_RESTORE:
2462 case AMDGPU::SI_SPILL_V352_RESTORE:
2463 case AMDGPU::SI_SPILL_V384_RESTORE:
2464 case AMDGPU::SI_SPILL_V512_RESTORE:
2465 case AMDGPU::SI_SPILL_V1024_RESTORE:
2466 case AMDGPU::SI_SPILL_A32_RESTORE:
2467 case AMDGPU::SI_SPILL_A64_RESTORE:
2468 case AMDGPU::SI_SPILL_A96_RESTORE:
2469 case AMDGPU::SI_SPILL_A128_RESTORE:
2470 case AMDGPU::SI_SPILL_A160_RESTORE:
2471 case AMDGPU::SI_SPILL_A192_RESTORE:
2472 case AMDGPU::SI_SPILL_A224_RESTORE:
2473 case AMDGPU::SI_SPILL_A256_RESTORE:
2474 case AMDGPU::SI_SPILL_A288_RESTORE:
2475 case AMDGPU::SI_SPILL_A320_RESTORE:
2476 case AMDGPU::SI_SPILL_A352_RESTORE:
2477 case AMDGPU::SI_SPILL_A384_RESTORE:
2478 case AMDGPU::SI_SPILL_A512_RESTORE:
2479 case AMDGPU::SI_SPILL_A1024_RESTORE:
2480 case AMDGPU::SI_SPILL_AV32_RESTORE:
2481 case AMDGPU::SI_SPILL_AV64_RESTORE:
2482 case AMDGPU::SI_SPILL_AV96_RESTORE:
2483 case AMDGPU::SI_SPILL_AV128_RESTORE:
2484 case AMDGPU::SI_SPILL_AV160_RESTORE:
2485 case AMDGPU::SI_SPILL_AV192_RESTORE:
2486 case AMDGPU::SI_SPILL_AV224_RESTORE:
2487 case AMDGPU::SI_SPILL_AV256_RESTORE:
2488 case AMDGPU::SI_SPILL_AV288_RESTORE:
2489 case AMDGPU::SI_SPILL_AV320_RESTORE:
2490 case AMDGPU::SI_SPILL_AV352_RESTORE:
2491 case AMDGPU::SI_SPILL_AV384_RESTORE:
2492 case AMDGPU::SI_SPILL_AV512_RESTORE:
2493 case AMDGPU::SI_SPILL_AV1024_RESTORE:
2494 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
2495 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: {
2497 AMDGPU::OpName::vdata);
2498 assert(
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset)->getReg() ==
2502 if (
MI->getOpcode() == AMDGPU::SI_SPILL_V16_RESTORE) {
2503 assert(ST.enableFlatScratch() &&
"Flat Scratch is not enabled!");
2504 Opc = AMDGPU::SCRATCH_LOAD_SHORT_D16_SADDR_t16;
2506 Opc =
MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE
2507 ? AMDGPU::SCRATCH_LOAD_BLOCK_SADDR
2508 : ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
2509 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2512 auto *
MBB =
MI->getParent();
2513 bool IsWWMRegSpill =
TII->isWWMRegSpillOpcode(
MI->getOpcode());
2514 if (IsWWMRegSpill) {
2516 RS->isRegUsed(AMDGPU::SCC));
2521 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm(),
2522 *
MI->memoperands_begin(), RS);
2527 MI->eraseFromParent();
2530 case AMDGPU::V_ADD_U32_e32:
2531 case AMDGPU::V_ADD_U32_e64:
2532 case AMDGPU::V_ADD_CO_U32_e32:
2533 case AMDGPU::V_ADD_CO_U32_e64: {
2535 unsigned NumDefs =
MI->getNumExplicitDefs();
2536 unsigned Src0Idx = NumDefs;
2538 bool HasClamp =
false;
2541 switch (
MI->getOpcode()) {
2542 case AMDGPU::V_ADD_U32_e32:
2544 case AMDGPU::V_ADD_U32_e64:
2545 HasClamp =
MI->getOperand(3).getImm();
2547 case AMDGPU::V_ADD_CO_U32_e32:
2548 VCCOp = &
MI->getOperand(3);
2550 case AMDGPU::V_ADD_CO_U32_e64:
2551 VCCOp = &
MI->getOperand(1);
2552 HasClamp =
MI->getOperand(4).getImm();
2557 bool DeadVCC = !VCCOp || VCCOp->
isDead();
2561 unsigned OtherOpIdx =
2562 FIOperandNum == Src0Idx ? FIOperandNum + 1 : Src0Idx;
2565 unsigned Src1Idx = Src0Idx + 1;
2566 Register MaterializedReg = FrameReg;
2569 int64_t
Offset = FrameInfo.getObjectOffset(Index);
2573 if (OtherOp->
isImm()) {
2584 OtherOp->
setImm(TotalOffset);
2588 if (FrameReg && !ST.enableFlatScratch()) {
2596 ScavengedVGPR = RS->scavengeRegisterBackwards(
2597 AMDGPU::VGPR_32RegClass,
MI,
false, 0);
2603 .
addImm(ST.getWavefrontSizeLog2())
2605 MaterializedReg = ScavengedVGPR;
2608 if ((!OtherOp->
isImm() || OtherOp->
getImm() != 0) && MaterializedReg) {
2609 if (ST.enableFlatScratch() &&
2610 !
TII->isOperandLegal(*
MI, Src1Idx, OtherOp)) {
2617 if (!ScavengedVGPR) {
2618 ScavengedVGPR = RS->scavengeRegisterBackwards(
2619 AMDGPU::VGPR_32RegClass,
MI,
false,
2623 assert(ScavengedVGPR != DstReg);
2628 MaterializedReg = ScavengedVGPR;
2637 AddI32.
add(
MI->getOperand(1));
2639 unsigned MaterializedRegFlags =
2642 if (
isVGPRClass(getPhysRegBaseClass(MaterializedReg))) {
2647 .addReg(MaterializedReg, MaterializedRegFlags);
2652 .addReg(MaterializedReg, MaterializedRegFlags)
2656 if (
MI->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 ||
2657 MI->getOpcode() == AMDGPU::V_ADD_U32_e64)
2660 if (
MI->getOpcode() == AMDGPU::V_ADD_CO_U32_e32)
2661 AddI32.setOperandDead(3);
2663 MaterializedReg = DstReg;
2669 }
else if (
Offset != 0) {
2670 assert(!MaterializedReg);
2674 if (DeadVCC && !HasClamp) {
2679 if (OtherOp->
isReg() && OtherOp->
getReg() == DstReg) {
2681 MI->eraseFromParent();
2686 MI->setDesc(
TII->get(AMDGPU::V_MOV_B32_e32));
2687 MI->removeOperand(FIOperandNum);
2689 unsigned NumOps =
MI->getNumOperands();
2690 for (
unsigned I =
NumOps - 2;
I >= NumDefs + 1; --
I)
2691 MI->removeOperand(
I);
2694 MI->removeOperand(1);
2706 if (!
TII->isOperandLegal(*
MI, Src1Idx) &&
TII->commuteInstruction(*
MI)) {
2714 for (
unsigned SrcIdx : {FIOperandNum, OtherOpIdx}) {
2715 if (!
TII->isOperandLegal(*
MI, SrcIdx)) {
2719 if (!ScavengedVGPR) {
2720 ScavengedVGPR = RS->scavengeRegisterBackwards(
2721 AMDGPU::VGPR_32RegClass,
MI,
false,
2725 assert(ScavengedVGPR != DstReg);
2731 Src.ChangeToRegister(ScavengedVGPR,
false);
2732 Src.setIsKill(
true);
2738 if (FIOp->
isImm() && FIOp->
getImm() == 0 && DeadVCC && !HasClamp) {
2739 if (OtherOp->
isReg() && OtherOp->
getReg() != DstReg) {
2743 MI->eraseFromParent();
2748 case AMDGPU::S_ADD_I32:
2749 case AMDGPU::S_ADD_U32: {
2751 unsigned OtherOpIdx = FIOperandNum == 1 ? 2 : 1;
2758 Register MaterializedReg = FrameReg;
2761 bool DeadSCC =
MI->getOperand(3).isDead();
2770 if (FrameReg && !ST.enableFlatScratch()) {
2775 TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
2782 .
addImm(ST.getWavefrontSizeLog2())
2785 MaterializedReg = TmpReg;
2788 int64_t
Offset = FrameInfo.getObjectOffset(Index);
2793 if (OtherOp.
isImm()) {
2797 if (MaterializedReg)
2801 }
else if (MaterializedReg) {
2805 if (!TmpReg && MaterializedReg == FrameReg) {
2806 TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
2820 MaterializedReg = DstReg;
2833 if (DeadSCC && OtherOp.
isImm() && OtherOp.
getImm() == 0) {
2835 MI->removeOperand(3);
2836 MI->removeOperand(OtherOpIdx);
2837 MI->setDesc(
TII->get(FIOp->
isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2838 }
else if (DeadSCC && FIOp->
isImm() && FIOp->
getImm() == 0) {
2840 MI->removeOperand(3);
2841 MI->removeOperand(FIOperandNum);
2843 TII->get(OtherOp.
isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2854 int64_t
Offset = FrameInfo.getObjectOffset(Index);
2855 if (ST.enableFlatScratch()) {
2856 if (
TII->isFLATScratch(*
MI)) {
2858 (int16_t)FIOperandNum ==
2859 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::saddr));
2866 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset);
2877 unsigned Opc =
MI->getOpcode();
2881 }
else if (ST.hasFlatScratchSTMode()) {
2891 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
2892 bool TiedVDst = VDstIn != -1 &&
MI->getOperand(VDstIn).isReg() &&
2893 MI->getOperand(VDstIn).isTied();
2895 MI->untieRegOperand(VDstIn);
2898 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr));
2902 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
2904 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
2905 assert(NewVDst != -1 && NewVDstIn != -1 &&
"Must be tied!");
2906 MI->tieOperands(NewVDst, NewVDstIn);
2908 MI->setDesc(
TII->get(NewOpc));
2916 if (
TII->isImmOperandLegal(*
MI, FIOperandNum, *FIOp))
2923 bool UseSGPR =
TII->isOperandLegal(*
MI, FIOperandNum, FIOp);
2925 if (!
Offset && FrameReg && UseSGPR) {
2931 UseSGPR ? &AMDGPU::SReg_32_XM0RegClass : &AMDGPU::VGPR_32RegClass;
2934 RS->scavengeRegisterBackwards(*RC,
MI,
false, 0, !UseSGPR);
2938 if ((!FrameReg || !
Offset) && TmpReg) {
2939 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2942 MIB.addReg(FrameReg);
2949 bool NeedSaveSCC = RS->isRegUsed(AMDGPU::SCC) &&
2950 !
MI->definesRegister(AMDGPU::SCC,
nullptr);
2954 : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
2955 MI,
false, 0, !UseSGPR);
2959 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
2970 assert(!(
Offset & 0x1) &&
"Flat scratch offset must be aligned!");
2990 if (TmpSReg == FrameReg) {
2993 !
MI->registerDefIsDead(AMDGPU::SCC,
nullptr)) {
3017 bool IsMUBUF =
TII->isMUBUF(*
MI);
3023 bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&
3024 !
MI->definesRegister(AMDGPU::SCC,
nullptr);
3026 ? &AMDGPU::SReg_32RegClass
3027 : &AMDGPU::VGPR_32RegClass;
3028 bool IsCopy =
MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
3029 MI->getOpcode() == AMDGPU::V_MOV_B32_e64 ||
3030 MI->getOpcode() == AMDGPU::S_MOV_B32;
3032 IsCopy ?
MI->getOperand(0).getReg()
3033 : RS->scavengeRegisterBackwards(*RC,
MI,
false, 0);
3035 int64_t
Offset = FrameInfo.getObjectOffset(Index);
3038 IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32 : AMDGPU::V_LSHRREV_B32_e64;
3040 if (IsSALU && LiveSCC) {
3041 TmpResultReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
3046 if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
3049 Shift.addImm(ST.getWavefrontSizeLog2()).addReg(FrameReg);
3051 Shift.addReg(FrameReg).addImm(ST.getWavefrontSizeLog2());
3052 if (IsSALU && !LiveSCC)
3053 Shift.getInstr()->getOperand(3).setIsDead();
3054 if (IsSALU && LiveSCC) {
3058 NewDest = ResultReg;
3060 NewDest = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
3065 ResultReg = NewDest;
3070 if ((MIB =
TII->getAddNoCarry(*
MBB,
MI,
DL, ResultReg, *RS)) !=
3077 .
addImm(ST.getWavefrontSizeLog2())
3080 const bool IsVOP2 = MIB->
getOpcode() == AMDGPU::V_ADD_U32_e32;
3092 "Need to reuse carry out register");
3097 ConstOffsetReg = getSubReg(MIB.
getReg(1), AMDGPU::sub0);
3099 ConstOffsetReg = MIB.
getReg(1);
3110 if (!MIB || IsSALU) {
3117 Register TmpScaledReg = IsCopy && IsSALU
3119 : RS->scavengeRegisterBackwards(
3120 AMDGPU::SReg_32_XM0RegClass,
MI,
3122 Register ScaledReg = TmpScaledReg.
isValid() ? TmpScaledReg : FrameReg;
3128 .
addImm(ST.getWavefrontSizeLog2());
3133 TmpResultReg = RS->scavengeRegisterBackwards(
3134 AMDGPU::VGPR_32RegClass,
MI,
false, 0,
true);
3137 if ((
Add =
TII->getAddNoCarry(*
MBB,
MI,
DL, TmpResultReg, *RS))) {
3140 .
addImm(ST.getWavefrontSizeLog2())
3142 if (
Add->getOpcode() == AMDGPU::V_ADD_CO_U32_e64) {
3152 "offset is unsafe for v_mad_u32_u24");
3161 bool IsInlinableLiteral =
3163 if (!IsInlinableLiteral) {
3172 if (!IsInlinableLiteral) {
3178 Add.addImm(ST.getWavefrontSize()).addReg(FrameReg).addImm(0);
3181 .
addImm(ST.getWavefrontSizeLog2())
3187 NewDest = ResultReg;
3189 NewDest = RS->scavengeRegisterBackwards(
3190 AMDGPU::SReg_32_XM0RegClass, *
Add,
false, 0,
3197 ResultReg = NewDest;
3203 ResultReg = TmpResultReg;
3205 if (!TmpScaledReg.
isValid()) {
3211 .
addImm(ST.getWavefrontSizeLog2());
3218 MI->eraseFromParent();
3228 static_cast<int>(FIOperandNum) ==
3229 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::vaddr));
3231 auto &SOffset = *
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset);
3232 assert((SOffset.isImm() && SOffset.getImm() == 0));
3234 if (FrameReg != AMDGPU::NoRegister)
3235 SOffset.ChangeToRegister(FrameReg,
false);
3237 int64_t
Offset = FrameInfo.getObjectOffset(Index);
3239 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm();
3240 int64_t NewOffset = OldImm +
Offset;
3242 if (
TII->isLegalMUBUFImmOffset(NewOffset) &&
3244 MI->eraseFromParent();
3253 if (!
TII->isImmOperandLegal(*
MI, FIOperandNum, *FIOp)) {
3255 RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
false, 0);
3279 return &AMDGPU::VReg_64RegClass;
3281 return &AMDGPU::VReg_96RegClass;
3283 return &AMDGPU::VReg_128RegClass;
3285 return &AMDGPU::VReg_160RegClass;
3287 return &AMDGPU::VReg_192RegClass;
3289 return &AMDGPU::VReg_224RegClass;
3291 return &AMDGPU::VReg_256RegClass;
3293 return &AMDGPU::VReg_288RegClass;
3295 return &AMDGPU::VReg_320RegClass;
3297 return &AMDGPU::VReg_352RegClass;
3299 return &AMDGPU::VReg_384RegClass;
3301 return &AMDGPU::VReg_512RegClass;
3303 return &AMDGPU::VReg_1024RegClass;
3311 return &AMDGPU::VReg_64_Align2RegClass;
3313 return &AMDGPU::VReg_96_Align2RegClass;
3315 return &AMDGPU::VReg_128_Align2RegClass;
3317 return &AMDGPU::VReg_160_Align2RegClass;
3319 return &AMDGPU::VReg_192_Align2RegClass;
3321 return &AMDGPU::VReg_224_Align2RegClass;
3323 return &AMDGPU::VReg_256_Align2RegClass;
3325 return &AMDGPU::VReg_288_Align2RegClass;
3327 return &AMDGPU::VReg_320_Align2RegClass;
3329 return &AMDGPU::VReg_352_Align2RegClass;
3331 return &AMDGPU::VReg_384_Align2RegClass;
3333 return &AMDGPU::VReg_512_Align2RegClass;
3335 return &AMDGPU::VReg_1024_Align2RegClass;
3343 return &AMDGPU::VReg_1RegClass;
3345 return &AMDGPU::VGPR_16RegClass;
3347 return &AMDGPU::VGPR_32RegClass;
3355 return &AMDGPU::VGPR_32_Lo256RegClass;
3357 return &AMDGPU::VReg_64_Lo256_Align2RegClass;
3359 return &AMDGPU::VReg_96_Lo256_Align2RegClass;
3361 return &AMDGPU::VReg_128_Lo256_Align2RegClass;
3363 return &AMDGPU::VReg_160_Lo256_Align2RegClass;
3365 return &AMDGPU::VReg_192_Lo256_Align2RegClass;
3367 return &AMDGPU::VReg_224_Lo256_Align2RegClass;
3369 return &AMDGPU::VReg_256_Lo256_Align2RegClass;
3371 return &AMDGPU::VReg_288_Lo256_Align2RegClass;
3373 return &AMDGPU::VReg_320_Lo256_Align2RegClass;
3375 return &AMDGPU::VReg_352_Lo256_Align2RegClass;
3377 return &AMDGPU::VReg_384_Lo256_Align2RegClass;
3379 return &AMDGPU::VReg_512_Lo256_Align2RegClass;
3381 return &AMDGPU::VReg_1024_Lo256_Align2RegClass;
3389 return &AMDGPU::AReg_64RegClass;
3391 return &AMDGPU::AReg_96RegClass;
3393 return &AMDGPU::AReg_128RegClass;
3395 return &AMDGPU::AReg_160RegClass;
3397 return &AMDGPU::AReg_192RegClass;
3399 return &AMDGPU::AReg_224RegClass;
3401 return &AMDGPU::AReg_256RegClass;
3403 return &AMDGPU::AReg_288RegClass;
3405 return &AMDGPU::AReg_320RegClass;
3407 return &AMDGPU::AReg_352RegClass;
3409 return &AMDGPU::AReg_384RegClass;
3411 return &AMDGPU::AReg_512RegClass;
3413 return &AMDGPU::AReg_1024RegClass;
3421 return &AMDGPU::AReg_64_Align2RegClass;
3423 return &AMDGPU::AReg_96_Align2RegClass;
3425 return &AMDGPU::AReg_128_Align2RegClass;
3427 return &AMDGPU::AReg_160_Align2RegClass;
3429 return &AMDGPU::AReg_192_Align2RegClass;
3431 return &AMDGPU::AReg_224_Align2RegClass;
3433 return &AMDGPU::AReg_256_Align2RegClass;
3435 return &AMDGPU::AReg_288_Align2RegClass;
3437 return &AMDGPU::AReg_320_Align2RegClass;
3439 return &AMDGPU::AReg_352_Align2RegClass;
3441 return &AMDGPU::AReg_384_Align2RegClass;
3443 return &AMDGPU::AReg_512_Align2RegClass;
3445 return &AMDGPU::AReg_1024_Align2RegClass;
3453 return &AMDGPU::AGPR_LO16RegClass;
3455 return &AMDGPU::AGPR_32RegClass;
3463 return &AMDGPU::AV_64RegClass;
3465 return &AMDGPU::AV_96RegClass;
3467 return &AMDGPU::AV_128RegClass;
3469 return &AMDGPU::AV_160RegClass;
3471 return &AMDGPU::AV_192RegClass;
3473 return &AMDGPU::AV_224RegClass;
3475 return &AMDGPU::AV_256RegClass;
3477 return &AMDGPU::AV_288RegClass;
3479 return &AMDGPU::AV_320RegClass;
3481 return &AMDGPU::AV_352RegClass;
3483 return &AMDGPU::AV_384RegClass;
3485 return &AMDGPU::AV_512RegClass;
3487 return &AMDGPU::AV_1024RegClass;
3495 return &AMDGPU::AV_64_Align2RegClass;
3497 return &AMDGPU::AV_96_Align2RegClass;
3499 return &AMDGPU::AV_128_Align2RegClass;
3501 return &AMDGPU::AV_160_Align2RegClass;
3503 return &AMDGPU::AV_192_Align2RegClass;
3505 return &AMDGPU::AV_224_Align2RegClass;
3507 return &AMDGPU::AV_256_Align2RegClass;
3509 return &AMDGPU::AV_288_Align2RegClass;
3511 return &AMDGPU::AV_320_Align2RegClass;
3513 return &AMDGPU::AV_352_Align2RegClass;
3515 return &AMDGPU::AV_384_Align2RegClass;
3517 return &AMDGPU::AV_512_Align2RegClass;
3519 return &AMDGPU::AV_1024_Align2RegClass;
3527 return &AMDGPU::AV_32RegClass;
3528 return ST.needsAlignedVGPRs()
3536 return &AMDGPU::SReg_32RegClass;
3538 return &AMDGPU::SReg_64RegClass;
3540 return &AMDGPU::SGPR_96RegClass;
3542 return &AMDGPU::SGPR_128RegClass;
3544 return &AMDGPU::SGPR_160RegClass;
3546 return &AMDGPU::SGPR_192RegClass;
3548 return &AMDGPU::SGPR_224RegClass;
3550 return &AMDGPU::SGPR_256RegClass;
3552 return &AMDGPU::SGPR_288RegClass;
3554 return &AMDGPU::SGPR_320RegClass;
3556 return &AMDGPU::SGPR_352RegClass;
3558 return &AMDGPU::SGPR_384RegClass;
3560 return &AMDGPU::SGPR_512RegClass;
3562 return &AMDGPU::SGPR_1024RegClass;
3570 if (Reg.isVirtual())
3571 RC =
MRI.getRegClass(Reg);
3573 RC = getPhysRegBaseClass(Reg);
3579 unsigned Size = getRegSizeInBits(*SRC);
3581 switch (SRC->
getID()) {
3584 case AMDGPU::VS_32_Lo256RegClassID:
3585 case AMDGPU::VS_64_Lo256RegClassID:
3591 assert(VRC &&
"Invalid register class size");
3597 unsigned Size = getRegSizeInBits(*SRC);
3599 assert(ARC &&
"Invalid register class size");
3605 unsigned Size = getRegSizeInBits(*VRC);
3607 return &AMDGPU::SGPR_32RegClass;
3609 assert(SRC &&
"Invalid register class size");
3616 unsigned SubIdx)
const {
3619 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
3620 return MatchRC && MatchRC->
hasSubClassEq(SuperRC) ? MatchRC :
nullptr;
3626 return !ST.hasMFMAInlineLiteralBug();
3645 if (ReserveHighestRegister) {
3647 if (
MRI.isAllocatable(Reg) && !
MRI.isPhysRegUsed(Reg))
3651 if (
MRI.isAllocatable(Reg) && !
MRI.isPhysRegUsed(Reg))
3668 unsigned EltSize)
const {
3670 assert(RegBitWidth >= 32 && RegBitWidth <= 1024 && EltSize >= 2);
3672 const unsigned RegHalves = RegBitWidth / 16;
3673 const unsigned EltHalves = EltSize / 2;
3674 assert(RegSplitParts.size() + 1 >= EltHalves);
3676 const std::vector<int16_t> &Parts = RegSplitParts[EltHalves - 1];
3677 const unsigned NumParts = RegHalves / EltHalves;
3679 return ArrayRef(Parts.data(), NumParts);
3685 return Reg.isVirtual() ?
MRI.getRegClass(Reg) : getPhysRegBaseClass(Reg);
3692 return getSubRegisterClass(SrcRC, MO.
getSubReg());
3717 unsigned SrcSize = getRegSizeInBits(*SrcRC);
3718 unsigned DstSize = getRegSizeInBits(*DstRC);
3719 unsigned NewSize = getRegSizeInBits(*NewRC);
3725 if (SrcSize <= 32 || DstSize <= 32)
3728 return NewSize <= DstSize || NewSize <= SrcSize;
3733 unsigned MinOcc = ST.getOccupancyWithWorkGroupSizes(MF).first;
3734 switch (RC->
getID()) {
3736 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
3737 case AMDGPU::VGPR_32RegClassID:
3742 ST.getMaxNumVGPRs(MF));
3743 case AMDGPU::SGPR_32RegClassID:
3744 case AMDGPU::SGPR_LO16RegClassID:
3745 return std::min(ST.getMaxNumSGPRs(MinOcc,
true), ST.getMaxNumSGPRs(MF));
3750 unsigned Idx)
const {
3751 switch (
static_cast<AMDGPU::RegisterPressureSets
>(Idx)) {
3752 case AMDGPU::RegisterPressureSets::VGPR_32:
3753 case AMDGPU::RegisterPressureSets::AGPR_32:
3756 case AMDGPU::RegisterPressureSets::SReg_32:
3765 static const int Empty[] = { -1 };
3767 if (RegPressureIgnoredUnits[RegUnit])
3770 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
3783 std::pair<unsigned, Register> Hint =
MRI.getRegAllocationHint(VirtReg);
3785 switch (Hint.first) {
3792 getMatchingSuperReg(Paired, AMDGPU::lo16, &AMDGPU::VGPR_32RegClass);
3793 }
else if (VRM && VRM->
hasPhys(Paired)) {
3794 PairedPhys = getMatchingSuperReg(VRM->
getPhys(Paired), AMDGPU::lo16,
3795 &AMDGPU::VGPR_32RegClass);
3810 PairedPhys =
TRI->getSubReg(Paired, AMDGPU::lo16);
3811 }
else if (VRM && VRM->
hasPhys(Paired)) {
3812 PairedPhys =
TRI->getSubReg(VRM->
getPhys(Paired), AMDGPU::lo16);
3827 if (AMDGPU::VGPR_16RegClass.
contains(PhysReg) &&
3828 !
MRI.isReserved(PhysReg))
3842 return AMDGPU::SGPR30_SGPR31;
3848 switch (RB.
getID()) {
3849 case AMDGPU::VGPRRegBankID:
3851 std::max(ST.useRealTrue16Insts() ? 16u : 32u,
Size));
3852 case AMDGPU::VCCRegBankID:
3855 case AMDGPU::SGPRRegBankID:
3857 case AMDGPU::AGPRRegBankID:
3872 return getAllocatableClass(RC);
3878 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
3882 return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
3887 return ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass
3888 : &AMDGPU::VReg_64RegClass;
3893 switch ((
int)RCID) {
3894 case AMDGPU::SReg_1RegClassID:
3896 case AMDGPU::SReg_1_XEXECRegClassID:
3901 return AMDGPUGenRegisterInfo::getRegClass(RCID);
3914 if (Reg.isVirtual()) {
3919 :
MRI.getMaxLaneMaskForVReg(Reg);
3923 if ((S.LaneMask & SubLanes) == SubLanes) {
3924 V = S.getVNInfoAt(UseIdx);
3936 for (
MCRegUnit Unit : regunits(Reg.asMCReg())) {
3951 if (!Def || !MDT.dominates(Def, &
Use))
3954 assert(Def->modifiesRegister(Reg,
this));
3960 assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32);
3963 AMDGPU::SReg_32RegClass,
3964 AMDGPU::AGPR_32RegClass } ) {
3965 if (
MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
3968 if (
MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
3969 &AMDGPU::VGPR_32RegClass)) {
3973 return AMDGPU::NoRegister;
3977 if (!ST.needsAlignedVGPRs())
3988 assert(&RC != &AMDGPU::VS_64RegClass);
3995 if (!RC || !ST.needsAlignedVGPRs())
3998 unsigned Size = getRegSizeInBits(*RC);
4002 if (RC == &AMDGPU::VS_64RegClass)
4003 return &AMDGPU::VS_64_Align2RegClass;
4017 return ArrayRef(AMDGPU::SGPR_128RegClass.begin(), ST.getMaxNumSGPRs(MF) / 4);
4022 return ArrayRef(AMDGPU::SGPR_64RegClass.begin(), ST.getMaxNumSGPRs(MF) / 2);
4027 return ArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF));
4035 return std::min(128u, getSubRegIdxSize(
SubReg));
4039 return std::min(32u, getSubRegIdxSize(
SubReg));
4048 bool IncludeCalls)
const {
4049 unsigned NumArchVGPRs = ST.has1024AddressableVGPRs() ? 1024 : 256;
4051 (RC.
getID() == AMDGPU::VGPR_32RegClassID)
4055 if (
MRI.isPhysRegUsed(Reg, !IncludeCalls))
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
This file declares the machine register scavenger class.
SI Pre allocate WWM Registers
static int getOffenMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyAGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFLoad(unsigned Opc)
static const std::array< unsigned, 17 > SubRegFromChannelTableWidthMap
static unsigned getNumSubRegsForSpillOp(const MachineInstr &MI, const SIInstrInfo *TII)
static void emitUnsupportedError(const Function &Fn, const MachineInstr &MI, const Twine &ErrMsg)
static const TargetRegisterClass * getAlignedAGPRClassForBitWidth(unsigned BitWidth)
static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, unsigned LoadStoreOp, unsigned EltSize)
static const TargetRegisterClass * getAlignedVGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyVGPRClassForBitWidth(unsigned BitWidth)
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling SGPRs to VGPRs"), cl::ReallyHidden, cl::init(true))
static const TargetRegisterClass * getAlignedVectorSuperClassForBitWidth(unsigned BitWidth)
static const TargetRegisterClass * getAnyVectorSuperClassForBitWidth(unsigned BitWidth)
static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill)
static bool isFIPlusImmOrVGPR(const SIRegisterInfo &TRI, const MachineInstr &MI)
static int getOffenMUBUFLoad(unsigned Opc)
Interface definition for SIRegisterInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const char * getRegisterName(MCRegister Reg)
bool isBottomOfStack() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
bool test(unsigned Idx) const
bool empty() const
empty - Tests whether there are no bits in this bitvector.
Diagnostic information for unsupported feature in backend.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
bool hasInterval(Register Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
MachineDominatorTree & getDomTree()
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveInterval & getInterval(Register Reg)
This class represents the liveness of a register, stack slot, etc.
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
Describe properties that are true of each instruction in the target description file.
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
Generic base class for all target subtargets.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasCalls() const
Return true if the current function has any function calls.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
void setAsmPrinterFlag(uint8_t Flag)
Set a flag for the AsmPrinter.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
void setImm(int64_t immVal)
LLVM_ABI void setIsRenamable(bool Val=true)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
Holds all the information related to register banks.
virtual bool isDivergentRegBank(const RegisterBank *RB) const
Returns true if the register bank is considered divergent.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
static bool isFLATScratch(const MachineInstr &MI)
static bool isMUBUF(const MachineInstr &MI)
static bool isVOP3(const MCInstrDesc &Desc)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
Register getLongBranchReservedReg() const
unsigned getDynamicVGPRBlockSize() const
Register getStackPtrOffsetReg() const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const
uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const
Register getSGPRForEXECCopy() const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
Register getVGPRForAGPRCopy() const
Register getFrameOffsetReg() const
BitVector getNonWWMRegMask() const
bool checkFlag(Register Reg, uint8_t Flag) const
void addToSpilledVGPRs(unsigned num)
const ReservedRegSet & getWWMReservedRegs() const
void addToSpilledSGPRs(unsigned num)
Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, int64_t Offset) const override
int64_t getScratchInstrOffset(const MachineInstr *MI) const
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, int64_t Offset) const override
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
ArrayRef< MCPhysReg > getAllSGPR64(const MachineFunction &MF) const
Return all SGPR64 which satisfy the waves per execution unit requirement of the subtarget.
MCRegister findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF, bool ReserveHighestVGPR=false) const
Returns a lowest register that is not used at any point in the function.
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool shouldRealignStack(const MachineFunction &MF) const override
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
Register getFrameRegister(const MachineFunction &MF) const override
LLVM_READONLY const TargetRegisterClass * getVectorSuperClassForBitWidth(unsigned BitWidth) const
bool spillEmergencySGPR(MachineBasicBlock::iterator MI, MachineBasicBlock &RestoreMBB, Register SGPR, RegScavenger *RS) const
SIRegisterInfo(const GCNSubtarget &ST)
const uint32_t * getAllVGPRRegMask() const
MCRegister getReturnAddressReg(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
bool hasBasePointer(const MachineFunction &MF) const
const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override
Returns a legal register class to copy a register in the specified class to or from.
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
ArrayRef< MCPhysReg > getAllSGPR32(const MachineFunction &MF) const
Return all SGPR32 which satisfy the waves per execution unit requirement of the subtarget.
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool SpillToPhysVGPRLane=false) const
Special case of eliminateFrameIndex.
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
void buildSpillLoadStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned LoadStoreOp, int Index, Register ValueReg, bool ValueIsKill, MCRegister ScratchOffsetReg, int64_t InstrOffset, MachineMemOperand *MMO, RegScavenger *RS, LiveRegUnits *LiveUnits=nullptr) const
bool isAsmClobberable(const MachineFunction &MF, MCRegister PhysReg) const override
LLVM_READONLY const TargetRegisterClass * getAGPRClassForBitWidth(unsigned BitWidth) const
static bool isChainScratchRegister(Register VGPR)
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register Reg) const override
const uint32_t * getNoPreservedMask() const override
StringRef getRegAsmName(MCRegister Reg) const override
const uint32_t * getAllAllocatableSRegMask() const
MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF, const unsigned Align, const TargetRegisterClass *RC) const
Return the largest available SGPR aligned to Align for the register class RC.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
unsigned getHWRegIndex(MCRegister Reg) const
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
const uint32_t * getAllVectorRegMask() const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
const TargetRegisterClass * getPointerRegClass(unsigned Kind=0) const override
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
bool opCanUseLiteralConstant(unsigned OpType) const
Register getBaseRegister() const
bool getRegAllocationHints(Register VirtReg, ArrayRef< MCPhysReg > Order, SmallVectorImpl< MCPhysReg > &Hints, const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const override
LLVM_READONLY const TargetRegisterClass * getAlignedLo256VGPRClassForBitWidth(unsigned BitWidth) const
LLVM_READONLY const TargetRegisterClass * getVGPRClassForBitWidth(unsigned BitWidth) const
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
static bool isVGPRClass(const TargetRegisterClass *RC)
MachineInstr * findReachingDef(Register Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
SmallVector< StringLiteral > getVRegFlagsOfReg(Register Reg, const MachineFunction &MF) const override
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
ArrayRef< MCPhysReg > getAllSGPR128(const MachineFunction &MF) const
Return all SGPR128 which satisfy the waves per execution unit requirement of the subtarget.
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
const TargetRegisterClass * getRegClassForOperandReg(const MachineRegisterInfo &MRI, const MachineOperand &MO) const
void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, Register BlockReg) const
unsigned getNumUsedPhysRegs(const MachineRegisterInfo &MRI, const TargetRegisterClass &RC, bool IncludeCalls=true) const
const uint32_t * getAllAGPRRegMask() const
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
const TargetRegisterClass * getBoolRC() const
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
If OnlyToVGPR is true, this will only succeed if this manages to find a free VGPR lane to spill.
MCRegister getExec() const
MCRegister getVCC() const
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
bool isVectorSuperClass(const TargetRegisterClass *RC) const
const TargetRegisterClass * getWaveMaskRegClass() const
unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC, unsigned SubReg) const
void resolveFrameIndex(MachineInstr &MI, Register BaseReg, int64_t Offset) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
const TargetRegisterClass * getVGPR64Class() const
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill=true) const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
const int * getRegUnitPressureSets(unsigned RegUnit) const override
SlotIndex - An opaque wrapper around machine indexes.
bool isValid() const
Returns true if this is a valid index.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
SlotIndex replaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in maps used by register allocat...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
const uint8_t TSFlags
Configurable target specific flags.
ArrayRef< MCPhysReg > getRegisters() const
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const
Returns the largest super class of RC that is legal to use in the current sub-target and has the same...
virtual bool shouldRealignStack(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
virtual bool getRegAllocationHints(Register VirtReg, ArrayRef< MCPhysReg > Order, SmallVectorImpl< MCPhysReg > &Hints, const MachineFunction &MF, const VirtRegMap *VRM=nullptr, const LiveRegMatrix *Matrix=nullptr) const
Get a list of 'hint' registers that the register allocator should try first when allocating a physica...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
A Use represents the edge between a Value definition and its users.
VNInfo - Value Number Information.
MCRegister getPhys(Register virtReg) const
returns the physical register mapped to the specified virtual register
bool hasPhys(Register virtReg) const
returns true if the specified virtual register is mapped to a physical register
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ PRIVATE_ADDRESS
Address space for private memory.
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSTfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSVfromSVS(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
@ OPERAND_REG_INLINE_AC_FIRST
@ OPERAND_REG_INLINE_AC_LAST
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
auto reverse(ContainerTy &&C)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
unsigned MCRegUnit
Register units are used to compute register aliasing.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getDefRegState(bool B)
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
constexpr unsigned BitWidth
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
int popcount(T Value) noexcept
Count the number of set bits in a value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI)
ArrayRef< int16_t > SplitParts
SIMachineFunctionInfo & MFI
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, int Index, RegScavenger *RS)
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, Register Reg, bool IsKill, int Index, RegScavenger *RS)
PerVGPRData getPerVGPRData()
MachineBasicBlock::iterator MI
void readWriteTmpVGPR(unsigned Offset, bool IsLoad)
const SIRegisterInfo & TRI
The llvm::once_flag structure.