29 #define GET_REGINFO_TARGET_DESC
30 #include "AMDGPUGenRegisterInfo.inc"
33 "amdgpu-spill-sgpr-to-vgpr",
34 cl::desc(
"Enable spilling VGPRs to SGPRs"),
38 std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
39 std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
116 MI->getOperand(0).isKill(),
Index,
RS) {}
131 MovOpc = AMDGPU::S_MOV_B32;
132 NotOpc = AMDGPU::S_NOT_B32;
135 MovOpc = AMDGPU::S_MOV_B64;
136 NotOpc = AMDGPU::S_NOT_B64;
141 SuperReg != AMDGPU::EXEC &&
"exec should never spill");
172 assert(
RS &&
"Cannot spill SGPR to memory without RegScavenger");
200 IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
221 MI->emitError(
"unhandled SGPR spill to memory");
231 I->getOperand(2).setIsDead(
true);
266 I->getOperand(2).setIsDead(
true);
296 MI->emitError(
"unhandled SGPR spill to memory");
323 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
324 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
326 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
327 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
328 "getNumCoveredRegs() will not work with generated subreg masks!");
330 RegPressureIgnoredUnits.
resize(getNumRegUnits());
331 RegPressureIgnoredUnits.
set(
333 for (
auto Reg : AMDGPU::VGPR_HI16RegClass)
339 static auto InitializeRegSplitPartsOnce = [
this]() {
340 for (
unsigned Idx = 1,
E = getNumSubRegIndices() - 1; Idx <
E; ++Idx) {
341 unsigned Size = getSubRegIdxSize(Idx);
344 std::vector<int16_t> &Vec = RegSplitParts[Size / 32 - 1];
345 unsigned Pos = getSubRegIdxOffset(Idx);
350 unsigned MaxNumParts = 1024 / Size;
351 Vec.resize(MaxNumParts);
359 static auto InitializeSubRegFromChannelTableOnce = [
this]() {
360 for (
auto &Row : SubRegFromChannelTable)
361 Row.fill(AMDGPU::NoSubRegister);
362 for (
unsigned Idx = 1; Idx < getNumSubRegIndices(); ++Idx) {
363 unsigned Width = AMDGPUSubRegIdxRanges[Idx].Size / 32;
364 unsigned Offset = AMDGPUSubRegIdxRanges[Idx].Offset / 32;
369 unsigned TableIdx =
Width - 1;
370 assert(TableIdx < SubRegFromChannelTable.size());
371 assert(Offset < SubRegFromChannelTable[TableIdx].
size());
372 SubRegFromChannelTable[TableIdx][Offset] = Idx;
376 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
378 InitializeSubRegFromChannelTableOnce);
381 void SIRegisterInfo::reserveRegisterTuples(
BitVector &Reserved,
385 for (; R.isValid(); ++R)
397 return ST.
hasGFX90AInsts() ? CSR_AMDGPU_HighRegs_With_AGPRs_SaveList
398 : CSR_AMDGPU_HighRegs_SaveList;
400 return ST.
hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_With_AGPRs_SaveList
401 : CSR_AMDGPU_SI_Gfx_SaveList;
404 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
405 return &NoCalleeSavedReg;
421 return ST.
hasGFX90AInsts() ? CSR_AMDGPU_HighRegs_With_AGPRs_RegMask
422 : CSR_AMDGPU_HighRegs_RegMask;
425 : CSR_AMDGPU_SI_Gfx_RegMask;
432 return CSR_AMDGPU_NoRegs_RegMask;
443 if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
444 return &AMDGPU::AV_32RegClass;
445 if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
446 return &AMDGPU::AV_64RegClass;
447 if (RC == &AMDGPU::VReg_64_Align2RegClass ||
448 RC == &AMDGPU::AReg_64_Align2RegClass)
449 return &AMDGPU::AV_64_Align2RegClass;
450 if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)
451 return &AMDGPU::AV_96RegClass;
452 if (RC == &AMDGPU::VReg_96_Align2RegClass ||
453 RC == &AMDGPU::AReg_96_Align2RegClass)
454 return &AMDGPU::AV_96_Align2RegClass;
455 if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)
456 return &AMDGPU::AV_128RegClass;
457 if (RC == &AMDGPU::VReg_128_Align2RegClass ||
458 RC == &AMDGPU::AReg_128_Align2RegClass)
459 return &AMDGPU::AV_128_Align2RegClass;
460 if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)
461 return &AMDGPU::AV_160RegClass;
462 if (RC == &AMDGPU::VReg_160_Align2RegClass ||
463 RC == &AMDGPU::AReg_160_Align2RegClass)
464 return &AMDGPU::AV_160_Align2RegClass;
465 if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)
466 return &AMDGPU::AV_192RegClass;
467 if (RC == &AMDGPU::VReg_192_Align2RegClass ||
468 RC == &AMDGPU::AReg_192_Align2RegClass)
469 return &AMDGPU::AV_192_Align2RegClass;
470 if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)
471 return &AMDGPU::AV_256RegClass;
472 if (RC == &AMDGPU::VReg_256_Align2RegClass ||
473 RC == &AMDGPU::AReg_256_Align2RegClass)
474 return &AMDGPU::AV_256_Align2RegClass;
475 if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)
476 return &AMDGPU::AV_512RegClass;
477 if (RC == &AMDGPU::VReg_512_Align2RegClass ||
478 RC == &AMDGPU::AReg_512_Align2RegClass)
479 return &AMDGPU::AV_512_Align2RegClass;
480 if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)
481 return &AMDGPU::AV_1024RegClass;
482 if (RC == &AMDGPU::VReg_1024_Align2RegClass ||
483 RC == &AMDGPU::AReg_1024_Align2RegClass)
484 return &AMDGPU::AV_1024_Align2RegClass;
514 return CSR_AMDGPU_AllVGPRs_RegMask;
518 return CSR_AMDGPU_AllAGPRs_RegMask;
522 return CSR_AMDGPU_AllVectorRegs_RegMask;
526 return CSR_AMDGPU_AllAllocatableSRegs_RegMask;
533 assert(NumRegIndex &&
"Not implemented");
534 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].
size());
535 return SubRegFromChannelTable[NumRegIndex - 1][Channel];
541 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
542 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass);
547 Reserved.
set(AMDGPU::MODE);
555 reserveRegisterTuples(Reserved, AMDGPU::EXEC);
556 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
562 reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
563 reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
564 reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
567 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
568 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
569 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
570 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
573 reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
576 reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
579 reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
582 reserveRegisterTuples(Reserved, AMDGPU::TBA);
583 reserveRegisterTuples(Reserved, AMDGPU::TMA);
584 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
585 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
586 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
587 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
588 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
589 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
590 reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
591 reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
594 reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL);
599 Reserved.
set(AMDGPU::VCC);
600 Reserved.
set(AMDGPU::VCC_HI);
606 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
607 for (
unsigned i = MaxNumSGPRs;
i < TotalNumSGPRs; ++
i) {
608 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(
i);
609 reserveRegisterTuples(Reserved,
Reg);
612 for (
auto Reg : AMDGPU::SReg_32RegClass) {
613 Reserved.
set(getSubReg(
Reg, AMDGPU::hi16));
621 if (ScratchRSrcReg != AMDGPU::NoRegister) {
625 reserveRegisterTuples(Reserved, ScratchRSrcReg);
633 reserveRegisterTuples(Reserved, StackPtrReg);
634 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
639 reserveRegisterTuples(Reserved, FrameReg);
640 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
645 reserveRegisterTuples(Reserved, BasePtrReg);
646 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
652 unsigned MaxNumAGPRs = MaxNumVGPRs;
653 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
657 for (
unsigned i = 0;
i < MaxNumAGPRs; ++
i) {
658 unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(
i);
659 reserveRegisterTuples(Reserved,
Reg);
663 for (
auto Reg : AMDGPU::AGPR_32RegClass) {
664 Reserved.
set(getSubReg(
Reg, AMDGPU::hi16));
678 MaxNumAGPRs = MaxNumVGPRs;
680 if (MaxNumVGPRs > TotalNumVGPRs) {
681 MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
682 MaxNumVGPRs = TotalNumVGPRs;
688 for (
unsigned i = MaxNumVGPRs;
i < TotalNumVGPRs; ++
i) {
689 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(
i);
690 reserveRegisterTuples(Reserved,
Reg);
693 for (
unsigned i = MaxNumAGPRs;
i < TotalNumVGPRs; ++
i) {
694 unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(
i);
695 reserveRegisterTuples(Reserved,
Reg);
705 reserveRegisterTuples(Reserved,
Reg);
709 reserveRegisterTuples(Reserved,
Reg);
712 reserveRegisterTuples(Reserved,
Reg);
715 reserveRegisterTuples(Reserved, SSpill.VGPR);
732 if (
Info->isEntryFunction())
740 if (
Info->isEntryFunction()) {
774 AMDGPU::OpName::offset);
775 return MI->getOperand(OffIdx).getImm();
784 AMDGPU::OpName::vaddr) ||
786 AMDGPU::OpName::saddr))) &&
787 "Should never see frame index on non-address operand");
808 int64_t Offset)
const {
813 DL =
Ins->getDebugLoc();
819 : AMDGPU::V_MOV_B32_e32;
823 : &AMDGPU::VGPR_32RegClass);
835 : &AMDGPU::VGPR_32RegClass);
858 int64_t Offset)
const {
860 bool IsFlat =
TII->isFLATScratch(
MI);
876 TII->getNamedOperand(
MI, IsFlat ? AMDGPU::OpName::saddr
877 : AMDGPU::OpName::vaddr);
880 int64_t NewOffset = OffsetOp->
getImm() + Offset;
882 assert(FIOp && FIOp->
isFI() &&
"frame index must be address operand");
888 "offset should be legal");
890 OffsetOp->
setImm(NewOffset);
900 "offset should be legal");
903 OffsetOp->
setImm(NewOffset);
908 int64_t Offset)
const {
927 return &AMDGPU::VGPR_32RegClass;
941 case AMDGPU::SI_SPILL_S1024_SAVE:
942 case AMDGPU::SI_SPILL_S1024_RESTORE:
943 case AMDGPU::SI_SPILL_V1024_SAVE:
944 case AMDGPU::SI_SPILL_V1024_RESTORE:
945 case AMDGPU::SI_SPILL_A1024_SAVE:
946 case AMDGPU::SI_SPILL_A1024_RESTORE:
947 case AMDGPU::SI_SPILL_AV1024_SAVE:
948 case AMDGPU::SI_SPILL_AV1024_RESTORE:
950 case AMDGPU::SI_SPILL_S512_SAVE:
951 case AMDGPU::SI_SPILL_S512_RESTORE:
952 case AMDGPU::SI_SPILL_V512_SAVE:
953 case AMDGPU::SI_SPILL_V512_RESTORE:
954 case AMDGPU::SI_SPILL_A512_SAVE:
955 case AMDGPU::SI_SPILL_A512_RESTORE:
956 case AMDGPU::SI_SPILL_AV512_SAVE:
957 case AMDGPU::SI_SPILL_AV512_RESTORE:
959 case AMDGPU::SI_SPILL_S256_SAVE:
960 case AMDGPU::SI_SPILL_S256_RESTORE:
961 case AMDGPU::SI_SPILL_V256_SAVE:
962 case AMDGPU::SI_SPILL_V256_RESTORE:
963 case AMDGPU::SI_SPILL_A256_SAVE:
964 case AMDGPU::SI_SPILL_A256_RESTORE:
965 case AMDGPU::SI_SPILL_AV256_SAVE:
966 case AMDGPU::SI_SPILL_AV256_RESTORE:
968 case AMDGPU::SI_SPILL_S224_SAVE:
969 case AMDGPU::SI_SPILL_S224_RESTORE:
970 case AMDGPU::SI_SPILL_V224_SAVE:
971 case AMDGPU::SI_SPILL_V224_RESTORE:
972 case AMDGPU::SI_SPILL_A224_SAVE:
973 case AMDGPU::SI_SPILL_A224_RESTORE:
974 case AMDGPU::SI_SPILL_AV224_SAVE:
975 case AMDGPU::SI_SPILL_AV224_RESTORE:
977 case AMDGPU::SI_SPILL_S192_SAVE:
978 case AMDGPU::SI_SPILL_S192_RESTORE:
979 case AMDGPU::SI_SPILL_V192_SAVE:
980 case AMDGPU::SI_SPILL_V192_RESTORE:
981 case AMDGPU::SI_SPILL_A192_SAVE:
982 case AMDGPU::SI_SPILL_A192_RESTORE:
983 case AMDGPU::SI_SPILL_AV192_SAVE:
984 case AMDGPU::SI_SPILL_AV192_RESTORE:
986 case AMDGPU::SI_SPILL_S160_SAVE:
987 case AMDGPU::SI_SPILL_S160_RESTORE:
988 case AMDGPU::SI_SPILL_V160_SAVE:
989 case AMDGPU::SI_SPILL_V160_RESTORE:
990 case AMDGPU::SI_SPILL_A160_SAVE:
991 case AMDGPU::SI_SPILL_A160_RESTORE:
992 case AMDGPU::SI_SPILL_AV160_SAVE:
993 case AMDGPU::SI_SPILL_AV160_RESTORE:
995 case AMDGPU::SI_SPILL_S128_SAVE:
996 case AMDGPU::SI_SPILL_S128_RESTORE:
997 case AMDGPU::SI_SPILL_V128_SAVE:
998 case AMDGPU::SI_SPILL_V128_RESTORE:
999 case AMDGPU::SI_SPILL_A128_SAVE:
1000 case AMDGPU::SI_SPILL_A128_RESTORE:
1001 case AMDGPU::SI_SPILL_AV128_SAVE:
1002 case AMDGPU::SI_SPILL_AV128_RESTORE:
1004 case AMDGPU::SI_SPILL_S96_SAVE:
1005 case AMDGPU::SI_SPILL_S96_RESTORE:
1006 case AMDGPU::SI_SPILL_V96_SAVE:
1007 case AMDGPU::SI_SPILL_V96_RESTORE:
1008 case AMDGPU::SI_SPILL_A96_SAVE:
1009 case AMDGPU::SI_SPILL_A96_RESTORE:
1010 case AMDGPU::SI_SPILL_AV96_SAVE:
1011 case AMDGPU::SI_SPILL_AV96_RESTORE:
1013 case AMDGPU::SI_SPILL_S64_SAVE:
1014 case AMDGPU::SI_SPILL_S64_RESTORE:
1015 case AMDGPU::SI_SPILL_V64_SAVE:
1016 case AMDGPU::SI_SPILL_V64_RESTORE:
1017 case AMDGPU::SI_SPILL_A64_SAVE:
1018 case AMDGPU::SI_SPILL_A64_RESTORE:
1019 case AMDGPU::SI_SPILL_AV64_SAVE:
1020 case AMDGPU::SI_SPILL_AV64_RESTORE:
1022 case AMDGPU::SI_SPILL_S32_SAVE:
1023 case AMDGPU::SI_SPILL_S32_RESTORE:
1024 case AMDGPU::SI_SPILL_V32_SAVE:
1025 case AMDGPU::SI_SPILL_V32_RESTORE:
1026 case AMDGPU::SI_SPILL_A32_SAVE:
1027 case AMDGPU::SI_SPILL_A32_RESTORE:
1028 case AMDGPU::SI_SPILL_AV32_SAVE:
1029 case AMDGPU::SI_SPILL_AV32_RESTORE:
1037 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
1038 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1039 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
1040 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
1041 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
1042 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
1043 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
1044 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
1045 case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN:
1046 return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET;
1047 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
1048 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
1049 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
1050 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
1051 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
1052 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
1060 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
1061 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1062 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
1063 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
1064 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
1065 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
1066 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
1067 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
1068 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
1069 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
1070 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
1071 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
1072 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN:
1073 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET;
1074 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
1075 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
1076 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
1077 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
1078 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
1079 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
1080 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
1081 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
1082 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
1083 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
1084 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
1085 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
1086 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
1087 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
1095 case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
1096 return AMDGPU::BUFFER_STORE_DWORD_OFFEN;
1097 case AMDGPU::BUFFER_STORE_BYTE_OFFSET:
1098 return AMDGPU::BUFFER_STORE_BYTE_OFFEN;
1099 case AMDGPU::BUFFER_STORE_SHORT_OFFSET:
1100 return AMDGPU::BUFFER_STORE_SHORT_OFFEN;
1101 case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
1102 return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
1103 case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET:
1104 return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN;
1105 case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET:
1106 return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
1107 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET:
1108 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN;
1109 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET:
1110 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN;
1118 case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
1119 return AMDGPU::BUFFER_LOAD_DWORD_OFFEN;
1120 case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET:
1121 return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN;
1122 case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET:
1123 return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN;
1124 case AMDGPU::BUFFER_LOAD_USHORT_OFFSET:
1125 return AMDGPU::BUFFER_LOAD_USHORT_OFFEN;
1126 case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET:
1127 return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN;
1128 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET:
1129 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
1130 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET:
1131 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN;
1132 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET:
1133 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN;
1134 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET:
1135 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN;
1136 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
1137 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN;
1138 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET:
1139 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN;
1140 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
1141 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN;
1142 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET:
1143 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN;
1144 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
1145 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN;
1154 int Index,
unsigned Lane,
1155 unsigned ValueReg,
bool IsKill) {
1162 if (
Reg == AMDGPU::NoRegister)
1165 bool IsStore =
MI->mayStore();
1169 unsigned Dst = IsStore ?
Reg : ValueReg;
1170 unsigned Src = IsStore ? ValueReg :
Reg;
1173 if (IsVGPR ==
TRI->isVGPR(
MRI, ValueReg)) {
1183 unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
1184 : AMDGPU::V_ACCVGPR_READ_B32_e64;
1202 bool IsStore =
MI->mayStore();
1204 unsigned Opc =
MI->getOpcode();
1205 int LoadStoreOp = IsStore ?
1207 if (LoadStoreOp == -1)
1217 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::srsrc))
1218 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset))
1226 AMDGPU::OpName::vdata_in);
1228 NewMI.
add(*VDataIn);
1233 unsigned LoadStoreOp,
1235 bool IsStore =
TII->get(LoadStoreOp).mayStore();
1243 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1244 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
1247 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1248 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1251 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1252 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1255 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1256 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1272 unsigned LoadStoreOp,
int Index,
Register ValueReg,
bool IsKill,
1275 assert((!RS || !LiveRegs) &&
"Only RS or LiveRegs can be set but not both");
1283 bool IsStore = Desc->mayStore();
1284 bool IsFlat =
TII->isFLATScratch(LoadStoreOp);
1286 bool CanClobberSCC =
false;
1287 bool Scavenged =
false;
1297 unsigned EltSize = (IsFlat && !IsAGPR) ?
std::min(RegWidth, 16u) : 4u;
1298 unsigned NumSubRegs = RegWidth / EltSize;
1299 unsigned Size = NumSubRegs * EltSize;
1300 unsigned RemSize = RegWidth - Size;
1301 unsigned NumRemSubRegs = RemSize ? 1 : 0;
1303 int64_t MaterializedOffset = Offset;
1305 int64_t MaxOffset = Offset + Size + RemSize - EltSize;
1306 int64_t ScratchOffsetRegDelta = 0;
1308 if (IsFlat && EltSize > 4) {
1310 Desc = &
TII->get(LoadStoreOp);
1316 assert((IsFlat || ((Offset % EltSize) == 0)) &&
1317 "unexpected VGPR spill offset");
1324 bool UseVGPROffset =
false;
1331 if (IsFlat && SGPRBase) {
1355 bool IsOffsetLegal =
1371 }
else if (LiveRegs) {
1381 if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC)
1385 UseVGPROffset =
true;
1393 TmpOffsetVGPR =
Reg;
1400 }
else if (!SOffset && CanClobberSCC) {
1411 if (!ScratchOffsetReg)
1413 SOffset = ScratchOffsetReg;
1414 ScratchOffsetRegDelta = Offset;
1422 if (!IsFlat && !UseVGPROffset)
1425 if (!UseVGPROffset && !SOffset)
1428 if (UseVGPROffset) {
1430 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, Offset);
1431 }
else if (ScratchOffsetReg == AMDGPU::NoRegister) {
1436 .
addReg(ScratchOffsetReg)
1438 Add->getOperand(3).setIsDead();
1444 if (IsFlat && SOffset == AMDGPU::NoRegister) {
1446 &&
"Unexpected vaddr for flat scratch with a FI operand");
1448 if (UseVGPROffset) {
1455 Desc = &
TII->get(LoadStoreOp);
1458 for (
unsigned i = 0,
e = NumSubRegs + NumRemSubRegs, RegOffset = 0;
i !=
e;
1459 ++
i, RegOffset += EltSize) {
1460 if (
i == NumSubRegs) {
1464 Desc = &
TII->get(LoadStoreOp);
1466 if (!IsFlat && UseVGPROffset) {
1469 Desc = &
TII->get(NewLoadStoreOp);
1472 if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) {
1479 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset);
1482 unsigned NumRegs = EltSize / 4;
1488 unsigned SOffsetRegState = 0;
1490 const bool IsLastSubReg =
i + 1 ==
e;
1499 bool NeedSuperRegDef =
e > 1 && IsStore &&
i == 0;
1500 bool NeedSuperRegImpOperand =
e > 1;
1504 unsigned RemEltSize = EltSize;
1512 for (
int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,
1513 LaneE = RegOffset / 4;
1514 Lane >= LaneE; --Lane) {
1515 bool IsSubReg =
e > 1 || EltSize > 4;
1520 if (!MIB.getInstr())
1522 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && !
i)) {
1524 NeedSuperRegDef =
false;
1526 if (IsSubReg || NeedSuperRegImpOperand) {
1527 NeedSuperRegImpOperand =
true;
1528 unsigned State = SrcDstRegState;
1530 State &= ~RegState
::Kill;
1539 if (RemEltSize != EltSize) {
1540 assert(IsFlat && EltSize > 4);
1542 unsigned NumRegs = RemEltSize / 4;
1546 Desc = &
TII->get(Opc);
1549 unsigned FinalReg =
SubReg;
1554 if (!TmpIntermediateVGPR) {
1560 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
1561 TmpIntermediateVGPR)
1563 if (NeedSuperRegDef)
1567 SubReg = TmpIntermediateVGPR;
1568 }
else if (UseVGPROffset) {
1570 if (!TmpOffsetVGPR) {
1585 if (UseVGPROffset) {
1594 if (SOffset == AMDGPU::NoRegister) {
1596 if (UseVGPROffset && ScratchOffsetReg) {
1598 MIB.addReg(ScratchOffsetReg);
1605 MIB.addReg(SOffset, SOffsetRegState);
1607 MIB.addImm(Offset + RegOffset)
1612 MIB.addMemOperand(NewMMO);
1614 if (!IsAGPR && NeedSuperRegDef)
1617 if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) {
1624 if (NeedSuperRegImpOperand)
1628 if (ScratchOffsetRegDelta != 0) {
1632 .
addImm(-ScratchOffsetRegDelta);
1637 int Offset,
bool IsLoad,
1638 bool IsKill)
const {
1656 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1658 FrameReg, Offset * SB.
EltSize, MMO, SB.
RS);
1661 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1663 FrameReg, Offset * SB.
EltSize, MMO, SB.
RS);
1673 bool OnlyToVGPR)
const {
1678 bool SpillToVGPR = !VGPRSpills.
empty();
1679 if (OnlyToVGPR && !SpillToVGPR)
1688 "Num of VGPR lanes should be equal to num of SGPRs spilled");
1702 SB.
TII.get(AMDGPU::V_WRITELANE_B32), Spill.
VGPR)
1735 for (
unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1739 for (
unsigned i = Offset * PVD.PerVGPR,
1766 unsigned SuperKillState = 0;
1780 MI->eraseFromParent();
1793 bool OnlyToVGPR)
const {
1798 bool SpillToVGPR = !VGPRSpills.
empty();
1799 if (OnlyToVGPR && !SpillToVGPR)
1830 for (
unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1835 for (
unsigned i = Offset * PVD.PerVGPR,
1843 bool LastSubReg = (
i + 1 ==
e);
1845 SB.
TII.get(AMDGPU::V_READLANE_B32),
SubReg)
1862 MI->eraseFromParent();
1879 for (
unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1882 for (
unsigned i = Offset * PVD.PerVGPR,
1901 unsigned SuperKillState = 0;
1911 MI = RestoreMBB.
end();
1914 for (
unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1917 for (
unsigned i = Offset * PVD.PerVGPR,
1924 bool LastSubReg = (
i + 1 ==
e);
1947 switch (
MI->getOpcode()) {
1948 case AMDGPU::SI_SPILL_S1024_SAVE:
1949 case AMDGPU::SI_SPILL_S512_SAVE:
1950 case AMDGPU::SI_SPILL_S256_SAVE:
1951 case AMDGPU::SI_SPILL_S224_SAVE:
1952 case AMDGPU::SI_SPILL_S192_SAVE:
1953 case AMDGPU::SI_SPILL_S160_SAVE:
1954 case AMDGPU::SI_SPILL_S128_SAVE:
1955 case AMDGPU::SI_SPILL_S96_SAVE:
1956 case AMDGPU::SI_SPILL_S64_SAVE:
1957 case AMDGPU::SI_SPILL_S32_SAVE:
1959 case AMDGPU::SI_SPILL_S1024_RESTORE:
1960 case AMDGPU::SI_SPILL_S512_RESTORE:
1961 case AMDGPU::SI_SPILL_S256_RESTORE:
1962 case AMDGPU::SI_SPILL_S224_RESTORE:
1963 case AMDGPU::SI_SPILL_S192_RESTORE:
1964 case AMDGPU::SI_SPILL_S160_RESTORE:
1965 case AMDGPU::SI_SPILL_S128_RESTORE:
1966 case AMDGPU::SI_SPILL_S96_RESTORE:
1967 case AMDGPU::SI_SPILL_S64_RESTORE:
1968 case AMDGPU::SI_SPILL_S32_RESTORE:
1976 int SPAdj,
unsigned FIOperandNum,
1985 assert(SPAdj == 0 &&
"unhandled SP adjustment in call sequence?");
1988 int Index =
MI->getOperand(FIOperandNum).getIndex();
1994 switch (
MI->getOpcode()) {
1996 case AMDGPU::SI_SPILL_S1024_SAVE:
1997 case AMDGPU::SI_SPILL_S512_SAVE:
1998 case AMDGPU::SI_SPILL_S256_SAVE:
1999 case AMDGPU::SI_SPILL_S224_SAVE:
2000 case AMDGPU::SI_SPILL_S192_SAVE:
2001 case AMDGPU::SI_SPILL_S160_SAVE:
2002 case AMDGPU::SI_SPILL_S128_SAVE:
2003 case AMDGPU::SI_SPILL_S96_SAVE:
2004 case AMDGPU::SI_SPILL_S64_SAVE:
2005 case AMDGPU::SI_SPILL_S32_SAVE: {
2011 case AMDGPU::SI_SPILL_S1024_RESTORE:
2012 case AMDGPU::SI_SPILL_S512_RESTORE:
2013 case AMDGPU::SI_SPILL_S256_RESTORE:
2014 case AMDGPU::SI_SPILL_S224_RESTORE:
2015 case AMDGPU::SI_SPILL_S192_RESTORE:
2016 case AMDGPU::SI_SPILL_S160_RESTORE:
2017 case AMDGPU::SI_SPILL_S128_RESTORE:
2018 case AMDGPU::SI_SPILL_S96_RESTORE:
2019 case AMDGPU::SI_SPILL_S64_RESTORE:
2020 case AMDGPU::SI_SPILL_S32_RESTORE: {
2026 case AMDGPU::SI_SPILL_V1024_SAVE:
2027 case AMDGPU::SI_SPILL_V512_SAVE:
2028 case AMDGPU::SI_SPILL_V256_SAVE:
2029 case AMDGPU::SI_SPILL_V224_SAVE:
2030 case AMDGPU::SI_SPILL_V192_SAVE:
2031 case AMDGPU::SI_SPILL_V160_SAVE:
2032 case AMDGPU::SI_SPILL_V128_SAVE:
2033 case AMDGPU::SI_SPILL_V96_SAVE:
2034 case AMDGPU::SI_SPILL_V64_SAVE:
2035 case AMDGPU::SI_SPILL_V32_SAVE:
2036 case AMDGPU::SI_SPILL_A1024_SAVE:
2037 case AMDGPU::SI_SPILL_A512_SAVE:
2038 case AMDGPU::SI_SPILL_A256_SAVE:
2039 case AMDGPU::SI_SPILL_A224_SAVE:
2040 case AMDGPU::SI_SPILL_A192_SAVE:
2041 case AMDGPU::SI_SPILL_A160_SAVE:
2042 case AMDGPU::SI_SPILL_A128_SAVE:
2043 case AMDGPU::SI_SPILL_A96_SAVE:
2044 case AMDGPU::SI_SPILL_A64_SAVE:
2045 case AMDGPU::SI_SPILL_A32_SAVE:
2046 case AMDGPU::SI_SPILL_AV1024_SAVE:
2047 case AMDGPU::SI_SPILL_AV512_SAVE:
2048 case AMDGPU::SI_SPILL_AV256_SAVE:
2049 case AMDGPU::SI_SPILL_AV224_SAVE:
2050 case AMDGPU::SI_SPILL_AV192_SAVE:
2051 case AMDGPU::SI_SPILL_AV160_SAVE:
2052 case AMDGPU::SI_SPILL_AV128_SAVE:
2053 case AMDGPU::SI_SPILL_AV96_SAVE:
2054 case AMDGPU::SI_SPILL_AV64_SAVE:
2055 case AMDGPU::SI_SPILL_AV32_SAVE: {
2057 AMDGPU::OpName::vdata);
2058 assert(
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset)->getReg() ==
2062 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2063 auto *
MBB =
MI->getParent();
2066 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm(),
2067 *
MI->memoperands_begin(), RS);
2069 MI->eraseFromParent();
2072 case AMDGPU::SI_SPILL_V32_RESTORE:
2073 case AMDGPU::SI_SPILL_V64_RESTORE:
2074 case AMDGPU::SI_SPILL_V96_RESTORE:
2075 case AMDGPU::SI_SPILL_V128_RESTORE:
2076 case AMDGPU::SI_SPILL_V160_RESTORE:
2077 case AMDGPU::SI_SPILL_V192_RESTORE:
2078 case AMDGPU::SI_SPILL_V224_RESTORE:
2079 case AMDGPU::SI_SPILL_V256_RESTORE:
2080 case AMDGPU::SI_SPILL_V512_RESTORE:
2081 case AMDGPU::SI_SPILL_V1024_RESTORE:
2082 case AMDGPU::SI_SPILL_A32_RESTORE:
2083 case AMDGPU::SI_SPILL_A64_RESTORE:
2084 case AMDGPU::SI_SPILL_A96_RESTORE:
2085 case AMDGPU::SI_SPILL_A128_RESTORE:
2086 case AMDGPU::SI_SPILL_A160_RESTORE:
2087 case AMDGPU::SI_SPILL_A192_RESTORE:
2088 case AMDGPU::SI_SPILL_A224_RESTORE:
2089 case AMDGPU::SI_SPILL_A256_RESTORE:
2090 case AMDGPU::SI_SPILL_A512_RESTORE:
2091 case AMDGPU::SI_SPILL_A1024_RESTORE:
2092 case AMDGPU::SI_SPILL_AV32_RESTORE:
2093 case AMDGPU::SI_SPILL_AV64_RESTORE:
2094 case AMDGPU::SI_SPILL_AV96_RESTORE:
2095 case AMDGPU::SI_SPILL_AV128_RESTORE:
2096 case AMDGPU::SI_SPILL_AV160_RESTORE:
2097 case AMDGPU::SI_SPILL_AV192_RESTORE:
2098 case AMDGPU::SI_SPILL_AV224_RESTORE:
2099 case AMDGPU::SI_SPILL_AV256_RESTORE:
2100 case AMDGPU::SI_SPILL_AV512_RESTORE:
2101 case AMDGPU::SI_SPILL_AV1024_RESTORE: {
2103 AMDGPU::OpName::vdata);
2104 assert(
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset)->getReg() ==
2108 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2109 auto *
MBB =
MI->getParent();
2112 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm(),
2113 *
MI->memoperands_begin(), RS);
2114 MI->eraseFromParent();
2122 int64_t Offset = FrameInfo.getObjectOffset(Index);
2124 if (
TII->isFLATScratch(*
MI)) {
2125 assert((int16_t)FIOperandNum ==
2127 AMDGPU::OpName::saddr));
2137 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset);
2138 int64_t NewOffset = Offset + OffsetOp->
getImm();
2141 OffsetOp->
setImm(NewOffset);
2148 unsigned Opc =
MI->getOpcode();
2161 MI->setDesc(
TII->get(NewOpc));
2169 if (
TII->isImmOperandLegal(*
MI, FIOperandNum, FIOp))
2176 bool UseSGPR =
TII->isOperandLegal(*
MI, FIOperandNum, &FIOp);
2178 if (!Offset && FrameReg && UseSGPR) {
2184 : &AMDGPU::VGPR_32RegClass;
2190 if ((!FrameReg || !Offset) && TmpReg) {
2191 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2194 MIB.addReg(FrameReg);
2208 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
2226 if (TmpSReg == FrameReg) {
2237 bool IsMUBUF =
TII->isMUBUF(*
MI);
2245 ? &AMDGPU::SReg_32RegClass
2246 : &AMDGPU::VGPR_32RegClass;
2247 bool IsCopy =
MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
2248 MI->getOpcode() == AMDGPU::V_MOV_B32_e64;
2249 Register ResultReg = IsCopy ?
MI->getOperand(0).getReg()
2252 int64_t Offset = FrameInfo.getObjectOffset(Index);
2254 unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
2255 : AMDGPU::V_LSHRREV_B32_e64;
2260 if (IsSALU && !LiveSCC)
2261 Shift.getInstr()->getOperand(3).setIsDead(
2263 if (IsSALU && LiveSCC) {
2269 ResultReg = NewDest;
2274 if ((MIB =
TII->getAddNoCarry(*
MBB,
MI,
DL, ResultReg, *RS)) !=
2284 const bool IsVOP2 = MIB->
getOpcode() == AMDGPU::V_ADD_U32_e32;
2295 "Need to reuse carry out register");
2300 ConstOffsetReg = getSubReg(MIB.
getReg(1), AMDGPU::sub0);
2302 ConstOffsetReg = MIB.
getReg(1);
2312 if (!MIB || IsSALU) {
2321 Register ScaledReg = TmpScaledReg.
isValid() ? TmpScaledReg : FrameReg;
2333 ResultReg = ScaledReg;
2336 if (!TmpScaledReg.
isValid()) {
2349 MI->eraseFromParent();
2357 assert(
static_cast<int>(FIOperandNum) ==
2359 AMDGPU::OpName::vaddr));
2361 auto &SOffset = *
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset);
2362 assert((SOffset.isImm() && SOffset.getImm() == 0));
2364 if (FrameReg != AMDGPU::NoRegister)
2365 SOffset.ChangeToRegister(FrameReg,
false);
2367 int64_t Offset = FrameInfo.getObjectOffset(Index);
2369 =
TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm();
2370 int64_t NewOffset = OldImm + Offset;
2374 MI->eraseFromParent();
2383 if (!
TII->isImmOperandLegal(*
MI, FIOperandNum, FIOp)) {
2400 return &AMDGPU::VReg_64RegClass;
2402 return &AMDGPU::VReg_96RegClass;
2404 return &AMDGPU::VReg_128RegClass;
2406 return &AMDGPU::VReg_160RegClass;
2408 return &AMDGPU::VReg_192RegClass;
2410 return &AMDGPU::VReg_224RegClass;
2412 return &AMDGPU::VReg_256RegClass;
2414 return &AMDGPU::VReg_512RegClass;
2416 return &AMDGPU::VReg_1024RegClass;
2424 return &AMDGPU::VReg_64_Align2RegClass;
2426 return &AMDGPU::VReg_96_Align2RegClass;
2428 return &AMDGPU::VReg_128_Align2RegClass;
2430 return &AMDGPU::VReg_160_Align2RegClass;
2432 return &AMDGPU::VReg_192_Align2RegClass;
2434 return &AMDGPU::VReg_224_Align2RegClass;
2436 return &AMDGPU::VReg_256_Align2RegClass;
2438 return &AMDGPU::VReg_512_Align2RegClass;
2440 return &AMDGPU::VReg_1024_Align2RegClass;
2448 return &AMDGPU::VReg_1RegClass;
2450 return &AMDGPU::VGPR_LO16RegClass;
2452 return &AMDGPU::VGPR_32RegClass;
2460 return &AMDGPU::AReg_64RegClass;
2462 return &AMDGPU::AReg_96RegClass;
2464 return &AMDGPU::AReg_128RegClass;
2466 return &AMDGPU::AReg_160RegClass;
2468 return &AMDGPU::AReg_192RegClass;
2470 return &AMDGPU::AReg_224RegClass;
2472 return &AMDGPU::AReg_256RegClass;
2474 return &AMDGPU::AReg_512RegClass;
2476 return &AMDGPU::AReg_1024RegClass;
2484 return &AMDGPU::AReg_64_Align2RegClass;
2486 return &AMDGPU::AReg_96_Align2RegClass;
2488 return &AMDGPU::AReg_128_Align2RegClass;
2490 return &AMDGPU::AReg_160_Align2RegClass;
2492 return &AMDGPU::AReg_192_Align2RegClass;
2494 return &AMDGPU::AReg_224_Align2RegClass;
2496 return &AMDGPU::AReg_256_Align2RegClass;
2498 return &AMDGPU::AReg_512_Align2RegClass;
2500 return &AMDGPU::AReg_1024_Align2RegClass;
2508 return &AMDGPU::AGPR_LO16RegClass;
2510 return &AMDGPU::AGPR_32RegClass;
2518 return &AMDGPU::AV_64RegClass;
2520 return &AMDGPU::AV_96RegClass;
2522 return &AMDGPU::AV_128RegClass;
2524 return &AMDGPU::AV_160RegClass;
2526 return &AMDGPU::AV_192RegClass;
2528 return &AMDGPU::AV_224RegClass;
2530 return &AMDGPU::AV_256RegClass;
2532 return &AMDGPU::AV_512RegClass;
2534 return &AMDGPU::AV_1024RegClass;
2542 return &AMDGPU::AV_64_Align2RegClass;
2544 return &AMDGPU::AV_96_Align2RegClass;
2546 return &AMDGPU::AV_128_Align2RegClass;
2548 return &AMDGPU::AV_160_Align2RegClass;
2550 return &AMDGPU::AV_192_Align2RegClass;
2552 return &AMDGPU::AV_224_Align2RegClass;
2554 return &AMDGPU::AV_256_Align2RegClass;
2556 return &AMDGPU::AV_512_Align2RegClass;
2558 return &AMDGPU::AV_1024_Align2RegClass;
2566 return &AMDGPU::VGPR_LO16RegClass;
2568 return &AMDGPU::AV_32RegClass;
2577 return &AMDGPU::SGPR_LO16RegClass;
2579 return &AMDGPU::SReg_32RegClass;
2581 return &AMDGPU::SReg_64RegClass;
2583 return &AMDGPU::SGPR_96RegClass;
2585 return &AMDGPU::SGPR_128RegClass;
2587 return &AMDGPU::SGPR_160RegClass;
2589 return &AMDGPU::SGPR_192RegClass;
2591 return &AMDGPU::SGPR_224RegClass;
2593 return &AMDGPU::SGPR_256RegClass;
2595 return &AMDGPU::SGPR_512RegClass;
2597 return &AMDGPU::SGPR_1024RegClass;
2607 &AMDGPU::VGPR_LO16RegClass,
2608 &AMDGPU::VGPR_HI16RegClass,
2609 &AMDGPU::SReg_LO16RegClass,
2610 &AMDGPU::AGPR_LO16RegClass,
2611 &AMDGPU::VGPR_32RegClass,
2612 &AMDGPU::SReg_32RegClass,
2613 &AMDGPU::AGPR_32RegClass,
2614 &AMDGPU::AGPR_32RegClass,
2615 &AMDGPU::VReg_64_Align2RegClass,
2616 &AMDGPU::VReg_64RegClass,
2617 &AMDGPU::SReg_64RegClass,
2618 &AMDGPU::AReg_64_Align2RegClass,
2619 &AMDGPU::AReg_64RegClass,
2620 &AMDGPU::VReg_96_Align2RegClass,
2621 &AMDGPU::VReg_96RegClass,
2622 &AMDGPU::SReg_96RegClass,
2623 &AMDGPU::AReg_96_Align2RegClass,
2624 &AMDGPU::AReg_96RegClass,
2625 &AMDGPU::VReg_128_Align2RegClass,
2626 &AMDGPU::VReg_128RegClass,
2627 &AMDGPU::SReg_128RegClass,
2628 &AMDGPU::AReg_128_Align2RegClass,
2629 &AMDGPU::AReg_128RegClass,
2630 &AMDGPU::VReg_160_Align2RegClass,
2631 &AMDGPU::VReg_160RegClass,
2632 &AMDGPU::SReg_160RegClass,
2633 &AMDGPU::AReg_160_Align2RegClass,
2634 &AMDGPU::AReg_160RegClass,
2635 &AMDGPU::VReg_192_Align2RegClass,
2636 &AMDGPU::VReg_192RegClass,
2637 &AMDGPU::SReg_192RegClass,
2638 &AMDGPU::AReg_192_Align2RegClass,
2639 &AMDGPU::AReg_192RegClass,
2640 &AMDGPU::VReg_224_Align2RegClass,
2641 &AMDGPU::VReg_224RegClass,
2642 &AMDGPU::SReg_224RegClass,
2643 &AMDGPU::AReg_224_Align2RegClass,
2644 &AMDGPU::AReg_224RegClass,
2645 &AMDGPU::VReg_256_Align2RegClass,
2646 &AMDGPU::VReg_256RegClass,
2647 &AMDGPU::SReg_256RegClass,
2648 &AMDGPU::AReg_256_Align2RegClass,
2649 &AMDGPU::AReg_256RegClass,
2650 &AMDGPU::VReg_512_Align2RegClass,
2651 &AMDGPU::VReg_512RegClass,
2652 &AMDGPU::SReg_512RegClass,
2653 &AMDGPU::AReg_512_Align2RegClass,
2654 &AMDGPU::AReg_512RegClass,
2655 &AMDGPU::SReg_1024RegClass,
2656 &AMDGPU::VReg_1024_Align2RegClass,
2657 &AMDGPU::VReg_1024RegClass,
2658 &AMDGPU::AReg_1024_Align2RegClass,
2659 &AMDGPU::AReg_1024RegClass,
2660 &AMDGPU::SCC_CLASSRegClass,
2661 &AMDGPU::Pseudo_SReg_32RegClass,
2662 &AMDGPU::Pseudo_SReg_128RegClass,
2666 if (BaseClass->contains(
Reg)) {
2676 if (
Reg.isVirtual())
2685 unsigned Size = getRegSizeInBits(*SRC);
2687 assert(VRC &&
"Invalid register class size");
2693 unsigned Size = getRegSizeInBits(*SRC);
2695 assert(ARC &&
"Invalid register class size");
2701 unsigned Size = getRegSizeInBits(*VRC);
2703 return &AMDGPU::SGPR_32RegClass;
2705 assert(SRC &&
"Invalid register class size");
2711 if (SubIdx == AMDGPU::NoSubRegister)
2725 assert(RC &&
"Invalid sub-register class size");
2732 unsigned SubIdx)
const {
2735 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
2736 return MatchRC && MatchRC->
hasSubClassEq(SuperRC) ? MatchRC :
nullptr;
2752 unsigned SrcSubReg)
const {
2769 return getCommonSubClass(DefRC, SrcRC) !=
nullptr;
2785 bool ReserveHighestVGPR)
const {
2786 if (ReserveHighestVGPR) {
2799 unsigned EltSize)
const {
2801 assert(RegBitWidth >= 32 && RegBitWidth <= 1024);
2803 const unsigned RegDWORDs = RegBitWidth / 32;
2804 const unsigned EltDWORDs = EltSize / 4;
2805 assert(RegSplitParts.size() + 1 >= EltDWORDs);
2807 const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1];
2808 const unsigned NumParts = RegDWORDs / EltDWORDs;
2841 unsigned SrcSize = getRegSizeInBits(*SrcRC);
2842 unsigned DstSize = getRegSizeInBits(*DstRC);
2843 unsigned NewSize = getRegSizeInBits(*NewRC);
2849 if (SrcSize <= 32 || DstSize <= 32)
2852 return NewSize <= DstSize || NewSize <= SrcSize;
2861 switch (RC->
getID()) {
2863 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
2864 case AMDGPU::VGPR_32RegClassID:
2865 case AMDGPU::VGPR_LO16RegClassID:
2866 case AMDGPU::VGPR_HI16RegClassID:
2868 case AMDGPU::SGPR_32RegClassID:
2869 case AMDGPU::SGPR_LO16RegClassID:
2875 unsigned Idx)
const {
2876 if (Idx == AMDGPU::RegisterPressureSets::VGPR_32 ||
2877 Idx == AMDGPU::RegisterPressureSets::AGPR_32)
2881 if (Idx == AMDGPU::RegisterPressureSets::SReg_32)
2889 static const int Empty[] = { -1 };
2891 if (RegPressureIgnoredUnits[RegUnit])
2894 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
2899 return AMDGPU::SGPR30_SGPR31;
2905 switch (RB.
getID()) {
2906 case AMDGPU::VGPRRegBankID:
2908 case AMDGPU::VCCRegBankID:
2910 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
2911 : &AMDGPU::SReg_64_XEXECRegClass;
2912 case AMDGPU::SGPRRegBankID:
2914 case AMDGPU::AGPRRegBankID:
2929 return getAllocatableClass(RC);
2935 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
2941 : &AMDGPU::VReg_64RegClass;
2946 switch ((
int)RCID) {
2947 case AMDGPU::SReg_1RegClassID:
2949 case AMDGPU::SReg_1_XEXECRegClassID:
2950 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
2951 : &AMDGPU::SReg_64_XEXECRegClass;
2968 if (
Reg.isVirtual()) {
2977 if ((
S.LaneMask & SubLanes) == SubLanes) {
2978 V =
S.getVNInfoAt(UseIdx);
3018 AMDGPU::SReg_32RegClass,
3019 AMDGPU::AGPR_32RegClass } ) {
3023 if (
MCPhysReg Super = getMatchingSuperReg(
Reg, AMDGPU::hi16,
3024 &AMDGPU::VGPR_32RegClass)) {
3028 return AMDGPU::NoRegister;
3051 unsigned Size = getRegSizeInBits(*RC);
3067 case AMDGPU::SGPR_NULL:
3068 case AMDGPU::SRC_SHARED_BASE:
3069 case AMDGPU::SRC_PRIVATE_BASE:
3070 case AMDGPU::SRC_SHARED_LIMIT:
3071 case AMDGPU::SRC_PRIVATE_LIMIT: