29#define GET_REGINFO_TARGET_DESC
30#include "AMDGPUGenRegisterInfo.inc"
33 "amdgpu-spill-sgpr-to-vgpr",
34 cl::desc(
"Enable spilling SGPRs to VGPRs"),
38std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
39std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
116 MI->getOperand(0).isKill(),
Index,
RS) {}
131 MovOpc = AMDGPU::S_MOV_B32;
132 NotOpc = AMDGPU::S_NOT_B32;
135 MovOpc = AMDGPU::S_MOV_B64;
136 NotOpc = AMDGPU::S_NOT_B64;
141 SuperReg != AMDGPU::EXEC &&
"exec should never spill");
172 assert(
RS &&
"Cannot spill SGPR to memory without RegScavenger");
201 IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
222 MI->emitError(
"unhandled SGPR spill to memory");
232 I->getOperand(2).setIsDead();
267 I->getOperand(2).setIsDead();
297 MI->emitError(
"unhandled SGPR spill to memory");
324 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
325 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
326 (getSubRegIndexLaneMask(AMDGPU::lo16) |
327 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
328 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
329 "getNumCoveredRegs() will not work with generated subreg masks!");
331 RegPressureIgnoredUnits.
resize(getNumRegUnits());
333 for (
auto Reg : AMDGPU::VGPR_HI16RegClass)
334 RegPressureIgnoredUnits.
set(*regunits(Reg).begin());
339 static auto InitializeRegSplitPartsOnce = [
this]() {
340 for (
unsigned Idx = 1,
E = getNumSubRegIndices() - 1;
Idx <
E; ++
Idx) {
341 unsigned Size = getSubRegIdxSize(
Idx);
344 std::vector<int16_t> &Vec = RegSplitParts[
Size / 32 - 1];
345 unsigned Pos = getSubRegIdxOffset(
Idx);
350 unsigned MaxNumParts = 1024 /
Size;
351 Vec.resize(MaxNumParts);
359 static auto InitializeSubRegFromChannelTableOnce = [
this]() {
360 for (
auto &Row : SubRegFromChannelTable)
361 Row.fill(AMDGPU::NoSubRegister);
362 for (
unsigned Idx = 1;
Idx < getNumSubRegIndices(); ++
Idx) {
363 unsigned Width = AMDGPUSubRegIdxRanges[
Idx].Size / 32;
364 unsigned Offset = AMDGPUSubRegIdxRanges[
Idx].Offset / 32;
369 unsigned TableIdx = Width - 1;
370 assert(TableIdx < SubRegFromChannelTable.size());
372 SubRegFromChannelTable[TableIdx][
Offset] =
Idx;
376 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
378 InitializeSubRegFromChannelTableOnce);
396 : CSR_AMDGPU_SaveList;
398 return ST.
hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList
399 : CSR_AMDGPU_SI_Gfx_SaveList;
402 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
403 return &NoCalleeSavedReg;
420 : CSR_AMDGPU_RegMask;
422 return ST.
hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask
423 : CSR_AMDGPU_SI_Gfx_RegMask;
430 return CSR_AMDGPU_NoRegs_RegMask;
441 if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
442 return &AMDGPU::AV_32RegClass;
443 if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
444 return &AMDGPU::AV_64RegClass;
445 if (RC == &AMDGPU::VReg_64_Align2RegClass ||
446 RC == &AMDGPU::AReg_64_Align2RegClass)
447 return &AMDGPU::AV_64_Align2RegClass;
448 if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)
449 return &AMDGPU::AV_96RegClass;
450 if (RC == &AMDGPU::VReg_96_Align2RegClass ||
451 RC == &AMDGPU::AReg_96_Align2RegClass)
452 return &AMDGPU::AV_96_Align2RegClass;
453 if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)
454 return &AMDGPU::AV_128RegClass;
455 if (RC == &AMDGPU::VReg_128_Align2RegClass ||
456 RC == &AMDGPU::AReg_128_Align2RegClass)
457 return &AMDGPU::AV_128_Align2RegClass;
458 if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)
459 return &AMDGPU::AV_160RegClass;
460 if (RC == &AMDGPU::VReg_160_Align2RegClass ||
461 RC == &AMDGPU::AReg_160_Align2RegClass)
462 return &AMDGPU::AV_160_Align2RegClass;
463 if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)
464 return &AMDGPU::AV_192RegClass;
465 if (RC == &AMDGPU::VReg_192_Align2RegClass ||
466 RC == &AMDGPU::AReg_192_Align2RegClass)
467 return &AMDGPU::AV_192_Align2RegClass;
468 if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)
469 return &AMDGPU::AV_256RegClass;
470 if (RC == &AMDGPU::VReg_256_Align2RegClass ||
471 RC == &AMDGPU::AReg_256_Align2RegClass)
472 return &AMDGPU::AV_256_Align2RegClass;
473 if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)
474 return &AMDGPU::AV_512RegClass;
475 if (RC == &AMDGPU::VReg_512_Align2RegClass ||
476 RC == &AMDGPU::AReg_512_Align2RegClass)
477 return &AMDGPU::AV_512_Align2RegClass;
478 if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)
479 return &AMDGPU::AV_1024RegClass;
480 if (RC == &AMDGPU::VReg_1024_Align2RegClass ||
481 RC == &AMDGPU::AReg_1024_Align2RegClass)
482 return &AMDGPU::AV_1024_Align2RegClass;
512 return AMDGPU_AllVGPRs_RegMask;
516 return AMDGPU_AllAGPRs_RegMask;
520 return AMDGPU_AllVectorRegs_RegMask;
524 return AMDGPU_AllAllocatableSRegs_RegMask;
531 assert(NumRegIndex &&
"Not implemented");
532 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].
size());
533 return SubRegFromChannelTable[NumRegIndex - 1][Channel];
538 const unsigned Align,
541 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
542 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, RC);
560 reserveRegisterTuples(
Reserved, AMDGPU::EXEC);
561 reserveRegisterTuples(
Reserved, AMDGPU::FLAT_SCR);
564 reserveRegisterTuples(
Reserved, AMDGPU::M0);
567 reserveRegisterTuples(
Reserved, AMDGPU::SRC_VCCZ);
568 reserveRegisterTuples(
Reserved, AMDGPU::SRC_EXECZ);
569 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SCC);
572 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SHARED_BASE);
573 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SHARED_LIMIT);
574 reserveRegisterTuples(
Reserved, AMDGPU::SRC_PRIVATE_BASE);
575 reserveRegisterTuples(
Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
578 reserveRegisterTuples(
Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
581 reserveRegisterTuples(
Reserved, AMDGPU::XNACK_MASK);
584 reserveRegisterTuples(
Reserved, AMDGPU::LDS_DIRECT);
587 reserveRegisterTuples(
Reserved, AMDGPU::TBA);
588 reserveRegisterTuples(
Reserved, AMDGPU::TMA);
589 reserveRegisterTuples(
Reserved, AMDGPU::TTMP0_TTMP1);
590 reserveRegisterTuples(
Reserved, AMDGPU::TTMP2_TTMP3);
591 reserveRegisterTuples(
Reserved, AMDGPU::TTMP4_TTMP5);
592 reserveRegisterTuples(
Reserved, AMDGPU::TTMP6_TTMP7);
593 reserveRegisterTuples(
Reserved, AMDGPU::TTMP8_TTMP9);
594 reserveRegisterTuples(
Reserved, AMDGPU::TTMP10_TTMP11);
595 reserveRegisterTuples(
Reserved, AMDGPU::TTMP12_TTMP13);
596 reserveRegisterTuples(
Reserved, AMDGPU::TTMP14_TTMP15);
599 reserveRegisterTuples(
Reserved, AMDGPU::SGPR_NULL64);
611 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
612 for (
unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
613 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
614 reserveRegisterTuples(
Reserved, Reg);
618 if (ScratchRSrcReg != AMDGPU::NoRegister) {
622 reserveRegisterTuples(
Reserved, ScratchRSrcReg);
626 if (LongBranchReservedReg)
627 reserveRegisterTuples(
Reserved, LongBranchReservedReg);
634 reserveRegisterTuples(
Reserved, StackPtrReg);
635 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
640 reserveRegisterTuples(
Reserved, FrameReg);
641 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
646 reserveRegisterTuples(
Reserved, BasePtrReg);
647 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
654 reserveRegisterTuples(
Reserved, ExecCopyReg);
659 unsigned MaxNumAGPRs = MaxNumVGPRs;
660 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
673 MaxNumAGPRs = MaxNumVGPRs;
675 if (MaxNumVGPRs > TotalNumVGPRs) {
676 MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
677 MaxNumVGPRs = TotalNumVGPRs;
683 for (
unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
684 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
685 reserveRegisterTuples(
Reserved, Reg);
689 for (
unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) {
690 unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
691 reserveRegisterTuples(
Reserved, Reg);
695 for (
MCRegister Reg : AMDGPU::AGPR_32RegClass)
696 reserveRegisterTuples(
Reserved, Reg);
706 reserveRegisterTuples(
Reserved, Reg);
710 reserveRegisterTuples(
Reserved, Reg);
713 reserveRegisterTuples(
Reserved, Reg);
730 if (
Info->isEntryFunction())
738 if (
Info->isEntryFunction()) {
772 AMDGPU::OpName::offset);
773 return MI->getOperand(OffIdx).getImm();
782 AMDGPU::OpName::vaddr) ||
784 AMDGPU::OpName::saddr))) &&
785 "Should never see frame index on non-address operand");
811 DL = Ins->getDebugLoc();
817 : AMDGPU::V_MOV_B32_e32;
821 : &AMDGPU::VGPR_32RegClass);
829 Register OffsetReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
833 : &AMDGPU::VGPR_32RegClass);
847 TII->getAddNoCarry(*
MBB, Ins,
DL, BaseReg)
858 bool IsFlat =
TII->isFLATScratch(
MI);
874 TII->getNamedOperand(
MI, IsFlat ? AMDGPU::OpName::saddr
875 : AMDGPU::OpName::vaddr);
880 assert(FIOp && FIOp->
isFI() &&
"frame index must be address operand");
886 "offset should be legal");
888 OffsetOp->
setImm(NewOffset);
898 "offset should be legal");
901 OffsetOp->
setImm(NewOffset);
925 return &AMDGPU::VGPR_32RegClass;
932 if (RC == &AMDGPU::SCC_CLASSRegClass)
941 case AMDGPU::SI_SPILL_S1024_SAVE:
942 case AMDGPU::SI_SPILL_S1024_RESTORE:
943 case AMDGPU::SI_SPILL_V1024_SAVE:
944 case AMDGPU::SI_SPILL_V1024_RESTORE:
945 case AMDGPU::SI_SPILL_A1024_SAVE:
946 case AMDGPU::SI_SPILL_A1024_RESTORE:
947 case AMDGPU::SI_SPILL_AV1024_SAVE:
948 case AMDGPU::SI_SPILL_AV1024_RESTORE:
950 case AMDGPU::SI_SPILL_S512_SAVE:
951 case AMDGPU::SI_SPILL_S512_RESTORE:
952 case AMDGPU::SI_SPILL_V512_SAVE:
953 case AMDGPU::SI_SPILL_V512_RESTORE:
954 case AMDGPU::SI_SPILL_A512_SAVE:
955 case AMDGPU::SI_SPILL_A512_RESTORE:
956 case AMDGPU::SI_SPILL_AV512_SAVE:
957 case AMDGPU::SI_SPILL_AV512_RESTORE:
959 case AMDGPU::SI_SPILL_S384_SAVE:
960 case AMDGPU::SI_SPILL_S384_RESTORE:
961 case AMDGPU::SI_SPILL_V384_SAVE:
962 case AMDGPU::SI_SPILL_V384_RESTORE:
963 case AMDGPU::SI_SPILL_A384_SAVE:
964 case AMDGPU::SI_SPILL_A384_RESTORE:
965 case AMDGPU::SI_SPILL_AV384_SAVE:
966 case AMDGPU::SI_SPILL_AV384_RESTORE:
968 case AMDGPU::SI_SPILL_S352_SAVE:
969 case AMDGPU::SI_SPILL_S352_RESTORE:
970 case AMDGPU::SI_SPILL_V352_SAVE:
971 case AMDGPU::SI_SPILL_V352_RESTORE:
972 case AMDGPU::SI_SPILL_A352_SAVE:
973 case AMDGPU::SI_SPILL_A352_RESTORE:
974 case AMDGPU::SI_SPILL_AV352_SAVE:
975 case AMDGPU::SI_SPILL_AV352_RESTORE:
977 case AMDGPU::SI_SPILL_S320_SAVE:
978 case AMDGPU::SI_SPILL_S320_RESTORE:
979 case AMDGPU::SI_SPILL_V320_SAVE:
980 case AMDGPU::SI_SPILL_V320_RESTORE:
981 case AMDGPU::SI_SPILL_A320_SAVE:
982 case AMDGPU::SI_SPILL_A320_RESTORE:
983 case AMDGPU::SI_SPILL_AV320_SAVE:
984 case AMDGPU::SI_SPILL_AV320_RESTORE:
986 case AMDGPU::SI_SPILL_S288_SAVE:
987 case AMDGPU::SI_SPILL_S288_RESTORE:
988 case AMDGPU::SI_SPILL_V288_SAVE:
989 case AMDGPU::SI_SPILL_V288_RESTORE:
990 case AMDGPU::SI_SPILL_A288_SAVE:
991 case AMDGPU::SI_SPILL_A288_RESTORE:
992 case AMDGPU::SI_SPILL_AV288_SAVE:
993 case AMDGPU::SI_SPILL_AV288_RESTORE:
995 case AMDGPU::SI_SPILL_S256_SAVE:
996 case AMDGPU::SI_SPILL_S256_RESTORE:
997 case AMDGPU::SI_SPILL_V256_SAVE:
998 case AMDGPU::SI_SPILL_V256_RESTORE:
999 case AMDGPU::SI_SPILL_A256_SAVE:
1000 case AMDGPU::SI_SPILL_A256_RESTORE:
1001 case AMDGPU::SI_SPILL_AV256_SAVE:
1002 case AMDGPU::SI_SPILL_AV256_RESTORE:
1004 case AMDGPU::SI_SPILL_S224_SAVE:
1005 case AMDGPU::SI_SPILL_S224_RESTORE:
1006 case AMDGPU::SI_SPILL_V224_SAVE:
1007 case AMDGPU::SI_SPILL_V224_RESTORE:
1008 case AMDGPU::SI_SPILL_A224_SAVE:
1009 case AMDGPU::SI_SPILL_A224_RESTORE:
1010 case AMDGPU::SI_SPILL_AV224_SAVE:
1011 case AMDGPU::SI_SPILL_AV224_RESTORE:
1013 case AMDGPU::SI_SPILL_S192_SAVE:
1014 case AMDGPU::SI_SPILL_S192_RESTORE:
1015 case AMDGPU::SI_SPILL_V192_SAVE:
1016 case AMDGPU::SI_SPILL_V192_RESTORE:
1017 case AMDGPU::SI_SPILL_A192_SAVE:
1018 case AMDGPU::SI_SPILL_A192_RESTORE:
1019 case AMDGPU::SI_SPILL_AV192_SAVE:
1020 case AMDGPU::SI_SPILL_AV192_RESTORE:
1022 case AMDGPU::SI_SPILL_S160_SAVE:
1023 case AMDGPU::SI_SPILL_S160_RESTORE:
1024 case AMDGPU::SI_SPILL_V160_SAVE:
1025 case AMDGPU::SI_SPILL_V160_RESTORE:
1026 case AMDGPU::SI_SPILL_A160_SAVE:
1027 case AMDGPU::SI_SPILL_A160_RESTORE:
1028 case AMDGPU::SI_SPILL_AV160_SAVE:
1029 case AMDGPU::SI_SPILL_AV160_RESTORE:
1031 case AMDGPU::SI_SPILL_S128_SAVE:
1032 case AMDGPU::SI_SPILL_S128_RESTORE:
1033 case AMDGPU::SI_SPILL_V128_SAVE:
1034 case AMDGPU::SI_SPILL_V128_RESTORE:
1035 case AMDGPU::SI_SPILL_A128_SAVE:
1036 case AMDGPU::SI_SPILL_A128_RESTORE:
1037 case AMDGPU::SI_SPILL_AV128_SAVE:
1038 case AMDGPU::SI_SPILL_AV128_RESTORE:
1040 case AMDGPU::SI_SPILL_S96_SAVE:
1041 case AMDGPU::SI_SPILL_S96_RESTORE:
1042 case AMDGPU::SI_SPILL_V96_SAVE:
1043 case AMDGPU::SI_SPILL_V96_RESTORE:
1044 case AMDGPU::SI_SPILL_A96_SAVE:
1045 case AMDGPU::SI_SPILL_A96_RESTORE:
1046 case AMDGPU::SI_SPILL_AV96_SAVE:
1047 case AMDGPU::SI_SPILL_AV96_RESTORE:
1049 case AMDGPU::SI_SPILL_S64_SAVE:
1050 case AMDGPU::SI_SPILL_S64_RESTORE:
1051 case AMDGPU::SI_SPILL_V64_SAVE:
1052 case AMDGPU::SI_SPILL_V64_RESTORE:
1053 case AMDGPU::SI_SPILL_A64_SAVE:
1054 case AMDGPU::SI_SPILL_A64_RESTORE:
1055 case AMDGPU::SI_SPILL_AV64_SAVE:
1056 case AMDGPU::SI_SPILL_AV64_RESTORE:
1058 case AMDGPU::SI_SPILL_S32_SAVE:
1059 case AMDGPU::SI_SPILL_S32_RESTORE:
1060 case AMDGPU::SI_SPILL_V32_SAVE:
1061 case AMDGPU::SI_SPILL_V32_RESTORE:
1062 case AMDGPU::SI_SPILL_A32_SAVE:
1063 case AMDGPU::SI_SPILL_A32_RESTORE:
1064 case AMDGPU::SI_SPILL_AV32_SAVE:
1065 case AMDGPU::SI_SPILL_AV32_RESTORE:
1066 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
1067 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
1068 case AMDGPU::SI_SPILL_WWM_AV32_SAVE:
1069 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE:
1077 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
1078 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1079 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
1080 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
1081 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
1082 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
1083 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
1084 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
1085 case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN:
1086 return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET;
1087 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
1088 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
1089 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
1090 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
1091 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
1092 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
1100 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
1101 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1102 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
1103 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
1104 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
1105 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
1106 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
1107 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
1108 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
1109 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
1110 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
1111 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
1112 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN:
1113 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET;
1114 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
1115 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
1116 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
1117 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
1118 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
1119 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
1120 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
1121 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
1122 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
1123 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
1124 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
1125 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
1126 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
1127 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
1135 case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
1136 return AMDGPU::BUFFER_STORE_DWORD_OFFEN;
1137 case AMDGPU::BUFFER_STORE_BYTE_OFFSET:
1138 return AMDGPU::BUFFER_STORE_BYTE_OFFEN;
1139 case AMDGPU::BUFFER_STORE_SHORT_OFFSET:
1140 return AMDGPU::BUFFER_STORE_SHORT_OFFEN;
1141 case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
1142 return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
1143 case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET:
1144 return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN;
1145 case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET:
1146 return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
1147 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET:
1148 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN;
1149 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET:
1150 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN;
1158 case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
1159 return AMDGPU::BUFFER_LOAD_DWORD_OFFEN;
1160 case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET:
1161 return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN;
1162 case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET:
1163 return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN;
1164 case AMDGPU::BUFFER_LOAD_USHORT_OFFSET:
1165 return AMDGPU::BUFFER_LOAD_USHORT_OFFEN;
1166 case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET:
1167 return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN;
1168 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET:
1169 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
1170 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET:
1171 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN;
1172 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET:
1173 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN;
1174 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET:
1175 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN;
1176 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
1177 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN;
1178 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET:
1179 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN;
1180 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
1181 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN;
1182 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET:
1183 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN;
1184 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
1185 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN;
1194 int Index,
unsigned Lane,
1195 unsigned ValueReg,
bool IsKill) {
1202 if (Reg == AMDGPU::NoRegister)
1205 bool IsStore =
MI->mayStore();
1209 unsigned Dst = IsStore ? Reg : ValueReg;
1210 unsigned Src = IsStore ? ValueReg : Reg;
1211 bool IsVGPR =
TRI->isVGPR(
MRI, Reg);
1213 if (IsVGPR ==
TRI->isVGPR(
MRI, ValueReg)) {
1223 unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
1224 : AMDGPU::V_ACCVGPR_READ_B32_e64;
1242 bool IsStore =
MI->mayStore();
1244 unsigned Opc =
MI->getOpcode();
1245 int LoadStoreOp = IsStore ?
1247 if (LoadStoreOp == -1)
1257 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::srsrc))
1258 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset))
1265 AMDGPU::OpName::vdata_in);
1267 NewMI.
add(*VDataIn);
1272 unsigned LoadStoreOp,
1274 bool IsStore =
TII->get(LoadStoreOp).mayStore();
1281 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1282 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
1285 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1286 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1289 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1290 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1293 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1294 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1310 unsigned LoadStoreOp,
int Index,
Register ValueReg,
bool IsKill,
1313 assert((!RS || !LiveUnits) &&
"Only RS or LiveUnits can be set but not both");
1321 bool IsStore =
Desc->mayStore();
1322 bool IsFlat =
TII->isFLATScratch(LoadStoreOp);
1324 bool CanClobberSCC =
false;
1325 bool Scavenged =
false;
1335 unsigned EltSize = (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) : 4u;
1336 unsigned NumSubRegs = RegWidth / EltSize;
1337 unsigned Size = NumSubRegs * EltSize;
1338 unsigned RemSize = RegWidth -
Size;
1339 unsigned NumRemSubRegs = RemSize ? 1 : 0;
1341 int64_t MaterializedOffset =
Offset;
1343 int64_t MaxOffset =
Offset +
Size + RemSize - EltSize;
1344 int64_t ScratchOffsetRegDelta = 0;
1346 if (IsFlat && EltSize > 4) {
1348 Desc = &
TII->get(LoadStoreOp);
1355 "unexpected VGPR spill offset");
1362 bool UseVGPROffset =
false;
1369 if (IsFlat && SGPRBase) {
1393 bool IsOffsetLegal =
1408 CanClobberSCC = !RS->
isRegUsed(AMDGPU::SCC);
1409 }
else if (LiveUnits) {
1410 CanClobberSCC = LiveUnits->
available(AMDGPU::SCC);
1411 for (
MCRegister Reg : AMDGPU::SGPR_32RegClass) {
1419 if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC)
1423 UseVGPROffset =
true;
1429 for (
MCRegister Reg : AMDGPU::VGPR_32RegClass) {
1431 TmpOffsetVGPR = Reg;
1438 }
else if (!SOffset && CanClobberSCC) {
1449 if (!ScratchOffsetReg)
1451 SOffset = ScratchOffsetReg;
1452 ScratchOffsetRegDelta =
Offset;
1460 if (!IsFlat && !UseVGPROffset)
1463 if (!UseVGPROffset && !SOffset)
1466 if (UseVGPROffset) {
1468 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR,
Offset);
1469 }
else if (ScratchOffsetReg == AMDGPU::NoRegister) {
1474 .
addReg(ScratchOffsetReg)
1476 Add->getOperand(3).setIsDead();
1482 if (IsFlat && SOffset == AMDGPU::NoRegister) {
1484 &&
"Unexpected vaddr for flat scratch with a FI operand");
1486 if (UseVGPROffset) {
1493 Desc = &
TII->get(LoadStoreOp);
1496 for (
unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
1497 ++i, RegOffset += EltSize) {
1498 if (i == NumSubRegs) {
1502 Desc = &
TII->get(LoadStoreOp);
1504 if (!IsFlat && UseVGPROffset) {
1507 Desc = &
TII->get(NewLoadStoreOp);
1510 if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) {
1517 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset);
1520 unsigned NumRegs = EltSize / 4;
1526 unsigned SOffsetRegState = 0;
1528 const bool IsLastSubReg = i + 1 == e;
1529 const bool IsFirstSubReg = i == 0;
1538 bool NeedSuperRegDef = e > 1 && IsStore && IsFirstSubReg;
1539 bool NeedSuperRegImpOperand = e > 1;
1543 unsigned RemEltSize = EltSize;
1551 for (
int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,
1552 LaneE = RegOffset / 4;
1553 Lane >= LaneE; --Lane) {
1554 bool IsSubReg = e > 1 || EltSize > 4;
1559 if (!MIB.getInstr())
1561 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) {
1563 NeedSuperRegDef =
false;
1565 if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) {
1566 NeedSuperRegImpOperand =
true;
1567 unsigned State = SrcDstRegState;
1568 if (!IsLastSubReg || (Lane != LaneE))
1569 State &= ~RegState::Kill;
1570 if (!IsFirstSubReg || (Lane != LaneS))
1571 State &= ~RegState::Define;
1580 if (RemEltSize != EltSize) {
1581 assert(IsFlat && EltSize > 4);
1583 unsigned NumRegs = RemEltSize / 4;
1590 unsigned FinalReg =
SubReg;
1595 if (!TmpIntermediateVGPR) {
1601 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
1602 TmpIntermediateVGPR)
1604 if (NeedSuperRegDef)
1608 SubReg = TmpIntermediateVGPR;
1609 }
else if (UseVGPROffset) {
1611 if (!TmpOffsetVGPR) {
1627 if (UseVGPROffset) {
1636 if (SOffset == AMDGPU::NoRegister) {
1638 if (UseVGPROffset && ScratchOffsetReg) {
1639 MIB.
addReg(ScratchOffsetReg);
1646 MIB.addReg(SOffset, SOffsetRegState);
1648 MIB.addImm(
Offset + RegOffset)
1652 MIB.addMemOperand(NewMMO);
1654 if (!IsAGPR && NeedSuperRegDef)
1657 if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) {
1664 if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
1688 if (!IsStore &&
MI !=
MBB.
end() &&
MI->isReturn() &&
1691 MIB->tieOperands(0, MIB->getNumOperands() - 1);
1695 if (ScratchOffsetRegDelta != 0) {
1699 .
addImm(-ScratchOffsetRegDelta);
1705 bool IsKill)
const {
1723 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1728 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1739 bool SpillToPhysVGPRLane)
const {
1745 bool SpillToVGPR = !VGPRSpills.
empty();
1746 if (OnlyToVGPR && !SpillToVGPR)
1755 "Num of VGPR lanes should be equal to num of SGPRs spilled");
1757 for (
unsigned i = 0, e = SB.
NumSubRegs; i < e; ++i) {
1764 bool IsFirstSubreg = i == 0;
1766 bool UseKill = SB.
IsKill && IsLastSubreg;
1772 SB.
TII.get(AMDGPU::V_WRITELANE_B32), Spill.
VGPR)
1789 if (SB.
NumSubRegs > 1 && (IsFirstSubreg || IsLastSubreg))
1809 for (
unsigned i =
Offset * PVD.PerVGPR,
1836 unsigned SuperKillState = 0;
1850 MI->eraseFromParent();
1862 bool SpillToPhysVGPRLane)
const {
1868 bool SpillToVGPR = !VGPRSpills.
empty();
1869 if (OnlyToVGPR && !SpillToVGPR)
1873 for (
unsigned i = 0, e = SB.
NumSubRegs; i < e; ++i) {
1904 for (
unsigned i =
Offset * PVD.PerVGPR,
1912 bool LastSubReg = (i + 1 == e);
1914 SB.
TII.get(AMDGPU::V_READLANE_B32),
SubReg)
1931 MI->eraseFromParent();
1951 for (
unsigned i =
Offset * PVD.PerVGPR,
1970 unsigned SuperKillState = 0;
1980 MI = RestoreMBB.
end();
1986 for (
unsigned i =
Offset * PVD.PerVGPR,
1993 bool LastSubReg = (i + 1 == e);
2014 switch (
MI->getOpcode()) {
2015 case AMDGPU::SI_SPILL_S1024_SAVE:
2016 case AMDGPU::SI_SPILL_S512_SAVE:
2017 case AMDGPU::SI_SPILL_S384_SAVE:
2018 case AMDGPU::SI_SPILL_S352_SAVE:
2019 case AMDGPU::SI_SPILL_S320_SAVE:
2020 case AMDGPU::SI_SPILL_S288_SAVE:
2021 case AMDGPU::SI_SPILL_S256_SAVE:
2022 case AMDGPU::SI_SPILL_S224_SAVE:
2023 case AMDGPU::SI_SPILL_S192_SAVE:
2024 case AMDGPU::SI_SPILL_S160_SAVE:
2025 case AMDGPU::SI_SPILL_S128_SAVE:
2026 case AMDGPU::SI_SPILL_S96_SAVE:
2027 case AMDGPU::SI_SPILL_S64_SAVE:
2028 case AMDGPU::SI_SPILL_S32_SAVE:
2029 return spillSGPR(
MI, FI, RS, Indexes, LIS,
true, SpillToPhysVGPRLane);
2030 case AMDGPU::SI_SPILL_S1024_RESTORE:
2031 case AMDGPU::SI_SPILL_S512_RESTORE:
2032 case AMDGPU::SI_SPILL_S384_RESTORE:
2033 case AMDGPU::SI_SPILL_S352_RESTORE:
2034 case AMDGPU::SI_SPILL_S320_RESTORE:
2035 case AMDGPU::SI_SPILL_S288_RESTORE:
2036 case AMDGPU::SI_SPILL_S256_RESTORE:
2037 case AMDGPU::SI_SPILL_S224_RESTORE:
2038 case AMDGPU::SI_SPILL_S192_RESTORE:
2039 case AMDGPU::SI_SPILL_S160_RESTORE:
2040 case AMDGPU::SI_SPILL_S128_RESTORE:
2041 case AMDGPU::SI_SPILL_S96_RESTORE:
2042 case AMDGPU::SI_SPILL_S64_RESTORE:
2043 case AMDGPU::SI_SPILL_S32_RESTORE:
2044 return restoreSGPR(
MI, FI, RS, Indexes, LIS,
true, SpillToPhysVGPRLane);
2051 int SPAdj,
unsigned FIOperandNum,
2060 assert(SPAdj == 0 &&
"unhandled SP adjustment in call sequence?");
2063 int Index =
MI->getOperand(FIOperandNum).getIndex();
2069 switch (
MI->getOpcode()) {
2071 case AMDGPU::SI_SPILL_S1024_SAVE:
2072 case AMDGPU::SI_SPILL_S512_SAVE:
2073 case AMDGPU::SI_SPILL_S384_SAVE:
2074 case AMDGPU::SI_SPILL_S352_SAVE:
2075 case AMDGPU::SI_SPILL_S320_SAVE:
2076 case AMDGPU::SI_SPILL_S288_SAVE:
2077 case AMDGPU::SI_SPILL_S256_SAVE:
2078 case AMDGPU::SI_SPILL_S224_SAVE:
2079 case AMDGPU::SI_SPILL_S192_SAVE:
2080 case AMDGPU::SI_SPILL_S160_SAVE:
2081 case AMDGPU::SI_SPILL_S128_SAVE:
2082 case AMDGPU::SI_SPILL_S96_SAVE:
2083 case AMDGPU::SI_SPILL_S64_SAVE:
2084 case AMDGPU::SI_SPILL_S32_SAVE: {
2089 case AMDGPU::SI_SPILL_S1024_RESTORE:
2090 case AMDGPU::SI_SPILL_S512_RESTORE:
2091 case AMDGPU::SI_SPILL_S384_RESTORE:
2092 case AMDGPU::SI_SPILL_S352_RESTORE:
2093 case AMDGPU::SI_SPILL_S320_RESTORE:
2094 case AMDGPU::SI_SPILL_S288_RESTORE:
2095 case AMDGPU::SI_SPILL_S256_RESTORE:
2096 case AMDGPU::SI_SPILL_S224_RESTORE:
2097 case AMDGPU::SI_SPILL_S192_RESTORE:
2098 case AMDGPU::SI_SPILL_S160_RESTORE:
2099 case AMDGPU::SI_SPILL_S128_RESTORE:
2100 case AMDGPU::SI_SPILL_S96_RESTORE:
2101 case AMDGPU::SI_SPILL_S64_RESTORE:
2102 case AMDGPU::SI_SPILL_S32_RESTORE: {
2107 case AMDGPU::SI_SPILL_V1024_SAVE:
2108 case AMDGPU::SI_SPILL_V512_SAVE:
2109 case AMDGPU::SI_SPILL_V384_SAVE:
2110 case AMDGPU::SI_SPILL_V352_SAVE:
2111 case AMDGPU::SI_SPILL_V320_SAVE:
2112 case AMDGPU::SI_SPILL_V288_SAVE:
2113 case AMDGPU::SI_SPILL_V256_SAVE:
2114 case AMDGPU::SI_SPILL_V224_SAVE:
2115 case AMDGPU::SI_SPILL_V192_SAVE:
2116 case AMDGPU::SI_SPILL_V160_SAVE:
2117 case AMDGPU::SI_SPILL_V128_SAVE:
2118 case AMDGPU::SI_SPILL_V96_SAVE:
2119 case AMDGPU::SI_SPILL_V64_SAVE:
2120 case AMDGPU::SI_SPILL_V32_SAVE:
2121 case AMDGPU::SI_SPILL_A1024_SAVE:
2122 case AMDGPU::SI_SPILL_A512_SAVE:
2123 case AMDGPU::SI_SPILL_A384_SAVE:
2124 case AMDGPU::SI_SPILL_A352_SAVE:
2125 case AMDGPU::SI_SPILL_A320_SAVE:
2126 case AMDGPU::SI_SPILL_A288_SAVE:
2127 case AMDGPU::SI_SPILL_A256_SAVE:
2128 case AMDGPU::SI_SPILL_A224_SAVE:
2129 case AMDGPU::SI_SPILL_A192_SAVE:
2130 case AMDGPU::SI_SPILL_A160_SAVE:
2131 case AMDGPU::SI_SPILL_A128_SAVE:
2132 case AMDGPU::SI_SPILL_A96_SAVE:
2133 case AMDGPU::SI_SPILL_A64_SAVE:
2134 case AMDGPU::SI_SPILL_A32_SAVE:
2135 case AMDGPU::SI_SPILL_AV1024_SAVE:
2136 case AMDGPU::SI_SPILL_AV512_SAVE:
2137 case AMDGPU::SI_SPILL_AV384_SAVE:
2138 case AMDGPU::SI_SPILL_AV352_SAVE:
2139 case AMDGPU::SI_SPILL_AV320_SAVE:
2140 case AMDGPU::SI_SPILL_AV288_SAVE:
2141 case AMDGPU::SI_SPILL_AV256_SAVE:
2142 case AMDGPU::SI_SPILL_AV224_SAVE:
2143 case AMDGPU::SI_SPILL_AV192_SAVE:
2144 case AMDGPU::SI_SPILL_AV160_SAVE:
2145 case AMDGPU::SI_SPILL_AV128_SAVE:
2146 case AMDGPU::SI_SPILL_AV96_SAVE:
2147 case AMDGPU::SI_SPILL_AV64_SAVE:
2148 case AMDGPU::SI_SPILL_AV32_SAVE:
2149 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
2150 case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
2152 AMDGPU::OpName::vdata);
2153 assert(
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset)->getReg() ==
2157 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2158 auto *
MBB =
MI->getParent();
2159 bool IsWWMRegSpill =
TII->isWWMRegSpillOpcode(
MI->getOpcode());
2160 if (IsWWMRegSpill) {
2166 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm(),
2167 *
MI->memoperands_begin(), RS);
2172 MI->eraseFromParent();
2175 case AMDGPU::SI_SPILL_V32_RESTORE:
2176 case AMDGPU::SI_SPILL_V64_RESTORE:
2177 case AMDGPU::SI_SPILL_V96_RESTORE:
2178 case AMDGPU::SI_SPILL_V128_RESTORE:
2179 case AMDGPU::SI_SPILL_V160_RESTORE:
2180 case AMDGPU::SI_SPILL_V192_RESTORE:
2181 case AMDGPU::SI_SPILL_V224_RESTORE:
2182 case AMDGPU::SI_SPILL_V256_RESTORE:
2183 case AMDGPU::SI_SPILL_V288_RESTORE:
2184 case AMDGPU::SI_SPILL_V320_RESTORE:
2185 case AMDGPU::SI_SPILL_V352_RESTORE:
2186 case AMDGPU::SI_SPILL_V384_RESTORE:
2187 case AMDGPU::SI_SPILL_V512_RESTORE:
2188 case AMDGPU::SI_SPILL_V1024_RESTORE:
2189 case AMDGPU::SI_SPILL_A32_RESTORE:
2190 case AMDGPU::SI_SPILL_A64_RESTORE:
2191 case AMDGPU::SI_SPILL_A96_RESTORE:
2192 case AMDGPU::SI_SPILL_A128_RESTORE:
2193 case AMDGPU::SI_SPILL_A160_RESTORE:
2194 case AMDGPU::SI_SPILL_A192_RESTORE:
2195 case AMDGPU::SI_SPILL_A224_RESTORE:
2196 case AMDGPU::SI_SPILL_A256_RESTORE:
2197 case AMDGPU::SI_SPILL_A288_RESTORE:
2198 case AMDGPU::SI_SPILL_A320_RESTORE:
2199 case AMDGPU::SI_SPILL_A352_RESTORE:
2200 case AMDGPU::SI_SPILL_A384_RESTORE:
2201 case AMDGPU::SI_SPILL_A512_RESTORE:
2202 case AMDGPU::SI_SPILL_A1024_RESTORE:
2203 case AMDGPU::SI_SPILL_AV32_RESTORE:
2204 case AMDGPU::SI_SPILL_AV64_RESTORE:
2205 case AMDGPU::SI_SPILL_AV96_RESTORE:
2206 case AMDGPU::SI_SPILL_AV128_RESTORE:
2207 case AMDGPU::SI_SPILL_AV160_RESTORE:
2208 case AMDGPU::SI_SPILL_AV192_RESTORE:
2209 case AMDGPU::SI_SPILL_AV224_RESTORE:
2210 case AMDGPU::SI_SPILL_AV256_RESTORE:
2211 case AMDGPU::SI_SPILL_AV288_RESTORE:
2212 case AMDGPU::SI_SPILL_AV320_RESTORE:
2213 case AMDGPU::SI_SPILL_AV352_RESTORE:
2214 case AMDGPU::SI_SPILL_AV384_RESTORE:
2215 case AMDGPU::SI_SPILL_AV512_RESTORE:
2216 case AMDGPU::SI_SPILL_AV1024_RESTORE:
2217 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
2218 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: {
2220 AMDGPU::OpName::vdata);
2221 assert(
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset)->getReg() ==
2225 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2226 auto *
MBB =
MI->getParent();
2227 bool IsWWMRegSpill =
TII->isWWMRegSpillOpcode(
MI->getOpcode());
2228 if (IsWWMRegSpill) {
2234 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm(),
2235 *
MI->memoperands_begin(), RS);
2240 MI->eraseFromParent();
2248 int64_t
Offset = FrameInfo.getObjectOffset(
Index);
2250 if (
TII->isFLATScratch(*
MI)) {
2251 assert((int16_t)FIOperandNum ==
2253 AMDGPU::OpName::saddr));
2263 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset);
2267 OffsetOp->
setImm(NewOffset);
2274 unsigned Opc =
MI->getOpcode();
2288 AMDGPU::OpName::vdst_in);
2289 bool TiedVDst = VDstIn != -1 &&
2290 MI->getOperand(VDstIn).isReg() &&
2291 MI->getOperand(VDstIn).isTied();
2293 MI->untieRegOperand(VDstIn);
2303 assert (NewVDst != -1 && NewVDstIn != -1 &&
"Must be tied!");
2304 MI->tieOperands(NewVDst, NewVDstIn);
2306 MI->setDesc(
TII->get(NewOpc));
2314 if (
TII->isImmOperandLegal(*
MI, FIOperandNum, FIOp))
2321 bool UseSGPR =
TII->isOperandLegal(*
MI, FIOperandNum, &FIOp);
2323 if (!
Offset && FrameReg && UseSGPR) {
2329 : &AMDGPU::VGPR_32RegClass;
2336 if ((!FrameReg || !
Offset) && TmpReg) {
2337 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2340 MIB.addReg(FrameReg);
2348 RS->
isRegUsed(AMDGPU::SCC) && !
MI->definesRegister(AMDGPU::SCC);
2353 MI,
false, 0, !UseSGPR);
2357 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
2368 assert(!(
Offset & 0x1) &&
"Flat scratch offset must be aligned!");
2388 if (TmpSReg == FrameReg) {
2390 if (NeedSaveSCC && !
MI->registerDefIsDead(AMDGPU::SCC)) {
2414 bool IsMUBUF =
TII->isMUBUF(*
MI);
2421 RS->
isRegUsed(AMDGPU::SCC) && !
MI->definesRegister(AMDGPU::SCC);
2423 ? &AMDGPU::SReg_32RegClass
2424 : &AMDGPU::VGPR_32RegClass;
2425 bool IsCopy =
MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
2426 MI->getOpcode() == AMDGPU::V_MOV_B32_e64;
2428 IsCopy ?
MI->getOperand(0).getReg()
2431 int64_t
Offset = FrameInfo.getObjectOffset(
Index);
2433 unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
2434 : AMDGPU::V_LSHRREV_B32_e64;
2439 if (IsSALU && !LiveSCC)
2441 if (IsSALU && LiveSCC) {
2443 AMDGPU::SReg_32RegClass, Shift,
false, 0);
2447 ResultReg = NewDest;
2452 if ((MIB =
TII->getAddNoCarry(*
MBB,
MI,
DL, ResultReg, *RS)) !=
2462 const bool IsVOP2 = MIB->
getOpcode() == AMDGPU::V_ADD_U32_e32;
2473 "Need to reuse carry out register");
2478 ConstOffsetReg = getSubReg(MIB.
getReg(1), AMDGPU::sub0);
2480 ConstOffsetReg = MIB.
getReg(1);
2490 if (!MIB || IsSALU) {
2498 AMDGPU::SReg_32_XM0RegClass,
MI,
false, 0,
false);
2499 Register ScaledReg = TmpScaledReg.
isValid() ? TmpScaledReg : FrameReg;
2511 ResultReg = ScaledReg;
2514 if (!TmpScaledReg.
isValid()) {
2527 MI->eraseFromParent();
2536 assert(
static_cast<int>(FIOperandNum) ==
2538 AMDGPU::OpName::vaddr));
2540 auto &SOffset = *
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset);
2541 assert((SOffset.isImm() && SOffset.getImm() == 0));
2543 if (FrameReg != AMDGPU::NoRegister)
2544 SOffset.ChangeToRegister(FrameReg,
false);
2546 int64_t
Offset = FrameInfo.getObjectOffset(
Index);
2548 =
TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm();
2549 int64_t NewOffset = OldImm +
Offset;
2553 MI->eraseFromParent();
2562 if (!
TII->isImmOperandLegal(*
MI, FIOperandNum, FIOp)) {
2585 return &AMDGPU::VReg_64RegClass;
2587 return &AMDGPU::VReg_96RegClass;
2589 return &AMDGPU::VReg_128RegClass;
2591 return &AMDGPU::VReg_160RegClass;
2593 return &AMDGPU::VReg_192RegClass;
2595 return &AMDGPU::VReg_224RegClass;
2597 return &AMDGPU::VReg_256RegClass;
2599 return &AMDGPU::VReg_288RegClass;
2601 return &AMDGPU::VReg_320RegClass;
2603 return &AMDGPU::VReg_352RegClass;
2605 return &AMDGPU::VReg_384RegClass;
2607 return &AMDGPU::VReg_512RegClass;
2609 return &AMDGPU::VReg_1024RegClass;
2617 return &AMDGPU::VReg_64_Align2RegClass;
2619 return &AMDGPU::VReg_96_Align2RegClass;
2621 return &AMDGPU::VReg_128_Align2RegClass;
2623 return &AMDGPU::VReg_160_Align2RegClass;
2625 return &AMDGPU::VReg_192_Align2RegClass;
2627 return &AMDGPU::VReg_224_Align2RegClass;
2629 return &AMDGPU::VReg_256_Align2RegClass;
2631 return &AMDGPU::VReg_288_Align2RegClass;
2633 return &AMDGPU::VReg_320_Align2RegClass;
2635 return &AMDGPU::VReg_352_Align2RegClass;
2637 return &AMDGPU::VReg_384_Align2RegClass;
2639 return &AMDGPU::VReg_512_Align2RegClass;
2641 return &AMDGPU::VReg_1024_Align2RegClass;
2649 return &AMDGPU::VReg_1RegClass;
2651 return &AMDGPU::VGPR_LO16RegClass;
2653 return &AMDGPU::VGPR_32RegClass;
2661 return &AMDGPU::AReg_64RegClass;
2663 return &AMDGPU::AReg_96RegClass;
2665 return &AMDGPU::AReg_128RegClass;
2667 return &AMDGPU::AReg_160RegClass;
2669 return &AMDGPU::AReg_192RegClass;
2671 return &AMDGPU::AReg_224RegClass;
2673 return &AMDGPU::AReg_256RegClass;
2675 return &AMDGPU::AReg_288RegClass;
2677 return &AMDGPU::AReg_320RegClass;
2679 return &AMDGPU::AReg_352RegClass;
2681 return &AMDGPU::AReg_384RegClass;
2683 return &AMDGPU::AReg_512RegClass;
2685 return &AMDGPU::AReg_1024RegClass;
2693 return &AMDGPU::AReg_64_Align2RegClass;
2695 return &AMDGPU::AReg_96_Align2RegClass;
2697 return &AMDGPU::AReg_128_Align2RegClass;
2699 return &AMDGPU::AReg_160_Align2RegClass;
2701 return &AMDGPU::AReg_192_Align2RegClass;
2703 return &AMDGPU::AReg_224_Align2RegClass;
2705 return &AMDGPU::AReg_256_Align2RegClass;
2707 return &AMDGPU::AReg_288_Align2RegClass;
2709 return &AMDGPU::AReg_320_Align2RegClass;
2711 return &AMDGPU::AReg_352_Align2RegClass;
2713 return &AMDGPU::AReg_384_Align2RegClass;
2715 return &AMDGPU::AReg_512_Align2RegClass;
2717 return &AMDGPU::AReg_1024_Align2RegClass;
2725 return &AMDGPU::AGPR_LO16RegClass;
2727 return &AMDGPU::AGPR_32RegClass;
2735 return &AMDGPU::AV_64RegClass;
2737 return &AMDGPU::AV_96RegClass;
2739 return &AMDGPU::AV_128RegClass;
2741 return &AMDGPU::AV_160RegClass;
2743 return &AMDGPU::AV_192RegClass;
2745 return &AMDGPU::AV_224RegClass;
2747 return &AMDGPU::AV_256RegClass;
2749 return &AMDGPU::AV_288RegClass;
2751 return &AMDGPU::AV_320RegClass;
2753 return &AMDGPU::AV_352RegClass;
2755 return &AMDGPU::AV_384RegClass;
2757 return &AMDGPU::AV_512RegClass;
2759 return &AMDGPU::AV_1024RegClass;
2767 return &AMDGPU::AV_64_Align2RegClass;
2769 return &AMDGPU::AV_96_Align2RegClass;
2771 return &AMDGPU::AV_128_Align2RegClass;
2773 return &AMDGPU::AV_160_Align2RegClass;
2775 return &AMDGPU::AV_192_Align2RegClass;
2777 return &AMDGPU::AV_224_Align2RegClass;
2779 return &AMDGPU::AV_256_Align2RegClass;
2781 return &AMDGPU::AV_288_Align2RegClass;
2783 return &AMDGPU::AV_320_Align2RegClass;
2785 return &AMDGPU::AV_352_Align2RegClass;
2787 return &AMDGPU::AV_384_Align2RegClass;
2789 return &AMDGPU::AV_512_Align2RegClass;
2791 return &AMDGPU::AV_1024_Align2RegClass;
2799 return &AMDGPU::VGPR_LO16RegClass;
2801 return &AMDGPU::AV_32RegClass;
2810 return &AMDGPU::SGPR_LO16RegClass;
2812 return &AMDGPU::SReg_32RegClass;
2814 return &AMDGPU::SReg_64RegClass;
2816 return &AMDGPU::SGPR_96RegClass;
2818 return &AMDGPU::SGPR_128RegClass;
2820 return &AMDGPU::SGPR_160RegClass;
2822 return &AMDGPU::SGPR_192RegClass;
2824 return &AMDGPU::SGPR_224RegClass;
2826 return &AMDGPU::SGPR_256RegClass;
2828 return &AMDGPU::SGPR_288RegClass;
2830 return &AMDGPU::SGPR_320RegClass;
2832 return &AMDGPU::SGPR_352RegClass;
2834 return &AMDGPU::SGPR_384RegClass;
2836 return &AMDGPU::SGPR_512RegClass;
2838 return &AMDGPU::SGPR_1024RegClass;
2846 if (Reg.isVirtual())
2847 RC =
MRI.getRegClass(Reg);
2849 RC = getPhysRegBaseClass(Reg);
2855 unsigned Size = getRegSizeInBits(*SRC);
2857 assert(VRC &&
"Invalid register class size");
2863 unsigned Size = getRegSizeInBits(*SRC);
2865 assert(ARC &&
"Invalid register class size");
2871 unsigned Size = getRegSizeInBits(*VRC);
2873 return &AMDGPU::SGPR_32RegClass;
2875 assert(SRC &&
"Invalid register class size");
2882 unsigned SubIdx)
const {
2885 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
2886 return MatchRC && MatchRC->
hasSubClassEq(SuperRC) ? MatchRC :
nullptr;
2902 unsigned SrcSubReg)
const {
2919 return getCommonSubClass(DefRC, SrcRC) !=
nullptr;
2935 if (ReserveHighestRegister) {
2937 if (
MRI.isAllocatable(Reg) && !
MRI.isPhysRegUsed(Reg))
2941 if (
MRI.isAllocatable(Reg) && !
MRI.isPhysRegUsed(Reg))
2958 unsigned EltSize)
const {
2960 assert(RegBitWidth >= 32 && RegBitWidth <= 1024);
2962 const unsigned RegDWORDs = RegBitWidth / 32;
2963 const unsigned EltDWORDs = EltSize / 4;
2964 assert(RegSplitParts.size() + 1 >= EltDWORDs);
2966 const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1];
2967 const unsigned NumParts = RegDWORDs / EltDWORDs;
2969 return ArrayRef(Parts.data(), NumParts);
2975 return Reg.isVirtual() ?
MRI.getRegClass(Reg) : getPhysRegBaseClass(Reg);
2982 return getSubRegisterClass(SrcRC, MO.
getSubReg());
3007 unsigned SrcSize = getRegSizeInBits(*SrcRC);
3008 unsigned DstSize = getRegSizeInBits(*DstRC);
3009 unsigned NewSize = getRegSizeInBits(*NewRC);
3015 if (SrcSize <= 32 || DstSize <= 32)
3018 return NewSize <= DstSize || NewSize <= SrcSize;
3027 switch (RC->
getID()) {
3029 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
3030 case AMDGPU::VGPR_32RegClassID:
3031 case AMDGPU::VGPR_LO16RegClassID:
3032 case AMDGPU::VGPR_HI16RegClassID:
3034 case AMDGPU::SGPR_32RegClassID:
3035 case AMDGPU::SGPR_LO16RegClassID:
3041 unsigned Idx)
const {
3042 if (
Idx == AMDGPU::RegisterPressureSets::VGPR_32 ||
3043 Idx == AMDGPU::RegisterPressureSets::AGPR_32)
3047 if (
Idx == AMDGPU::RegisterPressureSets::SReg_32)
3055 static const int Empty[] = { -1 };
3057 if (RegPressureIgnoredUnits[RegUnit])
3060 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
3065 return AMDGPU::SGPR30_SGPR31;
3071 switch (RB.
getID()) {
3072 case AMDGPU::VGPRRegBankID:
3074 case AMDGPU::VCCRegBankID:
3076 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3077 : &AMDGPU::SReg_64_XEXECRegClass;
3078 case AMDGPU::SGPRRegBankID:
3080 case AMDGPU::AGPRRegBankID:
3095 return getAllocatableClass(RC);
3101 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
3105 return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
3111 : &AMDGPU::VReg_64RegClass;
3116 switch ((
int)RCID) {
3117 case AMDGPU::SReg_1RegClassID:
3119 case AMDGPU::SReg_1_XEXECRegClassID:
3120 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3121 : &AMDGPU::SReg_64_XEXECRegClass;
3125 return AMDGPUGenRegisterInfo::getRegClass(RCID);
3138 if (Reg.isVirtual()) {
3143 :
MRI.getMaxLaneMaskForVReg(Reg);
3147 if ((S.LaneMask & SubLanes) == SubLanes) {
3148 V = S.getVNInfoAt(UseIdx);
3160 for (
MCRegUnit Unit : regunits(Reg.asMCReg())) {
3175 if (!Def || !MDT.dominates(Def, &
Use))
3178 assert(Def->modifiesRegister(Reg,
this));
3184 assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32);
3187 AMDGPU::SReg_32RegClass,
3188 AMDGPU::AGPR_32RegClass } ) {
3189 if (
MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
3192 if (
MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
3193 &AMDGPU::VGPR_32RegClass)) {
3197 return AMDGPU::NoRegister;
3220 unsigned Size = getRegSizeInBits(*RC);
3254 return std::min(128u, getSubRegIdxSize(
SubReg));
3258 return std::min(32u, getSubRegIdxSize(
SubReg));
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Provides AMDGPU specific target descriptions.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static const Function * getParent(const Value *V)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static int getOffenMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyAGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFLoad(unsigned Opc)
static const std::array< unsigned, 17 > SubRegFromChannelTableWidthMap
static const TargetRegisterClass * getAlignedAGPRClassForBitWidth(unsigned BitWidth)
static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, unsigned LoadStoreOp, unsigned EltSize)
static const TargetRegisterClass * getAlignedVGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyVGPRClassForBitWidth(unsigned BitWidth)
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling SGPRs to VGPRs"), cl::ReallyHidden, cl::init(true))
static unsigned getNumSubRegsForSpillOp(unsigned Op)
static const TargetRegisterClass * getAlignedVectorSuperClassForBitWidth(unsigned BitWidth)
static const TargetRegisterClass * getAnyVectorSuperClassForBitWidth(unsigned BitWidth)
static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill)
static int getOffenMUBUFLoad(unsigned Opc)
Interface definition for SIRegisterInfo.
static const char * getRegisterName(MCRegister Reg)
uint32_t getLDSSize() const
bool isEntryFunction() const
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
This class represents an Operation in the Expression.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
bool hasGFX90AInsts() const
bool hasMFMAInlineLiteralBug() const
const SIInstrInfo * getInstrInfo() const override
unsigned getConstantBusLimit(unsigned Opcode) const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool enableFlatScratch() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
const SIFrameLowering * getFrameLowering() const override
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasFlatScratchSTMode() const
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
bool hasInterval(Register Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveInterval & getInterval(Register Reg)
This class represents the liveness of a register, stack slot, etc.
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
A set of register units used to track register liveness.
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Describe properties that are true of each instruction in the target description file.
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasCalls() const
Return true if the current function has any function calls.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
unsigned getNumFixedObjects() const
Return the number of fixed objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
void setAsmPrinterFlag(uint8_t Flag)
Set a flag for the AsmPrinter.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
void setImm(int64_t immVal)
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void assignRegToScavengingIndex(int FI, Register Reg, MachineInstr *Restore=nullptr)
Record that Reg is in use at scavenging index FI.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
Holds all the information related to register banks.
virtual bool isDivergentRegBank(const RegisterBank *RB) const
Returns true if the register bank is considered divergent.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
static bool isFLATScratch(const MachineInstr &MI)
static bool isLegalMUBUFImmOffset(unsigned Imm)
static bool isMUBUF(const MachineInstr &MI)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool usesAGPRs(const MachineFunction &MF) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
Register getLongBranchReservedReg() const
Register getStackPtrOffsetReg() const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const
Register getSGPRForEXECCopy() const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
Register getVGPRForAGPRCopy() const
Register getFrameOffsetReg() const
void addToSpilledVGPRs(unsigned num)
const ReservedRegSet & getWWMReservedRegs() const
void addToSpilledSGPRs(unsigned num)
Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, int64_t Offset) const override
int64_t getScratchInstrOffset(const MachineInstr *MI) const
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, int64_t Offset) const override
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
ArrayRef< MCPhysReg > getAllSGPR64(const MachineFunction &MF) const
Return all SGPR64 which satisfy the waves per execution unit requirement of the subtarget.
MCRegister findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF, bool ReserveHighestVGPR=false) const
Returns a lowest register that is not used at any point in the function.
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool shouldRealignStack(const MachineFunction &MF) const override
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
Register getFrameRegister(const MachineFunction &MF) const override
LLVM_READONLY const TargetRegisterClass * getVectorSuperClassForBitWidth(unsigned BitWidth) const
bool spillEmergencySGPR(MachineBasicBlock::iterator MI, MachineBasicBlock &RestoreMBB, Register SGPR, RegScavenger *RS) const
SIRegisterInfo(const GCNSubtarget &ST)
const uint32_t * getAllVGPRRegMask() const
MCRegister getReturnAddressReg(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
bool hasBasePointer(const MachineFunction &MF) const
const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override
Returns a legal register class to copy a register in the specified class to or from.
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
ArrayRef< MCPhysReg > getAllSGPR32(const MachineFunction &MF) const
Return all SGPR32 which satisfy the waves per execution unit requirement of the subtarget.
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool SpillToPhysVGPRLane=false) const
Special case of eliminateFrameIndex.
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
void buildSpillLoadStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned LoadStoreOp, int Index, Register ValueReg, bool ValueIsKill, MCRegister ScratchOffsetReg, int64_t InstrOffset, MachineMemOperand *MMO, RegScavenger *RS, LiveRegUnits *LiveUnits=nullptr) const
bool isAsmClobberable(const MachineFunction &MF, MCRegister PhysReg) const override
LLVM_READONLY const TargetRegisterClass * getAGPRClassForBitWidth(unsigned BitWidth) const
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register Reg) const override
const uint32_t * getNoPreservedMask() const override
StringRef getRegAsmName(MCRegister Reg) const override
const uint32_t * getAllAllocatableSRegMask() const
MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF, const unsigned Align, const TargetRegisterClass *RC) const
Return the largest available SGPR aligned to Align for the register class RC.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
const uint32_t * getAllVectorRegMask() const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
bool opCanUseLiteralConstant(unsigned OpType) const
Register getBaseRegister() const
LLVM_READONLY const TargetRegisterClass * getVGPRClassForBitWidth(unsigned BitWidth) const
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isVGPRClass(const TargetRegisterClass *RC)
MachineInstr * findReachingDef(Register Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
ArrayRef< MCPhysReg > getAllSGPR128(const MachineFunction &MF) const
Return all SGPR128 which satisfy the waves per execution unit requirement of the subtarget.
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
const TargetRegisterClass * getRegClassForOperandReg(const MachineRegisterInfo &MRI, const MachineOperand &MO) const
const uint32_t * getAllAGPRRegMask() const
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
const TargetRegisterClass * getBoolRC() const
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
If OnlyToVGPR is true, this will only succeed if this manages to find a free VGPR lane to spill.
MCRegister getExec() const
MCRegister getVCC() const
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
bool isVectorSuperClass(const TargetRegisterClass *RC) const
const TargetRegisterClass * getWaveMaskRegClass() const
unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC, unsigned SubReg) const
void resolveFrameIndex(MachineInstr &MI, Register BaseReg, int64_t Offset) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
const TargetRegisterClass * getVGPR64Class() const
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill=true) const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
const int * getRegUnitPressureSets(unsigned RegUnit) const override
SlotIndex - An opaque wrapper around machine indexes.
bool isValid() const
Returns true if this is a valid index.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
SlotIndex replaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in maps used by register allocat...
StringRef - Represent a constant reference to a string, i.e.
const uint8_t TSFlags
Configurable target specific flags.
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const
Returns the largest super class of RC that is legal to use in the current sub-target and has the same...
virtual bool shouldRealignStack(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
A Use represents the edge between a Value definition and its users.
VNInfo - Value Number Information.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ PRIVATE_ADDRESS
Address space for private memory.
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSTfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSVfromSVS(uint16_t Opcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
@ OPERAND_REG_INLINE_AC_FIRST
@ OPERAND_REG_INLINE_AC_LAST
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
auto reverse(ContainerTy &&C)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
constexpr unsigned BitWidth
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI)
ArrayRef< int16_t > SplitParts
SIMachineFunctionInfo & MFI
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, int Index, RegScavenger *RS)
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, Register Reg, bool IsKill, int Index, RegScavenger *RS)
PerVGPRData getPerVGPRData()
MachineBasicBlock::iterator MI
void readWriteTmpVGPR(unsigned Offset, bool IsLoad)
const SIRegisterInfo & TRI
The llvm::once_flag structure.