29#define GET_REGINFO_TARGET_DESC
30#include "AMDGPUGenRegisterInfo.inc"
33 "amdgpu-spill-sgpr-to-vgpr",
34 cl::desc(
"Enable spilling VGPRs to SGPRs"),
38std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
39std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
116 MI->getOperand(0).isKill(),
Index,
RS) {}
131 MovOpc = AMDGPU::S_MOV_B32;
132 NotOpc = AMDGPU::S_NOT_B32;
135 MovOpc = AMDGPU::S_MOV_B64;
136 NotOpc = AMDGPU::S_NOT_B64;
141 SuperReg != AMDGPU::EXEC &&
"exec should never spill");
172 assert(
RS &&
"Cannot spill SGPR to memory without RegScavenger");
200 IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
221 MI->emitError(
"unhandled SGPR spill to memory");
231 I->getOperand(2).setIsDead();
266 I->getOperand(2).setIsDead();
296 MI->emitError(
"unhandled SGPR spill to memory");
323 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
324 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
325 (getSubRegIndexLaneMask(AMDGPU::lo16) |
326 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
327 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
328 "getNumCoveredRegs() will not work with generated subreg masks!");
330 RegPressureIgnoredUnits.
resize(getNumRegUnits());
331 RegPressureIgnoredUnits.
set(
333 for (
auto Reg : AMDGPU::VGPR_HI16RegClass)
339 static auto InitializeRegSplitPartsOnce = [
this]() {
340 for (
unsigned Idx = 1,
E = getNumSubRegIndices() - 1;
Idx <
E; ++
Idx) {
341 unsigned Size = getSubRegIdxSize(
Idx);
344 std::vector<int16_t> &Vec = RegSplitParts[
Size / 32 - 1];
345 unsigned Pos = getSubRegIdxOffset(
Idx);
350 unsigned MaxNumParts = 1024 /
Size;
351 Vec.resize(MaxNumParts);
359 static auto InitializeSubRegFromChannelTableOnce = [
this]() {
360 for (
auto &Row : SubRegFromChannelTable)
361 Row.fill(AMDGPU::NoSubRegister);
362 for (
unsigned Idx = 1;
Idx < getNumSubRegIndices(); ++
Idx) {
363 unsigned Width = AMDGPUSubRegIdxRanges[
Idx].Size / 32;
364 unsigned Offset = AMDGPUSubRegIdxRanges[
Idx].Offset / 32;
369 unsigned TableIdx = Width - 1;
370 assert(TableIdx < SubRegFromChannelTable.size());
372 SubRegFromChannelTable[TableIdx][
Offset] =
Idx;
376 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
378 InitializeSubRegFromChannelTableOnce);
385 for (; R.isValid(); ++R)
398 : CSR_AMDGPU_SaveList;
400 return ST.
hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList
401 : CSR_AMDGPU_SI_Gfx_SaveList;
404 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
405 return &NoCalleeSavedReg;
422 : CSR_AMDGPU_RegMask;
424 return ST.
hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask
425 : CSR_AMDGPU_SI_Gfx_RegMask;
432 return CSR_AMDGPU_NoRegs_RegMask;
443 if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
444 return &AMDGPU::AV_32RegClass;
445 if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
446 return &AMDGPU::AV_64RegClass;
447 if (RC == &AMDGPU::VReg_64_Align2RegClass ||
448 RC == &AMDGPU::AReg_64_Align2RegClass)
449 return &AMDGPU::AV_64_Align2RegClass;
450 if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)
451 return &AMDGPU::AV_96RegClass;
452 if (RC == &AMDGPU::VReg_96_Align2RegClass ||
453 RC == &AMDGPU::AReg_96_Align2RegClass)
454 return &AMDGPU::AV_96_Align2RegClass;
455 if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)
456 return &AMDGPU::AV_128RegClass;
457 if (RC == &AMDGPU::VReg_128_Align2RegClass ||
458 RC == &AMDGPU::AReg_128_Align2RegClass)
459 return &AMDGPU::AV_128_Align2RegClass;
460 if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)
461 return &AMDGPU::AV_160RegClass;
462 if (RC == &AMDGPU::VReg_160_Align2RegClass ||
463 RC == &AMDGPU::AReg_160_Align2RegClass)
464 return &AMDGPU::AV_160_Align2RegClass;
465 if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)
466 return &AMDGPU::AV_192RegClass;
467 if (RC == &AMDGPU::VReg_192_Align2RegClass ||
468 RC == &AMDGPU::AReg_192_Align2RegClass)
469 return &AMDGPU::AV_192_Align2RegClass;
470 if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)
471 return &AMDGPU::AV_256RegClass;
472 if (RC == &AMDGPU::VReg_256_Align2RegClass ||
473 RC == &AMDGPU::AReg_256_Align2RegClass)
474 return &AMDGPU::AV_256_Align2RegClass;
475 if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)
476 return &AMDGPU::AV_512RegClass;
477 if (RC == &AMDGPU::VReg_512_Align2RegClass ||
478 RC == &AMDGPU::AReg_512_Align2RegClass)
479 return &AMDGPU::AV_512_Align2RegClass;
480 if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)
481 return &AMDGPU::AV_1024RegClass;
482 if (RC == &AMDGPU::VReg_1024_Align2RegClass ||
483 RC == &AMDGPU::AReg_1024_Align2RegClass)
484 return &AMDGPU::AV_1024_Align2RegClass;
514 return AMDGPU_AllVGPRs_RegMask;
518 return AMDGPU_AllAGPRs_RegMask;
522 return AMDGPU_AllVectorRegs_RegMask;
526 return AMDGPU_AllAllocatableSRegs_RegMask;
533 assert(NumRegIndex &&
"Not implemented");
534 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].
size());
535 return SubRegFromChannelTable[NumRegIndex - 1][Channel];
541 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
542 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass);
555 reserveRegisterTuples(
Reserved, AMDGPU::EXEC);
556 reserveRegisterTuples(
Reserved, AMDGPU::FLAT_SCR);
559 reserveRegisterTuples(
Reserved, AMDGPU::M0);
562 reserveRegisterTuples(
Reserved, AMDGPU::SRC_VCCZ);
563 reserveRegisterTuples(
Reserved, AMDGPU::SRC_EXECZ);
564 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SCC);
567 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SHARED_BASE);
568 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SHARED_LIMIT);
569 reserveRegisterTuples(
Reserved, AMDGPU::SRC_PRIVATE_BASE);
570 reserveRegisterTuples(
Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
573 reserveRegisterTuples(
Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
576 reserveRegisterTuples(
Reserved, AMDGPU::XNACK_MASK);
579 reserveRegisterTuples(
Reserved, AMDGPU::LDS_DIRECT);
582 reserveRegisterTuples(
Reserved, AMDGPU::TBA);
583 reserveRegisterTuples(
Reserved, AMDGPU::TMA);
584 reserveRegisterTuples(
Reserved, AMDGPU::TTMP0_TTMP1);
585 reserveRegisterTuples(
Reserved, AMDGPU::TTMP2_TTMP3);
586 reserveRegisterTuples(
Reserved, AMDGPU::TTMP4_TTMP5);
587 reserveRegisterTuples(
Reserved, AMDGPU::TTMP6_TTMP7);
588 reserveRegisterTuples(
Reserved, AMDGPU::TTMP8_TTMP9);
589 reserveRegisterTuples(
Reserved, AMDGPU::TTMP10_TTMP11);
590 reserveRegisterTuples(
Reserved, AMDGPU::TTMP12_TTMP13);
591 reserveRegisterTuples(
Reserved, AMDGPU::TTMP14_TTMP15);
594 reserveRegisterTuples(
Reserved, AMDGPU::SGPR_NULL64);
606 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
607 for (
unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
608 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
609 reserveRegisterTuples(
Reserved, Reg);
612 for (
auto Reg : AMDGPU::SReg_32RegClass) {
613 Reserved.set(getSubReg(Reg, AMDGPU::hi16));
621 if (ScratchRSrcReg != AMDGPU::NoRegister) {
625 reserveRegisterTuples(
Reserved, ScratchRSrcReg);
633 reserveRegisterTuples(
Reserved, StackPtrReg);
634 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
639 reserveRegisterTuples(
Reserved, FrameReg);
640 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
645 reserveRegisterTuples(
Reserved, BasePtrReg);
646 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
652 unsigned MaxNumAGPRs = MaxNumVGPRs;
653 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
657 for (
MCRegister Reg : AMDGPU::AGPR_32RegClass) {
658 reserveRegisterTuples(
Reserved, Reg);
662 for (
auto Reg : AMDGPU::AGPR_32RegClass) {
663 Reserved.set(getSubReg(Reg, AMDGPU::hi16));
677 MaxNumAGPRs = MaxNumVGPRs;
679 if (MaxNumVGPRs > TotalNumVGPRs) {
680 MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
681 MaxNumVGPRs = TotalNumVGPRs;
687 for (
unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
688 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
689 reserveRegisterTuples(
Reserved, Reg);
692 for (
unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) {
693 unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
694 reserveRegisterTuples(
Reserved, Reg);
704 reserveRegisterTuples(
Reserved, Reg);
708 reserveRegisterTuples(
Reserved, Reg);
711 reserveRegisterTuples(
Reserved, Reg);
714 reserveRegisterTuples(
Reserved, Reg);
731 if (
Info->isEntryFunction())
739 if (
Info->isEntryFunction()) {
773 AMDGPU::OpName::offset);
774 return MI->getOperand(OffIdx).getImm();
783 AMDGPU::OpName::vaddr) ||
785 AMDGPU::OpName::saddr))) &&
786 "Should never see frame index on non-address operand");
812 DL = Ins->getDebugLoc();
818 : AMDGPU::V_MOV_B32_e32;
822 : &AMDGPU::VGPR_32RegClass);
830 Register OffsetReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
834 : &AMDGPU::VGPR_32RegClass);
848 TII->getAddNoCarry(*
MBB, Ins,
DL, BaseReg)
859 bool IsFlat =
TII->isFLATScratch(
MI);
875 TII->getNamedOperand(
MI, IsFlat ? AMDGPU::OpName::saddr
876 : AMDGPU::OpName::vaddr);
881 assert(FIOp && FIOp->
isFI() &&
"frame index must be address operand");
887 "offset should be legal");
889 OffsetOp->
setImm(NewOffset);
899 "offset should be legal");
902 OffsetOp->
setImm(NewOffset);
926 return &AMDGPU::VGPR_32RegClass;
933 if (RC == &AMDGPU::SCC_CLASSRegClass)
942 case AMDGPU::SI_SPILL_S1024_SAVE:
943 case AMDGPU::SI_SPILL_S1024_RESTORE:
944 case AMDGPU::SI_SPILL_V1024_SAVE:
945 case AMDGPU::SI_SPILL_V1024_RESTORE:
946 case AMDGPU::SI_SPILL_A1024_SAVE:
947 case AMDGPU::SI_SPILL_A1024_RESTORE:
948 case AMDGPU::SI_SPILL_AV1024_SAVE:
949 case AMDGPU::SI_SPILL_AV1024_RESTORE:
951 case AMDGPU::SI_SPILL_S512_SAVE:
952 case AMDGPU::SI_SPILL_S512_RESTORE:
953 case AMDGPU::SI_SPILL_V512_SAVE:
954 case AMDGPU::SI_SPILL_V512_RESTORE:
955 case AMDGPU::SI_SPILL_A512_SAVE:
956 case AMDGPU::SI_SPILL_A512_RESTORE:
957 case AMDGPU::SI_SPILL_AV512_SAVE:
958 case AMDGPU::SI_SPILL_AV512_RESTORE:
960 case AMDGPU::SI_SPILL_S384_SAVE:
961 case AMDGPU::SI_SPILL_S384_RESTORE:
962 case AMDGPU::SI_SPILL_V384_SAVE:
963 case AMDGPU::SI_SPILL_V384_RESTORE:
964 case AMDGPU::SI_SPILL_A384_SAVE:
965 case AMDGPU::SI_SPILL_A384_RESTORE:
966 case AMDGPU::SI_SPILL_AV384_SAVE:
967 case AMDGPU::SI_SPILL_AV384_RESTORE:
969 case AMDGPU::SI_SPILL_S352_SAVE:
970 case AMDGPU::SI_SPILL_S352_RESTORE:
971 case AMDGPU::SI_SPILL_V352_SAVE:
972 case AMDGPU::SI_SPILL_V352_RESTORE:
973 case AMDGPU::SI_SPILL_A352_SAVE:
974 case AMDGPU::SI_SPILL_A352_RESTORE:
975 case AMDGPU::SI_SPILL_AV352_SAVE:
976 case AMDGPU::SI_SPILL_AV352_RESTORE:
978 case AMDGPU::SI_SPILL_S320_SAVE:
979 case AMDGPU::SI_SPILL_S320_RESTORE:
980 case AMDGPU::SI_SPILL_V320_SAVE:
981 case AMDGPU::SI_SPILL_V320_RESTORE:
982 case AMDGPU::SI_SPILL_A320_SAVE:
983 case AMDGPU::SI_SPILL_A320_RESTORE:
984 case AMDGPU::SI_SPILL_AV320_SAVE:
985 case AMDGPU::SI_SPILL_AV320_RESTORE:
987 case AMDGPU::SI_SPILL_S288_SAVE:
988 case AMDGPU::SI_SPILL_S288_RESTORE:
989 case AMDGPU::SI_SPILL_V288_SAVE:
990 case AMDGPU::SI_SPILL_V288_RESTORE:
991 case AMDGPU::SI_SPILL_A288_SAVE:
992 case AMDGPU::SI_SPILL_A288_RESTORE:
993 case AMDGPU::SI_SPILL_AV288_SAVE:
994 case AMDGPU::SI_SPILL_AV288_RESTORE:
996 case AMDGPU::SI_SPILL_S256_SAVE:
997 case AMDGPU::SI_SPILL_S256_RESTORE:
998 case AMDGPU::SI_SPILL_V256_SAVE:
999 case AMDGPU::SI_SPILL_V256_RESTORE:
1000 case AMDGPU::SI_SPILL_A256_SAVE:
1001 case AMDGPU::SI_SPILL_A256_RESTORE:
1002 case AMDGPU::SI_SPILL_AV256_SAVE:
1003 case AMDGPU::SI_SPILL_AV256_RESTORE:
1005 case AMDGPU::SI_SPILL_S224_SAVE:
1006 case AMDGPU::SI_SPILL_S224_RESTORE:
1007 case AMDGPU::SI_SPILL_V224_SAVE:
1008 case AMDGPU::SI_SPILL_V224_RESTORE:
1009 case AMDGPU::SI_SPILL_A224_SAVE:
1010 case AMDGPU::SI_SPILL_A224_RESTORE:
1011 case AMDGPU::SI_SPILL_AV224_SAVE:
1012 case AMDGPU::SI_SPILL_AV224_RESTORE:
1014 case AMDGPU::SI_SPILL_S192_SAVE:
1015 case AMDGPU::SI_SPILL_S192_RESTORE:
1016 case AMDGPU::SI_SPILL_V192_SAVE:
1017 case AMDGPU::SI_SPILL_V192_RESTORE:
1018 case AMDGPU::SI_SPILL_A192_SAVE:
1019 case AMDGPU::SI_SPILL_A192_RESTORE:
1020 case AMDGPU::SI_SPILL_AV192_SAVE:
1021 case AMDGPU::SI_SPILL_AV192_RESTORE:
1023 case AMDGPU::SI_SPILL_S160_SAVE:
1024 case AMDGPU::SI_SPILL_S160_RESTORE:
1025 case AMDGPU::SI_SPILL_V160_SAVE:
1026 case AMDGPU::SI_SPILL_V160_RESTORE:
1027 case AMDGPU::SI_SPILL_A160_SAVE:
1028 case AMDGPU::SI_SPILL_A160_RESTORE:
1029 case AMDGPU::SI_SPILL_AV160_SAVE:
1030 case AMDGPU::SI_SPILL_AV160_RESTORE:
1032 case AMDGPU::SI_SPILL_S128_SAVE:
1033 case AMDGPU::SI_SPILL_S128_RESTORE:
1034 case AMDGPU::SI_SPILL_V128_SAVE:
1035 case AMDGPU::SI_SPILL_V128_RESTORE:
1036 case AMDGPU::SI_SPILL_A128_SAVE:
1037 case AMDGPU::SI_SPILL_A128_RESTORE:
1038 case AMDGPU::SI_SPILL_AV128_SAVE:
1039 case AMDGPU::SI_SPILL_AV128_RESTORE:
1041 case AMDGPU::SI_SPILL_S96_SAVE:
1042 case AMDGPU::SI_SPILL_S96_RESTORE:
1043 case AMDGPU::SI_SPILL_V96_SAVE:
1044 case AMDGPU::SI_SPILL_V96_RESTORE:
1045 case AMDGPU::SI_SPILL_A96_SAVE:
1046 case AMDGPU::SI_SPILL_A96_RESTORE:
1047 case AMDGPU::SI_SPILL_AV96_SAVE:
1048 case AMDGPU::SI_SPILL_AV96_RESTORE:
1050 case AMDGPU::SI_SPILL_S64_SAVE:
1051 case AMDGPU::SI_SPILL_S64_RESTORE:
1052 case AMDGPU::SI_SPILL_V64_SAVE:
1053 case AMDGPU::SI_SPILL_V64_RESTORE:
1054 case AMDGPU::SI_SPILL_A64_SAVE:
1055 case AMDGPU::SI_SPILL_A64_RESTORE:
1056 case AMDGPU::SI_SPILL_AV64_SAVE:
1057 case AMDGPU::SI_SPILL_AV64_RESTORE:
1059 case AMDGPU::SI_SPILL_S32_SAVE:
1060 case AMDGPU::SI_SPILL_S32_RESTORE:
1061 case AMDGPU::SI_SPILL_V32_SAVE:
1062 case AMDGPU::SI_SPILL_V32_RESTORE:
1063 case AMDGPU::SI_SPILL_A32_SAVE:
1064 case AMDGPU::SI_SPILL_A32_RESTORE:
1065 case AMDGPU::SI_SPILL_AV32_SAVE:
1066 case AMDGPU::SI_SPILL_AV32_RESTORE:
1074 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
1075 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1076 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
1077 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
1078 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
1079 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
1080 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
1081 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
1082 case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN:
1083 return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET;
1084 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
1085 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
1086 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
1087 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
1088 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
1089 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
1097 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
1098 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1099 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
1100 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
1101 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
1102 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
1103 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
1104 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
1105 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
1106 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
1107 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
1108 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
1109 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN:
1110 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET;
1111 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
1112 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
1113 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
1114 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
1115 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
1116 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
1117 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
1118 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
1119 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
1120 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
1121 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
1122 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
1123 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
1124 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
1132 case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
1133 return AMDGPU::BUFFER_STORE_DWORD_OFFEN;
1134 case AMDGPU::BUFFER_STORE_BYTE_OFFSET:
1135 return AMDGPU::BUFFER_STORE_BYTE_OFFEN;
1136 case AMDGPU::BUFFER_STORE_SHORT_OFFSET:
1137 return AMDGPU::BUFFER_STORE_SHORT_OFFEN;
1138 case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
1139 return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
1140 case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET:
1141 return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN;
1142 case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET:
1143 return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
1144 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET:
1145 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN;
1146 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET:
1147 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN;
1155 case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
1156 return AMDGPU::BUFFER_LOAD_DWORD_OFFEN;
1157 case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET:
1158 return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN;
1159 case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET:
1160 return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN;
1161 case AMDGPU::BUFFER_LOAD_USHORT_OFFSET:
1162 return AMDGPU::BUFFER_LOAD_USHORT_OFFEN;
1163 case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET:
1164 return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN;
1165 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET:
1166 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
1167 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET:
1168 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN;
1169 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET:
1170 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN;
1171 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET:
1172 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN;
1173 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
1174 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN;
1175 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET:
1176 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN;
1177 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
1178 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN;
1179 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET:
1180 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN;
1181 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
1182 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN;
1191 int Index,
unsigned Lane,
1192 unsigned ValueReg,
bool IsKill) {
1199 if (Reg == AMDGPU::NoRegister)
1202 bool IsStore =
MI->mayStore();
1206 unsigned Dst = IsStore ? Reg : ValueReg;
1207 unsigned Src = IsStore ? ValueReg : Reg;
1208 bool IsVGPR =
TRI->isVGPR(
MRI, Reg);
1210 if (IsVGPR ==
TRI->isVGPR(
MRI, ValueReg)) {
1220 unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
1221 : AMDGPU::V_ACCVGPR_READ_B32_e64;
1239 bool IsStore =
MI->mayStore();
1241 unsigned Opc =
MI->getOpcode();
1242 int LoadStoreOp = IsStore ?
1244 if (LoadStoreOp == -1)
1254 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::srsrc))
1255 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset))
1262 AMDGPU::OpName::vdata_in);
1264 NewMI.
add(*VDataIn);
1269 unsigned LoadStoreOp,
1271 bool IsStore =
TII->get(LoadStoreOp).mayStore();
1278 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1279 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
1282 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1283 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1286 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1287 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1290 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1291 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1307 unsigned LoadStoreOp,
int Index,
Register ValueReg,
bool IsKill,
1310 assert((!RS || !LiveRegs) &&
"Only RS or LiveRegs can be set but not both");
1318 bool IsStore = Desc->mayStore();
1319 bool IsFlat =
TII->isFLATScratch(LoadStoreOp);
1321 bool CanClobberSCC =
false;
1322 bool Scavenged =
false;
1332 unsigned EltSize = (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) : 4u;
1333 unsigned NumSubRegs = RegWidth / EltSize;
1334 unsigned Size = NumSubRegs * EltSize;
1335 unsigned RemSize = RegWidth -
Size;
1336 unsigned NumRemSubRegs = RemSize ? 1 : 0;
1338 int64_t MaterializedOffset =
Offset;
1340 int64_t MaxOffset =
Offset +
Size + RemSize - EltSize;
1341 int64_t ScratchOffsetRegDelta = 0;
1343 if (IsFlat && EltSize > 4) {
1345 Desc = &
TII->get(LoadStoreOp);
1352 "unexpected VGPR spill offset");
1359 bool UseVGPROffset =
false;
1366 if (IsFlat && SGPRBase) {
1390 bool IsOffsetLegal =
1405 CanClobberSCC = !RS->
isRegUsed(AMDGPU::SCC);
1406 }
else if (LiveRegs) {
1407 CanClobberSCC = !LiveRegs->
contains(AMDGPU::SCC);
1408 for (
MCRegister Reg : AMDGPU::SGPR_32RegClass) {
1416 if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC)
1420 UseVGPROffset =
true;
1426 for (
MCRegister Reg : AMDGPU::VGPR_32RegClass) {
1428 TmpOffsetVGPR = Reg;
1435 }
else if (!SOffset && CanClobberSCC) {
1446 if (!ScratchOffsetReg)
1447 ScratchOffsetReg =
FuncInfo->getStackPtrOffsetReg();
1448 SOffset = ScratchOffsetReg;
1449 ScratchOffsetRegDelta =
Offset;
1457 if (!IsFlat && !UseVGPROffset)
1460 if (!UseVGPROffset && !SOffset)
1463 if (UseVGPROffset) {
1465 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR,
Offset);
1466 }
else if (ScratchOffsetReg == AMDGPU::NoRegister) {
1471 .
addReg(ScratchOffsetReg)
1473 Add->getOperand(3).setIsDead();
1479 if (IsFlat && SOffset == AMDGPU::NoRegister) {
1481 &&
"Unexpected vaddr for flat scratch with a FI operand");
1483 if (UseVGPROffset) {
1490 Desc = &
TII->get(LoadStoreOp);
1493 for (
unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
1494 ++i, RegOffset += EltSize) {
1495 if (i == NumSubRegs) {
1499 Desc = &
TII->get(LoadStoreOp);
1501 if (!IsFlat && UseVGPROffset) {
1504 Desc = &
TII->get(NewLoadStoreOp);
1507 if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) {
1514 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset);
1517 unsigned NumRegs = EltSize / 4;
1523 unsigned SOffsetRegState = 0;
1525 const bool IsLastSubReg = i + 1 == e;
1526 const bool IsFirstSubReg = i == 0;
1535 bool NeedSuperRegDef = e > 1 && IsStore && IsFirstSubReg;
1536 bool NeedSuperRegImpOperand = e > 1;
1540 unsigned RemEltSize = EltSize;
1548 for (
int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,
1549 LaneE = RegOffset / 4;
1550 Lane >= LaneE; --Lane) {
1551 bool IsSubReg = e > 1 || EltSize > 4;
1556 if (!MIB.getInstr())
1558 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) {
1560 NeedSuperRegDef =
false;
1562 if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) {
1563 NeedSuperRegImpOperand =
true;
1564 unsigned State = SrcDstRegState;
1565 if (!IsLastSubReg || (Lane != LaneE))
1566 State &= ~RegState::Kill;
1567 if (!IsFirstSubReg || (Lane != LaneS))
1568 State &= ~RegState::Define;
1577 if (RemEltSize != EltSize) {
1578 assert(IsFlat && EltSize > 4);
1580 unsigned NumRegs = RemEltSize / 4;
1584 Desc = &
TII->get(Opc);
1587 unsigned FinalReg =
SubReg;
1592 if (!TmpIntermediateVGPR) {
1593 TmpIntermediateVGPR =
FuncInfo->getVGPRForAGPRCopy();
1598 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
1599 TmpIntermediateVGPR)
1601 if (NeedSuperRegDef)
1605 SubReg = TmpIntermediateVGPR;
1606 }
else if (UseVGPROffset) {
1608 if (!TmpOffsetVGPR) {
1623 if (UseVGPROffset) {
1632 if (SOffset == AMDGPU::NoRegister) {
1634 if (UseVGPROffset && ScratchOffsetReg) {
1635 MIB.
addReg(ScratchOffsetReg);
1642 MIB.addReg(SOffset, SOffsetRegState);
1644 MIB.addImm(
Offset + RegOffset)
1648 MIB.addMemOperand(NewMMO);
1650 if (!IsAGPR && NeedSuperRegDef)
1653 if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) {
1660 if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
1664 if (ScratchOffsetRegDelta != 0) {
1668 .
addImm(-ScratchOffsetRegDelta);
1674 bool IsKill)
const {
1692 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1697 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1711 bool SpillToVGPR = !VGPRSpills.
empty();
1712 if (OnlyToVGPR && !SpillToVGPR)
1721 "Num of VGPR lanes should be equal to num of SGPRs spilled");
1723 for (
unsigned i = 0, e = SB.
NumSubRegs; i < e; ++i) {
1730 bool IsFirstSubreg = i == 0;
1732 bool UseKill = SB.
IsKill && IsLastSubreg;
1738 SB.
TII.get(AMDGPU::V_WRITELANE_B32), Spill.
VGPR)
1755 if (SB.
NumSubRegs > 1 && (IsFirstSubreg || IsLastSubreg))
1775 for (
unsigned i =
Offset * PVD.PerVGPR,
1802 unsigned SuperKillState = 0;
1816 MI->eraseFromParent();
1831 bool SpillToVGPR = !VGPRSpills.
empty();
1832 if (OnlyToVGPR && !SpillToVGPR)
1836 for (
unsigned i = 0, e = SB.
NumSubRegs; i < e; ++i) {
1867 for (
unsigned i =
Offset * PVD.PerVGPR,
1875 bool LastSubReg = (i + 1 == e);
1877 SB.
TII.get(AMDGPU::V_READLANE_B32),
SubReg)
1894 MI->eraseFromParent();
1914 for (
unsigned i =
Offset * PVD.PerVGPR,
1933 unsigned SuperKillState = 0;
1943 MI = RestoreMBB.
end();
1949 for (
unsigned i =
Offset * PVD.PerVGPR,
1956 bool LastSubReg = (i + 1 == e);
1977 switch (
MI->getOpcode()) {
1978 case AMDGPU::SI_SPILL_S1024_SAVE:
1979 case AMDGPU::SI_SPILL_S512_SAVE:
1980 case AMDGPU::SI_SPILL_S384_SAVE:
1981 case AMDGPU::SI_SPILL_S352_SAVE:
1982 case AMDGPU::SI_SPILL_S320_SAVE:
1983 case AMDGPU::SI_SPILL_S288_SAVE:
1984 case AMDGPU::SI_SPILL_S256_SAVE:
1985 case AMDGPU::SI_SPILL_S224_SAVE:
1986 case AMDGPU::SI_SPILL_S192_SAVE:
1987 case AMDGPU::SI_SPILL_S160_SAVE:
1988 case AMDGPU::SI_SPILL_S128_SAVE:
1989 case AMDGPU::SI_SPILL_S96_SAVE:
1990 case AMDGPU::SI_SPILL_S64_SAVE:
1991 case AMDGPU::SI_SPILL_S32_SAVE:
1993 case AMDGPU::SI_SPILL_S1024_RESTORE:
1994 case AMDGPU::SI_SPILL_S512_RESTORE:
1995 case AMDGPU::SI_SPILL_S384_RESTORE:
1996 case AMDGPU::SI_SPILL_S352_RESTORE:
1997 case AMDGPU::SI_SPILL_S320_RESTORE:
1998 case AMDGPU::SI_SPILL_S288_RESTORE:
1999 case AMDGPU::SI_SPILL_S256_RESTORE:
2000 case AMDGPU::SI_SPILL_S224_RESTORE:
2001 case AMDGPU::SI_SPILL_S192_RESTORE:
2002 case AMDGPU::SI_SPILL_S160_RESTORE:
2003 case AMDGPU::SI_SPILL_S128_RESTORE:
2004 case AMDGPU::SI_SPILL_S96_RESTORE:
2005 case AMDGPU::SI_SPILL_S64_RESTORE:
2006 case AMDGPU::SI_SPILL_S32_RESTORE:
2014 int SPAdj,
unsigned FIOperandNum,
2023 assert(SPAdj == 0 &&
"unhandled SP adjustment in call sequence?");
2026 int Index =
MI->getOperand(FIOperandNum).getIndex();
2032 switch (
MI->getOpcode()) {
2034 case AMDGPU::SI_SPILL_S1024_SAVE:
2035 case AMDGPU::SI_SPILL_S512_SAVE:
2036 case AMDGPU::SI_SPILL_S384_SAVE:
2037 case AMDGPU::SI_SPILL_S352_SAVE:
2038 case AMDGPU::SI_SPILL_S320_SAVE:
2039 case AMDGPU::SI_SPILL_S288_SAVE:
2040 case AMDGPU::SI_SPILL_S256_SAVE:
2041 case AMDGPU::SI_SPILL_S224_SAVE:
2042 case AMDGPU::SI_SPILL_S192_SAVE:
2043 case AMDGPU::SI_SPILL_S160_SAVE:
2044 case AMDGPU::SI_SPILL_S128_SAVE:
2045 case AMDGPU::SI_SPILL_S96_SAVE:
2046 case AMDGPU::SI_SPILL_S64_SAVE:
2047 case AMDGPU::SI_SPILL_S32_SAVE: {
2052 case AMDGPU::SI_SPILL_S1024_RESTORE:
2053 case AMDGPU::SI_SPILL_S512_RESTORE:
2054 case AMDGPU::SI_SPILL_S384_RESTORE:
2055 case AMDGPU::SI_SPILL_S352_RESTORE:
2056 case AMDGPU::SI_SPILL_S320_RESTORE:
2057 case AMDGPU::SI_SPILL_S288_RESTORE:
2058 case AMDGPU::SI_SPILL_S256_RESTORE:
2059 case AMDGPU::SI_SPILL_S224_RESTORE:
2060 case AMDGPU::SI_SPILL_S192_RESTORE:
2061 case AMDGPU::SI_SPILL_S160_RESTORE:
2062 case AMDGPU::SI_SPILL_S128_RESTORE:
2063 case AMDGPU::SI_SPILL_S96_RESTORE:
2064 case AMDGPU::SI_SPILL_S64_RESTORE:
2065 case AMDGPU::SI_SPILL_S32_RESTORE: {
2070 case AMDGPU::SI_SPILL_V1024_SAVE:
2071 case AMDGPU::SI_SPILL_V512_SAVE:
2072 case AMDGPU::SI_SPILL_V384_SAVE:
2073 case AMDGPU::SI_SPILL_V352_SAVE:
2074 case AMDGPU::SI_SPILL_V320_SAVE:
2075 case AMDGPU::SI_SPILL_V288_SAVE:
2076 case AMDGPU::SI_SPILL_V256_SAVE:
2077 case AMDGPU::SI_SPILL_V224_SAVE:
2078 case AMDGPU::SI_SPILL_V192_SAVE:
2079 case AMDGPU::SI_SPILL_V160_SAVE:
2080 case AMDGPU::SI_SPILL_V128_SAVE:
2081 case AMDGPU::SI_SPILL_V96_SAVE:
2082 case AMDGPU::SI_SPILL_V64_SAVE:
2083 case AMDGPU::SI_SPILL_V32_SAVE:
2084 case AMDGPU::SI_SPILL_A1024_SAVE:
2085 case AMDGPU::SI_SPILL_A512_SAVE:
2086 case AMDGPU::SI_SPILL_A384_SAVE:
2087 case AMDGPU::SI_SPILL_A352_SAVE:
2088 case AMDGPU::SI_SPILL_A320_SAVE:
2089 case AMDGPU::SI_SPILL_A288_SAVE:
2090 case AMDGPU::SI_SPILL_A256_SAVE:
2091 case AMDGPU::SI_SPILL_A224_SAVE:
2092 case AMDGPU::SI_SPILL_A192_SAVE:
2093 case AMDGPU::SI_SPILL_A160_SAVE:
2094 case AMDGPU::SI_SPILL_A128_SAVE:
2095 case AMDGPU::SI_SPILL_A96_SAVE:
2096 case AMDGPU::SI_SPILL_A64_SAVE:
2097 case AMDGPU::SI_SPILL_A32_SAVE:
2098 case AMDGPU::SI_SPILL_AV1024_SAVE:
2099 case AMDGPU::SI_SPILL_AV512_SAVE:
2100 case AMDGPU::SI_SPILL_AV384_SAVE:
2101 case AMDGPU::SI_SPILL_AV352_SAVE:
2102 case AMDGPU::SI_SPILL_AV320_SAVE:
2103 case AMDGPU::SI_SPILL_AV288_SAVE:
2104 case AMDGPU::SI_SPILL_AV256_SAVE:
2105 case AMDGPU::SI_SPILL_AV224_SAVE:
2106 case AMDGPU::SI_SPILL_AV192_SAVE:
2107 case AMDGPU::SI_SPILL_AV160_SAVE:
2108 case AMDGPU::SI_SPILL_AV128_SAVE:
2109 case AMDGPU::SI_SPILL_AV96_SAVE:
2110 case AMDGPU::SI_SPILL_AV64_SAVE:
2111 case AMDGPU::SI_SPILL_AV32_SAVE: {
2113 AMDGPU::OpName::vdata);
2114 assert(
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset)->getReg() ==
2118 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2119 auto *
MBB =
MI->getParent();
2122 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm(),
2123 *
MI->memoperands_begin(), RS);
2125 MI->eraseFromParent();
2128 case AMDGPU::SI_SPILL_V32_RESTORE:
2129 case AMDGPU::SI_SPILL_V64_RESTORE:
2130 case AMDGPU::SI_SPILL_V96_RESTORE:
2131 case AMDGPU::SI_SPILL_V128_RESTORE:
2132 case AMDGPU::SI_SPILL_V160_RESTORE:
2133 case AMDGPU::SI_SPILL_V192_RESTORE:
2134 case AMDGPU::SI_SPILL_V224_RESTORE:
2135 case AMDGPU::SI_SPILL_V256_RESTORE:
2136 case AMDGPU::SI_SPILL_V288_RESTORE:
2137 case AMDGPU::SI_SPILL_V320_RESTORE:
2138 case AMDGPU::SI_SPILL_V352_RESTORE:
2139 case AMDGPU::SI_SPILL_V384_RESTORE:
2140 case AMDGPU::SI_SPILL_V512_RESTORE:
2141 case AMDGPU::SI_SPILL_V1024_RESTORE:
2142 case AMDGPU::SI_SPILL_A32_RESTORE:
2143 case AMDGPU::SI_SPILL_A64_RESTORE:
2144 case AMDGPU::SI_SPILL_A96_RESTORE:
2145 case AMDGPU::SI_SPILL_A128_RESTORE:
2146 case AMDGPU::SI_SPILL_A160_RESTORE:
2147 case AMDGPU::SI_SPILL_A192_RESTORE:
2148 case AMDGPU::SI_SPILL_A224_RESTORE:
2149 case AMDGPU::SI_SPILL_A256_RESTORE:
2150 case AMDGPU::SI_SPILL_A288_RESTORE:
2151 case AMDGPU::SI_SPILL_A320_RESTORE:
2152 case AMDGPU::SI_SPILL_A352_RESTORE:
2153 case AMDGPU::SI_SPILL_A384_RESTORE:
2154 case AMDGPU::SI_SPILL_A512_RESTORE:
2155 case AMDGPU::SI_SPILL_A1024_RESTORE:
2156 case AMDGPU::SI_SPILL_AV32_RESTORE:
2157 case AMDGPU::SI_SPILL_AV64_RESTORE:
2158 case AMDGPU::SI_SPILL_AV96_RESTORE:
2159 case AMDGPU::SI_SPILL_AV128_RESTORE:
2160 case AMDGPU::SI_SPILL_AV160_RESTORE:
2161 case AMDGPU::SI_SPILL_AV192_RESTORE:
2162 case AMDGPU::SI_SPILL_AV224_RESTORE:
2163 case AMDGPU::SI_SPILL_AV256_RESTORE:
2164 case AMDGPU::SI_SPILL_AV288_RESTORE:
2165 case AMDGPU::SI_SPILL_AV320_RESTORE:
2166 case AMDGPU::SI_SPILL_AV352_RESTORE:
2167 case AMDGPU::SI_SPILL_AV384_RESTORE:
2168 case AMDGPU::SI_SPILL_AV512_RESTORE:
2169 case AMDGPU::SI_SPILL_AV1024_RESTORE: {
2171 AMDGPU::OpName::vdata);
2172 assert(
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset)->getReg() ==
2176 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2177 auto *
MBB =
MI->getParent();
2180 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm(),
2181 *
MI->memoperands_begin(), RS);
2182 MI->eraseFromParent();
2190 int64_t
Offset = FrameInfo.getObjectOffset(
Index);
2192 if (
TII->isFLATScratch(*
MI)) {
2193 assert((int16_t)FIOperandNum ==
2195 AMDGPU::OpName::saddr));
2205 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset);
2209 OffsetOp->
setImm(NewOffset);
2216 unsigned Opc =
MI->getOpcode();
2230 AMDGPU::OpName::vdst_in);
2231 bool TiedVDst = VDstIn != -1 &&
2232 MI->getOperand(VDstIn).isReg() &&
2233 MI->getOperand(VDstIn).isTied();
2235 MI->untieRegOperand(VDstIn);
2245 assert (NewVDst != -1 && NewVDstIn != -1 &&
"Must be tied!");
2246 MI->tieOperands(NewVDst, NewVDstIn);
2248 MI->setDesc(
TII->get(NewOpc));
2256 if (
TII->isImmOperandLegal(*
MI, FIOperandNum, FIOp))
2263 bool UseSGPR =
TII->isOperandLegal(*
MI, FIOperandNum, &FIOp);
2265 if (!
Offset && FrameReg && UseSGPR) {
2271 : &AMDGPU::VGPR_32RegClass;
2277 if ((!FrameReg || !
Offset) && TmpReg) {
2278 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2281 MIB.addReg(FrameReg);
2289 RS->
isRegUsed(AMDGPU::SCC) && !
MI->definesRegister(AMDGPU::SCC);
2298 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
2309 assert(!(
Offset & 0x1) &&
"Flat scratch offset must be aligned!");
2329 if (TmpSReg == FrameReg) {
2331 if (NeedSaveSCC && !
MI->registerDefIsDead(AMDGPU::SCC)) {
2355 bool IsMUBUF =
TII->isMUBUF(*
MI);
2362 RS->
isRegUsed(AMDGPU::SCC) && !
MI->definesRegister(AMDGPU::SCC);
2364 ? &AMDGPU::SReg_32RegClass
2365 : &AMDGPU::VGPR_32RegClass;
2366 bool IsCopy =
MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
2367 MI->getOpcode() == AMDGPU::V_MOV_B32_e64;
2368 Register ResultReg = IsCopy ?
MI->getOperand(0).getReg()
2371 int64_t
Offset = FrameInfo.getObjectOffset(
Index);
2373 unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
2374 : AMDGPU::V_LSHRREV_B32_e64;
2379 if (IsSALU && !LiveSCC)
2381 if (IsSALU && LiveSCC) {
2387 ResultReg = NewDest;
2392 if ((MIB =
TII->getAddNoCarry(*
MBB,
MI,
DL, ResultReg, *RS)) !=
2402 const bool IsVOP2 = MIB->
getOpcode() == AMDGPU::V_ADD_U32_e32;
2413 "Need to reuse carry out register");
2418 ConstOffsetReg = getSubReg(MIB.
getReg(1), AMDGPU::sub0);
2420 ConstOffsetReg = MIB.
getReg(1);
2430 if (!MIB || IsSALU) {
2439 Register ScaledReg = TmpScaledReg.
isValid() ? TmpScaledReg : FrameReg;
2451 ResultReg = ScaledReg;
2454 if (!TmpScaledReg.
isValid()) {
2467 MI->eraseFromParent();
2476 assert(
static_cast<int>(FIOperandNum) ==
2478 AMDGPU::OpName::vaddr));
2480 auto &SOffset = *
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset);
2481 assert((SOffset.isImm() && SOffset.getImm() == 0));
2483 if (FrameReg != AMDGPU::NoRegister)
2484 SOffset.ChangeToRegister(FrameReg,
false);
2486 int64_t
Offset = FrameInfo.getObjectOffset(
Index);
2488 =
TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm();
2489 int64_t NewOffset = OldImm +
Offset;
2493 MI->eraseFromParent();
2502 if (!
TII->isImmOperandLegal(*
MI, FIOperandNum, FIOp)) {
2520 return &AMDGPU::VReg_64RegClass;
2522 return &AMDGPU::VReg_96RegClass;
2524 return &AMDGPU::VReg_128RegClass;
2526 return &AMDGPU::VReg_160RegClass;
2528 return &AMDGPU::VReg_192RegClass;
2530 return &AMDGPU::VReg_224RegClass;
2532 return &AMDGPU::VReg_256RegClass;
2534 return &AMDGPU::VReg_288RegClass;
2536 return &AMDGPU::VReg_320RegClass;
2538 return &AMDGPU::VReg_352RegClass;
2540 return &AMDGPU::VReg_384RegClass;
2542 return &AMDGPU::VReg_512RegClass;
2544 return &AMDGPU::VReg_1024RegClass;
2552 return &AMDGPU::VReg_64_Align2RegClass;
2554 return &AMDGPU::VReg_96_Align2RegClass;
2556 return &AMDGPU::VReg_128_Align2RegClass;
2558 return &AMDGPU::VReg_160_Align2RegClass;
2560 return &AMDGPU::VReg_192_Align2RegClass;
2562 return &AMDGPU::VReg_224_Align2RegClass;
2564 return &AMDGPU::VReg_256_Align2RegClass;
2566 return &AMDGPU::VReg_288_Align2RegClass;
2568 return &AMDGPU::VReg_320_Align2RegClass;
2570 return &AMDGPU::VReg_352_Align2RegClass;
2572 return &AMDGPU::VReg_384_Align2RegClass;
2574 return &AMDGPU::VReg_512_Align2RegClass;
2576 return &AMDGPU::VReg_1024_Align2RegClass;
2584 return &AMDGPU::VReg_1RegClass;
2586 return &AMDGPU::VGPR_LO16RegClass;
2588 return &AMDGPU::VGPR_32RegClass;
2596 return &AMDGPU::AReg_64RegClass;
2598 return &AMDGPU::AReg_96RegClass;
2600 return &AMDGPU::AReg_128RegClass;
2602 return &AMDGPU::AReg_160RegClass;
2604 return &AMDGPU::AReg_192RegClass;
2606 return &AMDGPU::AReg_224RegClass;
2608 return &AMDGPU::AReg_256RegClass;
2610 return &AMDGPU::AReg_288RegClass;
2612 return &AMDGPU::AReg_320RegClass;
2614 return &AMDGPU::AReg_352RegClass;
2616 return &AMDGPU::AReg_384RegClass;
2618 return &AMDGPU::AReg_512RegClass;
2620 return &AMDGPU::AReg_1024RegClass;
2628 return &AMDGPU::AReg_64_Align2RegClass;
2630 return &AMDGPU::AReg_96_Align2RegClass;
2632 return &AMDGPU::AReg_128_Align2RegClass;
2634 return &AMDGPU::AReg_160_Align2RegClass;
2636 return &AMDGPU::AReg_192_Align2RegClass;
2638 return &AMDGPU::AReg_224_Align2RegClass;
2640 return &AMDGPU::AReg_256_Align2RegClass;
2642 return &AMDGPU::AReg_288_Align2RegClass;
2644 return &AMDGPU::AReg_320_Align2RegClass;
2646 return &AMDGPU::AReg_352_Align2RegClass;
2648 return &AMDGPU::AReg_384_Align2RegClass;
2650 return &AMDGPU::AReg_512_Align2RegClass;
2652 return &AMDGPU::AReg_1024_Align2RegClass;
2660 return &AMDGPU::AGPR_LO16RegClass;
2662 return &AMDGPU::AGPR_32RegClass;
2670 return &AMDGPU::AV_64RegClass;
2672 return &AMDGPU::AV_96RegClass;
2674 return &AMDGPU::AV_128RegClass;
2676 return &AMDGPU::AV_160RegClass;
2678 return &AMDGPU::AV_192RegClass;
2680 return &AMDGPU::AV_224RegClass;
2682 return &AMDGPU::AV_256RegClass;
2684 return &AMDGPU::AV_288RegClass;
2686 return &AMDGPU::AV_320RegClass;
2688 return &AMDGPU::AV_352RegClass;
2690 return &AMDGPU::AV_384RegClass;
2692 return &AMDGPU::AV_512RegClass;
2694 return &AMDGPU::AV_1024RegClass;
2702 return &AMDGPU::AV_64_Align2RegClass;
2704 return &AMDGPU::AV_96_Align2RegClass;
2706 return &AMDGPU::AV_128_Align2RegClass;
2708 return &AMDGPU::AV_160_Align2RegClass;
2710 return &AMDGPU::AV_192_Align2RegClass;
2712 return &AMDGPU::AV_224_Align2RegClass;
2714 return &AMDGPU::AV_256_Align2RegClass;
2716 return &AMDGPU::AV_288_Align2RegClass;
2718 return &AMDGPU::AV_320_Align2RegClass;
2720 return &AMDGPU::AV_352_Align2RegClass;
2722 return &AMDGPU::AV_384_Align2RegClass;
2724 return &AMDGPU::AV_512_Align2RegClass;
2726 return &AMDGPU::AV_1024_Align2RegClass;
2734 return &AMDGPU::VGPR_LO16RegClass;
2736 return &AMDGPU::AV_32RegClass;
2745 return &AMDGPU::SGPR_LO16RegClass;
2747 return &AMDGPU::SReg_32RegClass;
2749 return &AMDGPU::SReg_64RegClass;
2751 return &AMDGPU::SGPR_96RegClass;
2753 return &AMDGPU::SGPR_128RegClass;
2755 return &AMDGPU::SGPR_160RegClass;
2757 return &AMDGPU::SGPR_192RegClass;
2759 return &AMDGPU::SGPR_224RegClass;
2761 return &AMDGPU::SGPR_256RegClass;
2763 return &AMDGPU::SGPR_288RegClass;
2765 return &AMDGPU::SGPR_320RegClass;
2767 return &AMDGPU::SGPR_352RegClass;
2769 return &AMDGPU::SGPR_384RegClass;
2771 return &AMDGPU::SGPR_512RegClass;
2773 return &AMDGPU::SGPR_1024RegClass;
2781 if (Reg.isVirtual())
2782 RC =
MRI.getRegClass(Reg);
2784 RC = getPhysRegBaseClass(Reg);
2790 unsigned Size = getRegSizeInBits(*SRC);
2792 assert(VRC &&
"Invalid register class size");
2798 unsigned Size = getRegSizeInBits(*SRC);
2800 assert(ARC &&
"Invalid register class size");
2806 unsigned Size = getRegSizeInBits(*VRC);
2808 return &AMDGPU::SGPR_32RegClass;
2810 assert(SRC &&
"Invalid register class size");
2817 unsigned SubIdx)
const {
2820 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
2821 return MatchRC && MatchRC->
hasSubClassEq(SuperRC) ? MatchRC :
nullptr;
2837 unsigned SrcSubReg)
const {
2854 return getCommonSubClass(DefRC, SrcRC) !=
nullptr;
2870 bool ReserveHighestVGPR)
const {
2871 if (ReserveHighestVGPR) {
2873 if (
MRI.isAllocatable(Reg) && !
MRI.isPhysRegUsed(Reg))
2877 if (
MRI.isAllocatable(Reg) && !
MRI.isPhysRegUsed(Reg))
2884 unsigned EltSize)
const {
2886 assert(RegBitWidth >= 32 && RegBitWidth <= 1024);
2888 const unsigned RegDWORDs = RegBitWidth / 32;
2889 const unsigned EltDWORDs = EltSize / 4;
2890 assert(RegSplitParts.size() + 1 >= EltDWORDs);
2892 const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1];
2893 const unsigned NumParts = RegDWORDs / EltDWORDs;
2895 return ArrayRef(Parts.data(), NumParts);
2901 return Reg.isVirtual() ?
MRI.getRegClass(Reg) : getPhysRegBaseClass(Reg);
2908 return getSubRegisterClass(SrcRC, MO.
getSubReg());
2933 unsigned SrcSize = getRegSizeInBits(*SrcRC);
2934 unsigned DstSize = getRegSizeInBits(*DstRC);
2935 unsigned NewSize = getRegSizeInBits(*NewRC);
2941 if (SrcSize <= 32 || DstSize <= 32)
2944 return NewSize <= DstSize || NewSize <= SrcSize;
2953 switch (RC->
getID()) {
2955 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
2956 case AMDGPU::VGPR_32RegClassID:
2957 case AMDGPU::VGPR_LO16RegClassID:
2958 case AMDGPU::VGPR_HI16RegClassID:
2960 case AMDGPU::SGPR_32RegClassID:
2961 case AMDGPU::SGPR_LO16RegClassID:
2967 unsigned Idx)
const {
2968 if (
Idx == AMDGPU::RegisterPressureSets::VGPR_32 ||
2969 Idx == AMDGPU::RegisterPressureSets::AGPR_32)
2973 if (
Idx == AMDGPU::RegisterPressureSets::SReg_32)
2981 static const int Empty[] = { -1 };
2983 if (RegPressureIgnoredUnits[RegUnit])
2986 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
2991 return AMDGPU::SGPR30_SGPR31;
2997 switch (RB.
getID()) {
2998 case AMDGPU::VGPRRegBankID:
3000 case AMDGPU::VCCRegBankID:
3002 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3003 : &AMDGPU::SReg_64_XEXECRegClass;
3004 case AMDGPU::SGPRRegBankID:
3006 case AMDGPU::AGPRRegBankID:
3021 return getAllocatableClass(RC);
3027 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
3031 return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
3037 : &AMDGPU::VReg_64RegClass;
3042 switch ((
int)RCID) {
3043 case AMDGPU::SReg_1RegClassID:
3045 case AMDGPU::SReg_1_XEXECRegClassID:
3046 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3047 : &AMDGPU::SReg_64_XEXECRegClass;
3051 return AMDGPUGenRegisterInfo::getRegClass(RCID);
3064 if (Reg.isVirtual()) {
3069 :
MRI.getMaxLaneMaskForVReg(Reg);
3073 if ((S.LaneMask & SubLanes) == SubLanes) {
3074 V = S.getVNInfoAt(UseIdx);
3102 if (!Def || !MDT.dominates(Def, &
Use))
3105 assert(Def->modifiesRegister(Reg,
this));
3111 assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32);
3114 AMDGPU::SReg_32RegClass,
3115 AMDGPU::AGPR_32RegClass } ) {
3116 if (
MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
3119 if (
MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
3120 &AMDGPU::VGPR_32RegClass)) {
3124 return AMDGPU::NoRegister;
3147 unsigned Size = getRegSizeInBits(*RC);
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Provides AMDGPU specific target descriptions.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static const Function * getParent(const Value *V)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
typename CallsiteContextGraph< DerivedCCG, FuncTy, CallTy >::FuncInfo FuncInfo
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static int getOffenMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyAGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFLoad(unsigned Opc)
static const std::array< unsigned, 17 > SubRegFromChannelTableWidthMap
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), cl::ReallyHidden, cl::init(true))
static const TargetRegisterClass * getAlignedAGPRClassForBitWidth(unsigned BitWidth)
static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, unsigned LoadStoreOp, unsigned EltSize)
static const TargetRegisterClass * getAlignedVGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyVGPRClassForBitWidth(unsigned BitWidth)
static unsigned getNumSubRegsForSpillOp(unsigned Op)
static const TargetRegisterClass * getAlignedVectorSuperClassForBitWidth(unsigned BitWidth)
static const TargetRegisterClass * getAnyVectorSuperClassForBitWidth(unsigned BitWidth)
static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill)
static int getOffenMUBUFLoad(unsigned Opc)
Interface definition for SIRegisterInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
static const char * getRegisterName(MCRegister Reg)
uint32_t getLDSSize() const
bool isEntryFunction() const
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
bool hasGFX90AInsts() const
bool hasMFMAInlineLiteralBug() const
const SIInstrInfo * getInstrInfo() const override
unsigned getConstantBusLimit(unsigned Opcode) const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool enableFlatScratch() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
const SIFrameLowering * getFrameLowering() const override
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasFlatScratchSTMode() const
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
bool hasInterval(Register Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveInterval & getInterval(Register Reg)
A set of physical registers with utility functions to track liveness when walking backward/forward th...
bool contains(MCPhysReg Reg) const
Returns true if register Reg is contained in the set.
bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const
Returns true if register Reg and no aliasing register is in the set.
This class represents the liveness of a register, stack slot, etc.
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Describe properties that are true of each instruction in the target description file.
MCRegAliasIterator enumerates all registers aliasing Reg.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
Wrapper class representing physical registers. Should be passed by value.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasCalls() const
Return true if the current function has any function calls.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
unsigned getNumFixedObjects() const
Return the number of fixed objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
void setAsmPrinterFlag(uint8_t Flag)
Set a flag for the AsmPrinter.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
void setImm(int64_t immVal)
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
Register scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available and do the appropriate bookkeeping.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void assignRegToScavengingIndex(int FI, Register Reg, MachineInstr *Restore=nullptr)
Record that Reg is in use at scavenging index FI.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
static bool isFLATScratch(const MachineInstr &MI)
static bool isLegalMUBUFImmOffset(unsigned Imm)
static bool isMUBUF(const MachineInstr &MI)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool usesAGPRs(const MachineFunction &MF) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
Register getStackPtrOffsetReg() const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Register getVGPRForAGPRCopy() const
Register getFrameOffsetReg() const
void addToSpilledVGPRs(unsigned num)
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVGPRLanes(int FrameIndex) const
const ReservedRegSet & getWWMReservedRegs() const
ArrayRef< Register > getSGPRSpillVGPRs() const
void addToSpilledSGPRs(unsigned num)
Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, int64_t Offset) const override
int64_t getScratchInstrOffset(const MachineInstr *MI) const
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, int64_t Offset) const override
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
ArrayRef< MCPhysReg > getAllSGPR64(const MachineFunction &MF) const
Return all SGPR64 which satisfy the waves per execution unit requirement of the subtarget.
MCRegister findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF, bool ReserveHighestVGPR=false) const
Returns a lowest register that is not used at any point in the function.
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool shouldRealignStack(const MachineFunction &MF) const override
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
Register getFrameRegister(const MachineFunction &MF) const override
LLVM_READONLY const TargetRegisterClass * getVectorSuperClassForBitWidth(unsigned BitWidth) const
bool spillEmergencySGPR(MachineBasicBlock::iterator MI, MachineBasicBlock &RestoreMBB, Register SGPR, RegScavenger *RS) const
SIRegisterInfo(const GCNSubtarget &ST)
const uint32_t * getAllVGPRRegMask() const
MCRegister getReturnAddressReg(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
bool hasBasePointer(const MachineFunction &MF) const
const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override
Returns a legal register class to copy a register in the specified class to or from.
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
ArrayRef< MCPhysReg > getAllSGPR32(const MachineFunction &MF) const
Return all SGPR32 which satisfy the waves per execution unit requirement of the subtarget.
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed.
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool isAsmClobberable(const MachineFunction &MF, MCRegister PhysReg) const override
LLVM_READONLY const TargetRegisterClass * getAGPRClassForBitWidth(unsigned BitWidth) const
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
const uint32_t * getNoPreservedMask() const override
StringRef getRegAsmName(MCRegister Reg) const override
const uint32_t * getAllAllocatableSRegMask() const
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
const uint32_t * getAllVectorRegMask() const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
bool opCanUseLiteralConstant(unsigned OpType) const
Register getBaseRegister() const
LLVM_READONLY const TargetRegisterClass * getVGPRClassForBitWidth(unsigned BitWidth) const
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isVGPRClass(const TargetRegisterClass *RC)
MachineInstr * findReachingDef(Register Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false) const
If OnlyToVGPR is true, this will only succeed if this.
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
ArrayRef< MCPhysReg > getAllSGPR128(const MachineFunction &MF) const
Return all SGPR128 which satisfy the waves per execution unit requirement of the subtarget.
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
const TargetRegisterClass * getRegClassForOperandReg(const MachineRegisterInfo &MRI, const MachineOperand &MO) const
void buildSpillLoadStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned LoadStoreOp, int Index, Register ValueReg, bool ValueIsKill, MCRegister ScratchOffsetReg, int64_t InstrOffset, MachineMemOperand *MMO, RegScavenger *RS, LivePhysRegs *LiveRegs=nullptr) const
const uint32_t * getAllAGPRRegMask() const
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr) const
Special case of eliminateFrameIndex.
const TargetRegisterClass * getBoolRC() const
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false) const
MCRegister getExec() const
MCRegister getVCC() const
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
bool isVectorSuperClass(const TargetRegisterClass *RC) const
const TargetRegisterClass * getWaveMaskRegClass() const
void resolveFrameIndex(MachineInstr &MI, Register BaseReg, int64_t Offset) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
const TargetRegisterClass * getVGPR64Class() const
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill=true) const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
const int * getRegUnitPressureSets(unsigned RegUnit) const override
SlotIndex - An opaque wrapper around machine indexes.
bool isValid() const
Returns true if this is a valid index.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
SlotIndex replaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in maps used by register allocat...
StringRef - Represent a constant reference to a string, i.e.
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
const MCRegisterClass * MC
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const
Returns the largest super class of RC that is legal to use in the current sub-target and has the same...
virtual bool shouldRealignStack(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
A Use represents the edge between a Value definition and its users.
VNInfo - Value Number Information.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ PRIVATE_ADDRESS
Address space for private memory.
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSTfromSS(uint16_t Opcode)
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
LLVM_READONLY int getFlatScratchInstSVfromSVS(uint16_t Opcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
@ OPERAND_REG_INLINE_AC_FIRST
@ OPERAND_REG_INLINE_AC_LAST
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
auto reverse(ContainerTy &&C)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
constexpr unsigned BitWidth
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI)
ArrayRef< int16_t > SplitParts
SIMachineFunctionInfo & MFI
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, int Index, RegScavenger *RS)
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, Register Reg, bool IsKill, int Index, RegScavenger *RS)
PerVGPRData getPerVGPRData()
MachineBasicBlock::iterator MI
void readWriteTmpVGPR(unsigned Offset, bool IsLoad)
const SIRegisterInfo & TRI
The llvm::once_flag structure.