26#define DEBUG_TYPE "frame-info"
29 "amdgpu-spill-vgpr-to-agpr",
30 cl::desc(
"Enable spilling VGPRs to AGPRs"),
57 OS <<
uint8_t(dwarf::DW_OP_reg0 + DwarfReg);
59 OS <<
uint8_t(dwarf::DW_OP_regx);
67 assert(ST.enableFlatScratch());
75 const unsigned WavefrontSizeLog2 = ST.getWavefrontSizeLog2();
76 assert(WavefrontSizeLog2 < 32);
82 <<
uint8_t(dwarf::DW_OP_lit0 + WavefrontSizeLog2)
85 dwarf::DW_ASPACE_LLVM_AMDGPU_private_wave)
86 <<
uint8_t(dwarf::DW_OP_LLVM_user)
87 <<
uint8_t(dwarf::DW_OP_LLVM_form_aspace_address);
91 OSCFIInst <<
uint8_t(dwarf::DW_CFA_def_cfa_expression);
101 bool AspaceAlreadyDefined,
105 const SIRegisterInfo *
TRI =
ST.getRegisterInfo();
107 MCRegister DwarfStackPtrReg =
TRI->getDwarfRegNum(StackPtrReg,
false);
108 MCCFIInstruction CFIInst =
109 ST.enableFlatScratch()
111 : (AspaceAlreadyDefined
112 ? MCCFIInstruction::createLLVMDefAspaceCfa(
113 nullptr, DwarfStackPtrReg, 0,
114 dwarf::DW_ASPACE_LLVM_AMDGPU_private_wave, SMLoc())
115 : MCCFIInstruction::createDefCfaRegister(nullptr,
129 for (
unsigned i = 0; CSRegs[i]; ++i)
130 LiveUnits.
addReg(CSRegs[i]);
150 bool IncludeScratchCopy =
true) {
156 unsigned Size =
TRI->getSpillSize(RC);
157 Align Alignment =
TRI->getSpillAlign(RC);
165 if (IncludeScratchCopy)
169 int FI = FrameInfo.CreateStackObject(
Size, Alignment,
true,
nullptr,
172 if (
TRI->spillSGPRToVGPR() &&
189 FI = FrameInfo.CreateSpillStackObject(
Size, Alignment);
200 LiveUnits.
addReg(ScratchSGPR);
215 int64_t DwordOff = 0) {
216 unsigned Opc = ST.hasFlatScratchEnabled() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
217 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
223 FrameInfo.getObjectAlign(FI));
224 LiveUnits.
addReg(SpillReg);
225 bool IsKill = !
MBB.isLiveIn(SpillReg);
226 TRI.buildSpillLoadStore(
MBB,
I,
DL,
Opc, FI, SpillReg, IsKill, FrameReg,
227 DwordOff, MMO,
nullptr, &LiveUnits);
239 Register FrameReg, int64_t DwordOff = 0) {
240 unsigned Opc = ST.hasFlatScratchEnabled() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
241 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
247 FrameInfo.getObjectAlign(FI));
248 TRI.buildSpillLoadStore(
MBB,
I,
DL,
Opc, FI, SpillReg,
false, FrameReg,
249 DwordOff, MMO,
nullptr, &LiveUnits);
259 Register TargetLo =
TRI->getSubReg(TargetReg, AMDGPU::sub0);
260 Register TargetHi =
TRI->getSubReg(TargetReg, AMDGPU::sub1);
267 const MCInstrDesc &GetPC64 =
TII->get(AMDGPU::S_GETPC_B64_pseudo);
272 MBB.addLiveIn(GitPtrLo);
281 if (LiveUnits.
empty()) {
317 unsigned EltSize = 4;
318 bool IsFramePtrPrologSpill;
319 bool NeedsFrameMoves;
322 return Reg == AMDGPU::EXEC_LO || Reg == AMDGPU::EXEC;
334 if (IsFramePtrPrologSpill)
335 return FuncInfo->getFrameOffsetReg();
338 if (isExec(SuperReg))
343 void saveToMemory(
const int FI)
const {
346 assert(!MFI.isDeadObjectIndex(FI));
351 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
356 TFI->buildCFI(MBB, MI, DL,
358 nullptr, MCRI->getDwarfRegNum(Reg,
false),
359 MFI.getObjectOffset(FI) * ST.getWavefrontSize()));
362 for (
unsigned I = 0, DwordOff = 0;
I < NumSubRegs; ++
I) {
365 :
Register(TRI.getSubReg(SuperReg, SplitParts[
I]));
366 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
370 FI, FrameReg, DwordOff);
371 if (NeedsFrameMoves && !CFISuperReg)
375 if (NeedsFrameMoves && CFISuperReg)
376 BuildCFI(CFISuperReg);
379 void saveToVGPRLane(
const int FI)
const {
380 assert(!MFI.isDeadObjectIndex(FI));
384 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
385 assert(Spill.size() == NumSubRegs);
388 for (
unsigned I = 0;
I < NumSubRegs; ++
I) {
391 :
Register(TRI.getSubReg(SuperReg, SplitParts[
I]));
392 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),
397 if (NeedsFrameMoves && !CFISuperReg)
398 TFI->buildCFIForSGPRToVGPRSpill(MBB, MI, DL, SubReg, Spill[
I].VGPR,
401 if (NeedsFrameMoves && CFISuperReg)
402 TFI->buildCFIForSGPRToVGPRSpill(MBB, MI, DL, CFISuperReg, Spill);
405 void copyToScratchSGPR(
Register DstReg)
const {
406 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg)
409 if (NeedsFrameMoves) {
412 assert(NumSubRegs == (DstSplitParts.
empty() ? 1 : DstSplitParts.
size()));
414 if (NumSubRegs == 1) {
419 MCRI->getDwarfRegNum(
420 CFISuperReg ? CFISuperReg : SuperReg.asMCReg(),
false),
421 MCRI->getDwarfRegNum(DstReg,
false)));
422 }
else if (isExec(CFISuperReg)) {
423 assert(NumSubRegs == 2 &&
"EXEC larger than 64-bit");
424 TFI->buildCFIForRegToSGPRPairSpill(MBB, MI, DL, CFISuperReg, DstReg);
426 for (
unsigned I = 0;
I < NumSubRegs; ++
I) {
427 MCRegister SrcSubReg = TRI.getSubReg(SuperReg, SplitParts[
I]);
428 MCRegister DstSubReg = TRI.getSubReg(DstReg, DstSplitParts[
I]);
429 TFI->buildCFI(MBB, MI, DL,
431 nullptr, MCRI->getDwarfRegNum(SrcSubReg,
false),
432 MCRI->getDwarfRegNum(DstSubReg,
false)));
438 void restoreFromMemory(
const int FI) {
444 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
448 for (
unsigned I = 0, DwordOff = 0;
I < NumSubRegs; ++
I) {
451 : TRI.getSubReg(SuperReg, SplitParts[
I]);
454 TmpVGPR, FI, FrameReg, DwordOff);
457 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
463 void restoreFromVGPRLane(
const int FI) {
466 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
467 assert(Spill.size() == NumSubRegs);
469 for (
unsigned I = 0;
I < NumSubRegs; ++
I) {
472 : TRI.getSubReg(SuperReg, SplitParts[
I]);
473 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
479 void copyFromScratchSGPR(
Register SrcReg)
const {
480 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg)
493 bool IsFramePtrPrologSpill =
false)
494 : MI(MI), MBB(MBB), MF(*MBB.
getParent()),
495 ST(MF.getSubtarget<
GCNSubtarget>()), MFI(MF.getFrameInfo()),
497 MCRI(MF.getContext().getRegisterInfo()), TFI(ST.getFrameLowering()),
498 SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL), FrameReg(FrameReg),
499 IsFramePtrPrologSpill(IsFramePtrPrologSpill),
500 NeedsFrameMoves(MF.needsFrameMoves()) {
502 SplitParts = TRI.getRegSplitParts(RC, EltSize);
503 NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
505 assert(SuperReg != AMDGPU::M0 &&
"m0 should never spill");
509 switch (SI.getKind()) {
511 return saveToMemory(SI.getIndex());
513 return saveToVGPRLane(SI.getIndex());
515 return copyToScratchSGPR(SI.getReg());
520 switch (SI.getKind()) {
522 return restoreFromMemory(SI.getIndex());
524 return restoreFromVGPRLane(SI.getIndex());
526 return copyFromScratchSGPR(SI.getReg());
534void SIFrameLowering::emitEntryFunctionFlatScratchInit(
538 const SIInstrInfo *
TII =
ST.getInstrInfo();
540 const SIMachineFunctionInfo *MFI = MF.
getInfo<SIMachineFunctionInfo>();
555 if (
ST.isAmdPalOS()) {
557 LiveRegUnits LiveUnits;
563 Register FlatScrInit = AMDGPU::NoRegister;
566 AllSGPR64s = AllSGPR64s.
slice(
567 std::min(
static_cast<unsigned>(AllSGPR64s.
size()), NumPreloaded));
576 assert(FlatScrInit &&
"Failed to find free register for scratch init");
578 FlatScrInitLo =
TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
579 FlatScrInitHi =
TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
586 const MCInstrDesc &LoadDwordX2 =
TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
594 const GCNSubtarget &Subtarget = MF.
getSubtarget<GCNSubtarget>();
603 const MCInstrDesc &SAndB32 =
TII->get(AMDGPU::S_AND_B32);
611 assert(FlatScratchInitReg);
617 FlatScrInitLo =
TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
618 FlatScrInitHi =
TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
622 if (
ST.flatScratchIsPointer()) {
626 .
addReg(ScratchWaveOffsetReg);
633 using namespace AMDGPU::Hwreg;
636 .
addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_LO, 0, 32)));
639 .
addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_HI, 0, 32)));
646 .
addReg(ScratchWaveOffsetReg);
666 .
addReg(ScratchWaveOffsetReg);
689Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
693 const SIInstrInfo *
TII =
ST.getInstrInfo();
696 SIMachineFunctionInfo *MFI = MF.
getInfo<SIMachineFunctionInfo>();
702 if (!ScratchRsrcReg || (!MRI.
isPhysRegUsed(ScratchRsrcReg) &&
706 if (
ST.hasSGPRInitBug() ||
707 ScratchRsrcReg !=
TRI->reservedPrivateSegmentBufferReg(MF))
708 return ScratchRsrcReg;
721 AllSGPR128s = AllSGPR128s.
slice(std::min(
static_cast<unsigned>(AllSGPR128s.
size()), NumPreloaded));
731 (!GITPtrLoReg || !
TRI->isSubRegisterEq(
Reg, GITPtrLoReg))) {
739 return ScratchRsrcReg;
743 return ST.hasFlatScratchEnabled() ? 1 : ST.getWavefrontSize();
748 assert(&MF.
front() == &
MBB &&
"Shrink-wrapping not yet supported");
779 static const char CFAEncodedInstUserOpsArr[] = {
780 dwarf::DW_CFA_def_cfa_expression,
782 static_cast<char>(dwarf::DW_OP_lit0),
783 static_cast<char>(dwarf::DW_OP_lit0 +
784 dwarf::DW_ASPACE_LLVM_AMDGPU_private_wave),
785 static_cast<char>(dwarf::DW_OP_LLVM_user),
786 static_cast<char>(dwarf::DW_OP_LLVM_form_aspace_address)};
788 StringRef(CFAEncodedInstUserOpsArr,
sizeof(CFAEncodedInstUserOpsArr));
792 "CFA is 0 in private_wave aspace"));
796 nullptr,
TRI->getDwarfRegNum(AMDGPU::PC_REG,
false)));
809 if (!ST.hasFlatScratchEnabled())
810 ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
813 if (ScratchRsrcReg) {
815 if (&OtherBB != &
MBB) {
816 OtherBB.addLiveIn(ScratchRsrcReg);
824 if (ST.isAmdHsaOrMesa(
F)) {
825 PreloadedScratchRsrcReg =
827 if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
831 MBB.addLiveIn(PreloadedScratchRsrcReg);
841 if (PreloadedScratchWaveOffsetReg &&
842 TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
845 AllSGPRs = AllSGPRs.
slice(
846 std::min(
static_cast<unsigned>(AllSGPRs.
size()), NumPreloaded));
850 !
TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
851 ScratchWaveOffsetReg = Reg;
860 if (!ScratchWaveOffsetReg)
862 "could not find temporary scratch offset register in prolog");
864 ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
866 assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
896 FrameInfo.getMaxAlign());
908 ST.hasInv2PiInlineImm())) {
920 bool NeedsFlatScratchInit =
922 (MRI.
isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
925 if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
926 PreloadedScratchWaveOffsetReg && !ST.hasArchitectedFlatScratch()) {
927 MRI.
addLiveIn(PreloadedScratchWaveOffsetReg);
928 MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
931 if (NeedsFlatScratchInit) {
932 emitEntryFunctionFlatScratchInit(MF,
MBB,
I,
DL, ScratchWaveOffsetReg);
935 if (ScratchRsrcReg) {
936 emitEntryFunctionScratchRsrcRegSetup(MF,
MBB,
I,
DL,
937 PreloadedScratchRsrcReg,
938 ScratchRsrcReg, ScratchWaveOffsetReg);
941 if (ST.hasWaitXcnt()) {
945 unsigned RegEncoding =
954void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
965 if (ST.isAmdPalOS()) {
968 Register Rsrc01 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
969 Register Rsrc03 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
976 const MCInstrDesc &LoadDwordX4 =
TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
1005 }
else if (
ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
1007 const MCInstrDesc &SMovB32 =
TII->get(AMDGPU::S_MOV_B32);
1009 Register Rsrc2 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
1010 Register Rsrc3 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
1013 uint64_t Rsrc23 =
TII->getScratchRsrcWords23();
1016 Register Rsrc01 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
1019 const MCInstrDesc &Mov64 =
TII->get(AMDGPU::S_MOV_B64);
1025 const MCInstrDesc &LoadDwordX2 =
TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
1044 Register Rsrc0 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
1045 Register Rsrc1 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
1063 }
else if (
ST.isAmdHsaOrMesa(Fn)) {
1064 assert(PreloadedScratchRsrcReg);
1066 if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
1081 Register ScratchRsrcSub0 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
1082 Register ScratchRsrcSub1 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
1088 .
addReg(ScratchWaveOffsetReg)
1090 auto Addc =
BuildMI(
MBB,
I,
DL,
TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
1122 emitDefCFA(
MBB,
MBBI,
DL, StackPtrReg,
true,
1126 TRI.getReturnAddressReg(MF));
1130 for (
unsigned I = 0; CSRegs[
I]; ++
I) {
1131 IsCalleeSaved.set(CSRegs[
I]);
1135 if (
Reg == AMDGPU::VCC ||
Reg == AMDGPU::VCC_LO ||
Reg == AMDGPU::VCC_HI)
1145 unsigned NumArchVGPRs =
ST.has1024AddressableVGPRs() ? 1024 : 256;
1146 for_each(AMDGPU::VGPR_32RegClass.getRegisters().take_front(NumArchVGPRs),
1150 if (
ST.hasMAIInsts()) {
1151 for_each(AMDGPU::AGPR_32RegClass.getRegisters(), ProcessReg);
1155 for_each(AMDGPU::SGPR_32RegClass.getRegisters(), ProcessReg);
1165 bool EnableInactiveLanes) {
1178 assert(IsProlog &&
"Epilog should look at return, not setup");
1180 TII->getWholeWaveFunctionSetup(MF)->getOperand(0).getReg();
1181 assert(ScratchExecCopy &&
"Couldn't find copy of EXEC");
1184 MRI, LiveUnits, *
TRI.getWaveMaskRegClass());
1187 if (!ScratchExecCopy)
1190 LiveUnits.
addReg(ScratchExecCopy);
1192 const unsigned SaveExecOpc =
1193 ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
1194 : AMDGPU::S_OR_SAVEEXEC_B32)
1195 : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64
1196 : AMDGPU::S_OR_SAVEEXEC_B64);
1201 return ScratchExecCopy;
1208 const bool NeedsFrameMoves)
const {
1224 if (!WWMScratchRegs.
empty())
1229 auto StoreWWMRegisters =
1231 for (
const auto &Reg : WWMRegs) {
1233 int FI = Reg.second;
1235 VGPR, FI, FrameReg);
1236 if (NeedsFrameMoves) {
1241 MFI.getObjectOffset(FI) * ST.getWavefrontSize()));
1252 StoreWWMRegisters(WWMScratchRegs);
1254 auto EnableAllLanes = [&]() {
1258 if (!WWMCalleeSavedRegs.
empty()) {
1259 if (ScratchExecCopy) {
1268 StoreWWMRegisters(WWMCalleeSavedRegs);
1272 if (!ScratchExecCopy)
1275 else if (WWMCalleeSavedRegs.
empty())
1277 }
else if (ScratchExecCopy) {
1281 LiveUnits.
addReg(ScratchExecCopy);
1291 bool IsFramePtrPrologSpill = Spill.first == FramePtrReg;
1292 Register Reg = IsFramePtrPrologSpill ? FramePtrRegScratchCopy : Spill.first;
1297 LiveUnits, FrameReg, IsFramePtrPrologSpill);
1305 if (!ScratchSGPRs.
empty()) {
1310 MBB.sortUniqueLiveIns();
1312 if (!LiveUnits.
empty()) {
1320 if (
TRI.isCFISavedRegsSpillEnabled())
1328 Register FramePtrRegScratchCopy)
const {
1343 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1348 LiveUnits, FrameReg);
1358 auto RestoreWWMRegisters =
1360 for (
const auto &Reg : WWMRegs) {
1362 int FI = Reg.second;
1364 VGPR, FI, FrameReg);
1371 RestoreWWMRegisters(WWMCalleeSavedRegs);
1375 unsigned Opcode = Return.getOpcode();
1377 case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN:
1378 Opcode = AMDGPU::SI_RETURN;
1380 case AMDGPU::SI_TCRETURN_GFX_WholeWave:
1381 Opcode = AMDGPU::SI_TCRETURN_GFX;
1386 Register OrigExec = Return.getOperand(0).getReg();
1388 if (!WWMScratchRegs.
empty()) {
1392 RestoreWWMRegisters(WWMScratchRegs);
1399 Return.removeOperand(0);
1400 Return.setDesc(
TII->get(Opcode));
1405 if (!WWMScratchRegs.
empty()) {
1410 RestoreWWMRegisters(WWMScratchRegs);
1411 if (!WWMCalleeSavedRegs.
empty()) {
1412 if (ScratchExecCopy) {
1421 RestoreWWMRegisters(WWMCalleeSavedRegs);
1422 if (ScratchExecCopy) {
1461 bool SavesStackRegs =
1466 if (NeedsFrameMoves)
1469 if (
TRI.hasStackRealignment(MF))
1473 if (!HasFP && !
hasFP(MF)) {
1476 FramePtrRegScratchCopy, NeedsFrameMoves);
1477 }
else if (SavesStackRegs) {
1479 Register SGPRForFPSaveRestoreCopy =
1483 if (SGPRForFPSaveRestoreCopy) {
1490 DL,
TII,
TRI, LiveUnits, FramePtrReg,
1493 LiveUnits.
addReg(SGPRForFPSaveRestoreCopy);
1498 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1499 if (!FramePtrRegScratchCopy)
1502 LiveUnits.
addReg(FramePtrRegScratchCopy);
1511 RoundedSize += Alignment;
1512 if (LiveUnits.
empty()) {
1527 And->getOperand(3).setIsDead();
1529 }
else if ((HasFP =
hasFP(MF))) {
1538 FramePtrRegScratchCopy, NeedsFrameMoves);
1539 if (FramePtrRegScratchCopy)
1540 LiveUnits.
removeReg(FramePtrRegScratchCopy);
1547 if ((HasBP =
TRI.hasBasePointer(MF))) {
1554 if (NeedsFrameMoves)
1555 emitDefCFA(
MBB,
MBBI,
DL, FramePtrReg,
false,
1559 if (HasFP && RoundedSize != 0) {
1564 Add->getOperand(3).setIsDead();
1569 assert((!HasFP || FPSaved || !SavesStackRegs) &&
1570 "Needed to save FP but didn't save it anywhere");
1575 "Saved FP but didn't need it");
1579 assert((!HasBP || BPSaved || !SavesStackRegs) &&
1580 "Needed to save BP but didn't save it anywhere");
1582 assert((HasBP || !BPSaved) &&
"Saved BP but didn't need it");
1586 TII->getWholeWaveFunctionSetup(MF)->eraseFromParent();
1610 MBBI =
MBB.getLastNonDebugInstr();
1612 DL =
MBBI->getDebugLoc();
1614 MBBI =
MBB.getFirstTerminator();
1625 if (RoundedSize != 0) {
1626 if (
TRI.hasBasePointer(MF)) {
1630 }
else if (
hasFP(MF)) {
1638 Register SGPRForFPSaveRestoreCopy =
1646 if (SGPRForFPSaveRestoreCopy) {
1647 LiveUnits.
addReg(SGPRForFPSaveRestoreCopy);
1650 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1651 if (!FramePtrRegScratchCopy)
1654 LiveUnits.
addReg(FramePtrRegScratchCopy);
1658 FramePtrRegScratchCopy);
1662 emitDefCFA(
MBB,
MBBI,
DL, StackPtrReg,
false,
1668 Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy
1669 : FramePtrRegScratchCopy;
1673 if (SGPRForFPSaveRestoreCopy)
1678 FramePtrRegScratchCopy);
1719 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->
hasSpilledVGPRs()
1722 if (SpillVGPRToAGPR) {
1727 bool SeenDbgInstr =
false;
1732 if (
MI.isDebugInstr())
1733 SeenDbgInstr =
true;
1735 if (
TII->isVGPRSpill(
MI)) {
1738 unsigned FIOp = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1739 AMDGPU::OpName::vaddr);
1740 int FI =
MI.getOperand(FIOp).getIndex();
1742 TII->getNamedOperand(
MI, AMDGPU::OpName::vdata)->getReg();
1744 TRI->isAGPR(MRI, VReg))) {
1746 RS->enterBasicBlockEnd(
MBB);
1747 RS->backward(std::next(
MI.getIterator()));
1748 TRI->eliminateFrameIndex(
MI, 0, FIOp, RS);
1752 }
else if (
TII->isStoreToStackSlot(
MI, FrameIndex) ||
1753 TII->isLoadFromStackSlot(
MI, FrameIndex))
1755 NonVGPRSpillFIs.
set(FrameIndex);
1761 for (
unsigned FI : SpillFIs.
set_bits())
1762 if (!NonVGPRSpillFIs.
test(FI))
1772 MBB.sortUniqueLiveIns();
1774 if (!SpillFIs.
empty() && SeenDbgInstr)
1782 bool HaveSGPRToVMemSpill =
1785 "SGPR spill should have been removed in SILowerSGPRSpills");
1791 assert(RS &&
"RegScavenger required if spilling");
1798 if (HaveSGPRToVMemSpill &&
1812 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
1819 TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
1820 if (UnusedLowVGPR && (
TRI->getHWRegIndex(UnusedLowVGPR) <
1821 TRI->getHWRegIndex(VGPRForAGPRCopy))) {
1834 TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF);
1839 if (LongBranchReservedReg && UnusedLowSGPR) {
1849 bool NeedExecCopyReservedReg)
const {
1860 for (
unsigned I = 0; CSRegs[
I]; ++
I)
1866 if (NeedExecCopyReservedReg ||
1867 (ReservedRegForExecCopy &&
1871 if (UnusedScratchReg) {
1876 LiveUnits.
addReg(UnusedScratchReg);
1880 "Re-reserving spill slot for EXEC copy register");
1884 }
else if (ReservedRegForExecCopy) {
1890 if (
TRI->isCFISavedRegsSpillEnabled()) {
1893 "Re-reserving spill slot for EXEC");
1900 if (
F.hasFnAttribute(Attribute::NoReturn) ||
1912 const bool WillHaveFP =
1913 FrameInfo.hasCalls() &&
1916 if (WillHaveFP ||
hasFP(MF)) {
1919 "Re-reserving spill slot for FP");
1923 if (
TRI->hasBasePointer(MF)) {
1926 "Re-reserving spill slot for BP");
1948 bool NeedExecCopyReservedReg =
false;
1955 if (
TII->isWWMRegSpillOpcode(
MI.getOpcode()))
1956 NeedExecCopyReservedReg =
true;
1957 else if (
MI.getOpcode() == AMDGPU::SI_RETURN ||
1958 MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
1959 MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||
1961 TII->isChainCallOpcode(
MI.getOpcode()))) {
1964 (
count_if(
MI.operands(), [](
auto Op) { return Op.isReg(); }) ==
1977 if (
TRI->getRegSizeInBits(*RC) != 32)
1982 sort(SortedWWMVGPRs, std::greater<Register>());
1991 assert(!NeedExecCopyReservedReg &&
1992 "Whole wave functions can use the reg mapped for their i1 argument");
1994 unsigned NumArchVGPRs = ST.getAddressableNumArchVGPRs();
1996 AMDGPU::VGPR_32RegClass.getRegisters().take_front(NumArchVGPRs))
1999 MF.
begin()->addLiveIn(Reg);
2001 MF.
begin()->sortUniqueLiveIns();
2009 SavedVGPRs.
reset(
Op.getReg());
2017 TRI->getSpillAlign(*RC));
2026 if (!ST.hasGFX90AInsts())
2034 SavedVGPRs.
reset(Reg.first);
2051 const BitVector AllSavedRegs = SavedRegs;
2060 const bool WillHaveFP =
2064 if (WillHaveFP ||
hasFP(MF))
2074 Register RetAddrReg =
TRI->getReturnAddressReg(MF);
2077 SavedRegs.
set(
TRI->getSubReg(RetAddrReg, AMDGPU::sub0));
2078 SavedRegs.
set(
TRI->getSubReg(RetAddrReg, AMDGPU::sub1));
2084 std::vector<CalleeSavedInfo> &CSI) {
2092 return A.getReg() <
B.getReg();
2094 "Callee saved registers not sorted");
2097 return !CSI.isSpilledToReg() &&
2098 TRI->getPhysRegBaseClass(CSI.getReg()) == &AMDGPU::VGPR_32RegClass &&
2102 auto CSEnd = CSI.end();
2103 for (
auto CSIt = CSI.begin(); CSIt != CSEnd; ++CSIt) {
2105 if (!CanUseBlockOps(*CSIt))
2112 CSEnd = std::remove_if(
2114 if (CanUseBlockOps(CSI) && CSI.
getReg() <
Reg + 32) {
2124 TRI->getMatchingSuperReg(
Reg, AMDGPU::sub0, BlockRegClass);
2133 TRI->getMatchingSuperReg(LastBlockStart, AMDGPU::sub0, BlockRegClass);
2134 assert(RegBlock &&
TRI->isSubRegister(RegBlock,
Reg) &&
2135 "Couldn't find super register");
2136 int RegDelta =
Reg - LastBlockStart;
2138 "Bad shift amount");
2149 unsigned BlockSize =
TRI->getSpillSize(*BlockRegClass) - UnusedBits * 4;
2151 MFI.CreateStackObject(
BlockSize,
TRI->getSpillAlign(*BlockRegClass),
2153 MFI.setIsCalleeSavedObjectIndex(FrameIdx,
true);
2155 CSIt->setFrameIdx(FrameIdx);
2156 CSIt->setReg(RegBlock);
2158 CSI.erase(CSEnd, CSI.end());
2163 std::vector<CalleeSavedInfo> &CSI)
const {
2168 bool UseVGPRBlocks = ST.useVGPRBlockOpsForCSR();
2178 std::vector<CalleeSavedInfo> &CSI)
const {
2186 Register BasePtrReg = RI->getBaseRegister();
2187 Register SGPRForFPSaveRestoreCopy =
2189 Register SGPRForBPSaveRestoreCopy =
2191 if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)
2194 unsigned NumModifiedRegs = 0;
2196 if (SGPRForFPSaveRestoreCopy)
2198 if (SGPRForBPSaveRestoreCopy)
2201 for (
auto &CS : CSI) {
2202 if (CS.getReg() == FramePtrReg.
asMCReg() && SGPRForFPSaveRestoreCopy) {
2203 CS.setDstReg(SGPRForFPSaveRestoreCopy);
2204 if (--NumModifiedRegs)
2206 }
else if (CS.getReg() == BasePtrReg.
asMCReg() &&
2207 SGPRForBPSaveRestoreCopy) {
2208 CS.setDstReg(SGPRForBPSaveRestoreCopy);
2209 if (--NumModifiedRegs)
2223 uint64_t EstStackSize = MFI.estimateStackSize(MF);
2224 uint64_t MaxOffset = EstStackSize - 1;
2233 if (ST.hasFlatScratchEnabled()) {
2238 if (
TII->isLegalMUBUFImmOffset(MaxOffset))
2252 for (
const auto &LI :
MBB.liveins()) {
2254 auto [Unit, UnitLaneMask] = *
MI;
2255 if ((LI.LaneMask & UnitLaneMask).none())
2258 LiveInRoots.
set(*RI);
2270 if (LiveInRoots.
test(*RI))
2277void SIFrameLowering::spillCalleeSavedRegisterWithoutBlockOps(
2291 const TargetRegisterClass *RC =
TRI.getMinimalPhysRegClass(
Reg);
2311 std::optional<SparseBitVector<>> LiveInRoots;
2312 if (
MBB.getParent()->getRegInfo().tracksLiveness())
2315 if (!ST.useVGPRBlockOpsForCSR()) {
2317 spillCalleeSavedRegisterWithoutBlockOps(
MBB,
MI, CS,
TII,
TRI,
2320 MBB.sortUniqueLiveIns();
2330 if (!BlockRegClass->contains(Reg) ||
2332 spillCalleeSavedRegisterWithoutBlockOps(
MBB,
MI, CS,
TII,
TRI,
2344 FrameInfo.getObjectSize(FrameIndex),
2345 FrameInfo.getObjectAlign(FrameIndex));
2348 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE))
2366 MBB.sortUniqueLiveIns();
2378 if (!ST.useVGPRBlockOpsForCSR())
2387 if (!BlockRegClass->
contains(Reg) ||
2400 MFI.getObjectAlign(FrameIndex));
2403 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE), Reg)
2409 TRI.addImplicitUsesForBlockCSRLoad(MIB, Reg);
2418 MBB.sortUniqueLiveIns();
2426 int64_t Amount =
I->getOperand(0).getImm();
2428 return MBB.erase(
I);
2433 unsigned Opc =
I->getOpcode();
2434 bool IsDestroy =
Opc ==
TII->getCallFrameDestroyOpcode();
2435 uint64_t CalleePopAmount = IsDestroy ?
I->getOperand(1).getImm() : 0;
2449 Add->getOperand(3).setIsDead();
2450 }
else if (CalleePopAmount != 0) {
2454 return MBB.erase(
I);
2509 "only expected to call this for entry points functions");
2544 ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC,
false);
2548 ST.getWavefrontSize());
2555 const int Lane)
const {
2561 assert(DwarfSGPR != -1 && DwarfVGPR != -1);
2562 assert(Lane != -1 &&
"Expected a lane to be present");
2577 if (VGPRSpills.
size() == 1u)
2579 VGPRSpills[0].Lane);
2593 assert(Spill.hasLane() &&
"Expected a lane to be present");
2624 ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC,
false);
2641 int DwarfReg =
TRI.getDwarfRegNum(Reg,
false);
2642 int DwarfSGPR0 =
TRI.getDwarfRegNum(SGPR0,
false);
2643 int DwarfSGPR1 =
TRI.getDwarfRegNum(SGPR1,
false);
2644 assert(DwarfReg != -1 && DwarfSGPR0 != -1 && DwarfSGPR1 != -1);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains constants used for implementing Dwarf debug support.
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static constexpr MCPhysReg FPReg
static constexpr MCPhysReg SPReg
This file declares the machine register scavenger class.
static void buildEpilogRestore(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static cl::opt< bool > EnableSpillVGPRToAGPR("amdgpu-spill-vgpr-to-agpr", cl::desc("Enable spilling VGPRs to AGPRs"), cl::ReallyHidden, cl::init(true))
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR, const TargetRegisterClass &RC=AMDGPU::SReg_32_XM0_XEXECRegClass, bool IncludeScratchCopy=true)
Query target location for spilling SGPRs IncludeScratchCopy : Also look for free scratch SGPRs.
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, const SIInstrInfo *TII, Register TargetReg)
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static Register buildScratchExecCopy(LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsProlog, bool EnableInactiveLanes)
static void encodeDwarfRegisterLocation(int DwarfReg, raw_ostream &OS)
static constexpr unsigned SGPRBitSize
static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI)
Returns true if the frame will require a reference to the stack pointer.
static SparseBitVector buildLiveInRoots(const MachineBasicBlock &MBB, const SIRegisterInfo &TRI)
Return the set of all root registers of regunits live-in to MBB.
static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI, const SIMachineFunctionInfo *FuncInfo, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
static constexpr unsigned VGPRLaneBitSize
static bool allSGPRSpillsAreDead(const MachineFunction &MF)
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits, const TargetRegisterClass &RC, bool Unused=false)
static MCCFIInstruction createScaledCFAInPrivateWave(const GCNSubtarget &ST, MCRegister DwarfStackPtrReg)
static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, const LiveRegUnits &LiveUnits, const TargetRegisterClass &RC)
static constexpr unsigned SGPRByteSize
static void assignSlotsUsingVGPRBlocks(MachineFunction &MF, const GCNSubtarget &ST, std::vector< CalleeSavedInfo > &CSI)
static bool isAnyRootLiveIn(const SparseBitVector<> &LiveInRoots, const SIRegisterInfo &TRI, MCRegister Reg)
Returns true iff any root of Reg is in LiveInRoots (see buildLiveInRoots).
static unsigned getScratchScaleFactor(const GCNSubtarget &ST)
static const int BlockSize
bool isChainFunction() const
bool isEntryFunction() const
static const LaneMaskConstants & get(const GCNSubtarget &ST)
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
bool test(unsigned Idx) const
Returns true if bit Idx is set.
BitVector & reset()
Reset all bits in the bitvector.
void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
Clear a bit in this vector for every '0' bit in Mask.
BitVector & set()
Set all bits in the bitvector.
bool any() const
Returns true if any bit is set.
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
Clear any bits in this vector that are set in Mask.
iterator_range< const_set_bits_iterator > set_bits() const
bool empty() const
Returns whether there are no bits in this bitvector.
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
bool isSpilledToReg() const
MCRegister getReg() const
MCRegister getDstReg() const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
bool hasImplicitBufferPtr() const
bool hasFlatScratchInit() const
const HexagonRegisterInfo & getRegisterInfo() const
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
bool empty() const
Returns true if the set is empty.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
static MCCFIInstruction createLLVMVectorOffset(MCSymbol *L, unsigned Register, unsigned RegisterSizeInBits, unsigned MaskRegister, unsigned MaskRegisterSizeInBits, int64_t Offset, SMLoc Loc={})
.cfi_llvm_vector_offset Previous value of Register is saved at Offset from CFA.
static MCCFIInstruction createUndefined(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_undefined From now on the previous value of Register can't be restored anymore.
static MCCFIInstruction createLLVMVectorRegisters(MCSymbol *L, unsigned Register, ArrayRef< VectorRegisterWithLane > VectorRegisters, SMLoc Loc={})
.cfi_llvm_vector_registers Previous value of Register is saved in lanes of vector registers.
static MCCFIInstruction createLLVMVectorRegisterMask(MCSymbol *L, unsigned Register, unsigned SpillRegister, unsigned SpillRegisterLaneSizeInBits, unsigned MaskRegister, unsigned MaskRegisterSizeInBits, SMLoc Loc={})
.cfi_llvm_vector_register_mask Previous value of Register is saved in SpillRegister,...
static MCCFIInstruction createRegister(MCSymbol *L, unsigned Register1, unsigned Register2, SMLoc Loc={})
.cfi_register Previous value of Register1 is saved in register Register2.
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
static MCCFIInstruction createLLVMRegisterPair(MCSymbol *L, unsigned Register, unsigned R1, unsigned R1SizeInBits, unsigned R2, unsigned R2SizeInBits, SMLoc Loc={})
.cfi_llvm_register_pair Previous value of Register is saved in R1:R2.
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_same_value Current value of Register is the same as in the previous frame.
const MCRegisterInfo * getRegisterInfo() const
Describe properties that are true of each instruction in the target description file.
bool isValid() const
Returns true if this iterator is not yet at the end.
MCRegUnitMaskIterator enumerates a list of register units and their associated lane masks for Reg.
MCRegUnitRootIterator enumerates the root registers of a register unit.
bool isValid() const
Check if the iterator is at the end of the list.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
virtual int64_t getDwarfRegNum(MCRegister Reg, bool isEH) const
Map a target register to an equivalent dwarf register number.
Wrapper class representing physical registers. Should be passed by value.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
bool hasTailCall() const
Returns true if the function contains a tail call.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint8_t getStackID(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
void setIsDead(bool Val=true)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
void reserveReg(MCRegister PhysReg, const TargetRegisterInfo *TRI)
reserveReg – Mark a register as reserved so checks like isAllocatable will not suggest using it.
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
LLVM_ABI bool isPhysRegModified(MCRegister PhysReg, bool SkipNoReturnDef=false) const
Return true if the specified register is modified in this function.
LLVM_ABI bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
PrologEpilogSGPRSpillBuilder(Register Reg, const PrologEpilogSGPRSaveRestoreInfo SI, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, const SIInstrInfo *TII, const SIRegisterInfo &TRI, LiveRegUnits &LiveUnits, Register FrameReg, bool IsFramePtrPrologSpill=false)
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs, bool NeedExecCopyReservedReg) const
MachineInstr * buildCFIForSGPRToVMEMSpill(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister SGPR, int64_t Offset) const
Create a CFI index describing a spill of a SGPR to VMEM and build a MachineInstr around it.
void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
bool mayReserveScratchForCWSR(const MachineFunction &MF) const
bool allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction &MF) const override
Control the placement of special register scavenging spill slots when allocating a stack frame.
bool requiresStackPointerReference(const MachineFunction &MF) const
void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
bool hasFPImpl(const MachineFunction &MF) const override
bool assignCalleeSavedSpillSlotsImpl(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const
MachineInstr * buildCFIForVRegToVRegSpill(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCRegister Reg, const MCRegister RegCopy) const
Create a CFI index describing a spill of the VGPR/AGPR Reg to another VGPR/AGPR RegCopy and build a M...
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
MachineInstr * buildCFIForRegToSGPRPairSpill(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister Reg, MCRegister SGPRPair) const
MachineInstr * buildCFIForVGPRToVMEMSpill(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister VGPR, int64_t Offset) const
Create a CFI index describing a spill of a VGPR to VMEM and build a MachineInstr around it.
MachineInstr * buildCFIForSGPRToVGPRSpill(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCRegister SGPR, const MCRegister VGPR, const int Lane) const
Create a CFI index describing a spill of an SGPR to a single lane of a VGPR and build a MachineInstr ...
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
MachineInstr * buildCFIForSameValue(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister Reg) const
MachineInstr * buildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag flag=MachineInstr::FrameSetup) const
Create a CFI index for CFIInst and build a MachineInstr around it.
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy, const bool NeedsFrameMoves) const
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
bool isSupportedStackID(TargetStackID::Value ID) const override
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
ArrayRef< PrologEpilogSGPRSpill > getPrologEpilogSGPRSpills() const
const WWMSpillsMap & getWWMSpills() const
void getAllScratchSGPRCopyDstRegs(SmallVectorImpl< Register > &Regs) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
void setSGPRForEXECCopy(Register Reg)
void removePrologEpilogSGPRSpillEntry(Register Reg)
unsigned getNumPreloadedSGPRs() const
void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)
void setMaskForVGPRBlockOps(Register RegisterBlock, uint32_t Mask)
GCNUserSGPRUsageInfo & getUserSGPRInfo()
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
Register getLongBranchReservedReg() const
unsigned getDynamicVGPRBlockSize() const
bool hasSpilledVGPRs() const
void setVGPRToAGPRSpillDead(int FrameIndex)
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
bool isStackRealigned() const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const
bool hasMaskForVGPRBlockOps(Register RegisterBlock) const
bool hasPrologEpilogSGPRSpillEntry(Register Reg) const
Register getGITPtrLoReg(const MachineFunction &MF) const
void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
Register getSGPRForEXECCopy() const
bool isWWMReservedRegister(Register Reg) const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
Register getVGPRForAGPRCopy() const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
Register getFrameOffsetReg() const
void setLongBranchReservedReg(Register Reg)
void setHasSpilledVGPRs(bool Spill=true)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
void setScratchReservedForDynamicVGPRs(unsigned SizeInBytes)
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
const ReservedRegSet & getWWMReservedRegs() const
Register getImplicitBufferPtrUserSGPR() const
const PrologEpilogSGPRSaveRestoreInfo & getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const
void setIsStackRealigned(bool Realigned=true)
unsigned getGITPtrHigh() const
bool hasSpilledSGPRs() const
void addToPrologEpilogSGPRSpills(Register Reg, PrologEpilogSGPRSaveRestoreInfo SI)
Register getScratchSGPRCopyDstReg(Register Reg) const
void setScratchRSrcReg(Register Reg)
void reserveWWMRegister(Register Reg)
Register getFrameRegister(const MachineFunction &MF) const override
Represents a location in source code.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
bool test(unsigned Idx) const
StackOffset holds a fixed and a scalable offset in bytes.
int64_t getFixed() const
Returns the fixed component of the stack.
Represent a constant reference to a string, i.e.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual bool hasReservedCallFrame(const MachineFunction &MF) const
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void restoreCalleeSavedRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned getVGPRAllocGranule(const MCSubtargetInfo &STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
LLVM_READNONE constexpr bool isChainCC(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ ScalablePredicateVector
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
constexpr RegState getKillRegState(bool B)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
void clearDebugInfoForSpillFIs(MachineFrameInfo &MFI, MachineBasicBlock &MBB, const BitVector &SpillFIs)
Replace frame index operands with null registers in debug value instructions for the specified spill ...
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
@ And
Bitwise or logical AND of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
@ PRIVATE_SEGMENT_WAVE_BYTE_OFFSET
static constexpr uint64_t encode(Fields... Values)
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.