23#define DEBUG_TYPE "frame-info"
26 "amdgpu-spill-vgpr-to-agpr",
27 cl::desc(
"Enable spilling VGPRs to AGPRs"),
56 for (
unsigned i = 0; CSRegs[i]; ++i)
57 LiveUnits.
addReg(CSRegs[i]);
77 bool IncludeScratchCopy =
true) {
83 unsigned Size =
TRI->getSpillSize(RC);
84 Align Alignment =
TRI->getSpillAlign(RC);
92 if (IncludeScratchCopy)
96 int FI = FrameInfo.CreateStackObject(
Size, Alignment,
true,
nullptr,
99 if (
TRI->spillSGPRToVGPR() &&
116 FI = FrameInfo.CreateSpillStackObject(
Size, Alignment);
127 LiveUnits.
addReg(ScratchSGPR);
142 int64_t DwordOff = 0) {
143 unsigned Opc = ST.hasFlatScratchEnabled() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
144 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
150 FrameInfo.getObjectAlign(FI));
151 LiveUnits.
addReg(SpillReg);
152 bool IsKill = !
MBB.isLiveIn(SpillReg);
153 TRI.buildSpillLoadStore(
MBB,
I,
DL,
Opc, FI, SpillReg, IsKill, FrameReg,
154 DwordOff, MMO,
nullptr, &LiveUnits);
166 Register FrameReg, int64_t DwordOff = 0) {
167 unsigned Opc = ST.hasFlatScratchEnabled() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
168 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
174 FrameInfo.getObjectAlign(FI));
175 TRI.buildSpillLoadStore(
MBB,
I,
DL,
Opc, FI, SpillReg,
false, FrameReg,
176 DwordOff, MMO,
nullptr, &LiveUnits);
186 Register TargetLo =
TRI->getSubReg(TargetReg, AMDGPU::sub0);
187 Register TargetHi =
TRI->getSubReg(TargetReg, AMDGPU::sub1);
194 const MCInstrDesc &GetPC64 =
TII->get(AMDGPU::S_GETPC_B64_pseudo);
199 MBB.addLiveIn(GitPtrLo);
208 if (LiveUnits.
empty()) {
242 unsigned EltSize = 4;
244 void saveToMemory(
const int FI)
const {
246 assert(!MFI.isDeadObjectIndex(FI));
251 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
255 for (
unsigned I = 0, DwordOff = 0;
I < NumSubRegs; ++
I) {
258 :
Register(TRI.getSubReg(SuperReg, SplitParts[
I]));
259 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
263 FI, FrameReg, DwordOff);
268 void saveToVGPRLane(
const int FI)
const {
269 assert(!MFI.isDeadObjectIndex(FI));
273 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
274 assert(Spill.size() == NumSubRegs);
276 for (
unsigned I = 0;
I < NumSubRegs; ++
I) {
279 :
Register(TRI.getSubReg(SuperReg, SplitParts[
I]));
280 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),
288 void copyToScratchSGPR(
Register DstReg)
const {
289 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg)
294 void restoreFromMemory(
const int FI) {
299 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
303 for (
unsigned I = 0, DwordOff = 0;
I < NumSubRegs; ++
I) {
306 :
Register(TRI.getSubReg(SuperReg, SplitParts[
I]));
309 TmpVGPR, FI, FrameReg, DwordOff);
312 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
318 void restoreFromVGPRLane(
const int FI) {
321 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
322 assert(Spill.size() == NumSubRegs);
324 for (
unsigned I = 0;
I < NumSubRegs; ++
I) {
327 :
Register(TRI.getSubReg(SuperReg, SplitParts[
I]));
328 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
334 void copyFromScratchSGPR(
Register SrcReg)
const {
335 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg)
348 : MI(MI), MBB(MBB), MF(*MBB.
getParent()),
349 ST(MF.getSubtarget<
GCNSubtarget>()), MFI(MF.getFrameInfo()),
351 SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),
354 SplitParts = TRI.getRegSplitParts(RC, EltSize);
355 NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
357 assert(SuperReg != AMDGPU::M0 &&
"m0 should never spill");
361 switch (SI.getKind()) {
363 return saveToMemory(SI.getIndex());
365 return saveToVGPRLane(SI.getIndex());
367 return copyToScratchSGPR(SI.getReg());
372 switch (SI.getKind()) {
374 return restoreFromMemory(SI.getIndex());
376 return restoreFromVGPRLane(SI.getIndex());
378 return copyFromScratchSGPR(SI.getReg());
386void SIFrameLowering::emitEntryFunctionFlatScratchInit(
390 const SIInstrInfo *
TII =
ST.getInstrInfo();
392 const SIMachineFunctionInfo *MFI = MF.
getInfo<SIMachineFunctionInfo>();
407 if (
ST.isAmdPalOS()) {
409 LiveRegUnits LiveUnits;
415 Register FlatScrInit = AMDGPU::NoRegister;
418 AllSGPR64s = AllSGPR64s.
slice(
419 std::min(
static_cast<unsigned>(AllSGPR64s.
size()), NumPreloaded));
428 assert(FlatScrInit &&
"Failed to find free register for scratch init");
430 FlatScrInitLo =
TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
431 FlatScrInitHi =
TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
438 const MCInstrDesc &LoadDwordX2 =
TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
446 const GCNSubtarget &Subtarget = MF.
getSubtarget<GCNSubtarget>();
455 const MCInstrDesc &SAndB32 =
TII->get(AMDGPU::S_AND_B32);
463 assert(FlatScratchInitReg);
469 FlatScrInitLo =
TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
470 FlatScrInitHi =
TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
474 if (
ST.flatScratchIsPointer()) {
478 .
addReg(ScratchWaveOffsetReg);
485 using namespace AMDGPU::Hwreg;
488 .
addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_LO, 0, 32)));
491 .
addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_HI, 0, 32)));
498 .
addReg(ScratchWaveOffsetReg);
518 .
addReg(ScratchWaveOffsetReg);
541Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
545 const SIInstrInfo *
TII =
ST.getInstrInfo();
548 SIMachineFunctionInfo *MFI = MF.
getInfo<SIMachineFunctionInfo>();
554 if (!ScratchRsrcReg || (!MRI.
isPhysRegUsed(ScratchRsrcReg) &&
558 if (
ST.hasSGPRInitBug() ||
559 ScratchRsrcReg !=
TRI->reservedPrivateSegmentBufferReg(MF))
560 return ScratchRsrcReg;
573 AllSGPR128s = AllSGPR128s.
slice(std::min(
static_cast<unsigned>(AllSGPR128s.
size()), NumPreloaded));
583 (!GITPtrLoReg || !
TRI->isSubRegisterEq(
Reg, GITPtrLoReg))) {
591 return ScratchRsrcReg;
595 return ST.hasFlatScratchEnabled() ? 1 : ST.getWavefrontSize();
600 assert(&MF.
front() == &
MBB &&
"Shrink-wrapping not yet supported");
633 if (!ST.hasFlatScratchEnabled())
634 ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
637 if (ScratchRsrcReg) {
639 if (&OtherBB != &
MBB) {
640 OtherBB.addLiveIn(ScratchRsrcReg);
648 if (ST.isAmdHsaOrMesa(
F)) {
649 PreloadedScratchRsrcReg =
651 if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
655 MBB.addLiveIn(PreloadedScratchRsrcReg);
670 if (PreloadedScratchWaveOffsetReg &&
671 TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
674 AllSGPRs = AllSGPRs.
slice(
675 std::min(
static_cast<unsigned>(AllSGPRs.
size()), NumPreloaded));
679 !
TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
680 ScratchWaveOffsetReg = Reg;
689 if (!ScratchWaveOffsetReg)
691 "could not find temporary scratch offset register in prolog");
693 ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
695 assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
725 FrameInfo.getMaxAlign());
737 ST.hasInv2PiInlineImm())) {
749 bool NeedsFlatScratchInit =
751 (MRI.
isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
754 if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
755 PreloadedScratchWaveOffsetReg && !ST.hasArchitectedFlatScratch()) {
756 MRI.
addLiveIn(PreloadedScratchWaveOffsetReg);
757 MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
760 if (NeedsFlatScratchInit) {
761 emitEntryFunctionFlatScratchInit(MF,
MBB,
I,
DL, ScratchWaveOffsetReg);
764 if (ScratchRsrcReg) {
765 emitEntryFunctionScratchRsrcRegSetup(MF,
MBB,
I,
DL,
766 PreloadedScratchRsrcReg,
767 ScratchRsrcReg, ScratchWaveOffsetReg);
770 if (ST.hasWaitXcnt()) {
774 unsigned RegEncoding =
783void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
794 if (ST.isAmdPalOS()) {
797 Register Rsrc01 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
798 Register Rsrc03 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
805 const MCInstrDesc &LoadDwordX4 =
TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
834 }
else if (
ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
836 const MCInstrDesc &SMovB32 =
TII->get(AMDGPU::S_MOV_B32);
838 Register Rsrc2 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
839 Register Rsrc3 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
842 uint64_t Rsrc23 =
TII->getScratchRsrcWords23();
845 Register Rsrc01 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
848 const MCInstrDesc &Mov64 =
TII->get(AMDGPU::S_MOV_B64);
854 const MCInstrDesc &LoadDwordX2 =
TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
873 Register Rsrc0 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
874 Register Rsrc1 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
892 }
else if (
ST.isAmdHsaOrMesa(Fn)) {
893 assert(PreloadedScratchRsrcReg);
895 if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
910 Register ScratchRsrcSub0 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
911 Register ScratchRsrcSub1 =
TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
917 .
addReg(ScratchWaveOffsetReg)
919 auto Addc =
BuildMI(
MBB,
I,
DL,
TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
947 bool EnableInactiveLanes) {
960 assert(IsProlog &&
"Epilog should look at return, not setup");
962 TII->getWholeWaveFunctionSetup(MF)->getOperand(0).getReg();
963 assert(ScratchExecCopy &&
"Couldn't find copy of EXEC");
966 MRI, LiveUnits, *
TRI.getWaveMaskRegClass());
969 if (!ScratchExecCopy)
972 LiveUnits.
addReg(ScratchExecCopy);
974 const unsigned SaveExecOpc =
975 ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
976 : AMDGPU::S_OR_SAVEEXEC_B32)
977 : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64
978 : AMDGPU::S_OR_SAVEEXEC_B64);
983 return ScratchExecCopy;
1003 if (!WWMScratchRegs.
empty())
1008 auto StoreWWMRegisters =
1010 for (
const auto &Reg : WWMRegs) {
1012 int FI = Reg.second;
1014 VGPR, FI, FrameReg);
1024 StoreWWMRegisters(WWMScratchRegs);
1026 auto EnableAllLanes = [&]() {
1030 if (!WWMCalleeSavedRegs.
empty()) {
1031 if (ScratchExecCopy) {
1040 StoreWWMRegisters(WWMCalleeSavedRegs);
1044 if (!ScratchExecCopy)
1047 else if (WWMCalleeSavedRegs.
empty())
1049 }
else if (ScratchExecCopy) {
1053 LiveUnits.
addReg(ScratchExecCopy);
1064 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1069 LiveUnits, FrameReg);
1077 if (!ScratchSGPRs.
empty()) {
1082 MBB.sortUniqueLiveIns();
1084 if (!LiveUnits.
empty()) {
1109 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1114 LiveUnits, FrameReg);
1124 auto RestoreWWMRegisters =
1126 for (
const auto &Reg : WWMRegs) {
1128 int FI = Reg.second;
1130 VGPR, FI, FrameReg);
1137 RestoreWWMRegisters(WWMCalleeSavedRegs);
1141 unsigned Opcode = Return.getOpcode();
1143 case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN:
1144 Opcode = AMDGPU::SI_RETURN;
1146 case AMDGPU::SI_TCRETURN_GFX_WholeWave:
1147 Opcode = AMDGPU::SI_TCRETURN_GFX;
1152 Register OrigExec = Return.getOperand(0).getReg();
1154 if (!WWMScratchRegs.
empty()) {
1158 RestoreWWMRegisters(WWMScratchRegs);
1165 Return.removeOperand(0);
1166 Return.setDesc(
TII->get(Opcode));
1171 if (!WWMScratchRegs.
empty()) {
1176 RestoreWWMRegisters(WWMScratchRegs);
1177 if (!WWMCalleeSavedRegs.
empty()) {
1178 if (ScratchExecCopy) {
1187 RestoreWWMRegisters(WWMCalleeSavedRegs);
1188 if (ScratchExecCopy) {
1227 bool SavesStackRegs =
1230 if (
TRI.hasStackRealignment(MF))
1234 if (!HasFP && !
hasFP(MF)) {
1237 FramePtrRegScratchCopy);
1238 }
else if (SavesStackRegs) {
1240 Register SGPRForFPSaveRestoreCopy =
1244 if (SGPRForFPSaveRestoreCopy) {
1251 DL,
TII,
TRI, LiveUnits, FramePtrReg);
1253 LiveUnits.
addReg(SGPRForFPSaveRestoreCopy);
1258 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1259 if (!FramePtrRegScratchCopy)
1262 LiveUnits.
addReg(FramePtrRegScratchCopy);
1271 RoundedSize += Alignment;
1272 if (LiveUnits.
empty()) {
1287 And->getOperand(3).setIsDead();
1289 }
else if ((HasFP =
hasFP(MF))) {
1298 FramePtrRegScratchCopy);
1299 if (FramePtrRegScratchCopy)
1300 LiveUnits.
removeReg(FramePtrRegScratchCopy);
1307 if ((HasBP =
TRI.hasBasePointer(MF))) {
1313 if (HasFP && RoundedSize != 0) {
1318 Add->getOperand(3).setIsDead();
1323 assert((!HasFP || FPSaved || !SavesStackRegs) &&
1324 "Needed to save FP but didn't save it anywhere");
1329 "Saved FP but didn't need it");
1333 assert((!HasBP || BPSaved || !SavesStackRegs) &&
1334 "Needed to save BP but didn't save it anywhere");
1336 assert((HasBP || !BPSaved) &&
"Saved BP but didn't need it");
1340 TII->getWholeWaveFunctionSetup(MF)->eraseFromParent();
1364 MBBI =
MBB.getLastNonDebugInstr();
1366 DL =
MBBI->getDebugLoc();
1368 MBBI =
MBB.getFirstTerminator();
1379 if (RoundedSize != 0) {
1380 if (
TRI.hasBasePointer(MF)) {
1384 }
else if (
hasFP(MF)) {
1392 Register SGPRForFPSaveRestoreCopy =
1400 if (SGPRForFPSaveRestoreCopy) {
1401 LiveUnits.
addReg(SGPRForFPSaveRestoreCopy);
1404 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1405 if (!FramePtrRegScratchCopy)
1408 LiveUnits.
addReg(FramePtrRegScratchCopy);
1412 FramePtrRegScratchCopy);
1417 Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy
1418 : FramePtrRegScratchCopy;
1422 if (SGPRForFPSaveRestoreCopy)
1427 FramePtrRegScratchCopy);
1468 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->
hasSpilledVGPRs()
1471 if (SpillVGPRToAGPR) {
1476 bool SeenDbgInstr =
false;
1481 if (
MI.isDebugInstr())
1482 SeenDbgInstr =
true;
1484 if (
TII->isVGPRSpill(
MI)) {
1487 unsigned FIOp = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1488 AMDGPU::OpName::vaddr);
1489 int FI =
MI.getOperand(FIOp).getIndex();
1491 TII->getNamedOperand(
MI, AMDGPU::OpName::vdata)->getReg();
1493 TRI->isAGPR(MRI, VReg))) {
1495 RS->enterBasicBlockEnd(
MBB);
1496 RS->backward(std::next(
MI.getIterator()));
1497 TRI->eliminateFrameIndex(
MI, 0, FIOp, RS);
1501 }
else if (
TII->isStoreToStackSlot(
MI, FrameIndex) ||
1502 TII->isLoadFromStackSlot(
MI, FrameIndex))
1504 NonVGPRSpillFIs.
set(FrameIndex);
1510 for (
unsigned FI : SpillFIs.
set_bits())
1511 if (!NonVGPRSpillFIs.
test(FI))
1521 MBB.sortUniqueLiveIns();
1523 if (!SpillFIs.
empty() && SeenDbgInstr)
1531 bool HaveSGPRToVMemSpill =
1534 "SGPR spill should have been removed in SILowerSGPRSpills");
1540 assert(RS &&
"RegScavenger required if spilling");
1547 if (HaveSGPRToVMemSpill &&
1561 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
1568 TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
1569 if (UnusedLowVGPR && (
TRI->getHWRegIndex(UnusedLowVGPR) <
1570 TRI->getHWRegIndex(VGPRForAGPRCopy))) {
1583 TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF);
1588 if (LongBranchReservedReg && UnusedLowSGPR) {
1598 bool NeedExecCopyReservedReg)
const {
1609 for (
unsigned I = 0; CSRegs[
I]; ++
I)
1615 if (NeedExecCopyReservedReg ||
1616 (ReservedRegForExecCopy &&
1620 if (UnusedScratchReg) {
1625 LiveUnits.
addReg(UnusedScratchReg);
1629 "Re-reserving spill slot for EXEC copy register");
1633 }
else if (ReservedRegForExecCopy) {
1642 if (
F.hasFnAttribute(Attribute::NoReturn) ||
1654 const bool WillHaveFP =
1655 FrameInfo.hasCalls() &&
1658 if (WillHaveFP ||
hasFP(MF)) {
1661 "Re-reserving spill slot for FP");
1665 if (
TRI->hasBasePointer(MF)) {
1668 "Re-reserving spill slot for BP");
1690 bool NeedExecCopyReservedReg =
false;
1697 if (
TII->isWWMRegSpillOpcode(
MI.getOpcode()))
1698 NeedExecCopyReservedReg =
true;
1699 else if (
MI.getOpcode() == AMDGPU::SI_RETURN ||
1700 MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
1701 MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||
1703 TII->isChainCallOpcode(
MI.getOpcode()))) {
1706 (
count_if(
MI.operands(), [](
auto Op) { return Op.isReg(); }) ==
1719 if (
TRI->getRegSizeInBits(*RC) != 32)
1724 sort(SortedWWMVGPRs, std::greater<Register>());
1733 assert(!NeedExecCopyReservedReg &&
1734 "Whole wave functions can use the reg mapped for their i1 argument");
1736 unsigned NumArchVGPRs = ST.getAddressableNumArchVGPRs();
1738 AMDGPU::VGPR_32RegClass.getRegisters().take_front(NumArchVGPRs))
1741 MF.
begin()->addLiveIn(Reg);
1743 MF.
begin()->sortUniqueLiveIns();
1751 SavedVGPRs.
reset(
Op.getReg());
1759 TRI->getSpillAlign(*RC));
1768 if (!ST.hasGFX90AInsts())
1776 SavedVGPRs.
reset(Reg.first);
1793 const BitVector AllSavedRegs = SavedRegs;
1802 const bool WillHaveFP =
1806 if (WillHaveFP ||
hasFP(MF))
1816 Register RetAddrReg =
TRI->getReturnAddressReg(MF);
1819 SavedRegs.
set(
TRI->getSubReg(RetAddrReg, AMDGPU::sub0));
1820 SavedRegs.
set(
TRI->getSubReg(RetAddrReg, AMDGPU::sub1));
1826 std::vector<CalleeSavedInfo> &CSI) {
1834 return A.getReg() <
B.getReg();
1836 "Callee saved registers not sorted");
1839 return !CSI.isSpilledToReg() &&
1840 TRI->getPhysRegBaseClass(CSI.getReg()) == &AMDGPU::VGPR_32RegClass &&
1844 auto CSEnd = CSI.end();
1845 for (
auto CSIt = CSI.begin(); CSIt != CSEnd; ++CSIt) {
1847 if (!CanUseBlockOps(*CSIt))
1854 CSEnd = std::remove_if(
1856 if (CanUseBlockOps(CSI) && CSI.
getReg() <
Reg + 32) {
1866 TRI->getMatchingSuperReg(
Reg, AMDGPU::sub0, BlockRegClass);
1875 TRI->getMatchingSuperReg(LastBlockStart, AMDGPU::sub0, BlockRegClass);
1876 assert(RegBlock &&
TRI->isSubRegister(RegBlock,
Reg) &&
1877 "Couldn't find super register");
1878 int RegDelta =
Reg - LastBlockStart;
1880 "Bad shift amount");
1891 unsigned BlockSize =
TRI->getSpillSize(*BlockRegClass) - UnusedBits * 4;
1893 MFI.CreateStackObject(
BlockSize,
TRI->getSpillAlign(*BlockRegClass),
1895 MFI.setIsCalleeSavedObjectIndex(FrameIdx,
true);
1897 CSIt->setFrameIdx(FrameIdx);
1898 CSIt->setReg(RegBlock);
1900 CSI.erase(CSEnd, CSI.end());
1905 std::vector<CalleeSavedInfo> &CSI)
const {
1910 bool UseVGPRBlocks = ST.useVGPRBlockOpsForCSR();
1920 std::vector<CalleeSavedInfo> &CSI)
const {
1928 Register BasePtrReg = RI->getBaseRegister();
1929 Register SGPRForFPSaveRestoreCopy =
1931 Register SGPRForBPSaveRestoreCopy =
1933 if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)
1936 unsigned NumModifiedRegs = 0;
1938 if (SGPRForFPSaveRestoreCopy)
1940 if (SGPRForBPSaveRestoreCopy)
1943 for (
auto &CS : CSI) {
1944 if (CS.getReg() == FramePtrReg.
asMCReg() && SGPRForFPSaveRestoreCopy) {
1945 CS.setDstReg(SGPRForFPSaveRestoreCopy);
1946 if (--NumModifiedRegs)
1948 }
else if (CS.getReg() == BasePtrReg.
asMCReg() &&
1949 SGPRForBPSaveRestoreCopy) {
1950 CS.setDstReg(SGPRForBPSaveRestoreCopy);
1951 if (--NumModifiedRegs)
1965 uint64_t EstStackSize = MFI.estimateStackSize(MF);
1966 uint64_t MaxOffset = EstStackSize - 1;
1975 if (ST.hasFlatScratchEnabled()) {
1980 if (
TII->isLegalMUBUFImmOffset(MaxOffset))
1992 if (!ST.useVGPRBlockOpsForCSR())
2004 if (!BlockRegClass->contains(Reg) ||
2012 int FrameIndex = CS.getFrameIdx();
2017 FrameInfo.getObjectSize(FrameIndex),
2018 FrameInfo.getObjectAlign(FrameIndex));
2021 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE))
2037 MBB.sortUniqueLiveIns();
2047 if (!ST.useVGPRBlockOpsForCSR())
2057 if (!BlockRegClass->
contains(Reg) ||
2065 int FrameIndex = CS.getFrameIdx();
2070 MFI.getObjectAlign(FrameIndex));
2073 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE), Reg)
2088 MBB.sortUniqueLiveIns();
2096 int64_t Amount =
I->getOperand(0).getImm();
2098 return MBB.erase(
I);
2103 unsigned Opc =
I->getOpcode();
2104 bool IsDestroy =
Opc ==
TII->getCallFrameDestroyOpcode();
2105 uint64_t CalleePopAmount = IsDestroy ?
I->getOperand(1).getImm() : 0;
2119 Add->getOperand(3).setIsDead();
2120 }
else if (CalleePopAmount != 0) {
2124 return MBB.erase(
I);
2179 "only expected to call this for entry points functions");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static constexpr MCPhysReg FPReg
static constexpr MCPhysReg SPReg
This file declares the machine register scavenger class.
static void buildEpilogRestore(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static cl::opt< bool > EnableSpillVGPRToAGPR("amdgpu-spill-vgpr-to-agpr", cl::desc("Enable spilling VGPRs to AGPRs"), cl::ReallyHidden, cl::init(true))
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR, const TargetRegisterClass &RC=AMDGPU::SReg_32_XM0_XEXECRegClass, bool IncludeScratchCopy=true)
Query target location for spilling SGPRs IncludeScratchCopy : Also look for free scratch SGPRs.
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, const SIInstrInfo *TII, Register TargetReg)
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static Register buildScratchExecCopy(LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsProlog, bool EnableInactiveLanes)
static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI)
Returns true if the frame will require a reference to the stack pointer.
static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI, const SIMachineFunctionInfo *FuncInfo, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
static bool allSGPRSpillsAreDead(const MachineFunction &MF)
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits, const TargetRegisterClass &RC, bool Unused=false)
static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, const LiveRegUnits &LiveUnits, const TargetRegisterClass &RC)
static void assignSlotsUsingVGPRBlocks(MachineFunction &MF, const GCNSubtarget &ST, std::vector< CalleeSavedInfo > &CSI)
static unsigned getScratchScaleFactor(const GCNSubtarget &ST)
static const int BlockSize
bool isChainFunction() const
bool isEntryFunction() const
static const LaneMaskConstants & get(const GCNSubtarget &ST)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
bool test(unsigned Idx) const
void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask.
bool any() const
any - Returns true if any bit is set.
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
iterator_range< const_set_bits_iterator > set_bits() const
bool empty() const
empty - Tests whether there are no bits in this bitvector.
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
MCRegister getReg() const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
bool hasImplicitBufferPtr() const
bool hasFlatScratchInit() const
const HexagonRegisterInfo & getRegisterInfo() const
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
bool empty() const
Returns true if the set is empty.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
Describe properties that are true of each instruction in the target description file.
Wrapper class representing physical registers. Should be passed by value.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
bool hasTailCall() const
Returns true if the function contains a tail call.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint8_t getStackID(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
void setIsDead(bool Val=true)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
void reserveReg(MCRegister PhysReg, const TargetRegisterInfo *TRI)
reserveReg – Mark a register as reserved so checks like isAllocatable will not suggest using it.
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
LLVM_ABI bool isPhysRegModified(MCRegister PhysReg, bool SkipNoReturnDef=false) const
Return true if the specified register is modified in this function.
LLVM_ABI bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
PrologEpilogSGPRSpillBuilder(Register Reg, const PrologEpilogSGPRSaveRestoreInfo SI, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, const SIInstrInfo *TII, const SIRegisterInfo &TRI, LiveRegUnits &LiveUnits, Register FrameReg)
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs, bool NeedExecCopyReservedReg) const
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
bool mayReserveScratchForCWSR(const MachineFunction &MF) const
bool allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction &MF) const override
Control the placement of special register scavenging spill slots when allocating a stack frame.
bool requiresStackPointerReference(const MachineFunction &MF) const
void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
bool hasFPImpl(const MachineFunction &MF) const override
bool assignCalleeSavedSpillSlotsImpl(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
bool isSupportedStackID(TargetStackID::Value ID) const override
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
ArrayRef< PrologEpilogSGPRSpill > getPrologEpilogSGPRSpills() const
const WWMSpillsMap & getWWMSpills() const
void getAllScratchSGPRCopyDstRegs(SmallVectorImpl< Register > &Regs) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
void setSGPRForEXECCopy(Register Reg)
unsigned getNumPreloadedSGPRs() const
void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)
void setMaskForVGPRBlockOps(Register RegisterBlock, uint32_t Mask)
GCNUserSGPRUsageInfo & getUserSGPRInfo()
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
Register getLongBranchReservedReg() const
unsigned getDynamicVGPRBlockSize() const
bool hasSpilledVGPRs() const
void setVGPRToAGPRSpillDead(int FrameIndex)
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
bool isStackRealigned() const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const
bool hasMaskForVGPRBlockOps(Register RegisterBlock) const
bool hasPrologEpilogSGPRSpillEntry(Register Reg) const
Register getGITPtrLoReg(const MachineFunction &MF) const
void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
Register getSGPRForEXECCopy() const
bool isWWMReservedRegister(Register Reg) const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
Register getVGPRForAGPRCopy() const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
Register getFrameOffsetReg() const
void setLongBranchReservedReg(Register Reg)
void setHasSpilledVGPRs(bool Spill=true)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
void setScratchReservedForDynamicVGPRs(unsigned SizeInBytes)
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
const ReservedRegSet & getWWMReservedRegs() const
Register getImplicitBufferPtrUserSGPR() const
const PrologEpilogSGPRSaveRestoreInfo & getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const
void setIsStackRealigned(bool Realigned=true)
unsigned getGITPtrHigh() const
bool hasSpilledSGPRs() const
void addToPrologEpilogSGPRSpills(Register Reg, PrologEpilogSGPRSaveRestoreInfo SI)
Register getScratchSGPRCopyDstReg(Register Reg) const
void setScratchRSrcReg(Register Reg)
void reserveWWMRegister(Register Reg)
Register getFrameRegister(const MachineFunction &MF) const override
const TargetRegisterClass * getRegClassForBlockOp(const MachineFunction &MF) const
void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, Register BlockReg) const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
int64_t getFixed() const
Returns the fixed component of the stack.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual bool hasReservedCallFrame(const MachineFunction &MF) const
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void restoreCalleeSavedRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
void spillCalleeSavedRegister(MachineBasicBlock &SaveBlock, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
spillCalleeSavedRegister - Default implementation for spilling a single callee saved register.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
LLVM_READNONE constexpr bool isChainCC(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ ScalablePredicateVector
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
constexpr RegState getKillRegState(bool B)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
void clearDebugInfoForSpillFIs(MachineFrameInfo &MFI, MachineBasicBlock &MBB, const BitVector &SpillFIs)
Replace frame index operands with null registers in debug value instructions for the specified spill ...
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
@ And
Bitwise or logical AND of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
@ PRIVATE_SEGMENT_WAVE_BYTE_OFFSET
static constexpr uint64_t encode(Fields... Values)
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.