LLVM 23.0.0git
SIFrameLowering.cpp
Go to the documentation of this file.
1//===----------------------- SIFrameLowering.cpp --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8
9#include "SIFrameLowering.h"
10#include "AMDGPU.h"
11#include "AMDGPULaneMaskUtils.h"
12#include "GCNSubtarget.h"
15#include "SISpillUtils.h"
20
21using namespace llvm;
22
23#define DEBUG_TYPE "frame-info"
24
26 "amdgpu-spill-vgpr-to-agpr",
27 cl::desc("Enable spilling VGPRs to AGPRs"),
29 cl::init(true));
30
31// Find a register matching \p RC from \p LiveUnits which is unused and
32// available throughout the function. On failure, returns AMDGPU::NoRegister.
33// TODO: Rewrite the loop here to iterate over MCRegUnits instead of
34// MCRegisters. This should reduce the number of iterations and avoid redundant
35// checking.
37 const LiveRegUnits &LiveUnits,
38 const TargetRegisterClass &RC) {
39 for (MCRegister Reg : RC) {
40 if (!MRI.isPhysRegUsed(Reg) && LiveUnits.available(Reg) &&
41 !MRI.isReserved(Reg))
42 return Reg;
43 }
44 return MCRegister();
45}
46
47// Find a scratch register that we can use in the prologue. We avoid using
48// callee-save registers since they may appear to be free when this is called
49// from canUseAsPrologue (during shrink wrapping), but then no longer be free
50// when this is called from emitPrologue.
52 MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits,
53 const TargetRegisterClass &RC, bool Unused = false) {
54 // Mark callee saved registers as used so we will not choose them.
55 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
56 for (unsigned i = 0; CSRegs[i]; ++i)
57 LiveUnits.addReg(CSRegs[i]);
58
59 // We are looking for a register that can be used throughout the entire
60 // function, so any use is unacceptable.
61 if (Unused)
62 return findUnusedRegister(MRI, LiveUnits, RC);
63
64 for (MCRegister Reg : RC) {
65 if (LiveUnits.available(Reg) && !MRI.isReserved(Reg))
66 return Reg;
67 }
68
69 return MCRegister();
70}
71
72/// Query target location for spilling SGPRs
73/// \p IncludeScratchCopy : Also look for free scratch SGPRs
75 MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR,
76 const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,
77 bool IncludeScratchCopy = true) {
79 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
80
81 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
82 const SIRegisterInfo *TRI = ST.getRegisterInfo();
83 unsigned Size = TRI->getSpillSize(RC);
84 Align Alignment = TRI->getSpillAlign(RC);
85
86 // We need to save and restore the given SGPR.
87
88 Register ScratchSGPR;
89 // 1: Try to save the given register into an unused scratch SGPR. The
90 // LiveUnits should have all the callee saved registers marked as used. For
91 // certain cases we skip copy to scratch SGPR.
92 if (IncludeScratchCopy)
93 ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveUnits, RC);
94
95 if (!ScratchSGPR) {
96 int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,
98
99 if (TRI->spillSGPRToVGPR() &&
100 MFI->allocateSGPRSpillToVGPRLane(MF, FI, /*SpillToPhysVGPRLane=*/true,
101 /*IsPrologEpilog=*/true)) {
102 // 2: There's no free lane to spill, and no free register to save the
103 // SGPR, so we're forced to take another VGPR to use for the spill.
107
108 LLVM_DEBUG(auto Spill = MFI->getSGPRSpillToPhysicalVGPRLanes(FI).front();
109 dbgs() << printReg(SGPR, TRI) << " requires fallback spill to "
110 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
111 << '\n';);
112 } else {
113 // Remove dead <FI> index
115 // 3: If all else fails, spill the register to memory.
116 FI = FrameInfo.CreateSpillStackObject(Size, Alignment);
118 SGPR,
120 LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling "
121 << printReg(SGPR, TRI) << '\n');
122 }
123 } else {
127 LiveUnits.addReg(ScratchSGPR);
128 LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to "
129 << printReg(ScratchSGPR, TRI) << '\n');
130 }
131}
132
133// We need to specially emit stack operations here because a different frame
134// register is used than in the rest of the function, as getFrameRegister would
135// use.
136static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
137 const SIMachineFunctionInfo &FuncInfo,
138 LiveRegUnits &LiveUnits, MachineFunction &MF,
141 Register SpillReg, int FI, Register FrameReg,
142 int64_t DwordOff = 0) {
143 unsigned Opc = ST.hasFlatScratchEnabled() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
144 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
145
146 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
149 PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
150 FrameInfo.getObjectAlign(FI));
151 LiveUnits.addReg(SpillReg);
152 bool IsKill = !MBB.isLiveIn(SpillReg);
153 TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,
154 DwordOff, MMO, nullptr, &LiveUnits);
155 if (IsKill)
156 LiveUnits.removeReg(SpillReg);
157}
158
159static void buildEpilogRestore(const GCNSubtarget &ST,
160 const SIRegisterInfo &TRI,
161 const SIMachineFunctionInfo &FuncInfo,
162 LiveRegUnits &LiveUnits, MachineFunction &MF,
165 const DebugLoc &DL, Register SpillReg, int FI,
166 Register FrameReg, int64_t DwordOff = 0) {
167 unsigned Opc = ST.hasFlatScratchEnabled() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
168 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
169
170 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
173 PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
174 FrameInfo.getObjectAlign(FI));
175 TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,
176 DwordOff, MMO, nullptr, &LiveUnits);
177}
178
180 const DebugLoc &DL, const SIInstrInfo *TII,
181 Register TargetReg) {
182 MachineFunction *MF = MBB.getParent();
184 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
185 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
186 Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
187 Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
188
189 if (MFI->getGITPtrHigh() != 0xffffffff) {
190 BuildMI(MBB, I, DL, SMovB32, TargetHi)
191 .addImm(MFI->getGITPtrHigh())
192 .addReg(TargetReg, RegState::ImplicitDefine);
193 } else {
194 const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64_pseudo);
195 BuildMI(MBB, I, DL, GetPC64, TargetReg);
196 }
197 Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
198 MF->getRegInfo().addLiveIn(GitPtrLo);
199 MBB.addLiveIn(GitPtrLo);
200 BuildMI(MBB, I, DL, SMovB32, TargetLo)
201 .addReg(GitPtrLo);
202}
203
204static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI,
205 const SIMachineFunctionInfo *FuncInfo,
207 MachineBasicBlock::iterator MBBI, bool IsProlog) {
208 if (LiveUnits.empty()) {
209 LiveUnits.init(TRI);
210 if (IsProlog) {
211 LiveUnits.addLiveIns(MBB);
212 } else {
213 // In epilog.
214 LiveUnits.addLiveOuts(MBB);
215 LiveUnits.stepBackward(*MBBI);
216 }
217 }
218}
219
220namespace llvm {
221
222// SpillBuilder to save/restore special SGPR spills like the one needed for FP,
223// BP, etc. These spills are delayed until the current function's frame is
224// finalized. For a given register, the builder uses the
225// PrologEpilogSGPRSaveRestoreInfo to decide the spill method.
229 MachineFunction &MF;
230 const GCNSubtarget &ST;
231 MachineFrameInfo &MFI;
232 SIMachineFunctionInfo *FuncInfo;
233 const SIInstrInfo *TII;
234 const SIRegisterInfo &TRI;
235 Register SuperReg;
237 LiveRegUnits &LiveUnits;
238 const DebugLoc &DL;
239 Register FrameReg;
240 ArrayRef<int16_t> SplitParts;
241 unsigned NumSubRegs;
242 unsigned EltSize = 4;
243
244 void saveToMemory(const int FI) const {
245 MachineRegisterInfo &MRI = MF.getRegInfo();
246 assert(!MFI.isDeadObjectIndex(FI));
247
248 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);
249
251 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
252 if (!TmpVGPR)
253 report_fatal_error("failed to find free scratch register");
254
255 for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
256 Register SubReg = NumSubRegs == 1
257 ? SuperReg
258 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
259 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
260 .addReg(SubReg);
261
262 buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,
263 FI, FrameReg, DwordOff);
264 DwordOff += 4;
265 }
266 }
267
268 void saveToVGPRLane(const int FI) const {
269 assert(!MFI.isDeadObjectIndex(FI));
270
271 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
273 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
274 assert(Spill.size() == NumSubRegs);
275
276 for (unsigned I = 0; I < NumSubRegs; ++I) {
277 Register SubReg = NumSubRegs == 1
278 ? SuperReg
279 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
280 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),
281 Spill[I].VGPR)
282 .addReg(SubReg)
283 .addImm(Spill[I].Lane)
284 .addReg(Spill[I].VGPR, RegState::Undef);
285 }
286 }
287
288 void copyToScratchSGPR(Register DstReg) const {
289 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg)
290 .addReg(SuperReg)
292 }
293
294 void restoreFromMemory(const int FI) {
295 MachineRegisterInfo &MRI = MF.getRegInfo();
296
297 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);
299 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
300 if (!TmpVGPR)
301 report_fatal_error("failed to find free scratch register");
302
303 for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
304 Register SubReg = NumSubRegs == 1
305 ? SuperReg
306 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
307
308 buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL,
309 TmpVGPR, FI, FrameReg, DwordOff);
310 assert(SubReg.isPhysical());
311
312 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
313 .addReg(TmpVGPR, RegState::Kill);
314 DwordOff += 4;
315 }
316 }
317
318 void restoreFromVGPRLane(const int FI) {
319 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
321 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
322 assert(Spill.size() == NumSubRegs);
323
324 for (unsigned I = 0; I < NumSubRegs; ++I) {
325 Register SubReg = NumSubRegs == 1
326 ? SuperReg
327 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
328 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
329 .addReg(Spill[I].VGPR)
330 .addImm(Spill[I].Lane);
331 }
332 }
333
334 void copyFromScratchSGPR(Register SrcReg) const {
335 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg)
336 .addReg(SrcReg)
338 }
339
340public:
345 const DebugLoc &DL, const SIInstrInfo *TII,
346 const SIRegisterInfo &TRI,
347 LiveRegUnits &LiveUnits, Register FrameReg)
348 : MI(MI), MBB(MBB), MF(*MBB.getParent()),
349 ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),
350 FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
351 SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),
352 FrameReg(FrameReg) {
353 const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
354 SplitParts = TRI.getRegSplitParts(RC, EltSize);
355 NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
356
357 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
358 }
359
360 void save() {
361 switch (SI.getKind()) {
363 return saveToMemory(SI.getIndex());
365 return saveToVGPRLane(SI.getIndex());
367 return copyToScratchSGPR(SI.getReg());
368 }
369 }
370
371 void restore() {
372 switch (SI.getKind()) {
374 return restoreFromMemory(SI.getIndex());
376 return restoreFromVGPRLane(SI.getIndex());
378 return copyFromScratchSGPR(SI.getReg());
379 }
380 }
381};
382
383} // namespace llvm
384
385// Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
386void SIFrameLowering::emitEntryFunctionFlatScratchInit(
388 const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
389 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
390 const SIInstrInfo *TII = ST.getInstrInfo();
391 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
392 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
393
394 // We don't need this if we only have spills since there is no user facing
395 // scratch.
396
397 // TODO: If we know we don't have flat instructions earlier, we can omit
398 // this from the input registers.
399 //
400 // TODO: We only need to know if we access scratch space through a flat
401 // pointer. Because we only detect if flat instructions are used at all,
402 // this will be used more often than necessary on VI.
403
404 Register FlatScrInitLo;
405 Register FlatScrInitHi;
406
407 if (ST.isAmdPalOS()) {
408 // Extract the scratch offset from the descriptor in the GIT
409 LiveRegUnits LiveUnits;
410 LiveUnits.init(*TRI);
411 LiveUnits.addLiveIns(MBB);
412
413 // Find unused reg to load flat scratch init into
414 MachineRegisterInfo &MRI = MF.getRegInfo();
415 Register FlatScrInit = AMDGPU::NoRegister;
416 ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
417 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
418 AllSGPR64s = AllSGPR64s.slice(
419 std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
420 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
421 for (MCPhysReg Reg : AllSGPR64s) {
422 if (LiveUnits.available(Reg) && !MRI.isReserved(Reg) &&
423 MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
424 FlatScrInit = Reg;
425 break;
426 }
427 }
428 assert(FlatScrInit && "Failed to find free register for scratch init");
429
430 FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
431 FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
432
433 buildGitPtr(MBB, I, DL, TII, FlatScrInit);
434
435 // We now have the GIT ptr - now get the scratch descriptor from the entry
436 // at offset 0 (or offset 16 for a compute shader).
437 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
438 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
439 auto *MMO = MF.getMachineMemOperand(
440 PtrInfo,
443 8, Align(4));
444 unsigned Offset =
446 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
447 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
448 BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
449 .addReg(FlatScrInit)
450 .addImm(EncodedOffset) // offset
451 .addImm(0) // cpol
452 .addMemOperand(MMO);
453
454 // Mask the offset in [47:0] of the descriptor
455 const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
456 auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
457 .addReg(FlatScrInitHi)
458 .addImm(0xffff);
459 And->getOperand(3).setIsDead(); // Mark SCC as dead.
460 } else {
461 Register FlatScratchInitReg =
463 assert(FlatScratchInitReg);
464
465 MachineRegisterInfo &MRI = MF.getRegInfo();
466 MRI.addLiveIn(FlatScratchInitReg);
467 MBB.addLiveIn(FlatScratchInitReg);
468
469 FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
470 FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
471 }
472
473 // Do a 64-bit pointer add.
474 if (ST.flatScratchIsPointer()) {
475 if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
476 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
477 .addReg(FlatScrInitLo)
478 .addReg(ScratchWaveOffsetReg);
479 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
480 FlatScrInitHi)
481 .addReg(FlatScrInitHi)
482 .addImm(0);
483 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
484
485 using namespace AMDGPU::Hwreg;
486 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))
487 .addReg(FlatScrInitLo)
488 .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_LO, 0, 32)));
489 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))
490 .addReg(FlatScrInitHi)
491 .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_HI, 0, 32)));
492 return;
493 }
494
495 // For GFX9.
496 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
497 .addReg(FlatScrInitLo)
498 .addReg(ScratchWaveOffsetReg);
499 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
500 AMDGPU::FLAT_SCR_HI)
501 .addReg(FlatScrInitHi)
502 .addImm(0);
503 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
504
505 return;
506 }
507
508 assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
509
510 // Copy the size in bytes.
511 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
512 .addReg(FlatScrInitHi, RegState::Kill);
513
514 // Add wave offset in bytes to private base offset.
515 // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
516 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)
517 .addReg(FlatScrInitLo)
518 .addReg(ScratchWaveOffsetReg);
519
520 // Convert offset to 256-byte units.
521 auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32),
522 AMDGPU::FLAT_SCR_HI)
523 .addReg(FlatScrInitLo, RegState::Kill)
524 .addImm(8);
525 LShr->getOperand(3).setIsDead(); // Mark SCC as dead.
526}
527
528// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
529// memory. They should have been removed by now.
531 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
532 I != E; ++I) {
533 if (!MFI.isDeadObjectIndex(I))
534 return false;
535 }
536
537 return true;
538}
539
540// Shift down registers reserved for the scratch RSRC.
541Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
542 MachineFunction &MF) const {
543
544 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
545 const SIInstrInfo *TII = ST.getInstrInfo();
546 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
547 MachineRegisterInfo &MRI = MF.getRegInfo();
548 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
549
550 assert(MFI->isEntryFunction());
551
552 Register ScratchRsrcReg = MFI->getScratchRSrcReg();
553
554 if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
556 return Register();
557
558 if (ST.hasSGPRInitBug() ||
559 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
560 return ScratchRsrcReg;
561
562 // We reserved the last registers for this. Shift it down to the end of those
563 // which were actually used.
564 //
565 // FIXME: It might be safer to use a pseudoregister before replacement.
566
567 // FIXME: We should be able to eliminate unused input registers. We only
568 // cannot do this for the resources required for scratch access. For now we
569 // skip over user SGPRs and may leave unused holes.
570
571 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
572 ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
573 AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
574
575 // Skip the last N reserved elements because they should have already been
576 // reserved for VCC etc.
577 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
578 for (MCPhysReg Reg : AllSGPR128s) {
579 // Pick the first unallocated one. Make sure we don't clobber the other
580 // reserved input we needed. Also for PAL, make sure we don't clobber
581 // the GIT pointer passed in SGPR0 or SGPR8.
582 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
583 (!GITPtrLoReg || !TRI->isSubRegisterEq(Reg, GITPtrLoReg))) {
584 MRI.replaceRegWith(ScratchRsrcReg, Reg);
586 MRI.reserveReg(Reg, TRI);
587 return Reg;
588 }
589 }
590
591 return ScratchRsrcReg;
592}
593
594static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
595 return ST.hasFlatScratchEnabled() ? 1 : ST.getWavefrontSize();
596}
597
599 MachineBasicBlock &MBB) const {
600 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
601
602 // FIXME: If we only have SGPR spills, we won't actually be using scratch
603 // memory since these spill to VGPRs. We should be cleaning up these unused
604 // SGPR spill frame indices somewhere.
605
606 // FIXME: We still have implicit uses on SGPR spill instructions in case they
607 // need to spill to vector memory. It's likely that will not happen, but at
608 // this point it appears we need the setup. This part of the prolog should be
609 // emitted after frame indices are eliminated.
610
611 // FIXME: Remove all of the isPhysRegUsed checks
612
614 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
615 const SIInstrInfo *TII = ST.getInstrInfo();
616 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
618 const Function &F = MF.getFunction();
619 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
620
621 assert(MFI->isEntryFunction());
622
623 Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
625
626 // We need to do the replacement of the private segment buffer register even
627 // if there are no stack objects. There could be stores to undef or a
628 // constant without an associated object.
629 //
630 // This will return `Register()` in cases where there are no actual
631 // uses of the SRSRC.
632 Register ScratchRsrcReg;
633 if (!ST.hasFlatScratchEnabled())
634 ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
635
636 // Make the selected register live throughout the function.
637 if (ScratchRsrcReg) {
638 for (MachineBasicBlock &OtherBB : MF) {
639 if (&OtherBB != &MBB) {
640 OtherBB.addLiveIn(ScratchRsrcReg);
641 }
642 }
643 }
644
645 // Now that we have fixed the reserved SRSRC we need to locate the
646 // (potentially) preloaded SRSRC.
647 Register PreloadedScratchRsrcReg;
648 if (ST.isAmdHsaOrMesa(F)) {
649 PreloadedScratchRsrcReg =
651 if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
652 // We added live-ins during argument lowering, but since they were not
653 // used they were deleted. We're adding the uses now, so add them back.
654 MRI.addLiveIn(PreloadedScratchRsrcReg);
655 MBB.addLiveIn(PreloadedScratchRsrcReg);
656 }
657 }
658
659 // Debug location must be unknown since the first debug location is used to
660 // determine the end of the prologue.
661 DebugLoc DL;
663
664 // We found the SRSRC first because it needs four registers and has an
665 // alignment requirement. If the SRSRC that we found is clobbering with
666 // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
667 // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
668 // wave offset to a free SGPR.
669 Register ScratchWaveOffsetReg;
670 if (PreloadedScratchWaveOffsetReg &&
671 TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
672 ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
673 unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
674 AllSGPRs = AllSGPRs.slice(
675 std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
676 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
677 for (MCPhysReg Reg : AllSGPRs) {
678 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
679 !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
680 ScratchWaveOffsetReg = Reg;
681 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
682 .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
683 break;
684 }
685 }
686
687 // FIXME: We can spill incoming arguments and restore at the end of the
688 // prolog.
689 if (!ScratchWaveOffsetReg)
691 "could not find temporary scratch offset register in prolog");
692 } else {
693 ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
694 }
695 assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
696
697 unsigned Offset = FrameInfo.getStackSize() * getScratchScaleFactor(ST);
698 if (!mayReserveScratchForCWSR(MF)) {
699 if (hasFP(MF)) {
701 assert(FPReg != AMDGPU::FP_REG);
702 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
703 }
704
707 assert(SPReg != AMDGPU::SP_REG);
708 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg).addImm(Offset);
709 }
710 } else {
711 // We need to check if we're on a compute queue - if we are, then the CWSR
712 // trap handler may need to store some VGPRs on the stack. The first VGPR
713 // block is saved separately, so we only need to allocate space for any
714 // additional VGPR blocks used. For now, we will make sure there's enough
715 // room for the theoretical maximum number of VGPRs that can be allocated.
716 // FIXME: Figure out if the shader uses fewer VGPRs in practice.
717 assert(hasFP(MF));
719 assert(FPReg != AMDGPU::FP_REG);
720 unsigned VGPRSize = llvm::alignTo(
721 (ST.getAddressableNumVGPRs(MFI->getDynamicVGPRBlockSize()) -
723 MFI->getDynamicVGPRBlockSize())) *
724 4,
725 FrameInfo.getMaxAlign());
727
728 BuildMI(MBB, I, DL, TII->get(AMDGPU::GET_STACK_BASE), FPReg);
731 assert(SPReg != AMDGPU::SP_REG);
732
733 // If at least one of the constants can be inlined, then we can use
734 // s_cselect. Otherwise, use a mov and cmovk.
735 if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm()) ||
737 ST.hasInv2PiInlineImm())) {
738 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CSELECT_B32), SPReg)
739 .addImm(Offset + VGPRSize)
740 .addImm(Offset);
741 } else {
742 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg).addImm(Offset);
743 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMOVK_I32), SPReg)
744 .addImm(Offset + VGPRSize);
745 }
746 }
747 }
748
749 bool NeedsFlatScratchInit =
751 (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
752 (!allStackObjectsAreDead(FrameInfo) && ST.hasFlatScratchEnabled()));
753
754 if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
755 PreloadedScratchWaveOffsetReg && !ST.hasArchitectedFlatScratch()) {
756 MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
757 MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
758 }
759
760 if (NeedsFlatScratchInit) {
761 emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
762 }
763
764 if (ScratchRsrcReg) {
765 emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
766 PreloadedScratchRsrcReg,
767 ScratchRsrcReg, ScratchWaveOffsetReg);
768 }
769
770 if (ST.hasWaitXcnt()) {
771 // Set REPLAY_MODE (bit 25) in MODE register to enable multi-group XNACK
772 // replay. This aligns hardware behavior with the compiler's s_wait_xcnt
773 // insertion logic, which assumes multi-group mode by default.
774 unsigned RegEncoding =
776 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_IMM32_B32))
777 .addImm(1)
778 .addImm(RegEncoding);
779 }
780}
781
782// Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
783void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
785 const DebugLoc &DL, Register PreloadedScratchRsrcReg,
786 Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
787
788 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
789 const SIInstrInfo *TII = ST.getInstrInfo();
790 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
792 const Function &Fn = MF.getFunction();
793
794 if (ST.isAmdPalOS()) {
795 // The pointer to the GIT is formed from the offset passed in and either
796 // the amdgpu-git-ptr-high function attribute or the top part of the PC
797 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
798 Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
799
800 buildGitPtr(MBB, I, DL, TII, Rsrc01);
801
802 // We now have the GIT ptr - now get the scratch descriptor from the entry
803 // at offset 0 (or offset 16 for a compute shader).
805 const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
806 auto *MMO = MF.getMachineMemOperand(
807 PtrInfo,
810 16, Align(4));
811 unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
812 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
813 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
814 BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
815 .addReg(Rsrc01)
816 .addImm(EncodedOffset) // offset
817 .addImm(0) // cpol
818 .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
819 .addMemOperand(MMO);
820
821 // The driver will always set the SRD for wave 64 (bits 118:117 of
822 // descriptor / bits 22:21 of third sub-reg will be 0b11)
823 // If the shader is actually wave32 we have to modify the const_index_stride
824 // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
825 // reason the driver does this is that there can be cases where it presents
826 // 2 shaders with different wave size (e.g. VsFs).
827 // TODO: convert to using SCRATCH instructions or multiple SRD buffers
828 if (ST.isWave32()) {
829 const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
830 BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
831 .addImm(21)
832 .addReg(Rsrc03);
833 }
834 } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
835 assert(!ST.isAmdHsaOrMesa(Fn));
836 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
837
838 Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
839 Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
840
841 // Use relocations to get the pointer, and setup the other bits manually.
842 uint64_t Rsrc23 = TII->getScratchRsrcWords23();
843
845 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
846
848 const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
849
850 BuildMI(MBB, I, DL, Mov64, Rsrc01)
852 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
853 } else {
854 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
855
856 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
857 auto *MMO = MF.getMachineMemOperand(
858 PtrInfo,
861 8, Align(4));
862 BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
864 .addImm(0) // offset
865 .addImm(0) // cpol
866 .addMemOperand(MMO)
867 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
868
871 }
872 } else {
873 Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
874 Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
875
876 BuildMI(MBB, I, DL, SMovB32, Rsrc0)
877 .addExternalSymbol("SCRATCH_RSRC_DWORD0")
878 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
879
880 BuildMI(MBB, I, DL, SMovB32, Rsrc1)
881 .addExternalSymbol("SCRATCH_RSRC_DWORD1")
882 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
883 }
884
885 BuildMI(MBB, I, DL, SMovB32, Rsrc2)
886 .addImm(Lo_32(Rsrc23))
887 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
888
889 BuildMI(MBB, I, DL, SMovB32, Rsrc3)
890 .addImm(Hi_32(Rsrc23))
891 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
892 } else if (ST.isAmdHsaOrMesa(Fn)) {
893 assert(PreloadedScratchRsrcReg);
894
895 if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
896 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
897 .addReg(PreloadedScratchRsrcReg, RegState::Kill);
898 }
899 }
900
901 // Add the scratch wave offset into the scratch RSRC.
902 //
903 // We only want to update the first 48 bits, which is the base address
904 // pointer, without touching the adjacent 16 bits of flags. We know this add
905 // cannot carry-out from bit 47, otherwise the scratch allocation would be
906 // impossible to fit in the 48-bit global address space.
907 //
908 // TODO: Evaluate if it is better to just construct an SRD using the flat
909 // scratch init and some constants rather than update the one we are passed.
910 Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
911 Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
912
913 // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
914 // the kernel body via inreg arguments.
915 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
916 .addReg(ScratchRsrcSub0)
917 .addReg(ScratchWaveOffsetReg)
918 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
919 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
920 .addReg(ScratchRsrcSub1)
921 .addImm(0)
922 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
923 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
924}
925
927 switch (ID) {
931 return true;
935 return false;
936 }
937 llvm_unreachable("Invalid TargetStackID::Value");
938}
939
940// Activate only the inactive lanes when \p EnableInactiveLanes is true.
941// Otherwise, activate all lanes. It returns the saved exec.
943 MachineFunction &MF,
946 const DebugLoc &DL, bool IsProlog,
947 bool EnableInactiveLanes) {
948 Register ScratchExecCopy;
950 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
951 const SIInstrInfo *TII = ST.getInstrInfo();
952 const SIRegisterInfo &TRI = TII->getRegisterInfo();
954
955 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
956
957 if (FuncInfo->isWholeWaveFunction()) {
958 // Whole wave functions already have a copy of the original EXEC mask that
959 // we can use.
960 assert(IsProlog && "Epilog should look at return, not setup");
961 ScratchExecCopy =
962 TII->getWholeWaveFunctionSetup(MF)->getOperand(0).getReg();
963 assert(ScratchExecCopy && "Couldn't find copy of EXEC");
964 } else {
965 ScratchExecCopy = findScratchNonCalleeSaveRegister(
966 MRI, LiveUnits, *TRI.getWaveMaskRegClass());
967 }
968
969 if (!ScratchExecCopy)
970 report_fatal_error("failed to find free scratch register");
971
972 LiveUnits.addReg(ScratchExecCopy);
973
974 const unsigned SaveExecOpc =
975 ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
976 : AMDGPU::S_OR_SAVEEXEC_B32)
977 : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64
978 : AMDGPU::S_OR_SAVEEXEC_B64);
979 auto SaveExec =
980 BuildMI(MBB, MBBI, DL, TII->get(SaveExecOpc), ScratchExecCopy).addImm(-1);
981 SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead.
982
983 return ScratchExecCopy;
984}
985
989 Register FrameReg, Register FramePtrRegScratchCopy) const {
991 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
992 const SIInstrInfo *TII = ST.getInstrInfo();
993 const SIRegisterInfo &TRI = TII->getRegisterInfo();
996
997 // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch
998 // registers. However, save all lanes of callee-saved VGPRs. Due to this, we
999 // might end up flipping the EXEC bits twice.
1000 Register ScratchExecCopy;
1001 SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
1002 FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
1003 if (!WWMScratchRegs.empty())
1004 ScratchExecCopy =
1005 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1006 /*IsProlog*/ true, /*EnableInactiveLanes*/ true);
1007
1008 auto StoreWWMRegisters =
1010 for (const auto &Reg : WWMRegs) {
1011 Register VGPR = Reg.first;
1012 int FI = Reg.second;
1013 buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
1014 VGPR, FI, FrameReg);
1015 }
1016 };
1017
1018 for (const Register Reg : make_first_range(WWMScratchRegs)) {
1019 if (!MRI.isReserved(Reg)) {
1020 MRI.addLiveIn(Reg);
1021 MBB.addLiveIn(Reg);
1022 }
1023 }
1024 StoreWWMRegisters(WWMScratchRegs);
1025
1026 auto EnableAllLanes = [&]() {
1027 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addImm(-1);
1028 };
1029
1030 if (!WWMCalleeSavedRegs.empty()) {
1031 if (ScratchExecCopy) {
1032 EnableAllLanes();
1033 } else {
1034 ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1035 /*IsProlog*/ true,
1036 /*EnableInactiveLanes*/ false);
1037 }
1038 }
1039
1040 StoreWWMRegisters(WWMCalleeSavedRegs);
1041 if (FuncInfo->isWholeWaveFunction()) {
1042 // If we have already saved some WWM CSR registers, then the EXEC is already
1043 // -1 and we don't need to do anything else. Otherwise, set EXEC to -1 here.
1044 if (!ScratchExecCopy)
1045 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL, /*IsProlog*/ true,
1046 /*EnableInactiveLanes*/ true);
1047 else if (WWMCalleeSavedRegs.empty())
1048 EnableAllLanes();
1049 } else if (ScratchExecCopy) {
1050 // FIXME: Split block and make terminator.
1051 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg)
1052 .addReg(ScratchExecCopy, RegState::Kill);
1053 LiveUnits.addReg(ScratchExecCopy);
1054 }
1055
1056 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1057
1058 for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
1059 // Special handle FP spill:
1060 // Skip if FP is saved to a scratch SGPR, the save has already been emitted.
1061 // Otherwise, FP has been moved to a temporary register and spill it
1062 // instead.
1063 Register Reg =
1064 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1065 if (!Reg)
1066 continue;
1067
1068 PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
1069 LiveUnits, FrameReg);
1070 SB.save();
1071 }
1072
1073 // If a copy to scratch SGPR has been chosen for any of the SGPR spills, make
1074 // such scratch registers live throughout the function.
1075 SmallVector<Register, 1> ScratchSGPRs;
1076 FuncInfo->getAllScratchSGPRCopyDstRegs(ScratchSGPRs);
1077 if (!ScratchSGPRs.empty()) {
1078 for (MachineBasicBlock &MBB : MF) {
1079 for (MCPhysReg Reg : ScratchSGPRs)
1080 MBB.addLiveIn(Reg);
1081
1082 MBB.sortUniqueLiveIns();
1083 }
1084 if (!LiveUnits.empty()) {
1085 for (MCPhysReg Reg : ScratchSGPRs)
1086 LiveUnits.addReg(Reg);
1087 }
1088 }
1089}
1090
1094 Register FrameReg, Register FramePtrRegScratchCopy) const {
1095 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1096 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1097 const SIInstrInfo *TII = ST.getInstrInfo();
1098 const SIRegisterInfo &TRI = TII->getRegisterInfo();
1100 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1101
1102 for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
1103 // Special handle FP restore:
1104 // Skip if FP needs to be restored from the scratch SGPR. Otherwise, restore
1105 // the FP value to a temporary register. The frame pointer should be
1106 // overwritten only at the end when all other spills are restored from
1107 // current frame.
1108 Register Reg =
1109 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1110 if (!Reg)
1111 continue;
1112
1113 PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
1114 LiveUnits, FrameReg);
1115 SB.restore();
1116 }
1117
1118 // Restore Whole-Wave Mode VGPRs. Restore only the inactive lanes of the
1119 // scratch registers. However, restore all lanes of callee-saved VGPRs. Due to
1120 // this, we might end up flipping the EXEC bits twice.
1121 Register ScratchExecCopy;
1122 SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
1123 FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
1124 auto RestoreWWMRegisters =
1126 for (const auto &Reg : WWMRegs) {
1127 Register VGPR = Reg.first;
1128 int FI = Reg.second;
1129 buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
1130 VGPR, FI, FrameReg);
1131 }
1132 };
1133
1134 if (FuncInfo->isWholeWaveFunction()) {
1135 // For whole wave functions, the EXEC is already -1 at this point.
1136 // Therefore, we can restore the CSR WWM registers right away.
1137 RestoreWWMRegisters(WWMCalleeSavedRegs);
1138
1139 // The original EXEC is the first operand of the return instruction.
1140 MachineInstr &Return = MBB.instr_back();
1141 unsigned Opcode = Return.getOpcode();
1142 switch (Opcode) {
1143 case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN:
1144 Opcode = AMDGPU::SI_RETURN;
1145 break;
1146 case AMDGPU::SI_TCRETURN_GFX_WholeWave:
1147 Opcode = AMDGPU::SI_TCRETURN_GFX;
1148 break;
1149 default:
1150 llvm_unreachable("Unexpected return inst");
1151 }
1152 Register OrigExec = Return.getOperand(0).getReg();
1153
1154 if (!WWMScratchRegs.empty()) {
1155 BuildMI(MBB, MBBI, DL, TII->get(LMC.XorOpc), LMC.ExecReg)
1156 .addReg(OrigExec)
1157 .addImm(-1);
1158 RestoreWWMRegisters(WWMScratchRegs);
1159 }
1160
1161 // Restore original EXEC.
1162 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addReg(OrigExec);
1163
1164 // Drop the first operand and update the opcode.
1165 Return.removeOperand(0);
1166 Return.setDesc(TII->get(Opcode));
1167
1168 return;
1169 }
1170
1171 if (!WWMScratchRegs.empty()) {
1172 ScratchExecCopy =
1173 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1174 /*IsProlog=*/false, /*EnableInactiveLanes=*/true);
1175 }
1176 RestoreWWMRegisters(WWMScratchRegs);
1177 if (!WWMCalleeSavedRegs.empty()) {
1178 if (ScratchExecCopy) {
1179 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addImm(-1);
1180 } else {
1181 ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1182 /*IsProlog*/ false,
1183 /*EnableInactiveLanes*/ false);
1184 }
1185 }
1186
1187 RestoreWWMRegisters(WWMCalleeSavedRegs);
1188 if (ScratchExecCopy) {
1189 // FIXME: Split block and make terminator.
1190 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg)
1191 .addReg(ScratchExecCopy, RegState::Kill);
1192 }
1193}
1194
1196 MachineBasicBlock &MBB) const {
1198 if (FuncInfo->isEntryFunction()) {
1200 return;
1201 }
1202
1203 MachineFrameInfo &MFI = MF.getFrameInfo();
1204 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1205 const SIInstrInfo *TII = ST.getInstrInfo();
1206 const SIRegisterInfo &TRI = TII->getRegisterInfo();
1207 MachineRegisterInfo &MRI = MF.getRegInfo();
1208
1209 Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
1210 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1211 Register BasePtrReg =
1212 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
1213 LiveRegUnits LiveUnits;
1214
1216 // DebugLoc must be unknown since the first instruction with DebugLoc is used
1217 // to determine the end of the prologue.
1218 DebugLoc DL;
1219
1220 bool HasFP = false;
1221 bool HasBP = false;
1222 uint32_t NumBytes = MFI.getStackSize();
1223 uint32_t RoundedSize = NumBytes;
1224
1225 // Chain functions never return, so there's no need to save and restore the FP
1226 // or BP.
1227 bool SavesStackRegs = !FuncInfo->isChainFunction();
1228
1229 if (TRI.hasStackRealignment(MF))
1230 HasFP = true;
1231
1232 Register FramePtrRegScratchCopy;
1233 if (!HasFP && !hasFP(MF)) {
1234 // Emit the CSR spill stores with SP base register.
1235 emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg,
1236 FramePtrRegScratchCopy);
1237 } else if (SavesStackRegs) {
1238 // CSR spill stores will use FP as base register.
1239 Register SGPRForFPSaveRestoreCopy =
1240 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1241
1242 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
1243 if (SGPRForFPSaveRestoreCopy) {
1244 // Copy FP to the scratch register now and emit the CFI entry. It avoids
1245 // the extra FP copy needed in the other two cases when FP is spilled to
1246 // memory or to a VGPR lane.
1248 FramePtrReg,
1249 FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI,
1250 DL, TII, TRI, LiveUnits, FramePtrReg);
1251 SB.save();
1252 LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1253 } else {
1254 // Copy FP into a new scratch register so that its previous value can be
1255 // spilled after setting up the new frame.
1256 FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
1257 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1258 if (!FramePtrRegScratchCopy)
1259 report_fatal_error("failed to find free scratch register");
1260
1261 LiveUnits.addReg(FramePtrRegScratchCopy);
1262 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrRegScratchCopy)
1263 .addReg(FramePtrReg);
1264 }
1265 }
1266
1267 if (HasFP) {
1268 const unsigned Alignment = MFI.getMaxAlign().value();
1269
1270 RoundedSize += Alignment;
1271 if (LiveUnits.empty()) {
1272 LiveUnits.init(TRI);
1273 LiveUnits.addLiveIns(MBB);
1274 }
1275
1276 // s_add_i32 s33, s32, NumBytes
1277 // s_and_b32 s33, s33, 0b111...0000
1278 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
1279 .addReg(StackPtrReg)
1280 .addImm((Alignment - 1) * getScratchScaleFactor(ST))
1282 auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
1283 .addReg(FramePtrReg, RegState::Kill)
1284 .addImm(-Alignment * getScratchScaleFactor(ST))
1286 And->getOperand(3).setIsDead(); // Mark SCC as dead.
1287 FuncInfo->setIsStackRealigned(true);
1288 } else if ((HasFP = hasFP(MF))) {
1289 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1290 .addReg(StackPtrReg)
1292 }
1293
1294 // If FP is used, emit the CSR spills with FP base register.
1295 if (HasFP) {
1296 emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1297 FramePtrRegScratchCopy);
1298 if (FramePtrRegScratchCopy)
1299 LiveUnits.removeReg(FramePtrRegScratchCopy);
1300 }
1301
1302 // If we need a base pointer, set it up here. It's whatever the value of
1303 // the stack pointer is at this point. Any variable size objects will be
1304 // allocated after this, so we can still use the base pointer to reference
1305 // the incoming arguments.
1306 if ((HasBP = TRI.hasBasePointer(MF))) {
1307 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
1308 .addReg(StackPtrReg)
1310 }
1311
1312 if (HasFP && RoundedSize != 0) {
1313 auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
1314 .addReg(StackPtrReg)
1315 .addImm(RoundedSize * getScratchScaleFactor(ST))
1317 Add->getOperand(3).setIsDead(); // Mark SCC as dead.
1318 }
1319
1320 bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
1321 (void)FPSaved;
1322 assert((!HasFP || FPSaved || !SavesStackRegs) &&
1323 "Needed to save FP but didn't save it anywhere");
1324
1325 // If we allow spilling to AGPRs we may have saved FP but then spill
1326 // everything into AGPRs instead of the stack.
1327 assert((HasFP || !FPSaved || !SavesStackRegs || EnableSpillVGPRToAGPR) &&
1328 "Saved FP but didn't need it");
1329
1330 bool BPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(BasePtrReg);
1331 (void)BPSaved;
1332 assert((!HasBP || BPSaved || !SavesStackRegs) &&
1333 "Needed to save BP but didn't save it anywhere");
1334
1335 assert((HasBP || !BPSaved) && "Saved BP but didn't need it");
1336
1337 if (FuncInfo->isWholeWaveFunction()) {
1338 // SI_WHOLE_WAVE_FUNC_SETUP has outlived its purpose.
1339 TII->getWholeWaveFunctionSetup(MF)->eraseFromParent();
1340 }
1341}
1342
1344 MachineBasicBlock &MBB) const {
1345 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1346 if (FuncInfo->isEntryFunction())
1347 return;
1348
1349 const MachineFrameInfo &MFI = MF.getFrameInfo();
1350 if (FuncInfo->isChainFunction() && !MFI.hasTailCall())
1351 return;
1352
1353 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1354 const SIInstrInfo *TII = ST.getInstrInfo();
1355 const SIRegisterInfo &TRI = TII->getRegisterInfo();
1356 MachineRegisterInfo &MRI = MF.getRegInfo();
1357 LiveRegUnits LiveUnits;
1358 // Get the insert location for the epilogue. If there were no terminators in
1359 // the block, get the last instruction.
1361 DebugLoc DL;
1362 if (!MBB.empty()) {
1363 MBBI = MBB.getLastNonDebugInstr();
1364 if (MBBI != MBB.end())
1365 DL = MBBI->getDebugLoc();
1366
1367 MBBI = MBB.getFirstTerminator();
1368 }
1369
1370 uint32_t NumBytes = MFI.getStackSize();
1371 uint32_t RoundedSize = FuncInfo->isStackRealigned()
1372 ? NumBytes + MFI.getMaxAlign().value()
1373 : NumBytes;
1374 const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
1375 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1376 bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
1377
1378 if (RoundedSize != 0) {
1379 if (TRI.hasBasePointer(MF)) {
1380 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), StackPtrReg)
1381 .addReg(TRI.getBaseRegister())
1383 } else if (hasFP(MF)) {
1384 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), StackPtrReg)
1385 .addReg(FramePtrReg)
1387 }
1388 }
1389
1390 Register FramePtrRegScratchCopy;
1391 Register SGPRForFPSaveRestoreCopy =
1392 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1393 if (FPSaved) {
1394 // CSR spill restores should use FP as base register. If
1395 // SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP
1396 // into a new scratch register and copy to FP later when other registers are
1397 // restored from the current stack frame.
1398 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1399 if (SGPRForFPSaveRestoreCopy) {
1400 LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1401 } else {
1402 FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
1403 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1404 if (!FramePtrRegScratchCopy)
1405 report_fatal_error("failed to find free scratch register");
1406
1407 LiveUnits.addReg(FramePtrRegScratchCopy);
1408 }
1409
1410 emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1411 FramePtrRegScratchCopy);
1412 }
1413
1414 if (FPSaved) {
1415 // Insert the copy to restore FP.
1416 Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy
1417 : FramePtrRegScratchCopy;
1419 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1420 .addReg(SrcReg);
1421 if (SGPRForFPSaveRestoreCopy)
1423 } else {
1424 // Insert the CSR spill restores with SP as the base register.
1425 emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg,
1426 FramePtrRegScratchCopy);
1427 }
1428}
1429
1430#ifndef NDEBUG
1432 const MachineFrameInfo &MFI = MF.getFrameInfo();
1433 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1434 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
1435 I != E; ++I) {
1436 if (!MFI.isDeadObjectIndex(I) &&
1439 return false;
1440 }
1441 }
1442
1443 return true;
1444}
1445#endif
1446
1448 int FI,
1449 Register &FrameReg) const {
1450 const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1451
1452 FrameReg = RI->getFrameRegister(MF);
1454}
1455
1457 MachineFunction &MF,
1458 RegScavenger *RS) const {
1459 MachineFrameInfo &MFI = MF.getFrameInfo();
1460
1461 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1462 const SIInstrInfo *TII = ST.getInstrInfo();
1463 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1464 MachineRegisterInfo &MRI = MF.getRegInfo();
1466
1467 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
1469
1470 if (SpillVGPRToAGPR) {
1471 // To track the spill frame indices handled in this pass.
1472 BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
1473 BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false);
1474
1475 bool SeenDbgInstr = false;
1476
1477 for (MachineBasicBlock &MBB : MF) {
1479 int FrameIndex;
1480 if (MI.isDebugInstr())
1481 SeenDbgInstr = true;
1482
1483 if (TII->isVGPRSpill(MI)) {
1484 // Try to eliminate stack used by VGPR spills before frame
1485 // finalization.
1486 unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1487 AMDGPU::OpName::vaddr);
1488 int FI = MI.getOperand(FIOp).getIndex();
1489 Register VReg =
1490 TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
1491 if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
1492 TRI->isAGPR(MRI, VReg))) {
1493 assert(RS != nullptr);
1494 RS->enterBasicBlockEnd(MBB);
1495 RS->backward(std::next(MI.getIterator()));
1496 TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
1497 SpillFIs.set(FI);
1498 continue;
1499 }
1500 } else if (TII->isStoreToStackSlot(MI, FrameIndex) ||
1501 TII->isLoadFromStackSlot(MI, FrameIndex))
1502 if (!MFI.isFixedObjectIndex(FrameIndex))
1503 NonVGPRSpillFIs.set(FrameIndex);
1504 }
1505 }
1506
1507 // Stack slot coloring may assign different objects to the same stack slot.
1508 // If not, then the VGPR to AGPR spill slot is dead.
1509 for (unsigned FI : SpillFIs.set_bits())
1510 if (!NonVGPRSpillFIs.test(FI))
1511 FuncInfo->setVGPRToAGPRSpillDead(FI);
1512
1513 for (MachineBasicBlock &MBB : MF) {
1514 for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
1515 MBB.addLiveIn(Reg);
1516
1517 for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
1518 MBB.addLiveIn(Reg);
1519
1520 MBB.sortUniqueLiveIns();
1521
1522 if (!SpillFIs.empty() && SeenDbgInstr)
1523 clearDebugInfoForSpillFIs(MFI, MBB, SpillFIs);
1524 }
1525 }
1526
1527 // At this point we've already allocated all spilled SGPRs to VGPRs if we
1528 // can. Any remaining SGPR spills will go to memory, so move them back to the
1529 // default stack.
1530 bool HaveSGPRToVMemSpill =
1531 FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);
1533 "SGPR spill should have been removed in SILowerSGPRSpills");
1534
1535 // FIXME: The other checks should be redundant with allStackObjectsAreDead,
1536 // but currently hasNonSpillStackObjects is set only from source
1537 // allocas. Stack temps produced from legalization are not counted currently.
1538 if (!allStackObjectsAreDead(MFI)) {
1539 assert(RS && "RegScavenger required if spilling");
1540
1541 // Add an emergency spill slot
1542 RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
1543
1544 // If we are spilling SGPRs to memory with a large frame, we may need a
1545 // second VGPR emergency frame index.
1546 if (HaveSGPRToVMemSpill &&
1548 RS->addScavengingFrameIndex(MFI.CreateSpillStackObject(4, Align(4)));
1549 }
1550 }
1551}
1552
1554 MachineFunction &MF, RegScavenger *RS) const {
1555 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1556 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1557 MachineRegisterInfo &MRI = MF.getRegInfo();
1559
1560 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
1561 // On gfx908, we had initially reserved highest available VGPR for AGPR
1562 // copy. Now since we are done with RA, check if there exist an unused VGPR
1563 // which is lower than the eariler reserved VGPR before RA. If one exist,
1564 // use it for AGPR copy instead of one reserved before RA.
1565 Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy();
1566 Register UnusedLowVGPR =
1567 TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
1568 if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <
1569 TRI->getHWRegIndex(VGPRForAGPRCopy))) {
1570 // Reserve this newly identified VGPR (for AGPR copy)
1571 // reserved registers should already be frozen at this point
1572 // so we can avoid calling MRI.freezeReservedRegs and just use
1573 // MRI.reserveReg
1574 FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR);
1575 MRI.reserveReg(UnusedLowVGPR, TRI);
1576 }
1577 }
1578 // We initally reserved the highest available SGPR pair for long branches
1579 // now, after RA, we shift down to a lower unused one if one exists
1580 Register LongBranchReservedReg = FuncInfo->getLongBranchReservedReg();
1581 Register UnusedLowSGPR =
1582 TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF);
1583 // If LongBranchReservedReg is null then we didn't find a long branch
1584 // and never reserved a register to begin with so there is nothing to
1585 // shift down. Then if UnusedLowSGPR is null, there isn't available lower
1586 // register to use so just keep the original one we set.
1587 if (LongBranchReservedReg && UnusedLowSGPR) {
1588 FuncInfo->setLongBranchReservedReg(UnusedLowSGPR);
1589 MRI.reserveReg(UnusedLowSGPR, TRI);
1590 }
1591}
1592
1593// The special SGPR spills like the one needed for FP, BP or any reserved
1594// registers delayed until frame lowering.
1596 MachineFunction &MF, BitVector &SavedVGPRs,
1597 bool NeedExecCopyReservedReg) const {
1598 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1599 MachineRegisterInfo &MRI = MF.getRegInfo();
1601 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1602 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1603 LiveRegUnits LiveUnits;
1604 LiveUnits.init(*TRI);
1605 // Initially mark callee saved registers as used so we will not choose them
1606 // while looking for scratch SGPRs.
1607 const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
1608 for (unsigned I = 0; CSRegs[I]; ++I)
1609 LiveUnits.addReg(CSRegs[I]);
1610
1611 const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass();
1612
1613 Register ReservedRegForExecCopy = MFI->getSGPRForEXECCopy();
1614 if (NeedExecCopyReservedReg ||
1615 (ReservedRegForExecCopy &&
1616 MRI.isPhysRegUsed(ReservedRegForExecCopy, /*SkipRegMaskTest=*/true))) {
1617 MRI.reserveReg(ReservedRegForExecCopy, TRI);
1618 Register UnusedScratchReg = findUnusedRegister(MRI, LiveUnits, RC);
1619 if (UnusedScratchReg) {
1620 // If found any unused scratch SGPR, reserve the register itself for Exec
1621 // copy and there is no need for any spills in that case.
1622 MFI->setSGPRForEXECCopy(UnusedScratchReg);
1623 MRI.replaceRegWith(ReservedRegForExecCopy, UnusedScratchReg);
1624 LiveUnits.addReg(UnusedScratchReg);
1625 } else {
1626 // Needs spill.
1627 assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedRegForExecCopy) &&
1628 "Re-reserving spill slot for EXEC copy register");
1629 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, ReservedRegForExecCopy, RC,
1630 /*IncludeScratchCopy=*/false);
1631 }
1632 } else if (ReservedRegForExecCopy) {
1633 // Reset it at this point. There are no whole-wave copies and spills
1634 // encountered.
1635 MFI->setSGPRForEXECCopy(AMDGPU::NoRegister);
1636 }
1637
1638 // Chain functions don't return to the caller, so they don't need to preserve
1639 // the FP and BP.
1640 if (MFI->isChainFunction())
1641 return;
1642
1643 // hasFP only knows about stack objects that already exist. We're now
1644 // determining the stack slots that will be created, so we have to predict
1645 // them. Stack objects force FP usage with calls.
1646 //
1647 // Note a new VGPR CSR may be introduced if one is used for the spill, but we
1648 // don't want to report it here.
1649 //
1650 // FIXME: Is this really hasReservedCallFrame?
1651 const bool WillHaveFP =
1652 FrameInfo.hasCalls() &&
1653 (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1654
1655 if (WillHaveFP || hasFP(MF)) {
1656 Register FramePtrReg = MFI->getFrameOffsetReg();
1657 assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) &&
1658 "Re-reserving spill slot for FP");
1659 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, FramePtrReg);
1660 }
1661
1662 if (TRI->hasBasePointer(MF)) {
1663 Register BasePtrReg = TRI->getBaseRegister();
1664 assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) &&
1665 "Re-reserving spill slot for BP");
1666 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, BasePtrReg);
1667 }
1668}
1669
1670// Only report VGPRs to generic code.
1672 BitVector &SavedVGPRs,
1673 RegScavenger *RS) const {
1675
1676 // If this is a function with the amdgpu_cs_chain[_preserve] calling
1677 // convention and it doesn't contain any calls to llvm.amdgcn.cs.chain, then
1678 // we don't need to save and restore anything.
1679 if (MFI->isChainFunction() && !MF.getFrameInfo().hasTailCall())
1680 return;
1681
1683
1684 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1685 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1686 const SIInstrInfo *TII = ST.getInstrInfo();
1687 bool NeedExecCopyReservedReg = false;
1688
1689 MachineInstr *ReturnMI = nullptr;
1690 for (MachineBasicBlock &MBB : MF) {
1691 for (MachineInstr &MI : MBB) {
1692 // TODO: Walking through all MBBs here would be a bad heuristic. Better
1693 // handle them elsewhere.
1694 if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
1695 NeedExecCopyReservedReg = true;
1696 else if (MI.getOpcode() == AMDGPU::SI_RETURN ||
1697 MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
1698 MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||
1699 (MFI->isChainFunction() &&
1700 TII->isChainCallOpcode(MI.getOpcode()))) {
1701 // We expect all return to be the same size.
1702 assert(!ReturnMI ||
1703 (count_if(MI.operands(), [](auto Op) { return Op.isReg(); }) ==
1704 count_if(ReturnMI->operands(), [](auto Op) { return Op.isReg(); })));
1705 ReturnMI = &MI;
1706 }
1707 }
1708 }
1709
1710 SmallVector<Register> SortedWWMVGPRs;
1711 for (Register Reg : MFI->getWWMReservedRegs()) {
1712 // The shift-back is needed only for the VGPRs used for SGPR spills and they
1713 // are of 32-bit size. SIPreAllocateWWMRegs pass can add tuples into WWM
1714 // reserved registers.
1715 const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
1716 if (TRI->getRegSizeInBits(*RC) != 32)
1717 continue;
1718 SortedWWMVGPRs.push_back(Reg);
1719 }
1720
1721 sort(SortedWWMVGPRs, std::greater<Register>());
1722 MFI->shiftWwmVGPRsToLowestRange(MF, SortedWWMVGPRs, SavedVGPRs);
1723
1724 if (MFI->isEntryFunction())
1725 return;
1726
1727 if (MFI->isWholeWaveFunction()) {
1728 // In practice, all the VGPRs are WWM registers, and we will need to save at
1729 // least their inactive lanes. Add them to WWMReservedRegs.
1730 assert(!NeedExecCopyReservedReg &&
1731 "Whole wave functions can use the reg mapped for their i1 argument");
1732
1733 unsigned NumArchVGPRs = ST.getAddressableNumArchVGPRs();
1734 for (MCRegister Reg :
1735 AMDGPU::VGPR_32RegClass.getRegisters().take_front(NumArchVGPRs))
1736 if (MF.getRegInfo().isPhysRegModified(Reg)) {
1737 MFI->reserveWWMRegister(Reg);
1738 MF.begin()->addLiveIn(Reg);
1739 }
1740 MF.begin()->sortUniqueLiveIns();
1741 }
1742
1743 // Remove any VGPRs used in the return value because these do not need to be saved.
1744 // This prevents CSR restore from clobbering return VGPRs.
1745 if (ReturnMI) {
1746 for (auto &Op : ReturnMI->operands()) {
1747 if (Op.isReg())
1748 SavedVGPRs.reset(Op.getReg());
1749 }
1750 }
1751
1752 // Create the stack objects for WWM registers now.
1753 for (Register Reg : MFI->getWWMReservedRegs()) {
1754 const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
1755 MFI->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC),
1756 TRI->getSpillAlign(*RC));
1757 }
1758
1759 // Ignore the SGPRs the default implementation found.
1760 SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
1761
1762 // Do not save AGPRs prior to GFX90A because there was no easy way to do so.
1763 // In gfx908 there was do AGPR loads and stores and thus spilling also
1764 // require a temporary VGPR.
1765 if (!ST.hasGFX90AInsts())
1766 SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
1767
1768 determinePrologEpilogSGPRSaves(MF, SavedVGPRs, NeedExecCopyReservedReg);
1769
1770 // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't
1771 // allow the default insertion to handle them.
1772 for (auto &Reg : MFI->getWWMSpills())
1773 SavedVGPRs.reset(Reg.first);
1774}
1775
1777 BitVector &SavedRegs,
1778 RegScavenger *RS) const {
1781 if (MFI->isEntryFunction())
1782 return;
1783
1784 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1785 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1786
1787 // The SP is specifically managed and we don't want extra spills of it.
1788 SavedRegs.reset(MFI->getStackPtrOffsetReg());
1789
1790 const BitVector AllSavedRegs = SavedRegs;
1791 SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
1792
1793 // We have to anticipate introducing CSR VGPR spills or spill of caller
1794 // save VGPR reserved for SGPR spills as we now always create stack entry
1795 // for it, if we don't have any stack objects already, since we require a FP
1796 // if there is a call and stack. We will allocate a VGPR for SGPR spills if
1797 // there are any SGPR spills. Whether they are CSR spills or otherwise.
1798 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1799 const bool WillHaveFP =
1800 FrameInfo.hasCalls() && (AllSavedRegs.any() || MFI->hasSpilledSGPRs());
1801
1802 // FP will be specially managed like SP.
1803 if (WillHaveFP || hasFP(MF))
1804 SavedRegs.reset(MFI->getFrameOffsetReg());
1805
1806 // Return address use with return instruction is hidden through the SI_RETURN
1807 // pseudo. Given that and since the IPRA computes actual register usage and
1808 // does not use CSR list, the clobbering of return address by function calls
1809 // (D117243) or otherwise (D120922) is ignored/not seen by the IPRA's register
1810 // usage collection. This will ensure save/restore of return address happens
1811 // in those scenarios.
1812 const MachineRegisterInfo &MRI = MF.getRegInfo();
1813 Register RetAddrReg = TRI->getReturnAddressReg(MF);
1814 if (!MFI->isEntryFunction() &&
1815 (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) {
1816 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0));
1817 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1));
1818 }
1819}
1820
1822 const GCNSubtarget &ST,
1823 std::vector<CalleeSavedInfo> &CSI) {
1825 MachineFrameInfo &MFI = MF.getFrameInfo();
1826 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1827
1828 assert(
1829 llvm::is_sorted(CSI,
1830 [](const CalleeSavedInfo &A, const CalleeSavedInfo &B) {
1831 return A.getReg() < B.getReg();
1832 }) &&
1833 "Callee saved registers not sorted");
1834
1835 auto CanUseBlockOps = [&](const CalleeSavedInfo &CSI) {
1836 return !CSI.isSpilledToReg() &&
1837 TRI->getPhysRegBaseClass(CSI.getReg()) == &AMDGPU::VGPR_32RegClass &&
1838 !FuncInfo->isWWMReservedRegister(CSI.getReg());
1839 };
1840
1841 auto CSEnd = CSI.end();
1842 for (auto CSIt = CSI.begin(); CSIt != CSEnd; ++CSIt) {
1843 Register Reg = CSIt->getReg();
1844 if (!CanUseBlockOps(*CSIt))
1845 continue;
1846
1847 // Find all the regs that will fit in a 32-bit mask starting at the current
1848 // reg and build said mask. It should have 1 for every register that's
1849 // included, with the current register as the least significant bit.
1850 uint32_t Mask = 1;
1851 CSEnd = std::remove_if(
1852 CSIt + 1, CSEnd, [&](const CalleeSavedInfo &CSI) -> bool {
1853 if (CanUseBlockOps(CSI) && CSI.getReg() < Reg + 32) {
1854 Mask |= 1 << (CSI.getReg() - Reg);
1855 return true;
1856 } else {
1857 return false;
1858 }
1859 });
1860
1861 const TargetRegisterClass *BlockRegClass = TRI->getRegClassForBlockOp(MF);
1862 Register RegBlock =
1863 TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, BlockRegClass);
1864 if (!RegBlock) {
1865 // We couldn't find a super register for the block. This can happen if
1866 // the register we started with is too high (e.g. v232 if the maximum is
1867 // v255). We therefore try to get the last register block and figure out
1868 // the mask from there.
1869 Register LastBlockStart =
1870 AMDGPU::VGPR0 + alignDown(Reg - AMDGPU::VGPR0, 32);
1871 RegBlock =
1872 TRI->getMatchingSuperReg(LastBlockStart, AMDGPU::sub0, BlockRegClass);
1873 assert(RegBlock && TRI->isSubRegister(RegBlock, Reg) &&
1874 "Couldn't find super register");
1875 int RegDelta = Reg - LastBlockStart;
1876 assert(RegDelta > 0 && llvm::countl_zero(Mask) >= RegDelta &&
1877 "Bad shift amount");
1878 Mask <<= RegDelta;
1879 }
1880
1881 FuncInfo->setMaskForVGPRBlockOps(RegBlock, Mask);
1882
1883 // The stack objects can be a bit smaller than the register block if we know
1884 // some of the high bits of Mask are 0. This may happen often with calling
1885 // conventions where the caller and callee-saved VGPRs are interleaved at
1886 // a small boundary (e.g. 8 or 16).
1887 int UnusedBits = llvm::countl_zero(Mask);
1888 unsigned BlockSize = TRI->getSpillSize(*BlockRegClass) - UnusedBits * 4;
1889 int FrameIdx =
1890 MFI.CreateStackObject(BlockSize, TRI->getSpillAlign(*BlockRegClass),
1891 /*isSpillSlot=*/true);
1892 MFI.setIsCalleeSavedObjectIndex(FrameIdx, true);
1893
1894 CSIt->setFrameIdx(FrameIdx);
1895 CSIt->setReg(RegBlock);
1896 }
1897 CSI.erase(CSEnd, CSI.end());
1898}
1899
1902 std::vector<CalleeSavedInfo> &CSI) const {
1903 if (CSI.empty())
1904 return true; // Early exit if no callee saved registers are modified!
1905
1906 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1907 bool UseVGPRBlocks = ST.useVGPRBlockOpsForCSR();
1908
1909 if (UseVGPRBlocks)
1910 assignSlotsUsingVGPRBlocks(MF, ST, CSI);
1911
1912 return assignCalleeSavedSpillSlotsImpl(MF, TRI, CSI) || UseVGPRBlocks;
1913}
1914
1917 std::vector<CalleeSavedInfo> &CSI) const {
1918 if (CSI.empty())
1919 return true; // Early exit if no callee saved registers are modified!
1920
1921 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1922 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1923 const SIRegisterInfo *RI = ST.getRegisterInfo();
1924 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1925 Register BasePtrReg = RI->getBaseRegister();
1926 Register SGPRForFPSaveRestoreCopy =
1927 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1928 Register SGPRForBPSaveRestoreCopy =
1929 FuncInfo->getScratchSGPRCopyDstReg(BasePtrReg);
1930 if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)
1931 return false;
1932
1933 unsigned NumModifiedRegs = 0;
1934
1935 if (SGPRForFPSaveRestoreCopy)
1936 NumModifiedRegs++;
1937 if (SGPRForBPSaveRestoreCopy)
1938 NumModifiedRegs++;
1939
1940 for (auto &CS : CSI) {
1941 if (CS.getReg() == FramePtrReg.asMCReg() && SGPRForFPSaveRestoreCopy) {
1942 CS.setDstReg(SGPRForFPSaveRestoreCopy);
1943 if (--NumModifiedRegs)
1944 break;
1945 } else if (CS.getReg() == BasePtrReg.asMCReg() &&
1946 SGPRForBPSaveRestoreCopy) {
1947 CS.setDstReg(SGPRForBPSaveRestoreCopy);
1948 if (--NumModifiedRegs)
1949 break;
1950 }
1951 }
1952
1953 return false;
1954}
1955
1957 const MachineFunction &MF) const {
1958
1959 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1960 const MachineFrameInfo &MFI = MF.getFrameInfo();
1961 const SIInstrInfo *TII = ST.getInstrInfo();
1962 uint64_t EstStackSize = MFI.estimateStackSize(MF);
1963 uint64_t MaxOffset = EstStackSize - 1;
1964
1965 // We need the emergency stack slots to be allocated in range of the
1966 // MUBUF/flat scratch immediate offset from the base register, so assign these
1967 // first at the incoming SP position.
1968 //
1969 // TODO: We could try sorting the objects to find a hole in the first bytes
1970 // rather than allocating as close to possible. This could save a lot of space
1971 // on frames with alignment requirements.
1972 if (ST.hasFlatScratchEnabled()) {
1973 if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
1975 return false;
1976 } else {
1977 if (TII->isLegalMUBUFImmOffset(MaxOffset))
1978 return false;
1979 }
1980
1981 return true;
1982}
1983
1987 MachineFunction *MF = MBB.getParent();
1988 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
1989 if (!ST.useVGPRBlockOpsForCSR())
1990 return false;
1991
1992 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
1994 const SIInstrInfo *TII = ST.getInstrInfo();
1996
1997 const TargetRegisterClass *BlockRegClass =
1998 static_cast<const SIRegisterInfo *>(TRI)->getRegClassForBlockOp(*MF);
1999 for (const CalleeSavedInfo &CS : CSI) {
2000 Register Reg = CS.getReg();
2001 if (!BlockRegClass->contains(Reg) ||
2002 !FuncInfo->hasMaskForVGPRBlockOps(Reg)) {
2004 continue;
2005 }
2006
2007 // Build a scratch block store.
2008 uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);
2009 int FrameIndex = CS.getFrameIdx();
2010 MachinePointerInfo PtrInfo =
2011 MachinePointerInfo::getFixedStack(*MF, FrameIndex);
2012 MachineMemOperand *MMO =
2014 FrameInfo.getObjectSize(FrameIndex),
2015 FrameInfo.getObjectAlign(FrameIndex));
2016
2017 BuildMI(MBB, MI, MI->getDebugLoc(),
2018 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE))
2019 .addReg(Reg, getKillRegState(false))
2020 .addFrameIndex(FrameIndex)
2022 .addImm(0)
2023 .addImm(Mask)
2024 .addMemOperand(MMO);
2025
2026 FuncInfo->setHasSpilledVGPRs();
2027
2028 // Add the register to the liveins. This is necessary because if any of the
2029 // VGPRs in the register block is reserved (e.g. if it's a WWM register),
2030 // then the whole block will be marked as reserved and `updateLiveness` will
2031 // skip it.
2032 MBB.addLiveIn(Reg);
2033 }
2034 MBB.sortUniqueLiveIns();
2035
2036 return true;
2037}
2038
2042 MachineFunction *MF = MBB.getParent();
2043 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
2044 if (!ST.useVGPRBlockOpsForCSR())
2045 return false;
2046
2048 MachineFrameInfo &MFI = MF->getFrameInfo();
2049 const SIInstrInfo *TII = ST.getInstrInfo();
2050 const SIRegisterInfo *SITRI = static_cast<const SIRegisterInfo *>(TRI);
2051 const TargetRegisterClass *BlockRegClass = SITRI->getRegClassForBlockOp(*MF);
2052 for (const CalleeSavedInfo &CS : reverse(CSI)) {
2053 Register Reg = CS.getReg();
2054 if (!BlockRegClass->contains(Reg) ||
2055 !FuncInfo->hasMaskForVGPRBlockOps(Reg)) {
2057 continue;
2058 }
2059
2060 // Build a scratch block load.
2061 uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);
2062 int FrameIndex = CS.getFrameIdx();
2063 MachinePointerInfo PtrInfo =
2064 MachinePointerInfo::getFixedStack(*MF, FrameIndex);
2066 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),
2067 MFI.getObjectAlign(FrameIndex));
2068
2069 auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(),
2070 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE), Reg)
2071 .addFrameIndex(FrameIndex)
2072 .addReg(FuncInfo->getStackPtrOffsetReg())
2073 .addImm(0)
2074 .addImm(Mask)
2075 .addMemOperand(MMO);
2076 SITRI->addImplicitUsesForBlockCSRLoad(MIB, Reg);
2077
2078 // Add the register to the liveins. This is necessary because if any of the
2079 // VGPRs in the register block is reserved (e.g. if it's a WWM register),
2080 // then the whole block will be marked as reserved and `updateLiveness` will
2081 // skip it.
2082 MBB.addLiveIn(Reg);
2083 }
2084
2085 MBB.sortUniqueLiveIns();
2086 return true;
2087}
2088
2090 MachineFunction &MF,
2093 int64_t Amount = I->getOperand(0).getImm();
2094 if (Amount == 0)
2095 return MBB.erase(I);
2096
2097 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2098 const SIInstrInfo *TII = ST.getInstrInfo();
2099 const DebugLoc &DL = I->getDebugLoc();
2100 unsigned Opc = I->getOpcode();
2101 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
2102 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
2103
2104 if (!hasReservedCallFrame(MF)) {
2105 Amount = alignTo(Amount, getStackAlign());
2106 assert(isUInt<32>(Amount) && "exceeded stack address space size");
2109
2110 Amount *= getScratchScaleFactor(ST);
2111 if (IsDestroy)
2112 Amount = -Amount;
2113 auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
2114 .addReg(SPReg)
2115 .addImm(Amount);
2116 Add->getOperand(3).setIsDead(); // Mark SCC as dead.
2117 } else if (CalleePopAmount != 0) {
2118 llvm_unreachable("is this used?");
2119 }
2120
2121 return MBB.erase(I);
2122}
2123
2124/// Returns true if the frame will require a reference to the stack pointer.
2125///
2126/// This is the set of conditions common to setting up the stack pointer in a
2127/// kernel, and for using a frame pointer in a callable function.
2128///
2129/// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
2130/// references SP.
2132 return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
2133}
2134
2135// The FP for kernels is always known 0, so we never really need to setup an
2136// explicit register for it. However, DisableFramePointerElim will force us to
2137// use a register for it.
2139 const MachineFrameInfo &MFI = MF.getFrameInfo();
2140
2141 // For entry functions we can use an immediate offset in most cases,
2142 // so the presence of calls doesn't imply we need a distinct frame pointer.
2143 if (MFI.hasCalls() &&
2145 // All offsets are unsigned, so need to be addressed in the same direction
2146 // as stack growth.
2147
2148 // FIXME: This function is pretty broken, since it can be called before the
2149 // frame layout is determined or CSR spills are inserted.
2150 return MFI.getStackSize() != 0;
2151 }
2152
2153 return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
2154 MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
2155 MF) ||
2158}
2159
2161 const MachineFunction &MF) const {
2162 return MF.getInfo<SIMachineFunctionInfo>()->isDynamicVGPREnabled() &&
2165}
2166
2167// This is essentially a reduced version of hasFP for entry functions. Since the
2168// stack pointer is known 0 on entry to kernels, we never really need an FP
2169// register. We may need to initialize the stack pointer depending on the frame
2170// properties, which logically overlaps many of the cases where an ordinary
2171// function would require an FP.
2173 const MachineFunction &MF) const {
2174 // Callable functions always require a stack pointer reference.
2176 "only expected to call this for entry points functions");
2177
2178 const MachineFrameInfo &MFI = MF.getFrameInfo();
2179
2180 // Entry points ordinarily don't need to initialize SP. We have to set it up
2181 // for callees if there are any. Also note tail calls are only possible via
2182 // the `llvm.amdgcn.cs.chain` intrinsic.
2183 if (MFI.hasCalls() || MFI.hasTailCall())
2184 return true;
2185
2186 // We still need to initialize the SP if we're doing anything weird that
2187 // references the SP, like variable sized stack objects.
2188 return frameTriviallyRequiresSP(MFI);
2189}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A set of register units.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static constexpr MCPhysReg FPReg
static constexpr MCPhysReg SPReg
This file declares the machine register scavenger class.
static void buildEpilogRestore(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static cl::opt< bool > EnableSpillVGPRToAGPR("amdgpu-spill-vgpr-to-agpr", cl::desc("Enable spilling VGPRs to AGPRs"), cl::ReallyHidden, cl::init(true))
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR, const TargetRegisterClass &RC=AMDGPU::SReg_32_XM0_XEXECRegClass, bool IncludeScratchCopy=true)
Query target location for spilling SGPRs IncludeScratchCopy : Also look for free scratch SGPRs.
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, const SIInstrInfo *TII, Register TargetReg)
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static Register buildScratchExecCopy(LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsProlog, bool EnableInactiveLanes)
static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI)
Returns true if the frame will require a reference to the stack pointer.
static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI, const SIMachineFunctionInfo *FuncInfo, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
static bool allSGPRSpillsAreDead(const MachineFunction &MF)
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits, const TargetRegisterClass &RC, bool Unused=false)
static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, const LiveRegUnits &LiveUnits, const TargetRegisterClass &RC)
static void assignSlotsUsingVGPRBlocks(MachineFunction &MF, const GCNSubtarget &ST, std::vector< CalleeSavedInfo > &CSI)
static unsigned getScratchScaleFactor(const GCNSubtarget &ST)
#define LLVM_DEBUG(...)
Definition Debug.h:114
static const int BlockSize
Definition TarWriter.cpp:33
static const LaneMaskConstants & get(const GCNSubtarget &ST)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:186
bool test(unsigned Idx) const
Definition BitVector.h:480
BitVector & reset()
Definition BitVector.h:411
void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask.
Definition BitVector.h:744
BitVector & set()
Definition BitVector.h:370
bool any() const
any - Returns true if any bit is set.
Definition BitVector.h:189
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
Definition BitVector.h:732
iterator_range< const_set_bits_iterator > set_bits() const
Definition BitVector.h:159
bool empty() const
empty - Tests whether there are no bits in this bitvector.
Definition BitVector.h:175
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
MCRegister getReg() const
A debug info location.
Definition DebugLoc.h:123
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
const HexagonRegisterInfo & getRegisterInfo() const
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
bool empty() const
Returns true if the set is empty.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
Describe properties that are true of each instruction in the target description file.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
bool hasTailCall() const
Returns true if the function contains a tail call.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint8_t getStackID(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
mop_range operands()
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
void setIsDead(bool Val=true)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
void reserveReg(MCRegister PhysReg, const TargetRegisterInfo *TRI)
reserveReg – Mark a register as reserved so checks like isAllocatable will not suggest using it.
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
LLVM_ABI bool isPhysRegModified(MCRegister PhysReg, bool SkipNoReturnDef=false) const
Return true if the specified register is modified in this function.
LLVM_ABI bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
PrologEpilogSGPRSpillBuilder(Register Reg, const PrologEpilogSGPRSaveRestoreInfo SI, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, const SIInstrInfo *TII, const SIRegisterInfo &TRI, LiveRegUnits &LiveUnits, Register FrameReg)
Wrapper class representing virtual and physical registers.
Definition Register.h:20
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:107
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs, bool NeedExecCopyReservedReg) const
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
bool mayReserveScratchForCWSR(const MachineFunction &MF) const
bool allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction &MF) const override
Control the placement of special register scavenging spill slots when allocating a stack frame.
bool requiresStackPointerReference(const MachineFunction &MF) const
void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
bool hasFPImpl(const MachineFunction &MF) const override
bool assignCalleeSavedSpillSlotsImpl(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
bool isSupportedStackID(TargetStackID::Value ID) const override
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
ArrayRef< PrologEpilogSGPRSpill > getPrologEpilogSGPRSpills() const
const WWMSpillsMap & getWWMSpills() const
void getAllScratchSGPRCopyDstRegs(SmallVectorImpl< Register > &Regs) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)
void setMaskForVGPRBlockOps(Register RegisterBlock, uint32_t Mask)
GCNUserSGPRUsageInfo & getUserSGPRInfo()
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
void setVGPRToAGPRSpillDead(int FrameIndex)
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const
bool hasMaskForVGPRBlockOps(Register RegisterBlock) const
bool hasPrologEpilogSGPRSpillEntry(Register Reg) const
Register getGITPtrLoReg(const MachineFunction &MF) const
void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
bool isWWMReservedRegister(Register Reg) const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
void setLongBranchReservedReg(Register Reg)
void setHasSpilledVGPRs(bool Spill=true)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
void setScratchReservedForDynamicVGPRs(unsigned SizeInBytes)
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
const ReservedRegSet & getWWMReservedRegs() const
const PrologEpilogSGPRSaveRestoreInfo & getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const
void setIsStackRealigned(bool Realigned=true)
void addToPrologEpilogSGPRSpills(Register Reg, PrologEpilogSGPRSaveRestoreInfo SI)
Register getScratchSGPRCopyDstReg(Register Reg) const
Register getFrameRegister(const MachineFunction &MF) const override
const TargetRegisterClass * getRegClassForBlockOp(const MachineFunction &MF) const
void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, Register BlockReg) const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:46
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual bool hasReservedCallFrame(const MachineFunction &MF) const
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void restoreCalleeSavedRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
void spillCalleeSavedRegister(MachineBasicBlock &SaveBlock, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
spillCalleeSavedRegister - Default implementation for spilling a single callee saved register.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetOptions Options
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
constexpr RegState getKillRegState(bool B)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
void clearDebugInfoForSpillFIs(MachineFrameInfo &MFI, MachineBasicBlock &MBB, const BitVector &SpillFIs)
Replace frame index operands with null registers in debug value instructions for the specified spill ...
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:261
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1399
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition STLExtras.h:1970
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2019
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
static constexpr uint64_t encode(Fields... Values)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.