LLVM 23.0.0git
SIFrameLowering.cpp
Go to the documentation of this file.
1//===----------------------- SIFrameLowering.cpp --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8
9#include "SIFrameLowering.h"
10#include "AMDGPU.h"
11#include "AMDGPULaneMaskUtils.h"
12#include "GCNSubtarget.h"
15#include "SISpillUtils.h"
20
21using namespace llvm;
22
23#define DEBUG_TYPE "frame-info"
24
26 "amdgpu-spill-vgpr-to-agpr",
27 cl::desc("Enable spilling VGPRs to AGPRs"),
29 cl::init(true));
30
31// Find a register matching \p RC from \p LiveUnits which is unused and
32// available throughout the function. On failure, returns AMDGPU::NoRegister.
33// TODO: Rewrite the loop here to iterate over MCRegUnits instead of
34// MCRegisters. This should reduce the number of iterations and avoid redundant
35// checking.
37 const LiveRegUnits &LiveUnits,
38 const TargetRegisterClass &RC) {
39 for (MCRegister Reg : RC) {
40 if (!MRI.isPhysRegUsed(Reg) && LiveUnits.available(Reg) &&
41 !MRI.isReserved(Reg))
42 return Reg;
43 }
44 return MCRegister();
45}
46
47// Find a scratch register that we can use in the prologue. We avoid using
48// callee-save registers since they may appear to be free when this is called
49// from canUseAsPrologue (during shrink wrapping), but then no longer be free
50// when this is called from emitPrologue.
52 MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits,
53 const TargetRegisterClass &RC, bool Unused = false) {
54 // Mark callee saved registers as used so we will not choose them.
55 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
56 for (unsigned i = 0; CSRegs[i]; ++i)
57 LiveUnits.addReg(CSRegs[i]);
58
59 // We are looking for a register that can be used throughout the entire
60 // function, so any use is unacceptable.
61 if (Unused)
62 return findUnusedRegister(MRI, LiveUnits, RC);
63
64 for (MCRegister Reg : RC) {
65 if (LiveUnits.available(Reg) && !MRI.isReserved(Reg))
66 return Reg;
67 }
68
69 return MCRegister();
70}
71
72/// Query target location for spilling SGPRs
73/// \p IncludeScratchCopy : Also look for free scratch SGPRs
75 MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR,
76 const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,
77 bool IncludeScratchCopy = true) {
79 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
80
81 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
82 const SIRegisterInfo *TRI = ST.getRegisterInfo();
83 unsigned Size = TRI->getSpillSize(RC);
84 Align Alignment = TRI->getSpillAlign(RC);
85
86 // We need to save and restore the given SGPR.
87
88 Register ScratchSGPR;
89 // 1: Try to save the given register into an unused scratch SGPR. The
90 // LiveUnits should have all the callee saved registers marked as used. For
91 // certain cases we skip copy to scratch SGPR.
92 if (IncludeScratchCopy)
93 ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveUnits, RC);
94
95 if (!ScratchSGPR) {
96 int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,
98
99 if (TRI->spillSGPRToVGPR() &&
100 MFI->allocateSGPRSpillToVGPRLane(MF, FI, /*SpillToPhysVGPRLane=*/true,
101 /*IsPrologEpilog=*/true)) {
102 // 2: There's no free lane to spill, and no free register to save the
103 // SGPR, so we're forced to take another VGPR to use for the spill.
107
108 LLVM_DEBUG(auto Spill = MFI->getSGPRSpillToPhysicalVGPRLanes(FI).front();
109 dbgs() << printReg(SGPR, TRI) << " requires fallback spill to "
110 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
111 << '\n';);
112 } else {
113 // Remove dead <FI> index
115 // 3: If all else fails, spill the register to memory.
116 FI = FrameInfo.CreateSpillStackObject(Size, Alignment);
118 SGPR,
120 LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling "
121 << printReg(SGPR, TRI) << '\n');
122 }
123 } else {
127 LiveUnits.addReg(ScratchSGPR);
128 LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to "
129 << printReg(ScratchSGPR, TRI) << '\n');
130 }
131}
132
133// We need to specially emit stack operations here because a different frame
134// register is used than in the rest of the function, as getFrameRegister would
135// use.
136static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
137 const SIMachineFunctionInfo &FuncInfo,
138 LiveRegUnits &LiveUnits, MachineFunction &MF,
141 Register SpillReg, int FI, Register FrameReg,
142 int64_t DwordOff = 0) {
143 unsigned Opc = ST.hasFlatScratchEnabled() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
144 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
145
146 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
149 PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
150 FrameInfo.getObjectAlign(FI));
151 LiveUnits.addReg(SpillReg);
152 bool IsKill = !MBB.isLiveIn(SpillReg);
153 TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,
154 DwordOff, MMO, nullptr, &LiveUnits);
155 if (IsKill)
156 LiveUnits.removeReg(SpillReg);
157}
158
159static void buildEpilogRestore(const GCNSubtarget &ST,
160 const SIRegisterInfo &TRI,
161 const SIMachineFunctionInfo &FuncInfo,
162 LiveRegUnits &LiveUnits, MachineFunction &MF,
165 const DebugLoc &DL, Register SpillReg, int FI,
166 Register FrameReg, int64_t DwordOff = 0) {
167 unsigned Opc = ST.hasFlatScratchEnabled() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
168 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
169
170 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
173 PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
174 FrameInfo.getObjectAlign(FI));
175 TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,
176 DwordOff, MMO, nullptr, &LiveUnits);
177}
178
180 const DebugLoc &DL, const SIInstrInfo *TII,
181 Register TargetReg) {
182 MachineFunction *MF = MBB.getParent();
184 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
185 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
186 Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
187 Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
188
189 if (MFI->getGITPtrHigh() != 0xffffffff) {
190 BuildMI(MBB, I, DL, SMovB32, TargetHi)
191 .addImm(MFI->getGITPtrHigh())
192 .addReg(TargetReg, RegState::ImplicitDefine);
193 } else {
194 const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64_pseudo);
195 BuildMI(MBB, I, DL, GetPC64, TargetReg);
196 }
197 Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
198 MF->getRegInfo().addLiveIn(GitPtrLo);
199 MBB.addLiveIn(GitPtrLo);
200 BuildMI(MBB, I, DL, SMovB32, TargetLo)
201 .addReg(GitPtrLo);
202}
203
204static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI,
205 const SIMachineFunctionInfo *FuncInfo,
207 MachineBasicBlock::iterator MBBI, bool IsProlog) {
208 if (LiveUnits.empty()) {
209 LiveUnits.init(TRI);
210 if (IsProlog) {
211 LiveUnits.addLiveIns(MBB);
212 } else {
213 // In epilog.
214 LiveUnits.addLiveOuts(MBB);
215 LiveUnits.stepBackward(*MBBI);
216 }
217 }
218}
219
220namespace llvm {
221
222// SpillBuilder to save/restore special SGPR spills like the one needed for FP,
223// BP, etc. These spills are delayed until the current function's frame is
224// finalized. For a given register, the builder uses the
225// PrologEpilogSGPRSaveRestoreInfo to decide the spill method.
229 MachineFunction &MF;
230 const GCNSubtarget &ST;
231 MachineFrameInfo &MFI;
232 SIMachineFunctionInfo *FuncInfo;
233 const SIInstrInfo *TII;
234 const SIRegisterInfo &TRI;
235 Register SuperReg;
237 LiveRegUnits &LiveUnits;
238 const DebugLoc &DL;
239 Register FrameReg;
240 ArrayRef<int16_t> SplitParts;
241 unsigned NumSubRegs;
242 unsigned EltSize = 4;
243
244 void saveToMemory(const int FI) const {
245 MachineRegisterInfo &MRI = MF.getRegInfo();
246 assert(!MFI.isDeadObjectIndex(FI));
247
248 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);
249
251 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
252 if (!TmpVGPR)
253 report_fatal_error("failed to find free scratch register");
254
255 for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
256 Register SubReg = NumSubRegs == 1
257 ? SuperReg
258 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
259 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
260 .addReg(SubReg);
261
262 buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,
263 FI, FrameReg, DwordOff);
264 DwordOff += 4;
265 }
266 }
267
268 void saveToVGPRLane(const int FI) const {
269 assert(!MFI.isDeadObjectIndex(FI));
270
271 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
273 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
274 assert(Spill.size() == NumSubRegs);
275
276 for (unsigned I = 0; I < NumSubRegs; ++I) {
277 Register SubReg = NumSubRegs == 1
278 ? SuperReg
279 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
280 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),
281 Spill[I].VGPR)
282 .addReg(SubReg)
283 .addImm(Spill[I].Lane)
284 .addReg(Spill[I].VGPR, RegState::Undef);
285 }
286 }
287
288 void copyToScratchSGPR(Register DstReg) const {
289 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg)
290 .addReg(SuperReg)
292 }
293
294 void restoreFromMemory(const int FI) {
295 MachineRegisterInfo &MRI = MF.getRegInfo();
296
297 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);
299 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
300 if (!TmpVGPR)
301 report_fatal_error("failed to find free scratch register");
302
303 for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
304 Register SubReg = NumSubRegs == 1
305 ? SuperReg
306 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
307
308 buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL,
309 TmpVGPR, FI, FrameReg, DwordOff);
310 assert(SubReg.isPhysical());
311
312 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
313 .addReg(TmpVGPR, RegState::Kill);
314 DwordOff += 4;
315 }
316 }
317
318 void restoreFromVGPRLane(const int FI) {
319 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
321 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
322 assert(Spill.size() == NumSubRegs);
323
324 for (unsigned I = 0; I < NumSubRegs; ++I) {
325 Register SubReg = NumSubRegs == 1
326 ? SuperReg
327 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
328 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
329 .addReg(Spill[I].VGPR)
330 .addImm(Spill[I].Lane);
331 }
332 }
333
334 void copyFromScratchSGPR(Register SrcReg) const {
335 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg)
336 .addReg(SrcReg)
338 }
339
340public:
345 const DebugLoc &DL, const SIInstrInfo *TII,
346 const SIRegisterInfo &TRI,
347 LiveRegUnits &LiveUnits, Register FrameReg)
348 : MI(MI), MBB(MBB), MF(*MBB.getParent()),
349 ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),
350 FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
351 SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),
352 FrameReg(FrameReg) {
353 const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
354 SplitParts = TRI.getRegSplitParts(RC, EltSize);
355 NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
356
357 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
358 }
359
360 void save() {
361 switch (SI.getKind()) {
363 return saveToMemory(SI.getIndex());
365 return saveToVGPRLane(SI.getIndex());
367 return copyToScratchSGPR(SI.getReg());
368 }
369 }
370
371 void restore() {
372 switch (SI.getKind()) {
374 return restoreFromMemory(SI.getIndex());
376 return restoreFromVGPRLane(SI.getIndex());
378 return copyFromScratchSGPR(SI.getReg());
379 }
380 }
381};
382
383} // namespace llvm
384
385// Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
386void SIFrameLowering::emitEntryFunctionFlatScratchInit(
388 const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
389 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
390 const SIInstrInfo *TII = ST.getInstrInfo();
391 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
392 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
393
394 // We don't need this if we only have spills since there is no user facing
395 // scratch.
396
397 // TODO: If we know we don't have flat instructions earlier, we can omit
398 // this from the input registers.
399 //
400 // TODO: We only need to know if we access scratch space through a flat
401 // pointer. Because we only detect if flat instructions are used at all,
402 // this will be used more often than necessary on VI.
403
404 Register FlatScrInitLo;
405 Register FlatScrInitHi;
406
407 if (ST.isAmdPalOS()) {
408 // Extract the scratch offset from the descriptor in the GIT
409 LiveRegUnits LiveUnits;
410 LiveUnits.init(*TRI);
411 LiveUnits.addLiveIns(MBB);
412
413 // Find unused reg to load flat scratch init into
414 MachineRegisterInfo &MRI = MF.getRegInfo();
415 Register FlatScrInit = AMDGPU::NoRegister;
416 ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
417 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
418 AllSGPR64s = AllSGPR64s.slice(
419 std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
420 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
421 for (MCPhysReg Reg : AllSGPR64s) {
422 if (LiveUnits.available(Reg) && !MRI.isReserved(Reg) &&
423 MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
424 FlatScrInit = Reg;
425 break;
426 }
427 }
428 assert(FlatScrInit && "Failed to find free register for scratch init");
429
430 FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
431 FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
432
433 buildGitPtr(MBB, I, DL, TII, FlatScrInit);
434
435 // We now have the GIT ptr - now get the scratch descriptor from the entry
436 // at offset 0 (or offset 16 for a compute shader).
437 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
438 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
439 auto *MMO = MF.getMachineMemOperand(
440 PtrInfo,
443 8, Align(4));
444 unsigned Offset =
446 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
447 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
448 BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
449 .addReg(FlatScrInit)
450 .addImm(EncodedOffset) // offset
451 .addImm(0) // cpol
452 .addMemOperand(MMO);
453
454 // Mask the offset in [47:0] of the descriptor
455 const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
456 auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
457 .addReg(FlatScrInitHi)
458 .addImm(0xffff);
459 And->getOperand(3).setIsDead(); // Mark SCC as dead.
460 } else {
461 Register FlatScratchInitReg =
463 assert(FlatScratchInitReg);
464
465 MachineRegisterInfo &MRI = MF.getRegInfo();
466 MRI.addLiveIn(FlatScratchInitReg);
467 MBB.addLiveIn(FlatScratchInitReg);
468
469 FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
470 FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
471 }
472
473 // Do a 64-bit pointer add.
474 if (ST.flatScratchIsPointer()) {
475 if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
476 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
477 .addReg(FlatScrInitLo)
478 .addReg(ScratchWaveOffsetReg);
479 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
480 FlatScrInitHi)
481 .addReg(FlatScrInitHi)
482 .addImm(0);
483 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
484
485 using namespace AMDGPU::Hwreg;
486 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))
487 .addReg(FlatScrInitLo)
488 .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_LO, 0, 32)));
489 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))
490 .addReg(FlatScrInitHi)
491 .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_HI, 0, 32)));
492 return;
493 }
494
495 // For GFX9.
496 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
497 .addReg(FlatScrInitLo)
498 .addReg(ScratchWaveOffsetReg);
499 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
500 AMDGPU::FLAT_SCR_HI)
501 .addReg(FlatScrInitHi)
502 .addImm(0);
503 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
504
505 return;
506 }
507
508 assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
509
510 // Copy the size in bytes.
511 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
512 .addReg(FlatScrInitHi, RegState::Kill);
513
514 // Add wave offset in bytes to private base offset.
515 // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
516 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)
517 .addReg(FlatScrInitLo)
518 .addReg(ScratchWaveOffsetReg);
519
520 // Convert offset to 256-byte units.
521 auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32),
522 AMDGPU::FLAT_SCR_HI)
523 .addReg(FlatScrInitLo, RegState::Kill)
524 .addImm(8);
525 LShr->getOperand(3).setIsDead(); // Mark SCC as dead.
526}
527
528// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
529// memory. They should have been removed by now.
531 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
532 I != E; ++I) {
533 if (!MFI.isDeadObjectIndex(I))
534 return false;
535 }
536
537 return true;
538}
539
540// Shift down registers reserved for the scratch RSRC.
541Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
542 MachineFunction &MF) const {
543
544 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
545 const SIInstrInfo *TII = ST.getInstrInfo();
546 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
547 MachineRegisterInfo &MRI = MF.getRegInfo();
548 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
549
550 assert(MFI->isEntryFunction());
551
552 Register ScratchRsrcReg = MFI->getScratchRSrcReg();
553
554 if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
556 return Register();
557
558 if (ST.hasSGPRInitBug() ||
559 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
560 return ScratchRsrcReg;
561
562 // We reserved the last registers for this. Shift it down to the end of those
563 // which were actually used.
564 //
565 // FIXME: It might be safer to use a pseudoregister before replacement.
566
567 // FIXME: We should be able to eliminate unused input registers. We only
568 // cannot do this for the resources required for scratch access. For now we
569 // skip over user SGPRs and may leave unused holes.
570
571 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
572 ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
573 AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
574
575 // Skip the last N reserved elements because they should have already been
576 // reserved for VCC etc.
577 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
578 for (MCPhysReg Reg : AllSGPR128s) {
579 // Pick the first unallocated one. Make sure we don't clobber the other
580 // reserved input we needed. Also for PAL, make sure we don't clobber
581 // the GIT pointer passed in SGPR0 or SGPR8.
582 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
583 (!GITPtrLoReg || !TRI->isSubRegisterEq(Reg, GITPtrLoReg))) {
584 MRI.replaceRegWith(ScratchRsrcReg, Reg);
586 MRI.reserveReg(Reg, TRI);
587 return Reg;
588 }
589 }
590
591 return ScratchRsrcReg;
592}
593
594static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
595 return ST.hasFlatScratchEnabled() ? 1 : ST.getWavefrontSize();
596}
597
599 MachineBasicBlock &MBB) const {
600 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
601
602 // FIXME: If we only have SGPR spills, we won't actually be using scratch
603 // memory since these spill to VGPRs. We should be cleaning up these unused
604 // SGPR spill frame indices somewhere.
605
606 // FIXME: We still have implicit uses on SGPR spill instructions in case they
607 // need to spill to vector memory. It's likely that will not happen, but at
608 // this point it appears we need the setup. This part of the prolog should be
609 // emitted after frame indices are eliminated.
610
611 // FIXME: Remove all of the isPhysRegUsed checks
612
614 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
615 const SIInstrInfo *TII = ST.getInstrInfo();
616 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
618 const Function &F = MF.getFunction();
619 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
620
621 assert(MFI->isEntryFunction());
622
623 Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
625
626 // We need to do the replacement of the private segment buffer register even
627 // if there are no stack objects. There could be stores to undef or a
628 // constant without an associated object.
629 //
630 // This will return `Register()` in cases where there are no actual
631 // uses of the SRSRC.
632 Register ScratchRsrcReg;
633 if (!ST.hasFlatScratchEnabled())
634 ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
635
636 // Make the selected register live throughout the function.
637 if (ScratchRsrcReg) {
638 for (MachineBasicBlock &OtherBB : MF) {
639 if (&OtherBB != &MBB) {
640 OtherBB.addLiveIn(ScratchRsrcReg);
641 }
642 }
643 }
644
645 // Now that we have fixed the reserved SRSRC we need to locate the
646 // (potentially) preloaded SRSRC.
647 Register PreloadedScratchRsrcReg;
648 if (ST.isAmdHsaOrMesa(F)) {
649 PreloadedScratchRsrcReg =
651 if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
652 // We added live-ins during argument lowering, but since they were not
653 // used they were deleted. We're adding the uses now, so add them back.
654 MRI.addLiveIn(PreloadedScratchRsrcReg);
655 MBB.addLiveIn(PreloadedScratchRsrcReg);
656 }
657 }
658
659 // Debug location must be unknown since the first debug location is used to
660 // determine the end of the prologue.
661 DebugLoc DL;
663
664 // We found the SRSRC first because it needs four registers and has an
665 // alignment requirement. If the SRSRC that we found is clobbering with
666 // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
667 // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
668 // wave offset to a free SGPR.
669 Register ScratchWaveOffsetReg;
670 if (PreloadedScratchWaveOffsetReg &&
671 TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
672 ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
673 unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
674 AllSGPRs = AllSGPRs.slice(
675 std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
676 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
677 for (MCPhysReg Reg : AllSGPRs) {
678 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
679 !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
680 ScratchWaveOffsetReg = Reg;
681 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
682 .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
683 break;
684 }
685 }
686
687 // FIXME: We can spill incoming arguments and restore at the end of the
688 // prolog.
689 if (!ScratchWaveOffsetReg)
691 "could not find temporary scratch offset register in prolog");
692 } else {
693 ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
694 }
695 assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
696
697 unsigned Offset = FrameInfo.getStackSize() * getScratchScaleFactor(ST);
698 if (!mayReserveScratchForCWSR(MF)) {
699 if (hasFP(MF)) {
701 assert(FPReg != AMDGPU::FP_REG);
702 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
703 }
704
707 assert(SPReg != AMDGPU::SP_REG);
708 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg).addImm(Offset);
709 }
710 } else {
711 // We need to check if we're on a compute queue - if we are, then the CWSR
712 // trap handler may need to store some VGPRs on the stack. The first VGPR
713 // block is saved separately, so we only need to allocate space for any
714 // additional VGPR blocks used. For now, we will make sure there's enough
715 // room for the theoretical maximum number of VGPRs that can be allocated.
716 // FIXME: Figure out if the shader uses fewer VGPRs in practice.
717 assert(hasFP(MF));
719 assert(FPReg != AMDGPU::FP_REG);
720 unsigned VGPRSize = llvm::alignTo(
721 (ST.getAddressableNumVGPRs(MFI->getDynamicVGPRBlockSize()) -
723 MFI->getDynamicVGPRBlockSize())) *
724 4,
725 FrameInfo.getMaxAlign());
727
728 BuildMI(MBB, I, DL, TII->get(AMDGPU::GET_STACK_BASE), FPReg);
731 assert(SPReg != AMDGPU::SP_REG);
732
733 // If at least one of the constants can be inlined, then we can use
734 // s_cselect. Otherwise, use a mov and cmovk.
735 if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm()) ||
737 ST.hasInv2PiInlineImm())) {
738 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CSELECT_B32), SPReg)
739 .addImm(Offset + VGPRSize)
740 .addImm(Offset);
741 } else {
742 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg).addImm(Offset);
743 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMOVK_I32), SPReg)
744 .addImm(Offset + VGPRSize);
745 }
746 }
747 }
748
749 bool NeedsFlatScratchInit =
751 (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
752 (!allStackObjectsAreDead(FrameInfo) && ST.hasFlatScratchEnabled()));
753
754 if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
755 PreloadedScratchWaveOffsetReg && !ST.hasArchitectedFlatScratch()) {
756 MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
757 MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
758 }
759
760 if (NeedsFlatScratchInit) {
761 emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
762 }
763
764 if (ScratchRsrcReg) {
765 emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
766 PreloadedScratchRsrcReg,
767 ScratchRsrcReg, ScratchWaveOffsetReg);
768 }
769
770 if (ST.hasWaitXcnt()) {
771 // Set REPLAY_MODE (bit 25) in MODE register to enable multi-group XNACK
772 // replay. This aligns hardware behavior with the compiler's s_wait_xcnt
773 // insertion logic, which assumes multi-group mode by default.
774 unsigned RegEncoding =
776 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_IMM32_B32))
777 .addImm(1)
778 .addImm(RegEncoding);
779 }
780}
781
782// Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
783void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
785 const DebugLoc &DL, Register PreloadedScratchRsrcReg,
786 Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
787
788 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
789 const SIInstrInfo *TII = ST.getInstrInfo();
790 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
792 const Function &Fn = MF.getFunction();
793
794 if (ST.isAmdPalOS()) {
795 // The pointer to the GIT is formed from the offset passed in and either
796 // the amdgpu-git-ptr-high function attribute or the top part of the PC
797 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
798 Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
799
800 buildGitPtr(MBB, I, DL, TII, Rsrc01);
801
802 // We now have the GIT ptr - now get the scratch descriptor from the entry
803 // at offset 0 (or offset 16 for a compute shader).
805 const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
806 auto *MMO = MF.getMachineMemOperand(
807 PtrInfo,
810 16, Align(4));
811 unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
812 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
813 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
814 BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
815 .addReg(Rsrc01)
816 .addImm(EncodedOffset) // offset
817 .addImm(0) // cpol
818 .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
819 .addMemOperand(MMO);
820
821 // The driver will always set the SRD for wave 64 (bits 118:117 of
822 // descriptor / bits 22:21 of third sub-reg will be 0b11)
823 // If the shader is actually wave32 we have to modify the const_index_stride
824 // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
825 // reason the driver does this is that there can be cases where it presents
826 // 2 shaders with different wave size (e.g. VsFs).
827 // TODO: convert to using SCRATCH instructions or multiple SRD buffers
828 if (ST.isWave32()) {
829 const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
830 BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
831 .addImm(21)
832 .addReg(Rsrc03);
833 }
834 } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
835 assert(!ST.isAmdHsaOrMesa(Fn));
836 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
837
838 Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
839 Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
840
841 // Use relocations to get the pointer, and setup the other bits manually.
842 uint64_t Rsrc23 = TII->getScratchRsrcWords23();
843
845 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
846
848 const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
849
850 BuildMI(MBB, I, DL, Mov64, Rsrc01)
852 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
853 } else {
854 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
855
856 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
857 auto *MMO = MF.getMachineMemOperand(
858 PtrInfo,
861 8, Align(4));
862 BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
864 .addImm(0) // offset
865 .addImm(0) // cpol
866 .addMemOperand(MMO)
867 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
868
871 }
872 } else {
873 Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
874 Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
875
876 BuildMI(MBB, I, DL, SMovB32, Rsrc0)
877 .addExternalSymbol("SCRATCH_RSRC_DWORD0")
878 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
879
880 BuildMI(MBB, I, DL, SMovB32, Rsrc1)
881 .addExternalSymbol("SCRATCH_RSRC_DWORD1")
882 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
883 }
884
885 BuildMI(MBB, I, DL, SMovB32, Rsrc2)
886 .addImm(Lo_32(Rsrc23))
887 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
888
889 BuildMI(MBB, I, DL, SMovB32, Rsrc3)
890 .addImm(Hi_32(Rsrc23))
891 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
892 } else if (ST.isAmdHsaOrMesa(Fn)) {
893 assert(PreloadedScratchRsrcReg);
894
895 if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
896 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
897 .addReg(PreloadedScratchRsrcReg, RegState::Kill);
898 }
899 }
900
901 // Add the scratch wave offset into the scratch RSRC.
902 //
903 // We only want to update the first 48 bits, which is the base address
904 // pointer, without touching the adjacent 16 bits of flags. We know this add
905 // cannot carry-out from bit 47, otherwise the scratch allocation would be
906 // impossible to fit in the 48-bit global address space.
907 //
908 // TODO: Evaluate if it is better to just construct an SRD using the flat
909 // scratch init and some constants rather than update the one we are passed.
910 Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
911 Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
912
913 // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
914 // the kernel body via inreg arguments.
915 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
916 .addReg(ScratchRsrcSub0)
917 .addReg(ScratchWaveOffsetReg)
918 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
919 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
920 .addReg(ScratchRsrcSub1)
921 .addImm(0)
922 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
923 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
924}
925
927 switch (ID) {
931 return true;
935 return false;
936 }
937 llvm_unreachable("Invalid TargetStackID::Value");
938}
939
940// Activate only the inactive lanes when \p EnableInactiveLanes is true.
941// Otherwise, activate all lanes. It returns the saved exec.
943 MachineFunction &MF,
946 const DebugLoc &DL, bool IsProlog,
947 bool EnableInactiveLanes) {
948 Register ScratchExecCopy;
950 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
951 const SIInstrInfo *TII = ST.getInstrInfo();
952 const SIRegisterInfo &TRI = TII->getRegisterInfo();
954
955 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
956
957 if (FuncInfo->isWholeWaveFunction()) {
958 // Whole wave functions already have a copy of the original EXEC mask that
959 // we can use.
960 assert(IsProlog && "Epilog should look at return, not setup");
961 ScratchExecCopy =
962 TII->getWholeWaveFunctionSetup(MF)->getOperand(0).getReg();
963 assert(ScratchExecCopy && "Couldn't find copy of EXEC");
964 } else {
965 ScratchExecCopy = findScratchNonCalleeSaveRegister(
966 MRI, LiveUnits, *TRI.getWaveMaskRegClass());
967 }
968
969 if (!ScratchExecCopy)
970 report_fatal_error("failed to find free scratch register");
971
972 LiveUnits.addReg(ScratchExecCopy);
973
974 const unsigned SaveExecOpc =
975 ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
976 : AMDGPU::S_OR_SAVEEXEC_B32)
977 : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64
978 : AMDGPU::S_OR_SAVEEXEC_B64);
979 auto SaveExec =
980 BuildMI(MBB, MBBI, DL, TII->get(SaveExecOpc), ScratchExecCopy).addImm(-1);
981 SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead.
982
983 return ScratchExecCopy;
984}
985
989 Register FrameReg, Register FramePtrRegScratchCopy) const {
991 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
992 const SIInstrInfo *TII = ST.getInstrInfo();
993 const SIRegisterInfo &TRI = TII->getRegisterInfo();
996
997 // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch
998 // registers. However, save all lanes of callee-saved VGPRs. Due to this, we
999 // might end up flipping the EXEC bits twice.
1000 Register ScratchExecCopy;
1001 SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
1002 FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
1003 if (!WWMScratchRegs.empty())
1004 ScratchExecCopy =
1005 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1006 /*IsProlog*/ true, /*EnableInactiveLanes*/ true);
1007
1008 auto StoreWWMRegisters =
1010 for (const auto &Reg : WWMRegs) {
1011 Register VGPR = Reg.first;
1012 int FI = Reg.second;
1013 buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
1014 VGPR, FI, FrameReg);
1015 }
1016 };
1017
1018 for (const Register Reg : make_first_range(WWMScratchRegs)) {
1019 if (!MRI.isReserved(Reg)) {
1020 MRI.addLiveIn(Reg);
1021 MBB.addLiveIn(Reg);
1022 }
1023 }
1024 StoreWWMRegisters(WWMScratchRegs);
1025
1026 auto EnableAllLanes = [&]() {
1027 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addImm(-1);
1028 };
1029
1030 if (!WWMCalleeSavedRegs.empty()) {
1031 if (ScratchExecCopy) {
1032 EnableAllLanes();
1033 } else {
1034 ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1035 /*IsProlog*/ true,
1036 /*EnableInactiveLanes*/ false);
1037 }
1038 }
1039
1040 StoreWWMRegisters(WWMCalleeSavedRegs);
1041 if (FuncInfo->isWholeWaveFunction()) {
1042 // If we have already saved some WWM CSR registers, then the EXEC is already
1043 // -1 and we don't need to do anything else. Otherwise, set EXEC to -1 here.
1044 if (!ScratchExecCopy)
1045 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL, /*IsProlog*/ true,
1046 /*EnableInactiveLanes*/ true);
1047 else if (WWMCalleeSavedRegs.empty())
1048 EnableAllLanes();
1049 } else if (ScratchExecCopy) {
1050 // FIXME: Split block and make terminator.
1051 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg)
1052 .addReg(ScratchExecCopy, RegState::Kill);
1053 LiveUnits.addReg(ScratchExecCopy);
1054 }
1055
1056 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1057
1058 for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
1059 // Special handle FP spill:
1060 // Skip if FP is saved to a scratch SGPR, the save has already been emitted.
1061 // Otherwise, FP has been moved to a temporary register and spill it
1062 // instead.
1063 Register Reg =
1064 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1065 if (!Reg)
1066 continue;
1067
1068 PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
1069 LiveUnits, FrameReg);
1070 SB.save();
1071 }
1072
1073 // If a copy to scratch SGPR has been chosen for any of the SGPR spills, make
1074 // such scratch registers live throughout the function.
1075 SmallVector<Register, 1> ScratchSGPRs;
1076 FuncInfo->getAllScratchSGPRCopyDstRegs(ScratchSGPRs);
1077 if (!ScratchSGPRs.empty()) {
1078 for (MachineBasicBlock &MBB : MF) {
1079 for (MCPhysReg Reg : ScratchSGPRs)
1080 MBB.addLiveIn(Reg);
1081
1082 MBB.sortUniqueLiveIns();
1083 }
1084 if (!LiveUnits.empty()) {
1085 for (MCPhysReg Reg : ScratchSGPRs)
1086 LiveUnits.addReg(Reg);
1087 }
1088 }
1089}
1090
1094 Register FrameReg, Register FramePtrRegScratchCopy) const {
1095 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1096 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1097 const SIInstrInfo *TII = ST.getInstrInfo();
1098 const SIRegisterInfo &TRI = TII->getRegisterInfo();
1100 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1101
1102 for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
1103 // Special handle FP restore:
1104 // Skip if FP needs to be restored from the scratch SGPR. Otherwise, restore
1105 // the FP value to a temporary register. The frame pointer should be
1106 // overwritten only at the end when all other spills are restored from
1107 // current frame.
1108 Register Reg =
1109 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1110 if (!Reg)
1111 continue;
1112
1113 PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
1114 LiveUnits, FrameReg);
1115 SB.restore();
1116 }
1117
1118 // Restore Whole-Wave Mode VGPRs. Restore only the inactive lanes of the
1119 // scratch registers. However, restore all lanes of callee-saved VGPRs. Due to
1120 // this, we might end up flipping the EXEC bits twice.
1121 Register ScratchExecCopy;
1122 SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
1123 FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
1124 auto RestoreWWMRegisters =
1126 for (const auto &Reg : WWMRegs) {
1127 Register VGPR = Reg.first;
1128 int FI = Reg.second;
1129 buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
1130 VGPR, FI, FrameReg);
1131 }
1132 };
1133
1134 if (FuncInfo->isWholeWaveFunction()) {
1135 // For whole wave functions, the EXEC is already -1 at this point.
1136 // Therefore, we can restore the CSR WWM registers right away.
1137 RestoreWWMRegisters(WWMCalleeSavedRegs);
1138
1139 // The original EXEC is the first operand of the return instruction.
1140 MachineInstr &Return = MBB.instr_back();
1141 unsigned Opcode = Return.getOpcode();
1142 switch (Opcode) {
1143 case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN:
1144 Opcode = AMDGPU::SI_RETURN;
1145 break;
1146 case AMDGPU::SI_TCRETURN_GFX_WholeWave:
1147 Opcode = AMDGPU::SI_TCRETURN_GFX;
1148 break;
1149 default:
1150 llvm_unreachable("Unexpected return inst");
1151 }
1152 Register OrigExec = Return.getOperand(0).getReg();
1153
1154 if (!WWMScratchRegs.empty()) {
1155 BuildMI(MBB, MBBI, DL, TII->get(LMC.XorOpc), LMC.ExecReg)
1156 .addReg(OrigExec)
1157 .addImm(-1);
1158 RestoreWWMRegisters(WWMScratchRegs);
1159 }
1160
1161 // Restore original EXEC.
1162 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addReg(OrigExec);
1163
1164 // Drop the first operand and update the opcode.
1165 Return.removeOperand(0);
1166 Return.setDesc(TII->get(Opcode));
1167
1168 return;
1169 }
1170
1171 if (!WWMScratchRegs.empty()) {
1172 ScratchExecCopy =
1173 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1174 /*IsProlog=*/false, /*EnableInactiveLanes=*/true);
1175 }
1176 RestoreWWMRegisters(WWMScratchRegs);
1177 if (!WWMCalleeSavedRegs.empty()) {
1178 if (ScratchExecCopy) {
1179 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addImm(-1);
1180 } else {
1181 ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1182 /*IsProlog*/ false,
1183 /*EnableInactiveLanes*/ false);
1184 }
1185 }
1186
1187 RestoreWWMRegisters(WWMCalleeSavedRegs);
1188 if (ScratchExecCopy) {
1189 // FIXME: Split block and make terminator.
1190 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg)
1191 .addReg(ScratchExecCopy, RegState::Kill);
1192 }
1193}
1194
1196 MachineBasicBlock &MBB) const {
1198 if (FuncInfo->isEntryFunction()) {
1200 return;
1201 }
1202
1203 MachineFrameInfo &MFI = MF.getFrameInfo();
1204 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1205 const SIInstrInfo *TII = ST.getInstrInfo();
1206 const SIRegisterInfo &TRI = TII->getRegisterInfo();
1207 MachineRegisterInfo &MRI = MF.getRegInfo();
1208
1209 Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
1210 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1211 Register BasePtrReg =
1212 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
1213 LiveRegUnits LiveUnits;
1214
1216 // DebugLoc must be unknown since the first instruction with DebugLoc is used
1217 // to determine the end of the prologue.
1218 DebugLoc DL;
1219
1220 bool HasFP = false;
1221 bool HasBP = false;
1222 uint32_t NumBytes = MFI.getStackSize();
1223 uint32_t RoundedSize = NumBytes;
1224
1225 // Functions that never return don't need to save and restore the FP or BP.
1226 const Function &F = MF.getFunction();
1227 bool SavesStackRegs =
1228 !F.hasFnAttribute(Attribute::NoReturn) && !FuncInfo->isChainFunction();
1229
1230 if (TRI.hasStackRealignment(MF))
1231 HasFP = true;
1232
1233 Register FramePtrRegScratchCopy;
1234 if (!HasFP && !hasFP(MF)) {
1235 // Emit the CSR spill stores with SP base register.
1236 emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg,
1237 FramePtrRegScratchCopy);
1238 } else if (SavesStackRegs) {
1239 // CSR spill stores will use FP as base register.
1240 Register SGPRForFPSaveRestoreCopy =
1241 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1242
1243 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
1244 if (SGPRForFPSaveRestoreCopy) {
1245 // Copy FP to the scratch register now and emit the CFI entry. It avoids
1246 // the extra FP copy needed in the other two cases when FP is spilled to
1247 // memory or to a VGPR lane.
1249 FramePtrReg,
1250 FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI,
1251 DL, TII, TRI, LiveUnits, FramePtrReg);
1252 SB.save();
1253 LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1254 } else {
1255 // Copy FP into a new scratch register so that its previous value can be
1256 // spilled after setting up the new frame.
1257 FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
1258 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1259 if (!FramePtrRegScratchCopy)
1260 report_fatal_error("failed to find free scratch register");
1261
1262 LiveUnits.addReg(FramePtrRegScratchCopy);
1263 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrRegScratchCopy)
1264 .addReg(FramePtrReg);
1265 }
1266 }
1267
1268 if (HasFP) {
1269 const unsigned Alignment = MFI.getMaxAlign().value();
1270
1271 RoundedSize += Alignment;
1272 if (LiveUnits.empty()) {
1273 LiveUnits.init(TRI);
1274 LiveUnits.addLiveIns(MBB);
1275 }
1276
1277 // s_add_i32 s33, s32, NumBytes
1278 // s_and_b32 s33, s33, 0b111...0000
1279 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
1280 .addReg(StackPtrReg)
1281 .addImm((Alignment - 1) * getScratchScaleFactor(ST))
1283 auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
1284 .addReg(FramePtrReg, RegState::Kill)
1285 .addImm(-Alignment * getScratchScaleFactor(ST))
1287 And->getOperand(3).setIsDead(); // Mark SCC as dead.
1288 FuncInfo->setIsStackRealigned(true);
1289 } else if ((HasFP = hasFP(MF))) {
1290 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1291 .addReg(StackPtrReg)
1293 }
1294
1295 // If FP is used, emit the CSR spills with FP base register.
1296 if (HasFP) {
1297 emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1298 FramePtrRegScratchCopy);
1299 if (FramePtrRegScratchCopy)
1300 LiveUnits.removeReg(FramePtrRegScratchCopy);
1301 }
1302
1303 // If we need a base pointer, set it up here. It's whatever the value of
1304 // the stack pointer is at this point. Any variable size objects will be
1305 // allocated after this, so we can still use the base pointer to reference
1306 // the incoming arguments.
1307 if ((HasBP = TRI.hasBasePointer(MF))) {
1308 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
1309 .addReg(StackPtrReg)
1311 }
1312
1313 if (HasFP && RoundedSize != 0) {
1314 auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
1315 .addReg(StackPtrReg)
1316 .addImm(RoundedSize * getScratchScaleFactor(ST))
1318 Add->getOperand(3).setIsDead(); // Mark SCC as dead.
1319 }
1320
1321 bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
1322 (void)FPSaved;
1323 assert((!HasFP || FPSaved || !SavesStackRegs) &&
1324 "Needed to save FP but didn't save it anywhere");
1325
1326 // If we allow spilling to AGPRs we may have saved FP but then spill
1327 // everything into AGPRs instead of the stack.
1328 assert((HasFP || !FPSaved || !SavesStackRegs || EnableSpillVGPRToAGPR) &&
1329 "Saved FP but didn't need it");
1330
1331 bool BPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(BasePtrReg);
1332 (void)BPSaved;
1333 assert((!HasBP || BPSaved || !SavesStackRegs) &&
1334 "Needed to save BP but didn't save it anywhere");
1335
1336 assert((HasBP || !BPSaved) && "Saved BP but didn't need it");
1337
1338 if (FuncInfo->isWholeWaveFunction()) {
1339 // SI_WHOLE_WAVE_FUNC_SETUP has outlived its purpose.
1340 TII->getWholeWaveFunctionSetup(MF)->eraseFromParent();
1341 }
1342}
1343
1345 MachineBasicBlock &MBB) const {
1346 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1347 if (FuncInfo->isEntryFunction())
1348 return;
1349
1350 const MachineFrameInfo &MFI = MF.getFrameInfo();
1351 if (FuncInfo->isChainFunction() && !MFI.hasTailCall())
1352 return;
1353
1354 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1355 const SIInstrInfo *TII = ST.getInstrInfo();
1356 const SIRegisterInfo &TRI = TII->getRegisterInfo();
1357 MachineRegisterInfo &MRI = MF.getRegInfo();
1358 LiveRegUnits LiveUnits;
1359 // Get the insert location for the epilogue. If there were no terminators in
1360 // the block, get the last instruction.
1362 DebugLoc DL;
1363 if (!MBB.empty()) {
1364 MBBI = MBB.getLastNonDebugInstr();
1365 if (MBBI != MBB.end())
1366 DL = MBBI->getDebugLoc();
1367
1368 MBBI = MBB.getFirstTerminator();
1369 }
1370
1371 uint32_t NumBytes = MFI.getStackSize();
1372 uint32_t RoundedSize = FuncInfo->isStackRealigned()
1373 ? NumBytes + MFI.getMaxAlign().value()
1374 : NumBytes;
1375 const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
1376 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1377 bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
1378
1379 if (RoundedSize != 0) {
1380 if (TRI.hasBasePointer(MF)) {
1381 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), StackPtrReg)
1382 .addReg(TRI.getBaseRegister())
1384 } else if (hasFP(MF)) {
1385 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), StackPtrReg)
1386 .addReg(FramePtrReg)
1388 }
1389 }
1390
1391 Register FramePtrRegScratchCopy;
1392 Register SGPRForFPSaveRestoreCopy =
1393 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1394 if (FPSaved) {
1395 // CSR spill restores should use FP as base register. If
1396 // SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP
1397 // into a new scratch register and copy to FP later when other registers are
1398 // restored from the current stack frame.
1399 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1400 if (SGPRForFPSaveRestoreCopy) {
1401 LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1402 } else {
1403 FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
1404 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1405 if (!FramePtrRegScratchCopy)
1406 report_fatal_error("failed to find free scratch register");
1407
1408 LiveUnits.addReg(FramePtrRegScratchCopy);
1409 }
1410
1411 emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1412 FramePtrRegScratchCopy);
1413 }
1414
1415 if (FPSaved) {
1416 // Insert the copy to restore FP.
1417 Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy
1418 : FramePtrRegScratchCopy;
1420 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1421 .addReg(SrcReg);
1422 if (SGPRForFPSaveRestoreCopy)
1424 } else {
1425 // Insert the CSR spill restores with SP as the base register.
1426 emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg,
1427 FramePtrRegScratchCopy);
1428 }
1429}
1430
1431#ifndef NDEBUG
1433 const MachineFrameInfo &MFI = MF.getFrameInfo();
1434 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1435 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
1436 I != E; ++I) {
1437 if (!MFI.isDeadObjectIndex(I) &&
1440 return false;
1441 }
1442 }
1443
1444 return true;
1445}
1446#endif
1447
1449 int FI,
1450 Register &FrameReg) const {
1451 const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1452
1453 FrameReg = RI->getFrameRegister(MF);
1455}
1456
1458 MachineFunction &MF,
1459 RegScavenger *RS) const {
1460 MachineFrameInfo &MFI = MF.getFrameInfo();
1461
1462 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1463 const SIInstrInfo *TII = ST.getInstrInfo();
1464 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1465 MachineRegisterInfo &MRI = MF.getRegInfo();
1467
1468 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
1470
1471 if (SpillVGPRToAGPR) {
1472 // To track the spill frame indices handled in this pass.
1473 BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
1474 BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false);
1475
1476 bool SeenDbgInstr = false;
1477
1478 for (MachineBasicBlock &MBB : MF) {
1480 int FrameIndex;
1481 if (MI.isDebugInstr())
1482 SeenDbgInstr = true;
1483
1484 if (TII->isVGPRSpill(MI)) {
1485 // Try to eliminate stack used by VGPR spills before frame
1486 // finalization.
1487 unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1488 AMDGPU::OpName::vaddr);
1489 int FI = MI.getOperand(FIOp).getIndex();
1490 Register VReg =
1491 TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
1492 if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
1493 TRI->isAGPR(MRI, VReg))) {
1494 assert(RS != nullptr);
1495 RS->enterBasicBlockEnd(MBB);
1496 RS->backward(std::next(MI.getIterator()));
1497 TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
1498 SpillFIs.set(FI);
1499 continue;
1500 }
1501 } else if (TII->isStoreToStackSlot(MI, FrameIndex) ||
1502 TII->isLoadFromStackSlot(MI, FrameIndex))
1503 if (!MFI.isFixedObjectIndex(FrameIndex))
1504 NonVGPRSpillFIs.set(FrameIndex);
1505 }
1506 }
1507
1508 // Stack slot coloring may assign different objects to the same stack slot.
1509 // If not, then the VGPR to AGPR spill slot is dead.
1510 for (unsigned FI : SpillFIs.set_bits())
1511 if (!NonVGPRSpillFIs.test(FI))
1512 FuncInfo->setVGPRToAGPRSpillDead(FI);
1513
1514 for (MachineBasicBlock &MBB : MF) {
1515 for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
1516 MBB.addLiveIn(Reg);
1517
1518 for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
1519 MBB.addLiveIn(Reg);
1520
1521 MBB.sortUniqueLiveIns();
1522
1523 if (!SpillFIs.empty() && SeenDbgInstr)
1524 clearDebugInfoForSpillFIs(MFI, MBB, SpillFIs);
1525 }
1526 }
1527
1528 // At this point we've already allocated all spilled SGPRs to VGPRs if we
1529 // can. Any remaining SGPR spills will go to memory, so move them back to the
1530 // default stack.
1531 bool HaveSGPRToVMemSpill =
1532 FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);
1534 "SGPR spill should have been removed in SILowerSGPRSpills");
1535
1536 // FIXME: The other checks should be redundant with allStackObjectsAreDead,
1537 // but currently hasNonSpillStackObjects is set only from source
1538 // allocas. Stack temps produced from legalization are not counted currently.
1539 if (!allStackObjectsAreDead(MFI)) {
1540 assert(RS && "RegScavenger required if spilling");
1541
1542 // Add an emergency spill slot
1543 RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
1544
1545 // If we are spilling SGPRs to memory with a large frame, we may need a
1546 // second VGPR emergency frame index.
1547 if (HaveSGPRToVMemSpill &&
1549 RS->addScavengingFrameIndex(MFI.CreateSpillStackObject(4, Align(4)));
1550 }
1551 }
1552}
1553
1555 MachineFunction &MF, RegScavenger *RS) const {
1556 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1557 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1558 MachineRegisterInfo &MRI = MF.getRegInfo();
1560
1561 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
1562 // On gfx908, we had initially reserved highest available VGPR for AGPR
1563 // copy. Now since we are done with RA, check if there exist an unused VGPR
1564 // which is lower than the eariler reserved VGPR before RA. If one exist,
1565 // use it for AGPR copy instead of one reserved before RA.
1566 Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy();
1567 Register UnusedLowVGPR =
1568 TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
1569 if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <
1570 TRI->getHWRegIndex(VGPRForAGPRCopy))) {
1571 // Reserve this newly identified VGPR (for AGPR copy)
1572 // reserved registers should already be frozen at this point
1573 // so we can avoid calling MRI.freezeReservedRegs and just use
1574 // MRI.reserveReg
1575 FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR);
1576 MRI.reserveReg(UnusedLowVGPR, TRI);
1577 }
1578 }
1579 // We initally reserved the highest available SGPR pair for long branches
1580 // now, after RA, we shift down to a lower unused one if one exists
1581 Register LongBranchReservedReg = FuncInfo->getLongBranchReservedReg();
1582 Register UnusedLowSGPR =
1583 TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF);
1584 // If LongBranchReservedReg is null then we didn't find a long branch
1585 // and never reserved a register to begin with so there is nothing to
1586 // shift down. Then if UnusedLowSGPR is null, there isn't available lower
1587 // register to use so just keep the original one we set.
1588 if (LongBranchReservedReg && UnusedLowSGPR) {
1589 FuncInfo->setLongBranchReservedReg(UnusedLowSGPR);
1590 MRI.reserveReg(UnusedLowSGPR, TRI);
1591 }
1592}
1593
1594// The special SGPR spills like the one needed for FP, BP or any reserved
1595// registers delayed until frame lowering.
1597 MachineFunction &MF, BitVector &SavedVGPRs,
1598 bool NeedExecCopyReservedReg) const {
1599 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1600 MachineRegisterInfo &MRI = MF.getRegInfo();
1602 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1603 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1604 LiveRegUnits LiveUnits;
1605 LiveUnits.init(*TRI);
1606 // Initially mark callee saved registers as used so we will not choose them
1607 // while looking for scratch SGPRs.
1608 const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
1609 for (unsigned I = 0; CSRegs[I]; ++I)
1610 LiveUnits.addReg(CSRegs[I]);
1611
1612 const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass();
1613
1614 Register ReservedRegForExecCopy = MFI->getSGPRForEXECCopy();
1615 if (NeedExecCopyReservedReg ||
1616 (ReservedRegForExecCopy &&
1617 MRI.isPhysRegUsed(ReservedRegForExecCopy, /*SkipRegMaskTest=*/true))) {
1618 MRI.reserveReg(ReservedRegForExecCopy, TRI);
1619 Register UnusedScratchReg = findUnusedRegister(MRI, LiveUnits, RC);
1620 if (UnusedScratchReg) {
1621 // If found any unused scratch SGPR, reserve the register itself for Exec
1622 // copy and there is no need for any spills in that case.
1623 MFI->setSGPRForEXECCopy(UnusedScratchReg);
1624 MRI.replaceRegWith(ReservedRegForExecCopy, UnusedScratchReg);
1625 LiveUnits.addReg(UnusedScratchReg);
1626 } else {
1627 // Needs spill.
1628 assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedRegForExecCopy) &&
1629 "Re-reserving spill slot for EXEC copy register");
1630 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, ReservedRegForExecCopy, RC,
1631 /*IncludeScratchCopy=*/false);
1632 }
1633 } else if (ReservedRegForExecCopy) {
1634 // Reset it at this point. There are no whole-wave copies and spills
1635 // encountered.
1636 MFI->setSGPRForEXECCopy(AMDGPU::NoRegister);
1637 }
1638
1639 // Functions that don't return to the caller don't need to preserve
1640 // the FP and BP.
1641 const Function &F = MF.getFunction();
1642 if (F.hasFnAttribute(Attribute::NoReturn) ||
1643 AMDGPU::isChainCC(F.getCallingConv()))
1644 return;
1645
1646 // hasFP only knows about stack objects that already exist. We're now
1647 // determining the stack slots that will be created, so we have to predict
1648 // them. Stack objects force FP usage with calls.
1649 //
1650 // Note a new VGPR CSR may be introduced if one is used for the spill, but we
1651 // don't want to report it here.
1652 //
1653 // FIXME: Is this really hasReservedCallFrame?
1654 const bool WillHaveFP =
1655 FrameInfo.hasCalls() &&
1656 (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1657
1658 if (WillHaveFP || hasFP(MF)) {
1659 Register FramePtrReg = MFI->getFrameOffsetReg();
1660 assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) &&
1661 "Re-reserving spill slot for FP");
1662 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, FramePtrReg);
1663 }
1664
1665 if (TRI->hasBasePointer(MF)) {
1666 Register BasePtrReg = TRI->getBaseRegister();
1667 assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) &&
1668 "Re-reserving spill slot for BP");
1669 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, BasePtrReg);
1670 }
1671}
1672
1673// Only report VGPRs to generic code.
1675 BitVector &SavedVGPRs,
1676 RegScavenger *RS) const {
1678
1679 // If this is a function with the amdgpu_cs_chain[_preserve] calling
1680 // convention and it doesn't contain any calls to llvm.amdgcn.cs.chain, then
1681 // we don't need to save and restore anything.
1682 if (MFI->isChainFunction() && !MF.getFrameInfo().hasTailCall())
1683 return;
1684
1686
1687 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1688 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1689 const SIInstrInfo *TII = ST.getInstrInfo();
1690 bool NeedExecCopyReservedReg = false;
1691
1692 MachineInstr *ReturnMI = nullptr;
1693 for (MachineBasicBlock &MBB : MF) {
1694 for (MachineInstr &MI : MBB) {
1695 // TODO: Walking through all MBBs here would be a bad heuristic. Better
1696 // handle them elsewhere.
1697 if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
1698 NeedExecCopyReservedReg = true;
1699 else if (MI.getOpcode() == AMDGPU::SI_RETURN ||
1700 MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
1701 MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||
1702 (MFI->isChainFunction() &&
1703 TII->isChainCallOpcode(MI.getOpcode()))) {
1704 // We expect all return to be the same size.
1705 assert(!ReturnMI ||
1706 (count_if(MI.operands(), [](auto Op) { return Op.isReg(); }) ==
1707 count_if(ReturnMI->operands(), [](auto Op) { return Op.isReg(); })));
1708 ReturnMI = &MI;
1709 }
1710 }
1711 }
1712
1713 SmallVector<Register> SortedWWMVGPRs;
1714 for (Register Reg : MFI->getWWMReservedRegs()) {
1715 // The shift-back is needed only for the VGPRs used for SGPR spills and they
1716 // are of 32-bit size. SIPreAllocateWWMRegs pass can add tuples into WWM
1717 // reserved registers.
1718 const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
1719 if (TRI->getRegSizeInBits(*RC) != 32)
1720 continue;
1721 SortedWWMVGPRs.push_back(Reg);
1722 }
1723
1724 sort(SortedWWMVGPRs, std::greater<Register>());
1725 MFI->shiftWwmVGPRsToLowestRange(MF, SortedWWMVGPRs, SavedVGPRs);
1726
1727 if (MFI->isEntryFunction())
1728 return;
1729
1730 if (MFI->isWholeWaveFunction()) {
1731 // In practice, all the VGPRs are WWM registers, and we will need to save at
1732 // least their inactive lanes. Add them to WWMReservedRegs.
1733 assert(!NeedExecCopyReservedReg &&
1734 "Whole wave functions can use the reg mapped for their i1 argument");
1735
1736 unsigned NumArchVGPRs = ST.getAddressableNumArchVGPRs();
1737 for (MCRegister Reg :
1738 AMDGPU::VGPR_32RegClass.getRegisters().take_front(NumArchVGPRs))
1739 if (MF.getRegInfo().isPhysRegModified(Reg)) {
1740 MFI->reserveWWMRegister(Reg);
1741 MF.begin()->addLiveIn(Reg);
1742 }
1743 MF.begin()->sortUniqueLiveIns();
1744 }
1745
1746 // Remove any VGPRs used in the return value because these do not need to be saved.
1747 // This prevents CSR restore from clobbering return VGPRs.
1748 if (ReturnMI) {
1749 for (auto &Op : ReturnMI->operands()) {
1750 if (Op.isReg())
1751 SavedVGPRs.reset(Op.getReg());
1752 }
1753 }
1754
1755 // Create the stack objects for WWM registers now.
1756 for (Register Reg : MFI->getWWMReservedRegs()) {
1757 const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
1758 MFI->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC),
1759 TRI->getSpillAlign(*RC));
1760 }
1761
1762 // Ignore the SGPRs the default implementation found.
1763 SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
1764
1765 // Do not save AGPRs prior to GFX90A because there was no easy way to do so.
1766 // In gfx908 there was do AGPR loads and stores and thus spilling also
1767 // require a temporary VGPR.
1768 if (!ST.hasGFX90AInsts())
1769 SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
1770
1771 determinePrologEpilogSGPRSaves(MF, SavedVGPRs, NeedExecCopyReservedReg);
1772
1773 // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't
1774 // allow the default insertion to handle them.
1775 for (auto &Reg : MFI->getWWMSpills())
1776 SavedVGPRs.reset(Reg.first);
1777}
1778
1780 BitVector &SavedRegs,
1781 RegScavenger *RS) const {
1784 if (MFI->isEntryFunction())
1785 return;
1786
1787 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1788 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1789
1790 // The SP is specifically managed and we don't want extra spills of it.
1791 SavedRegs.reset(MFI->getStackPtrOffsetReg());
1792
1793 const BitVector AllSavedRegs = SavedRegs;
1794 SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
1795
1796 // We have to anticipate introducing CSR VGPR spills or spill of caller
1797 // save VGPR reserved for SGPR spills as we now always create stack entry
1798 // for it, if we don't have any stack objects already, since we require a FP
1799 // if there is a call and stack. We will allocate a VGPR for SGPR spills if
1800 // there are any SGPR spills. Whether they are CSR spills or otherwise.
1801 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1802 const bool WillHaveFP =
1803 FrameInfo.hasCalls() && (AllSavedRegs.any() || MFI->hasSpilledSGPRs());
1804
1805 // FP will be specially managed like SP.
1806 if (WillHaveFP || hasFP(MF))
1807 SavedRegs.reset(MFI->getFrameOffsetReg());
1808
1809 // Return address use with return instruction is hidden through the SI_RETURN
1810 // pseudo. Given that and since the IPRA computes actual register usage and
1811 // does not use CSR list, the clobbering of return address by function calls
1812 // (D117243) or otherwise (D120922) is ignored/not seen by the IPRA's register
1813 // usage collection. This will ensure save/restore of return address happens
1814 // in those scenarios.
1815 const MachineRegisterInfo &MRI = MF.getRegInfo();
1816 Register RetAddrReg = TRI->getReturnAddressReg(MF);
1817 if (!MFI->isEntryFunction() &&
1818 (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) {
1819 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0));
1820 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1));
1821 }
1822}
1823
1825 const GCNSubtarget &ST,
1826 std::vector<CalleeSavedInfo> &CSI) {
1828 MachineFrameInfo &MFI = MF.getFrameInfo();
1829 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1830
1831 assert(
1832 llvm::is_sorted(CSI,
1833 [](const CalleeSavedInfo &A, const CalleeSavedInfo &B) {
1834 return A.getReg() < B.getReg();
1835 }) &&
1836 "Callee saved registers not sorted");
1837
1838 auto CanUseBlockOps = [&](const CalleeSavedInfo &CSI) {
1839 return !CSI.isSpilledToReg() &&
1840 TRI->getPhysRegBaseClass(CSI.getReg()) == &AMDGPU::VGPR_32RegClass &&
1841 !FuncInfo->isWWMReservedRegister(CSI.getReg());
1842 };
1843
1844 auto CSEnd = CSI.end();
1845 for (auto CSIt = CSI.begin(); CSIt != CSEnd; ++CSIt) {
1846 Register Reg = CSIt->getReg();
1847 if (!CanUseBlockOps(*CSIt))
1848 continue;
1849
1850 // Find all the regs that will fit in a 32-bit mask starting at the current
1851 // reg and build said mask. It should have 1 for every register that's
1852 // included, with the current register as the least significant bit.
1853 uint32_t Mask = 1;
1854 CSEnd = std::remove_if(
1855 CSIt + 1, CSEnd, [&](const CalleeSavedInfo &CSI) -> bool {
1856 if (CanUseBlockOps(CSI) && CSI.getReg() < Reg + 32) {
1857 Mask |= 1 << (CSI.getReg() - Reg);
1858 return true;
1859 } else {
1860 return false;
1861 }
1862 });
1863
1864 const TargetRegisterClass *BlockRegClass = TRI->getRegClassForBlockOp(MF);
1865 Register RegBlock =
1866 TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, BlockRegClass);
1867 if (!RegBlock) {
1868 // We couldn't find a super register for the block. This can happen if
1869 // the register we started with is too high (e.g. v232 if the maximum is
1870 // v255). We therefore try to get the last register block and figure out
1871 // the mask from there.
1872 Register LastBlockStart =
1873 AMDGPU::VGPR0 + alignDown(Reg - AMDGPU::VGPR0, 32);
1874 RegBlock =
1875 TRI->getMatchingSuperReg(LastBlockStart, AMDGPU::sub0, BlockRegClass);
1876 assert(RegBlock && TRI->isSubRegister(RegBlock, Reg) &&
1877 "Couldn't find super register");
1878 int RegDelta = Reg - LastBlockStart;
1879 assert(RegDelta > 0 && llvm::countl_zero(Mask) >= RegDelta &&
1880 "Bad shift amount");
1881 Mask <<= RegDelta;
1882 }
1883
1884 FuncInfo->setMaskForVGPRBlockOps(RegBlock, Mask);
1885
1886 // The stack objects can be a bit smaller than the register block if we know
1887 // some of the high bits of Mask are 0. This may happen often with calling
1888 // conventions where the caller and callee-saved VGPRs are interleaved at
1889 // a small boundary (e.g. 8 or 16).
1890 int UnusedBits = llvm::countl_zero(Mask);
1891 unsigned BlockSize = TRI->getSpillSize(*BlockRegClass) - UnusedBits * 4;
1892 int FrameIdx =
1893 MFI.CreateStackObject(BlockSize, TRI->getSpillAlign(*BlockRegClass),
1894 /*isSpillSlot=*/true);
1895 MFI.setIsCalleeSavedObjectIndex(FrameIdx, true);
1896
1897 CSIt->setFrameIdx(FrameIdx);
1898 CSIt->setReg(RegBlock);
1899 }
1900 CSI.erase(CSEnd, CSI.end());
1901}
1902
1905 std::vector<CalleeSavedInfo> &CSI) const {
1906 if (CSI.empty())
1907 return true; // Early exit if no callee saved registers are modified!
1908
1909 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1910 bool UseVGPRBlocks = ST.useVGPRBlockOpsForCSR();
1911
1912 if (UseVGPRBlocks)
1913 assignSlotsUsingVGPRBlocks(MF, ST, CSI);
1914
1915 return assignCalleeSavedSpillSlotsImpl(MF, TRI, CSI) || UseVGPRBlocks;
1916}
1917
1920 std::vector<CalleeSavedInfo> &CSI) const {
1921 if (CSI.empty())
1922 return true; // Early exit if no callee saved registers are modified!
1923
1924 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1925 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1926 const SIRegisterInfo *RI = ST.getRegisterInfo();
1927 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1928 Register BasePtrReg = RI->getBaseRegister();
1929 Register SGPRForFPSaveRestoreCopy =
1930 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1931 Register SGPRForBPSaveRestoreCopy =
1932 FuncInfo->getScratchSGPRCopyDstReg(BasePtrReg);
1933 if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)
1934 return false;
1935
1936 unsigned NumModifiedRegs = 0;
1937
1938 if (SGPRForFPSaveRestoreCopy)
1939 NumModifiedRegs++;
1940 if (SGPRForBPSaveRestoreCopy)
1941 NumModifiedRegs++;
1942
1943 for (auto &CS : CSI) {
1944 if (CS.getReg() == FramePtrReg.asMCReg() && SGPRForFPSaveRestoreCopy) {
1945 CS.setDstReg(SGPRForFPSaveRestoreCopy);
1946 if (--NumModifiedRegs)
1947 break;
1948 } else if (CS.getReg() == BasePtrReg.asMCReg() &&
1949 SGPRForBPSaveRestoreCopy) {
1950 CS.setDstReg(SGPRForBPSaveRestoreCopy);
1951 if (--NumModifiedRegs)
1952 break;
1953 }
1954 }
1955
1956 return false;
1957}
1958
1960 const MachineFunction &MF) const {
1961
1962 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1963 const MachineFrameInfo &MFI = MF.getFrameInfo();
1964 const SIInstrInfo *TII = ST.getInstrInfo();
1965 uint64_t EstStackSize = MFI.estimateStackSize(MF);
1966 uint64_t MaxOffset = EstStackSize - 1;
1967
1968 // We need the emergency stack slots to be allocated in range of the
1969 // MUBUF/flat scratch immediate offset from the base register, so assign these
1970 // first at the incoming SP position.
1971 //
1972 // TODO: We could try sorting the objects to find a hole in the first bytes
1973 // rather than allocating as close to possible. This could save a lot of space
1974 // on frames with alignment requirements.
1975 if (ST.hasFlatScratchEnabled()) {
1976 if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
1978 return false;
1979 } else {
1980 if (TII->isLegalMUBUFImmOffset(MaxOffset))
1981 return false;
1982 }
1983
1984 return true;
1985}
1986
1990 MachineFunction *MF = MBB.getParent();
1991 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
1992 if (!ST.useVGPRBlockOpsForCSR())
1993 return false;
1994
1995 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
1997 const SIInstrInfo *TII = ST.getInstrInfo();
1999
2000 const TargetRegisterClass *BlockRegClass =
2001 static_cast<const SIRegisterInfo *>(TRI)->getRegClassForBlockOp(*MF);
2002 for (const CalleeSavedInfo &CS : CSI) {
2003 Register Reg = CS.getReg();
2004 if (!BlockRegClass->contains(Reg) ||
2005 !FuncInfo->hasMaskForVGPRBlockOps(Reg)) {
2007 continue;
2008 }
2009
2010 // Build a scratch block store.
2011 uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);
2012 int FrameIndex = CS.getFrameIdx();
2013 MachinePointerInfo PtrInfo =
2014 MachinePointerInfo::getFixedStack(*MF, FrameIndex);
2015 MachineMemOperand *MMO =
2017 FrameInfo.getObjectSize(FrameIndex),
2018 FrameInfo.getObjectAlign(FrameIndex));
2019
2020 BuildMI(MBB, MI, MI->getDebugLoc(),
2021 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE))
2022 .addReg(Reg, getKillRegState(false))
2023 .addFrameIndex(FrameIndex)
2025 .addImm(0)
2026 .addImm(Mask)
2027 .addMemOperand(MMO);
2028
2029 FuncInfo->setHasSpilledVGPRs();
2030
2031 // Add the register to the liveins. This is necessary because if any of the
2032 // VGPRs in the register block is reserved (e.g. if it's a WWM register),
2033 // then the whole block will be marked as reserved and `updateLiveness` will
2034 // skip it.
2035 MBB.addLiveIn(Reg);
2036 }
2037 MBB.sortUniqueLiveIns();
2038
2039 return true;
2040}
2041
2045 MachineFunction *MF = MBB.getParent();
2046 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
2047 if (!ST.useVGPRBlockOpsForCSR())
2048 return false;
2049
2051 MachineFrameInfo &MFI = MF->getFrameInfo();
2052 const SIInstrInfo *TII = ST.getInstrInfo();
2053 const SIRegisterInfo *SITRI = static_cast<const SIRegisterInfo *>(TRI);
2054 const TargetRegisterClass *BlockRegClass = SITRI->getRegClassForBlockOp(*MF);
2055 for (const CalleeSavedInfo &CS : reverse(CSI)) {
2056 Register Reg = CS.getReg();
2057 if (!BlockRegClass->contains(Reg) ||
2058 !FuncInfo->hasMaskForVGPRBlockOps(Reg)) {
2060 continue;
2061 }
2062
2063 // Build a scratch block load.
2064 uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);
2065 int FrameIndex = CS.getFrameIdx();
2066 MachinePointerInfo PtrInfo =
2067 MachinePointerInfo::getFixedStack(*MF, FrameIndex);
2069 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),
2070 MFI.getObjectAlign(FrameIndex));
2071
2072 auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(),
2073 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE), Reg)
2074 .addFrameIndex(FrameIndex)
2075 .addReg(FuncInfo->getStackPtrOffsetReg())
2076 .addImm(0)
2077 .addImm(Mask)
2078 .addMemOperand(MMO);
2079 SITRI->addImplicitUsesForBlockCSRLoad(MIB, Reg);
2080
2081 // Add the register to the liveins. This is necessary because if any of the
2082 // VGPRs in the register block is reserved (e.g. if it's a WWM register),
2083 // then the whole block will be marked as reserved and `updateLiveness` will
2084 // skip it.
2085 MBB.addLiveIn(Reg);
2086 }
2087
2088 MBB.sortUniqueLiveIns();
2089 return true;
2090}
2091
2093 MachineFunction &MF,
2096 int64_t Amount = I->getOperand(0).getImm();
2097 if (Amount == 0)
2098 return MBB.erase(I);
2099
2100 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2101 const SIInstrInfo *TII = ST.getInstrInfo();
2102 const DebugLoc &DL = I->getDebugLoc();
2103 unsigned Opc = I->getOpcode();
2104 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
2105 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
2106
2107 if (!hasReservedCallFrame(MF)) {
2108 Amount = alignTo(Amount, getStackAlign());
2109 assert(isUInt<32>(Amount) && "exceeded stack address space size");
2112
2113 Amount *= getScratchScaleFactor(ST);
2114 if (IsDestroy)
2115 Amount = -Amount;
2116 auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
2117 .addReg(SPReg)
2118 .addImm(Amount);
2119 Add->getOperand(3).setIsDead(); // Mark SCC as dead.
2120 } else if (CalleePopAmount != 0) {
2121 llvm_unreachable("is this used?");
2122 }
2123
2124 return MBB.erase(I);
2125}
2126
2127/// Returns true if the frame will require a reference to the stack pointer.
2128///
2129/// This is the set of conditions common to setting up the stack pointer in a
2130/// kernel, and for using a frame pointer in a callable function.
2131///
2132/// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
2133/// references SP.
2135 return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
2136}
2137
2138// The FP for kernels is always known 0, so we never really need to setup an
2139// explicit register for it. However, DisableFramePointerElim will force us to
2140// use a register for it.
2142 const MachineFrameInfo &MFI = MF.getFrameInfo();
2143
2144 // For entry functions we can use an immediate offset in most cases,
2145 // so the presence of calls doesn't imply we need a distinct frame pointer.
2146 if (MFI.hasCalls() &&
2148 // All offsets are unsigned, so need to be addressed in the same direction
2149 // as stack growth.
2150
2151 // FIXME: This function is pretty broken, since it can be called before the
2152 // frame layout is determined or CSR spills are inserted.
2153 return MFI.getStackSize() != 0;
2154 }
2155
2156 return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
2157 MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
2158 MF) ||
2161}
2162
2164 const MachineFunction &MF) const {
2165 return MF.getInfo<SIMachineFunctionInfo>()->isDynamicVGPREnabled() &&
2168}
2169
2170// This is essentially a reduced version of hasFP for entry functions. Since the
2171// stack pointer is known 0 on entry to kernels, we never really need an FP
2172// register. We may need to initialize the stack pointer depending on the frame
2173// properties, which logically overlaps many of the cases where an ordinary
2174// function would require an FP.
2176 const MachineFunction &MF) const {
2177 // Callable functions always require a stack pointer reference.
2179 "only expected to call this for entry points functions");
2180
2181 const MachineFrameInfo &MFI = MF.getFrameInfo();
2182
2183 // Entry points ordinarily don't need to initialize SP. We have to set it up
2184 // for callees if there are any. Also note tail calls are only possible via
2185 // the `llvm.amdgcn.cs.chain` intrinsic.
2186 if (MFI.hasCalls() || MFI.hasTailCall())
2187 return true;
2188
2189 // We still need to initialize the SP if we're doing anything weird that
2190 // references the SP, like variable sized stack objects.
2191 return frameTriviallyRequiresSP(MFI);
2192}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A set of register units.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static constexpr MCPhysReg FPReg
static constexpr MCPhysReg SPReg
This file declares the machine register scavenger class.
static void buildEpilogRestore(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static cl::opt< bool > EnableSpillVGPRToAGPR("amdgpu-spill-vgpr-to-agpr", cl::desc("Enable spilling VGPRs to AGPRs"), cl::ReallyHidden, cl::init(true))
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR, const TargetRegisterClass &RC=AMDGPU::SReg_32_XM0_XEXECRegClass, bool IncludeScratchCopy=true)
Query target location for spilling SGPRs IncludeScratchCopy : Also look for free scratch SGPRs.
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, const SIInstrInfo *TII, Register TargetReg)
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static Register buildScratchExecCopy(LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsProlog, bool EnableInactiveLanes)
static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI)
Returns true if the frame will require a reference to the stack pointer.
static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI, const SIMachineFunctionInfo *FuncInfo, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
static bool allSGPRSpillsAreDead(const MachineFunction &MF)
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits, const TargetRegisterClass &RC, bool Unused=false)
static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, const LiveRegUnits &LiveUnits, const TargetRegisterClass &RC)
static void assignSlotsUsingVGPRBlocks(MachineFunction &MF, const GCNSubtarget &ST, std::vector< CalleeSavedInfo > &CSI)
static unsigned getScratchScaleFactor(const GCNSubtarget &ST)
#define LLVM_DEBUG(...)
Definition Debug.h:114
static const int BlockSize
Definition TarWriter.cpp:33
static const LaneMaskConstants & get(const GCNSubtarget &ST)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:186
bool test(unsigned Idx) const
Definition BitVector.h:480
BitVector & reset()
Definition BitVector.h:411
void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask.
Definition BitVector.h:744
BitVector & set()
Definition BitVector.h:370
bool any() const
any - Returns true if any bit is set.
Definition BitVector.h:189
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
Definition BitVector.h:732
iterator_range< const_set_bits_iterator > set_bits() const
Definition BitVector.h:159
bool empty() const
empty - Tests whether there are no bits in this bitvector.
Definition BitVector.h:175
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
MCRegister getReg() const
A debug info location.
Definition DebugLoc.h:123
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
const HexagonRegisterInfo & getRegisterInfo() const
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
bool empty() const
Returns true if the set is empty.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
Describe properties that are true of each instruction in the target description file.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
bool hasTailCall() const
Returns true if the function contains a tail call.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint8_t getStackID(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
mop_range operands()
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
void setIsDead(bool Val=true)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
void reserveReg(MCRegister PhysReg, const TargetRegisterInfo *TRI)
reserveReg – Mark a register as reserved so checks like isAllocatable will not suggest using it.
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
LLVM_ABI bool isPhysRegModified(MCRegister PhysReg, bool SkipNoReturnDef=false) const
Return true if the specified register is modified in this function.
LLVM_ABI bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
PrologEpilogSGPRSpillBuilder(Register Reg, const PrologEpilogSGPRSaveRestoreInfo SI, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, const SIInstrInfo *TII, const SIRegisterInfo &TRI, LiveRegUnits &LiveUnits, Register FrameReg)
Wrapper class representing virtual and physical registers.
Definition Register.h:20
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:107
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs, bool NeedExecCopyReservedReg) const
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
bool mayReserveScratchForCWSR(const MachineFunction &MF) const
bool allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction &MF) const override
Control the placement of special register scavenging spill slots when allocating a stack frame.
bool requiresStackPointerReference(const MachineFunction &MF) const
void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
bool hasFPImpl(const MachineFunction &MF) const override
bool assignCalleeSavedSpillSlotsImpl(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
bool isSupportedStackID(TargetStackID::Value ID) const override
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
ArrayRef< PrologEpilogSGPRSpill > getPrologEpilogSGPRSpills() const
const WWMSpillsMap & getWWMSpills() const
void getAllScratchSGPRCopyDstRegs(SmallVectorImpl< Register > &Regs) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)
void setMaskForVGPRBlockOps(Register RegisterBlock, uint32_t Mask)
GCNUserSGPRUsageInfo & getUserSGPRInfo()
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
void setVGPRToAGPRSpillDead(int FrameIndex)
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const
bool hasMaskForVGPRBlockOps(Register RegisterBlock) const
bool hasPrologEpilogSGPRSpillEntry(Register Reg) const
Register getGITPtrLoReg(const MachineFunction &MF) const
void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
bool isWWMReservedRegister(Register Reg) const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
void setLongBranchReservedReg(Register Reg)
void setHasSpilledVGPRs(bool Spill=true)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
void setScratchReservedForDynamicVGPRs(unsigned SizeInBytes)
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
const ReservedRegSet & getWWMReservedRegs() const
const PrologEpilogSGPRSaveRestoreInfo & getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const
void setIsStackRealigned(bool Realigned=true)
void addToPrologEpilogSGPRSpills(Register Reg, PrologEpilogSGPRSaveRestoreInfo SI)
Register getScratchSGPRCopyDstReg(Register Reg) const
Register getFrameRegister(const MachineFunction &MF) const override
const TargetRegisterClass * getRegClassForBlockOp(const MachineFunction &MF) const
void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, Register BlockReg) const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:46
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual bool hasReservedCallFrame(const MachineFunction &MF) const
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void restoreCalleeSavedRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
void spillCalleeSavedRegister(MachineBasicBlock &SaveBlock, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
spillCalleeSavedRegister - Default implementation for spilling a single callee saved register.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetOptions Options
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
LLVM_READNONE constexpr bool isChainCC(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
constexpr RegState getKillRegState(bool B)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
void clearDebugInfoForSpillFIs(MachineFrameInfo &MFI, MachineBasicBlock &MBB, const BitVector &SpillFIs)
Replace frame index operands with null registers in debug value instructions for the specified spill ...
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:263
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1399
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition STLExtras.h:1970
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2019
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
static constexpr uint64_t encode(Fields... Values)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.