LLVM 22.0.0git
SIFrameLowering.cpp
Go to the documentation of this file.
1//===----------------------- SIFrameLowering.cpp --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8
9#include "SIFrameLowering.h"
10#include "AMDGPU.h"
11#include "AMDGPULaneMaskUtils.h"
12#include "GCNSubtarget.h"
19
20using namespace llvm;
21
22#define DEBUG_TYPE "frame-info"
23
25 "amdgpu-spill-vgpr-to-agpr",
26 cl::desc("Enable spilling VGPRs to AGPRs"),
28 cl::init(true));
29
30// Find a register matching \p RC from \p LiveUnits which is unused and
31// available throughout the function. On failure, returns AMDGPU::NoRegister.
32// TODO: Rewrite the loop here to iterate over MCRegUnits instead of
33// MCRegisters. This should reduce the number of iterations and avoid redundant
34// checking.
36 const LiveRegUnits &LiveUnits,
37 const TargetRegisterClass &RC) {
38 for (MCRegister Reg : RC) {
39 if (!MRI.isPhysRegUsed(Reg) && LiveUnits.available(Reg) &&
40 !MRI.isReserved(Reg))
41 return Reg;
42 }
43 return MCRegister();
44}
45
46// Find a scratch register that we can use in the prologue. We avoid using
47// callee-save registers since they may appear to be free when this is called
48// from canUseAsPrologue (during shrink wrapping), but then no longer be free
49// when this is called from emitPrologue.
52 const TargetRegisterClass &RC, bool Unused = false) {
53 // Mark callee saved registers as used so we will not choose them.
54 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
55 for (unsigned i = 0; CSRegs[i]; ++i)
56 LiveUnits.addReg(CSRegs[i]);
57
58 // We are looking for a register that can be used throughout the entire
59 // function, so any use is unacceptable.
60 if (Unused)
61 return findUnusedRegister(MRI, LiveUnits, RC);
62
63 for (MCRegister Reg : RC) {
64 if (LiveUnits.available(Reg) && !MRI.isReserved(Reg))
65 return Reg;
66 }
67
68 return MCRegister();
69}
70
71/// Query target location for spilling SGPRs
72/// \p IncludeScratchCopy : Also look for free scratch SGPRs
74 MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR,
75 const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,
76 bool IncludeScratchCopy = true) {
78 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
79
80 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
81 const SIRegisterInfo *TRI = ST.getRegisterInfo();
82 unsigned Size = TRI->getSpillSize(RC);
83 Align Alignment = TRI->getSpillAlign(RC);
84
85 // We need to save and restore the given SGPR.
86
87 Register ScratchSGPR;
88 // 1: Try to save the given register into an unused scratch SGPR. The
89 // LiveUnits should have all the callee saved registers marked as used. For
90 // certain cases we skip copy to scratch SGPR.
91 if (IncludeScratchCopy)
92 ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveUnits, RC);
93
94 if (!ScratchSGPR) {
95 int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,
97
98 if (TRI->spillSGPRToVGPR() &&
99 MFI->allocateSGPRSpillToVGPRLane(MF, FI, /*SpillToPhysVGPRLane=*/true,
100 /*IsPrologEpilog=*/true)) {
101 // 2: There's no free lane to spill, and no free register to save the
102 // SGPR, so we're forced to take another VGPR to use for the spill.
106
107 LLVM_DEBUG(auto Spill = MFI->getSGPRSpillToPhysicalVGPRLanes(FI).front();
108 dbgs() << printReg(SGPR, TRI) << " requires fallback spill to "
109 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
110 << '\n';);
111 } else {
112 // Remove dead <FI> index
114 // 3: If all else fails, spill the register to memory.
115 FI = FrameInfo.CreateSpillStackObject(Size, Alignment);
117 SGPR,
119 LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling "
120 << printReg(SGPR, TRI) << '\n');
121 }
122 } else {
126 LiveUnits.addReg(ScratchSGPR);
127 LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to "
128 << printReg(ScratchSGPR, TRI) << '\n');
129 }
130}
131
132// We need to specially emit stack operations here because a different frame
133// register is used than in the rest of the function, as getFrameRegister would
134// use.
135static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
136 const SIMachineFunctionInfo &FuncInfo,
137 LiveRegUnits &LiveUnits, MachineFunction &MF,
140 Register SpillReg, int FI, Register FrameReg,
141 int64_t DwordOff = 0) {
142 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
143 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
144
145 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
148 PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
149 FrameInfo.getObjectAlign(FI));
150 LiveUnits.addReg(SpillReg);
151 bool IsKill = !MBB.isLiveIn(SpillReg);
152 TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,
153 DwordOff, MMO, nullptr, &LiveUnits);
154 if (IsKill)
155 LiveUnits.removeReg(SpillReg);
156}
157
158static void buildEpilogRestore(const GCNSubtarget &ST,
159 const SIRegisterInfo &TRI,
160 const SIMachineFunctionInfo &FuncInfo,
161 LiveRegUnits &LiveUnits, MachineFunction &MF,
164 const DebugLoc &DL, Register SpillReg, int FI,
165 Register FrameReg, int64_t DwordOff = 0) {
166 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
167 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
168
169 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
172 PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
173 FrameInfo.getObjectAlign(FI));
174 TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,
175 DwordOff, MMO, nullptr, &LiveUnits);
176}
177
179 const DebugLoc &DL, const SIInstrInfo *TII,
180 Register TargetReg) {
181 MachineFunction *MF = MBB.getParent();
183 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
184 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
185 Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
186 Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
187
188 if (MFI->getGITPtrHigh() != 0xffffffff) {
189 BuildMI(MBB, I, DL, SMovB32, TargetHi)
190 .addImm(MFI->getGITPtrHigh())
191 .addReg(TargetReg, RegState::ImplicitDefine);
192 } else {
193 const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64_pseudo);
194 BuildMI(MBB, I, DL, GetPC64, TargetReg);
195 }
196 Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
197 MF->getRegInfo().addLiveIn(GitPtrLo);
198 MBB.addLiveIn(GitPtrLo);
199 BuildMI(MBB, I, DL, SMovB32, TargetLo)
200 .addReg(GitPtrLo);
201}
202
203static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI,
204 const SIMachineFunctionInfo *FuncInfo,
206 MachineBasicBlock::iterator MBBI, bool IsProlog) {
207 if (LiveUnits.empty()) {
208 LiveUnits.init(TRI);
209 if (IsProlog) {
210 LiveUnits.addLiveIns(MBB);
211 } else {
212 // In epilog.
213 LiveUnits.addLiveOuts(MBB);
214 LiveUnits.stepBackward(*MBBI);
215 }
216 }
217}
218
219namespace llvm {
220
221// SpillBuilder to save/restore special SGPR spills like the one needed for FP,
222// BP, etc. These spills are delayed until the current function's frame is
223// finalized. For a given register, the builder uses the
224// PrologEpilogSGPRSaveRestoreInfo to decide the spill method.
228 MachineFunction &MF;
229 const GCNSubtarget &ST;
230 MachineFrameInfo &MFI;
231 SIMachineFunctionInfo *FuncInfo;
232 const SIInstrInfo *TII;
233 const SIRegisterInfo &TRI;
234 Register SuperReg;
236 LiveRegUnits &LiveUnits;
237 const DebugLoc &DL;
238 Register FrameReg;
239 ArrayRef<int16_t> SplitParts;
240 unsigned NumSubRegs;
241 unsigned EltSize = 4;
242
243 void saveToMemory(const int FI) const {
244 MachineRegisterInfo &MRI = MF.getRegInfo();
245 assert(!MFI.isDeadObjectIndex(FI));
246
247 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);
248
250 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
251 if (!TmpVGPR)
252 report_fatal_error("failed to find free scratch register");
253
254 for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
255 Register SubReg = NumSubRegs == 1
256 ? SuperReg
257 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
258 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
259 .addReg(SubReg);
260
261 buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,
262 FI, FrameReg, DwordOff);
263 DwordOff += 4;
264 }
265 }
266
267 void saveToVGPRLane(const int FI) const {
268 assert(!MFI.isDeadObjectIndex(FI));
269
270 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
272 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
273 assert(Spill.size() == NumSubRegs);
274
275 for (unsigned I = 0; I < NumSubRegs; ++I) {
276 Register SubReg = NumSubRegs == 1
277 ? SuperReg
278 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
279 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),
280 Spill[I].VGPR)
281 .addReg(SubReg)
282 .addImm(Spill[I].Lane)
283 .addReg(Spill[I].VGPR, RegState::Undef);
284 }
285 }
286
287 void copyToScratchSGPR(Register DstReg) const {
288 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg)
289 .addReg(SuperReg)
291 }
292
293 void restoreFromMemory(const int FI) {
294 MachineRegisterInfo &MRI = MF.getRegInfo();
295
296 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);
298 MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
299 if (!TmpVGPR)
300 report_fatal_error("failed to find free scratch register");
301
302 for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
303 Register SubReg = NumSubRegs == 1
304 ? SuperReg
305 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
306
307 buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL,
308 TmpVGPR, FI, FrameReg, DwordOff);
309 MRI.constrainRegClass(SubReg, &AMDGPU::SReg_32_XM0RegClass);
310 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
311 .addReg(TmpVGPR, RegState::Kill);
312 DwordOff += 4;
313 }
314 }
315
316 void restoreFromVGPRLane(const int FI) {
317 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
319 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
320 assert(Spill.size() == NumSubRegs);
321
322 for (unsigned I = 0; I < NumSubRegs; ++I) {
323 Register SubReg = NumSubRegs == 1
324 ? SuperReg
325 : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
326 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
327 .addReg(Spill[I].VGPR)
328 .addImm(Spill[I].Lane);
329 }
330 }
331
332 void copyFromScratchSGPR(Register SrcReg) const {
333 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg)
334 .addReg(SrcReg)
336 }
337
338public:
343 const DebugLoc &DL, const SIInstrInfo *TII,
344 const SIRegisterInfo &TRI,
345 LiveRegUnits &LiveUnits, Register FrameReg)
346 : MI(MI), MBB(MBB), MF(*MBB.getParent()),
347 ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),
348 FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
349 SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),
350 FrameReg(FrameReg) {
351 const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
352 SplitParts = TRI.getRegSplitParts(RC, EltSize);
353 NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
354
355 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
356 }
357
358 void save() {
359 switch (SI.getKind()) {
361 return saveToMemory(SI.getIndex());
363 return saveToVGPRLane(SI.getIndex());
365 return copyToScratchSGPR(SI.getReg());
366 }
367 }
368
369 void restore() {
370 switch (SI.getKind()) {
372 return restoreFromMemory(SI.getIndex());
374 return restoreFromVGPRLane(SI.getIndex());
376 return copyFromScratchSGPR(SI.getReg());
377 }
378 }
379};
380
381} // namespace llvm
382
383// Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
384void SIFrameLowering::emitEntryFunctionFlatScratchInit(
386 const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
387 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
388 const SIInstrInfo *TII = ST.getInstrInfo();
389 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
390 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
391
392 // We don't need this if we only have spills since there is no user facing
393 // scratch.
394
395 // TODO: If we know we don't have flat instructions earlier, we can omit
396 // this from the input registers.
397 //
398 // TODO: We only need to know if we access scratch space through a flat
399 // pointer. Because we only detect if flat instructions are used at all,
400 // this will be used more often than necessary on VI.
401
402 Register FlatScrInitLo;
403 Register FlatScrInitHi;
404
405 if (ST.isAmdPalOS()) {
406 // Extract the scratch offset from the descriptor in the GIT
407 LiveRegUnits LiveUnits;
408 LiveUnits.init(*TRI);
409 LiveUnits.addLiveIns(MBB);
410
411 // Find unused reg to load flat scratch init into
412 MachineRegisterInfo &MRI = MF.getRegInfo();
413 Register FlatScrInit = AMDGPU::NoRegister;
414 ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
415 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
416 AllSGPR64s = AllSGPR64s.slice(
417 std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
418 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
419 for (MCPhysReg Reg : AllSGPR64s) {
420 if (LiveUnits.available(Reg) && !MRI.isReserved(Reg) &&
421 MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
422 FlatScrInit = Reg;
423 break;
424 }
425 }
426 assert(FlatScrInit && "Failed to find free register for scratch init");
427
428 FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
429 FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
430
431 buildGitPtr(MBB, I, DL, TII, FlatScrInit);
432
433 // We now have the GIT ptr - now get the scratch descriptor from the entry
434 // at offset 0 (or offset 16 for a compute shader).
435 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
436 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
437 auto *MMO = MF.getMachineMemOperand(
438 PtrInfo,
441 8, Align(4));
442 unsigned Offset =
444 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
445 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
446 BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
447 .addReg(FlatScrInit)
448 .addImm(EncodedOffset) // offset
449 .addImm(0) // cpol
450 .addMemOperand(MMO);
451
452 // Mask the offset in [47:0] of the descriptor
453 const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
454 auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
455 .addReg(FlatScrInitHi)
456 .addImm(0xffff);
457 And->getOperand(3).setIsDead(); // Mark SCC as dead.
458 } else {
459 Register FlatScratchInitReg =
461 assert(FlatScratchInitReg);
462
463 MachineRegisterInfo &MRI = MF.getRegInfo();
464 MRI.addLiveIn(FlatScratchInitReg);
465 MBB.addLiveIn(FlatScratchInitReg);
466
467 FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
468 FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
469 }
470
471 // Do a 64-bit pointer add.
472 if (ST.flatScratchIsPointer()) {
473 if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
474 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
475 .addReg(FlatScrInitLo)
476 .addReg(ScratchWaveOffsetReg);
477 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
478 FlatScrInitHi)
479 .addReg(FlatScrInitHi)
480 .addImm(0);
481 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
482
483 using namespace AMDGPU::Hwreg;
484 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))
485 .addReg(FlatScrInitLo)
486 .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_LO, 0, 32)));
487 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))
488 .addReg(FlatScrInitHi)
489 .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_HI, 0, 32)));
490 return;
491 }
492
493 // For GFX9.
494 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
495 .addReg(FlatScrInitLo)
496 .addReg(ScratchWaveOffsetReg);
497 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
498 AMDGPU::FLAT_SCR_HI)
499 .addReg(FlatScrInitHi)
500 .addImm(0);
501 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
502
503 return;
504 }
505
506 assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
507
508 // Copy the size in bytes.
509 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
510 .addReg(FlatScrInitHi, RegState::Kill);
511
512 // Add wave offset in bytes to private base offset.
513 // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
514 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)
515 .addReg(FlatScrInitLo)
516 .addReg(ScratchWaveOffsetReg);
517
518 // Convert offset to 256-byte units.
519 auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32),
520 AMDGPU::FLAT_SCR_HI)
521 .addReg(FlatScrInitLo, RegState::Kill)
522 .addImm(8);
523 LShr->getOperand(3).setIsDead(); // Mark SCC as dead.
524}
525
526// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
527// memory. They should have been removed by now.
529 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
530 I != E; ++I) {
531 if (!MFI.isDeadObjectIndex(I))
532 return false;
533 }
534
535 return true;
536}
537
538// Shift down registers reserved for the scratch RSRC.
539Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
540 MachineFunction &MF) const {
541
542 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
543 const SIInstrInfo *TII = ST.getInstrInfo();
544 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
545 MachineRegisterInfo &MRI = MF.getRegInfo();
546 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
547
548 assert(MFI->isEntryFunction());
549
550 Register ScratchRsrcReg = MFI->getScratchRSrcReg();
551
552 if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
554 return Register();
555
556 if (ST.hasSGPRInitBug() ||
557 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
558 return ScratchRsrcReg;
559
560 // We reserved the last registers for this. Shift it down to the end of those
561 // which were actually used.
562 //
563 // FIXME: It might be safer to use a pseudoregister before replacement.
564
565 // FIXME: We should be able to eliminate unused input registers. We only
566 // cannot do this for the resources required for scratch access. For now we
567 // skip over user SGPRs and may leave unused holes.
568
569 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
570 ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
571 AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
572
573 // Skip the last N reserved elements because they should have already been
574 // reserved for VCC etc.
575 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
576 for (MCPhysReg Reg : AllSGPR128s) {
577 // Pick the first unallocated one. Make sure we don't clobber the other
578 // reserved input we needed. Also for PAL, make sure we don't clobber
579 // the GIT pointer passed in SGPR0 or SGPR8.
580 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
581 (!GITPtrLoReg || !TRI->isSubRegisterEq(Reg, GITPtrLoReg))) {
582 MRI.replaceRegWith(ScratchRsrcReg, Reg);
584 MRI.reserveReg(Reg, TRI);
585 return Reg;
586 }
587 }
588
589 return ScratchRsrcReg;
590}
591
592static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
593 return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
594}
595
597 MachineBasicBlock &MBB) const {
598 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
599
600 // FIXME: If we only have SGPR spills, we won't actually be using scratch
601 // memory since these spill to VGPRs. We should be cleaning up these unused
602 // SGPR spill frame indices somewhere.
603
604 // FIXME: We still have implicit uses on SGPR spill instructions in case they
605 // need to spill to vector memory. It's likely that will not happen, but at
606 // this point it appears we need the setup. This part of the prolog should be
607 // emitted after frame indices are eliminated.
608
609 // FIXME: Remove all of the isPhysRegUsed checks
610
612 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
613 const SIInstrInfo *TII = ST.getInstrInfo();
614 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
616 const Function &F = MF.getFunction();
617 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
618
619 assert(MFI->isEntryFunction());
620
621 Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
623
624 // We need to do the replacement of the private segment buffer register even
625 // if there are no stack objects. There could be stores to undef or a
626 // constant without an associated object.
627 //
628 // This will return `Register()` in cases where there are no actual
629 // uses of the SRSRC.
630 Register ScratchRsrcReg;
631 if (!ST.enableFlatScratch())
632 ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
633
634 // Make the selected register live throughout the function.
635 if (ScratchRsrcReg) {
636 for (MachineBasicBlock &OtherBB : MF) {
637 if (&OtherBB != &MBB) {
638 OtherBB.addLiveIn(ScratchRsrcReg);
639 }
640 }
641 }
642
643 // Now that we have fixed the reserved SRSRC we need to locate the
644 // (potentially) preloaded SRSRC.
645 Register PreloadedScratchRsrcReg;
646 if (ST.isAmdHsaOrMesa(F)) {
647 PreloadedScratchRsrcReg =
649 if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
650 // We added live-ins during argument lowering, but since they were not
651 // used they were deleted. We're adding the uses now, so add them back.
652 MRI.addLiveIn(PreloadedScratchRsrcReg);
653 MBB.addLiveIn(PreloadedScratchRsrcReg);
654 }
655 }
656
657 // Debug location must be unknown since the first debug location is used to
658 // determine the end of the prologue.
659 DebugLoc DL;
661
662 // We found the SRSRC first because it needs four registers and has an
663 // alignment requirement. If the SRSRC that we found is clobbering with
664 // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
665 // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
666 // wave offset to a free SGPR.
667 Register ScratchWaveOffsetReg;
668 if (PreloadedScratchWaveOffsetReg &&
669 TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
670 ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
671 unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
672 AllSGPRs = AllSGPRs.slice(
673 std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
674 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
675 for (MCPhysReg Reg : AllSGPRs) {
676 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
677 !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
678 ScratchWaveOffsetReg = Reg;
679 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
680 .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
681 break;
682 }
683 }
684
685 // FIXME: We can spill incoming arguments and restore at the end of the
686 // prolog.
687 if (!ScratchWaveOffsetReg)
689 "could not find temporary scratch offset register in prolog");
690 } else {
691 ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
692 }
693 assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
694
695 unsigned Offset = FrameInfo.getStackSize() * getScratchScaleFactor(ST);
696 if (!mayReserveScratchForCWSR(MF)) {
697 if (hasFP(MF)) {
699 assert(FPReg != AMDGPU::FP_REG);
700 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
701 }
702
705 assert(SPReg != AMDGPU::SP_REG);
706 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg).addImm(Offset);
707 }
708 } else {
709 // We need to check if we're on a compute queue - if we are, then the CWSR
710 // trap handler may need to store some VGPRs on the stack. The first VGPR
711 // block is saved separately, so we only need to allocate space for any
712 // additional VGPR blocks used. For now, we will make sure there's enough
713 // room for the theoretical maximum number of VGPRs that can be allocated.
714 // FIXME: Figure out if the shader uses fewer VGPRs in practice.
715 assert(hasFP(MF));
717 assert(FPReg != AMDGPU::FP_REG);
718 unsigned VGPRSize = llvm::alignTo(
719 (ST.getAddressableNumVGPRs(MFI->getDynamicVGPRBlockSize()) -
721 MFI->getDynamicVGPRBlockSize())) *
722 4,
723 FrameInfo.getMaxAlign());
725
726 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_GETREG_B32), FPReg)
729 // The MicroEngine ID is 0 for the graphics queue, and 1 or 2 for compute
730 // (3 is unused, so we ignore it). Unfortunately, S_GETREG doesn't set
731 // SCC, so we need to check for 0 manually.
732 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMP_LG_U32)).addImm(0).addReg(FPReg);
733 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMOVK_I32), FPReg).addImm(VGPRSize);
736 assert(SPReg != AMDGPU::SP_REG);
737
738 // If at least one of the constants can be inlined, then we can use
739 // s_cselect. Otherwise, use a mov and cmovk.
740 if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm()) ||
742 ST.hasInv2PiInlineImm())) {
743 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CSELECT_B32), SPReg)
744 .addImm(Offset + VGPRSize)
745 .addImm(Offset);
746 } else {
747 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg).addImm(Offset);
748 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMOVK_I32), SPReg)
749 .addImm(Offset + VGPRSize);
750 }
751 }
752 }
753
754 bool NeedsFlatScratchInit =
756 (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
757 (!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
758
759 if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
760 PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
761 MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
762 MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
763 }
764
765 if (NeedsFlatScratchInit) {
766 emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
767 }
768
769 if (ScratchRsrcReg) {
770 emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
771 PreloadedScratchRsrcReg,
772 ScratchRsrcReg, ScratchWaveOffsetReg);
773 }
774}
775
776// Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
777void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
779 const DebugLoc &DL, Register PreloadedScratchRsrcReg,
780 Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
781
782 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
783 const SIInstrInfo *TII = ST.getInstrInfo();
784 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
786 const Function &Fn = MF.getFunction();
787
788 if (ST.isAmdPalOS()) {
789 // The pointer to the GIT is formed from the offset passed in and either
790 // the amdgpu-git-ptr-high function attribute or the top part of the PC
791 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
792 Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
793
794 buildGitPtr(MBB, I, DL, TII, Rsrc01);
795
796 // We now have the GIT ptr - now get the scratch descriptor from the entry
797 // at offset 0 (or offset 16 for a compute shader).
799 const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
800 auto *MMO = MF.getMachineMemOperand(
801 PtrInfo,
804 16, Align(4));
805 unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
806 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
807 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
808 BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
809 .addReg(Rsrc01)
810 .addImm(EncodedOffset) // offset
811 .addImm(0) // cpol
812 .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
813 .addMemOperand(MMO);
814
815 // The driver will always set the SRD for wave 64 (bits 118:117 of
816 // descriptor / bits 22:21 of third sub-reg will be 0b11)
817 // If the shader is actually wave32 we have to modify the const_index_stride
818 // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
819 // reason the driver does this is that there can be cases where it presents
820 // 2 shaders with different wave size (e.g. VsFs).
821 // TODO: convert to using SCRATCH instructions or multiple SRD buffers
822 if (ST.isWave32()) {
823 const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
824 BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
825 .addImm(21)
826 .addReg(Rsrc03);
827 }
828 } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
829 assert(!ST.isAmdHsaOrMesa(Fn));
830 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
831
832 Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
833 Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
834
835 // Use relocations to get the pointer, and setup the other bits manually.
836 uint64_t Rsrc23 = TII->getScratchRsrcWords23();
837
839 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
840
842 const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
843
844 BuildMI(MBB, I, DL, Mov64, Rsrc01)
846 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
847 } else {
848 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
849
850 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
851 auto *MMO = MF.getMachineMemOperand(
852 PtrInfo,
855 8, Align(4));
856 BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
858 .addImm(0) // offset
859 .addImm(0) // cpol
860 .addMemOperand(MMO)
861 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
862
865 }
866 } else {
867 Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
868 Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
869
870 BuildMI(MBB, I, DL, SMovB32, Rsrc0)
871 .addExternalSymbol("SCRATCH_RSRC_DWORD0")
872 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
873
874 BuildMI(MBB, I, DL, SMovB32, Rsrc1)
875 .addExternalSymbol("SCRATCH_RSRC_DWORD1")
876 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
877 }
878
879 BuildMI(MBB, I, DL, SMovB32, Rsrc2)
880 .addImm(Lo_32(Rsrc23))
881 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
882
883 BuildMI(MBB, I, DL, SMovB32, Rsrc3)
884 .addImm(Hi_32(Rsrc23))
885 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
886 } else if (ST.isAmdHsaOrMesa(Fn)) {
887 assert(PreloadedScratchRsrcReg);
888
889 if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
890 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
891 .addReg(PreloadedScratchRsrcReg, RegState::Kill);
892 }
893 }
894
895 // Add the scratch wave offset into the scratch RSRC.
896 //
897 // We only want to update the first 48 bits, which is the base address
898 // pointer, without touching the adjacent 16 bits of flags. We know this add
899 // cannot carry-out from bit 47, otherwise the scratch allocation would be
900 // impossible to fit in the 48-bit global address space.
901 //
902 // TODO: Evaluate if it is better to just construct an SRD using the flat
903 // scratch init and some constants rather than update the one we are passed.
904 Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
905 Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
906
907 // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
908 // the kernel body via inreg arguments.
909 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
910 .addReg(ScratchRsrcSub0)
911 .addReg(ScratchWaveOffsetReg)
912 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
913 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
914 .addReg(ScratchRsrcSub1)
915 .addImm(0)
916 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
917 Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
918}
919
921 switch (ID) {
925 return true;
929 return false;
930 }
931 llvm_unreachable("Invalid TargetStackID::Value");
932}
933
934// Activate only the inactive lanes when \p EnableInactiveLanes is true.
935// Otherwise, activate all lanes. It returns the saved exec.
937 MachineFunction &MF,
940 const DebugLoc &DL, bool IsProlog,
941 bool EnableInactiveLanes) {
942 Register ScratchExecCopy;
944 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
945 const SIInstrInfo *TII = ST.getInstrInfo();
946 const SIRegisterInfo &TRI = TII->getRegisterInfo();
948
949 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
950
951 if (FuncInfo->isWholeWaveFunction()) {
952 // Whole wave functions already have a copy of the original EXEC mask that
953 // we can use.
954 assert(IsProlog && "Epilog should look at return, not setup");
955 ScratchExecCopy =
956 TII->getWholeWaveFunctionSetup(MF)->getOperand(0).getReg();
957 assert(ScratchExecCopy && "Couldn't find copy of EXEC");
958 } else {
959 ScratchExecCopy = findScratchNonCalleeSaveRegister(
960 MRI, LiveUnits, *TRI.getWaveMaskRegClass());
961 }
962
963 if (!ScratchExecCopy)
964 report_fatal_error("failed to find free scratch register");
965
966 LiveUnits.addReg(ScratchExecCopy);
967
968 const unsigned SaveExecOpc =
969 ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
970 : AMDGPU::S_OR_SAVEEXEC_B32)
971 : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64
972 : AMDGPU::S_OR_SAVEEXEC_B64);
973 auto SaveExec =
974 BuildMI(MBB, MBBI, DL, TII->get(SaveExecOpc), ScratchExecCopy).addImm(-1);
975 SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead.
976
977 return ScratchExecCopy;
978}
979
983 Register FrameReg, Register FramePtrRegScratchCopy) const {
985 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
986 const SIInstrInfo *TII = ST.getInstrInfo();
987 const SIRegisterInfo &TRI = TII->getRegisterInfo();
990
991 // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch
992 // registers. However, save all lanes of callee-saved VGPRs. Due to this, we
993 // might end up flipping the EXEC bits twice.
994 Register ScratchExecCopy;
995 SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
996 FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
997 if (!WWMScratchRegs.empty())
998 ScratchExecCopy =
999 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1000 /*IsProlog*/ true, /*EnableInactiveLanes*/ true);
1001
1002 auto StoreWWMRegisters =
1004 for (const auto &Reg : WWMRegs) {
1005 Register VGPR = Reg.first;
1006 int FI = Reg.second;
1007 buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
1008 VGPR, FI, FrameReg);
1009 }
1010 };
1011
1012 for (const Register Reg : make_first_range(WWMScratchRegs)) {
1013 if (!MRI.isReserved(Reg)) {
1014 MRI.addLiveIn(Reg);
1015 MBB.addLiveIn(Reg);
1016 }
1017 }
1018 StoreWWMRegisters(WWMScratchRegs);
1019
1020 auto EnableAllLanes = [&]() {
1021 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addImm(-1);
1022 };
1023
1024 if (!WWMCalleeSavedRegs.empty()) {
1025 if (ScratchExecCopy) {
1026 EnableAllLanes();
1027 } else {
1028 ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1029 /*IsProlog*/ true,
1030 /*EnableInactiveLanes*/ false);
1031 }
1032 }
1033
1034 StoreWWMRegisters(WWMCalleeSavedRegs);
1035 if (FuncInfo->isWholeWaveFunction()) {
1036 // SI_WHOLE_WAVE_FUNC_SETUP has outlived its purpose, so we can remove
1037 // it now. If we have already saved some WWM CSR registers, then the EXEC is
1038 // already -1 and we don't need to do anything else. Otherwise, set EXEC to
1039 // -1 here.
1040 if (!ScratchExecCopy)
1041 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL, /*IsProlog*/ true,
1042 /*EnableInactiveLanes*/ true);
1043 else if (WWMCalleeSavedRegs.empty())
1044 EnableAllLanes();
1045 TII->getWholeWaveFunctionSetup(MF)->eraseFromParent();
1046 } else if (ScratchExecCopy) {
1047 // FIXME: Split block and make terminator.
1048 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg)
1049 .addReg(ScratchExecCopy, RegState::Kill);
1050 LiveUnits.addReg(ScratchExecCopy);
1051 }
1052
1053 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1054
1055 for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
1056 // Special handle FP spill:
1057 // Skip if FP is saved to a scratch SGPR, the save has already been emitted.
1058 // Otherwise, FP has been moved to a temporary register and spill it
1059 // instead.
1060 Register Reg =
1061 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1062 if (!Reg)
1063 continue;
1064
1065 PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
1066 LiveUnits, FrameReg);
1067 SB.save();
1068 }
1069
1070 // If a copy to scratch SGPR has been chosen for any of the SGPR spills, make
1071 // such scratch registers live throughout the function.
1072 SmallVector<Register, 1> ScratchSGPRs;
1073 FuncInfo->getAllScratchSGPRCopyDstRegs(ScratchSGPRs);
1074 if (!ScratchSGPRs.empty()) {
1075 for (MachineBasicBlock &MBB : MF) {
1076 for (MCPhysReg Reg : ScratchSGPRs)
1077 MBB.addLiveIn(Reg);
1078
1079 MBB.sortUniqueLiveIns();
1080 }
1081 if (!LiveUnits.empty()) {
1082 for (MCPhysReg Reg : ScratchSGPRs)
1083 LiveUnits.addReg(Reg);
1084 }
1085 }
1086}
1087
1091 Register FrameReg, Register FramePtrRegScratchCopy) const {
1092 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1093 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1094 const SIInstrInfo *TII = ST.getInstrInfo();
1095 const SIRegisterInfo &TRI = TII->getRegisterInfo();
1097 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1098
1099 for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
1100 // Special handle FP restore:
1101 // Skip if FP needs to be restored from the scratch SGPR. Otherwise, restore
1102 // the FP value to a temporary register. The frame pointer should be
1103 // overwritten only at the end when all other spills are restored from
1104 // current frame.
1105 Register Reg =
1106 Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1107 if (!Reg)
1108 continue;
1109
1110 PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
1111 LiveUnits, FrameReg);
1112 SB.restore();
1113 }
1114
1115 // Restore Whole-Wave Mode VGPRs. Restore only the inactive lanes of the
1116 // scratch registers. However, restore all lanes of callee-saved VGPRs. Due to
1117 // this, we might end up flipping the EXEC bits twice.
1118 Register ScratchExecCopy;
1119 SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
1120 FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
1121 auto RestoreWWMRegisters =
1123 for (const auto &Reg : WWMRegs) {
1124 Register VGPR = Reg.first;
1125 int FI = Reg.second;
1126 buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
1127 VGPR, FI, FrameReg);
1128 }
1129 };
1130
1131 if (FuncInfo->isWholeWaveFunction()) {
1132 // For whole wave functions, the EXEC is already -1 at this point.
1133 // Therefore, we can restore the CSR WWM registers right away.
1134 RestoreWWMRegisters(WWMCalleeSavedRegs);
1135
1136 // The original EXEC is the first operand of the return instruction.
1137 MachineInstr &Return = MBB.instr_back();
1138 unsigned Opcode = Return.getOpcode();
1139 switch (Opcode) {
1140 case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN:
1141 Opcode = AMDGPU::SI_RETURN;
1142 break;
1143 case AMDGPU::SI_TCRETURN_GFX_WholeWave:
1144 Opcode = AMDGPU::SI_TCRETURN_GFX;
1145 break;
1146 default:
1147 llvm_unreachable("Unexpected return inst");
1148 }
1149 Register OrigExec = Return.getOperand(0).getReg();
1150
1151 if (!WWMScratchRegs.empty()) {
1152 BuildMI(MBB, MBBI, DL, TII->get(LMC.XorOpc), LMC.ExecReg)
1153 .addReg(OrigExec)
1154 .addImm(-1);
1155 RestoreWWMRegisters(WWMScratchRegs);
1156 }
1157
1158 // Restore original EXEC.
1159 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addReg(OrigExec);
1160
1161 // Drop the first operand and update the opcode.
1162 Return.removeOperand(0);
1163 Return.setDesc(TII->get(Opcode));
1164
1165 return;
1166 }
1167
1168 if (!WWMScratchRegs.empty()) {
1169 ScratchExecCopy =
1170 buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1171 /*IsProlog=*/false, /*EnableInactiveLanes=*/true);
1172 }
1173 RestoreWWMRegisters(WWMScratchRegs);
1174 if (!WWMCalleeSavedRegs.empty()) {
1175 if (ScratchExecCopy) {
1176 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addImm(-1);
1177 } else {
1178 ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1179 /*IsProlog*/ false,
1180 /*EnableInactiveLanes*/ false);
1181 }
1182 }
1183
1184 RestoreWWMRegisters(WWMCalleeSavedRegs);
1185 if (ScratchExecCopy) {
1186 // FIXME: Split block and make terminator.
1187 BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg)
1188 .addReg(ScratchExecCopy, RegState::Kill);
1189 }
1190}
1191
1193 MachineBasicBlock &MBB) const {
1195 if (FuncInfo->isEntryFunction()) {
1197 return;
1198 }
1199
1200 MachineFrameInfo &MFI = MF.getFrameInfo();
1201 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1202 const SIInstrInfo *TII = ST.getInstrInfo();
1203 const SIRegisterInfo &TRI = TII->getRegisterInfo();
1205
1206 Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
1207 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1208 Register BasePtrReg =
1209 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
1210 LiveRegUnits LiveUnits;
1211
1213 // DebugLoc must be unknown since the first instruction with DebugLoc is used
1214 // to determine the end of the prologue.
1215 DebugLoc DL;
1216
1217 if (FuncInfo->isChainFunction()) {
1218 // Functions with the amdgpu_cs_chain[_preserve] CC don't receive a SP, but
1219 // are free to set one up if they need it.
1220 bool UseSP = requiresStackPointerReference(MF);
1221 if (UseSP) {
1222 assert(StackPtrReg != AMDGPU::SP_REG);
1223
1224 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B32), StackPtrReg)
1226 }
1227 }
1228
1229 bool HasFP = false;
1230 bool HasBP = false;
1231 uint32_t NumBytes = MFI.getStackSize();
1232 uint32_t RoundedSize = NumBytes;
1233
1234 if (TRI.hasStackRealignment(MF))
1235 HasFP = true;
1236
1237 Register FramePtrRegScratchCopy;
1238 if (!HasFP && !hasFP(MF)) {
1239 // Emit the CSR spill stores with SP base register.
1240 emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits,
1241 FuncInfo->isChainFunction() ? Register() : StackPtrReg,
1242 FramePtrRegScratchCopy);
1243 } else {
1244 // CSR spill stores will use FP as base register.
1245 Register SGPRForFPSaveRestoreCopy =
1246 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1247
1248 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
1249 if (SGPRForFPSaveRestoreCopy) {
1250 // Copy FP to the scratch register now and emit the CFI entry. It avoids
1251 // the extra FP copy needed in the other two cases when FP is spilled to
1252 // memory or to a VGPR lane.
1254 FramePtrReg,
1255 FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI,
1256 DL, TII, TRI, LiveUnits, FramePtrReg);
1257 SB.save();
1258 LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1259 } else {
1260 // Copy FP into a new scratch register so that its previous value can be
1261 // spilled after setting up the new frame.
1262 FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
1263 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1264 if (!FramePtrRegScratchCopy)
1265 report_fatal_error("failed to find free scratch register");
1266
1267 LiveUnits.addReg(FramePtrRegScratchCopy);
1268 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrRegScratchCopy)
1269 .addReg(FramePtrReg);
1270 }
1271 }
1272
1273 if (HasFP) {
1274 const unsigned Alignment = MFI.getMaxAlign().value();
1275
1276 RoundedSize += Alignment;
1277 if (LiveUnits.empty()) {
1278 LiveUnits.init(TRI);
1279 LiveUnits.addLiveIns(MBB);
1280 }
1281
1282 // s_add_i32 s33, s32, NumBytes
1283 // s_and_b32 s33, s33, 0b111...0000
1284 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
1285 .addReg(StackPtrReg)
1286 .addImm((Alignment - 1) * getScratchScaleFactor(ST))
1288 auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
1289 .addReg(FramePtrReg, RegState::Kill)
1290 .addImm(-Alignment * getScratchScaleFactor(ST))
1292 And->getOperand(3).setIsDead(); // Mark SCC as dead.
1293 FuncInfo->setIsStackRealigned(true);
1294 } else if ((HasFP = hasFP(MF))) {
1295 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1296 .addReg(StackPtrReg)
1298 }
1299
1300 // If FP is used, emit the CSR spills with FP base register.
1301 if (HasFP) {
1302 emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1303 FramePtrRegScratchCopy);
1304 if (FramePtrRegScratchCopy)
1305 LiveUnits.removeReg(FramePtrRegScratchCopy);
1306 }
1307
1308 // If we need a base pointer, set it up here. It's whatever the value of
1309 // the stack pointer is at this point. Any variable size objects will be
1310 // allocated after this, so we can still use the base pointer to reference
1311 // the incoming arguments.
1312 if ((HasBP = TRI.hasBasePointer(MF))) {
1313 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
1314 .addReg(StackPtrReg)
1316 }
1317
1318 if (HasFP && RoundedSize != 0) {
1319 auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
1320 .addReg(StackPtrReg)
1321 .addImm(RoundedSize * getScratchScaleFactor(ST))
1323 Add->getOperand(3).setIsDead(); // Mark SCC as dead.
1324 }
1325
1326 bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
1327 (void)FPSaved;
1328 assert((!HasFP || FPSaved) &&
1329 "Needed to save FP but didn't save it anywhere");
1330
1331 // If we allow spilling to AGPRs we may have saved FP but then spill
1332 // everything into AGPRs instead of the stack.
1333 assert((HasFP || !FPSaved || EnableSpillVGPRToAGPR) &&
1334 "Saved FP but didn't need it");
1335
1336 bool BPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(BasePtrReg);
1337 (void)BPSaved;
1338 assert((!HasBP || BPSaved) &&
1339 "Needed to save BP but didn't save it anywhere");
1340
1341 assert((HasBP || !BPSaved) && "Saved BP but didn't need it");
1342}
1343
1345 MachineBasicBlock &MBB) const {
1346 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1347 if (FuncInfo->isEntryFunction())
1348 return;
1349
1350 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1351 const SIInstrInfo *TII = ST.getInstrInfo();
1352 const SIRegisterInfo &TRI = TII->getRegisterInfo();
1354 LiveRegUnits LiveUnits;
1355 // Get the insert location for the epilogue. If there were no terminators in
1356 // the block, get the last instruction.
1358 DebugLoc DL;
1359 if (!MBB.empty()) {
1360 MBBI = MBB.getLastNonDebugInstr();
1361 if (MBBI != MBB.end())
1362 DL = MBBI->getDebugLoc();
1363
1364 MBBI = MBB.getFirstTerminator();
1365 }
1366
1367 const MachineFrameInfo &MFI = MF.getFrameInfo();
1368 uint32_t NumBytes = MFI.getStackSize();
1369 uint32_t RoundedSize = FuncInfo->isStackRealigned()
1370 ? NumBytes + MFI.getMaxAlign().value()
1371 : NumBytes;
1372 const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
1373 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1374 bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
1375
1376 if (RoundedSize != 0) {
1377 if (TRI.hasBasePointer(MF)) {
1378 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), StackPtrReg)
1379 .addReg(TRI.getBaseRegister())
1381 } else if (hasFP(MF)) {
1382 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), StackPtrReg)
1383 .addReg(FramePtrReg)
1385 }
1386 }
1387
1388 Register FramePtrRegScratchCopy;
1389 Register SGPRForFPSaveRestoreCopy =
1390 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1391 if (FPSaved) {
1392 // CSR spill restores should use FP as base register. If
1393 // SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP
1394 // into a new scratch register and copy to FP later when other registers are
1395 // restored from the current stack frame.
1396 initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1397 if (SGPRForFPSaveRestoreCopy) {
1398 LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1399 } else {
1400 FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
1401 MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1402 if (!FramePtrRegScratchCopy)
1403 report_fatal_error("failed to find free scratch register");
1404
1405 LiveUnits.addReg(FramePtrRegScratchCopy);
1406 }
1407
1408 emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1409 FramePtrRegScratchCopy);
1410 }
1411
1412 if (FPSaved) {
1413 // Insert the copy to restore FP.
1414 Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy
1415 : FramePtrRegScratchCopy;
1417 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1418 .addReg(SrcReg);
1419 if (SGPRForFPSaveRestoreCopy)
1421 } else {
1422 // Insert the CSR spill restores with SP as the base register.
1423 emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits,
1424 FuncInfo->isChainFunction() ? Register() : StackPtrReg,
1425 FramePtrRegScratchCopy);
1426 }
1427}
1428
1429#ifndef NDEBUG
1431 const MachineFrameInfo &MFI = MF.getFrameInfo();
1432 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1433 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
1434 I != E; ++I) {
1435 if (!MFI.isDeadObjectIndex(I) &&
1438 return false;
1439 }
1440 }
1441
1442 return true;
1443}
1444#endif
1445
1447 int FI,
1448 Register &FrameReg) const {
1449 const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1450
1451 FrameReg = RI->getFrameRegister(MF);
1453}
1454
1456 MachineFunction &MF,
1457 RegScavenger *RS) const {
1458 MachineFrameInfo &MFI = MF.getFrameInfo();
1459
1460 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1461 const SIInstrInfo *TII = ST.getInstrInfo();
1462 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1465
1466 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
1468
1469 if (SpillVGPRToAGPR) {
1470 // To track the spill frame indices handled in this pass.
1471 BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
1472 BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false);
1473
1474 bool SeenDbgInstr = false;
1475
1476 for (MachineBasicBlock &MBB : MF) {
1478 int FrameIndex;
1479 if (MI.isDebugInstr())
1480 SeenDbgInstr = true;
1481
1482 if (TII->isVGPRSpill(MI)) {
1483 // Try to eliminate stack used by VGPR spills before frame
1484 // finalization.
1485 unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1486 AMDGPU::OpName::vaddr);
1487 int FI = MI.getOperand(FIOp).getIndex();
1488 Register VReg =
1489 TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
1490 if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
1491 TRI->isAGPR(MRI, VReg))) {
1492 assert(RS != nullptr);
1494 RS->backward(std::next(MI.getIterator()));
1495 TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
1496 SpillFIs.set(FI);
1497 continue;
1498 }
1499 } else if (TII->isStoreToStackSlot(MI, FrameIndex) ||
1500 TII->isLoadFromStackSlot(MI, FrameIndex))
1501 if (!MFI.isFixedObjectIndex(FrameIndex))
1502 NonVGPRSpillFIs.set(FrameIndex);
1503 }
1504 }
1505
1506 // Stack slot coloring may assign different objects to the same stack slot.
1507 // If not, then the VGPR to AGPR spill slot is dead.
1508 for (unsigned FI : SpillFIs.set_bits())
1509 if (!NonVGPRSpillFIs.test(FI))
1510 FuncInfo->setVGPRToAGPRSpillDead(FI);
1511
1512 for (MachineBasicBlock &MBB : MF) {
1513 for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
1514 MBB.addLiveIn(Reg);
1515
1516 for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
1517 MBB.addLiveIn(Reg);
1518
1519 MBB.sortUniqueLiveIns();
1520
1521 if (!SpillFIs.empty() && SeenDbgInstr) {
1522 // FIXME: The dead frame indices are replaced with a null register from
1523 // the debug value instructions. We should instead, update it with the
1524 // correct register value. But not sure the register value alone is
1525 for (MachineInstr &MI : MBB) {
1526 if (MI.isDebugValue()) {
1527 uint32_t StackOperandIdx = MI.isDebugValueList() ? 2 : 0;
1528 if (MI.getOperand(StackOperandIdx).isFI() &&
1529 !MFI.isFixedObjectIndex(
1530 MI.getOperand(StackOperandIdx).getIndex()) &&
1531 SpillFIs[MI.getOperand(StackOperandIdx).getIndex()]) {
1532 MI.getOperand(StackOperandIdx)
1533 .ChangeToRegister(Register(), false /*isDef*/);
1534 }
1535 }
1536 }
1537 }
1538 }
1539 }
1540
1541 // At this point we've already allocated all spilled SGPRs to VGPRs if we
1542 // can. Any remaining SGPR spills will go to memory, so move them back to the
1543 // default stack.
1544 bool HaveSGPRToVMemSpill =
1545 FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);
1547 "SGPR spill should have been removed in SILowerSGPRSpills");
1548
1549 // FIXME: The other checks should be redundant with allStackObjectsAreDead,
1550 // but currently hasNonSpillStackObjects is set only from source
1551 // allocas. Stack temps produced from legalization are not counted currently.
1552 if (!allStackObjectsAreDead(MFI)) {
1553 assert(RS && "RegScavenger required if spilling");
1554
1555 // Add an emergency spill slot
1556 RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
1557
1558 // If we are spilling SGPRs to memory with a large frame, we may need a
1559 // second VGPR emergency frame index.
1560 if (HaveSGPRToVMemSpill &&
1563 }
1564 }
1565}
1566
1568 MachineFunction &MF, RegScavenger *RS) const {
1569 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1570 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1573
1574 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
1575 // On gfx908, we had initially reserved highest available VGPR for AGPR
1576 // copy. Now since we are done with RA, check if there exist an unused VGPR
1577 // which is lower than the eariler reserved VGPR before RA. If one exist,
1578 // use it for AGPR copy instead of one reserved before RA.
1579 Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy();
1580 Register UnusedLowVGPR =
1581 TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
1582 if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <
1583 TRI->getHWRegIndex(VGPRForAGPRCopy))) {
1584 // Reserve this newly identified VGPR (for AGPR copy)
1585 // reserved registers should already be frozen at this point
1586 // so we can avoid calling MRI.freezeReservedRegs and just use
1587 // MRI.reserveReg
1588 FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR);
1589 MRI.reserveReg(UnusedLowVGPR, TRI);
1590 }
1591 }
1592 // We initally reserved the highest available SGPR pair for long branches
1593 // now, after RA, we shift down to a lower unused one if one exists
1594 Register LongBranchReservedReg = FuncInfo->getLongBranchReservedReg();
1595 Register UnusedLowSGPR =
1596 TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF);
1597 // If LongBranchReservedReg is null then we didn't find a long branch
1598 // and never reserved a register to begin with so there is nothing to
1599 // shift down. Then if UnusedLowSGPR is null, there isn't available lower
1600 // register to use so just keep the original one we set.
1601 if (LongBranchReservedReg && UnusedLowSGPR) {
1602 FuncInfo->setLongBranchReservedReg(UnusedLowSGPR);
1603 MRI.reserveReg(UnusedLowSGPR, TRI);
1604 }
1605}
1606
1607// The special SGPR spills like the one needed for FP, BP or any reserved
1608// registers delayed until frame lowering.
1610 MachineFunction &MF, BitVector &SavedVGPRs,
1611 bool NeedExecCopyReservedReg) const {
1612 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1615 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1616 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1617 LiveRegUnits LiveUnits;
1618 LiveUnits.init(*TRI);
1619 // Initially mark callee saved registers as used so we will not choose them
1620 // while looking for scratch SGPRs.
1621 const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
1622 for (unsigned I = 0; CSRegs[I]; ++I)
1623 LiveUnits.addReg(CSRegs[I]);
1624
1625 const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass();
1626
1627 Register ReservedRegForExecCopy = MFI->getSGPRForEXECCopy();
1628 if (NeedExecCopyReservedReg ||
1629 (ReservedRegForExecCopy &&
1630 MRI.isPhysRegUsed(ReservedRegForExecCopy, /*SkipRegMaskTest=*/true))) {
1631 MRI.reserveReg(ReservedRegForExecCopy, TRI);
1632 Register UnusedScratchReg = findUnusedRegister(MRI, LiveUnits, RC);
1633 if (UnusedScratchReg) {
1634 // If found any unused scratch SGPR, reserve the register itself for Exec
1635 // copy and there is no need for any spills in that case.
1636 MFI->setSGPRForEXECCopy(UnusedScratchReg);
1637 MRI.replaceRegWith(ReservedRegForExecCopy, UnusedScratchReg);
1638 LiveUnits.addReg(UnusedScratchReg);
1639 } else {
1640 // Needs spill.
1641 assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedRegForExecCopy) &&
1642 "Re-reserving spill slot for EXEC copy register");
1643 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, ReservedRegForExecCopy, RC,
1644 /*IncludeScratchCopy=*/false);
1645 }
1646 } else if (ReservedRegForExecCopy) {
1647 // Reset it at this point. There are no whole-wave copies and spills
1648 // encountered.
1649 MFI->setSGPRForEXECCopy(AMDGPU::NoRegister);
1650 }
1651
1652 // hasFP only knows about stack objects that already exist. We're now
1653 // determining the stack slots that will be created, so we have to predict
1654 // them. Stack objects force FP usage with calls.
1655 //
1656 // Note a new VGPR CSR may be introduced if one is used for the spill, but we
1657 // don't want to report it here.
1658 //
1659 // FIXME: Is this really hasReservedCallFrame?
1660 const bool WillHaveFP =
1661 FrameInfo.hasCalls() &&
1662 (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1663
1664 if (WillHaveFP || hasFP(MF)) {
1665 Register FramePtrReg = MFI->getFrameOffsetReg();
1666 assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) &&
1667 "Re-reserving spill slot for FP");
1668 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, FramePtrReg);
1669 }
1670
1671 if (TRI->hasBasePointer(MF)) {
1672 Register BasePtrReg = TRI->getBaseRegister();
1673 assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) &&
1674 "Re-reserving spill slot for BP");
1675 getVGPRSpillLaneOrTempRegister(MF, LiveUnits, BasePtrReg);
1676 }
1677}
1678
1679// Only report VGPRs to generic code.
1681 BitVector &SavedVGPRs,
1682 RegScavenger *RS) const {
1684
1685 // If this is a function with the amdgpu_cs_chain[_preserve] calling
1686 // convention and it doesn't contain any calls to llvm.amdgcn.cs.chain, then
1687 // we don't need to save and restore anything.
1688 if (MFI->isChainFunction() && !MF.getFrameInfo().hasTailCall())
1689 return;
1690
1692
1693 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1694 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1695 const SIInstrInfo *TII = ST.getInstrInfo();
1696 bool NeedExecCopyReservedReg = false;
1697
1698 MachineInstr *ReturnMI = nullptr;
1699 for (MachineBasicBlock &MBB : MF) {
1700 for (MachineInstr &MI : MBB) {
1701 // TODO: Walking through all MBBs here would be a bad heuristic. Better
1702 // handle them elsewhere.
1703 if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
1704 NeedExecCopyReservedReg = true;
1705 else if (MI.getOpcode() == AMDGPU::SI_RETURN ||
1706 MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
1707 MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||
1708 (MFI->isChainFunction() &&
1709 TII->isChainCallOpcode(MI.getOpcode()))) {
1710 // We expect all return to be the same size.
1711 assert(!ReturnMI ||
1712 (count_if(MI.operands(), [](auto Op) { return Op.isReg(); }) ==
1713 count_if(ReturnMI->operands(), [](auto Op) { return Op.isReg(); })));
1714 ReturnMI = &MI;
1715 }
1716 }
1717 }
1718
1719 SmallVector<Register> SortedWWMVGPRs;
1720 for (Register Reg : MFI->getWWMReservedRegs()) {
1721 // The shift-back is needed only for the VGPRs used for SGPR spills and they
1722 // are of 32-bit size. SIPreAllocateWWMRegs pass can add tuples into WWM
1723 // reserved registers.
1724 const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
1725 if (TRI->getRegSizeInBits(*RC) != 32)
1726 continue;
1727 SortedWWMVGPRs.push_back(Reg);
1728 }
1729
1730 sort(SortedWWMVGPRs, std::greater<Register>());
1731 MFI->shiftWwmVGPRsToLowestRange(MF, SortedWWMVGPRs, SavedVGPRs);
1732
1733 if (MFI->isEntryFunction())
1734 return;
1735
1736 if (MFI->isWholeWaveFunction()) {
1737 // In practice, all the VGPRs are WWM registers, and we will need to save at
1738 // least their inactive lanes. Add them to WWMReservedRegs.
1739 assert(!NeedExecCopyReservedReg &&
1740 "Whole wave functions can use the reg mapped for their i1 argument");
1741
1742 // FIXME: Be more efficient!
1743 unsigned NumArchVGPRs = ST.has1024AddressableVGPRs() ? 1024 : 256;
1744 for (MCRegister Reg :
1745 AMDGPU::VGPR_32RegClass.getRegisters().take_front(NumArchVGPRs))
1746 if (MF.getRegInfo().isPhysRegModified(Reg)) {
1747 MFI->reserveWWMRegister(Reg);
1748 MF.begin()->addLiveIn(Reg);
1749 }
1750 MF.begin()->sortUniqueLiveIns();
1751 }
1752
1753 // Remove any VGPRs used in the return value because these do not need to be saved.
1754 // This prevents CSR restore from clobbering return VGPRs.
1755 if (ReturnMI) {
1756 for (auto &Op : ReturnMI->operands()) {
1757 if (Op.isReg())
1758 SavedVGPRs.reset(Op.getReg());
1759 }
1760 }
1761
1762 // Create the stack objects for WWM registers now.
1763 for (Register Reg : MFI->getWWMReservedRegs()) {
1764 const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
1765 MFI->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC),
1766 TRI->getSpillAlign(*RC));
1767 }
1768
1769 // Ignore the SGPRs the default implementation found.
1770 SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
1771
1772 // Do not save AGPRs prior to GFX90A because there was no easy way to do so.
1773 // In gfx908 there was do AGPR loads and stores and thus spilling also
1774 // require a temporary VGPR.
1775 if (!ST.hasGFX90AInsts())
1776 SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
1777
1778 determinePrologEpilogSGPRSaves(MF, SavedVGPRs, NeedExecCopyReservedReg);
1779
1780 // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't
1781 // allow the default insertion to handle them.
1782 for (auto &Reg : MFI->getWWMSpills())
1783 SavedVGPRs.reset(Reg.first);
1784}
1785
1787 BitVector &SavedRegs,
1788 RegScavenger *RS) const {
1791 if (MFI->isEntryFunction())
1792 return;
1793
1794 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1795 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1796
1797 // The SP is specifically managed and we don't want extra spills of it.
1798 SavedRegs.reset(MFI->getStackPtrOffsetReg());
1799
1800 const BitVector AllSavedRegs = SavedRegs;
1801 SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
1802
1803 // We have to anticipate introducing CSR VGPR spills or spill of caller
1804 // save VGPR reserved for SGPR spills as we now always create stack entry
1805 // for it, if we don't have any stack objects already, since we require a FP
1806 // if there is a call and stack. We will allocate a VGPR for SGPR spills if
1807 // there are any SGPR spills. Whether they are CSR spills or otherwise.
1808 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1809 const bool WillHaveFP =
1810 FrameInfo.hasCalls() && (AllSavedRegs.any() || MFI->hasSpilledSGPRs());
1811
1812 // FP will be specially managed like SP.
1813 if (WillHaveFP || hasFP(MF))
1814 SavedRegs.reset(MFI->getFrameOffsetReg());
1815
1816 // Return address use with return instruction is hidden through the SI_RETURN
1817 // pseudo. Given that and since the IPRA computes actual register usage and
1818 // does not use CSR list, the clobbering of return address by function calls
1819 // (D117243) or otherwise (D120922) is ignored/not seen by the IPRA's register
1820 // usage collection. This will ensure save/restore of return address happens
1821 // in those scenarios.
1822 const MachineRegisterInfo &MRI = MF.getRegInfo();
1823 Register RetAddrReg = TRI->getReturnAddressReg(MF);
1824 if (!MFI->isEntryFunction() &&
1825 (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) {
1826 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0));
1827 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1));
1828 }
1829}
1830
1832 const GCNSubtarget &ST,
1833 std::vector<CalleeSavedInfo> &CSI,
1834 unsigned &MinCSFrameIndex,
1835 unsigned &MaxCSFrameIndex) {
1837 MachineFrameInfo &MFI = MF.getFrameInfo();
1838 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1839
1840 assert(
1841 llvm::is_sorted(CSI,
1842 [](const CalleeSavedInfo &A, const CalleeSavedInfo &B) {
1843 return A.getReg() < B.getReg();
1844 }) &&
1845 "Callee saved registers not sorted");
1846
1847 auto CanUseBlockOps = [&](const CalleeSavedInfo &CSI) {
1848 return !CSI.isSpilledToReg() &&
1849 TRI->getPhysRegBaseClass(CSI.getReg()) == &AMDGPU::VGPR_32RegClass &&
1850 !FuncInfo->isWWMReservedRegister(CSI.getReg());
1851 };
1852
1853 auto CSEnd = CSI.end();
1854 for (auto CSIt = CSI.begin(); CSIt != CSEnd; ++CSIt) {
1855 Register Reg = CSIt->getReg();
1856 if (!CanUseBlockOps(*CSIt))
1857 continue;
1858
1859 // Find all the regs that will fit in a 32-bit mask starting at the current
1860 // reg and build said mask. It should have 1 for every register that's
1861 // included, with the current register as the least significant bit.
1862 uint32_t Mask = 1;
1863 CSEnd = std::remove_if(
1864 CSIt + 1, CSEnd, [&](const CalleeSavedInfo &CSI) -> bool {
1865 if (CanUseBlockOps(CSI) && CSI.getReg() < Reg + 32) {
1866 Mask |= 1 << (CSI.getReg() - Reg);
1867 return true;
1868 } else {
1869 return false;
1870 }
1871 });
1872
1873 const TargetRegisterClass *BlockRegClass = TRI->getRegClassForBlockOp(MF);
1874 Register RegBlock =
1875 TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, BlockRegClass);
1876 if (!RegBlock) {
1877 // We couldn't find a super register for the block. This can happen if
1878 // the register we started with is too high (e.g. v232 if the maximum is
1879 // v255). We therefore try to get the last register block and figure out
1880 // the mask from there.
1881 Register LastBlockStart =
1882 AMDGPU::VGPR0 + alignDown(Reg - AMDGPU::VGPR0, 32);
1883 RegBlock =
1884 TRI->getMatchingSuperReg(LastBlockStart, AMDGPU::sub0, BlockRegClass);
1885 assert(RegBlock && TRI->isSubRegister(RegBlock, Reg) &&
1886 "Couldn't find super register");
1887 int RegDelta = Reg - LastBlockStart;
1888 assert(RegDelta > 0 && llvm::countl_zero(Mask) >= RegDelta &&
1889 "Bad shift amount");
1890 Mask <<= RegDelta;
1891 }
1892
1893 FuncInfo->setMaskForVGPRBlockOps(RegBlock, Mask);
1894
1895 // The stack objects can be a bit smaller than the register block if we know
1896 // some of the high bits of Mask are 0. This may happen often with calling
1897 // conventions where the caller and callee-saved VGPRs are interleaved at
1898 // a small boundary (e.g. 8 or 16).
1899 int UnusedBits = llvm::countl_zero(Mask);
1900 unsigned BlockSize = TRI->getSpillSize(*BlockRegClass) - UnusedBits * 4;
1901 int FrameIdx =
1902 MFI.CreateStackObject(BlockSize, TRI->getSpillAlign(*BlockRegClass),
1903 /*isSpillSlot=*/true);
1904 if ((unsigned)FrameIdx < MinCSFrameIndex)
1905 MinCSFrameIndex = FrameIdx;
1906 if ((unsigned)FrameIdx > MaxCSFrameIndex)
1907 MaxCSFrameIndex = FrameIdx;
1908
1909 CSIt->setFrameIdx(FrameIdx);
1910 CSIt->setReg(RegBlock);
1911 }
1912 CSI.erase(CSEnd, CSI.end());
1913}
1914
1917 std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex,
1918 unsigned &MaxCSFrameIndex) const {
1919 if (CSI.empty())
1920 return true; // Early exit if no callee saved registers are modified!
1921
1922 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1923 bool UseVGPRBlocks = ST.useVGPRBlockOpsForCSR();
1924
1925 if (UseVGPRBlocks)
1926 assignSlotsUsingVGPRBlocks(MF, ST, CSI, MinCSFrameIndex, MaxCSFrameIndex);
1927
1928 return assignCalleeSavedSpillSlots(MF, TRI, CSI) || UseVGPRBlocks;
1929}
1930
1933 std::vector<CalleeSavedInfo> &CSI) const {
1934 if (CSI.empty())
1935 return true; // Early exit if no callee saved registers are modified!
1936
1937 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1938 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1939 const SIRegisterInfo *RI = ST.getRegisterInfo();
1940 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1941 Register BasePtrReg = RI->getBaseRegister();
1942 Register SGPRForFPSaveRestoreCopy =
1943 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1944 Register SGPRForBPSaveRestoreCopy =
1945 FuncInfo->getScratchSGPRCopyDstReg(BasePtrReg);
1946 if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)
1947 return false;
1948
1949 unsigned NumModifiedRegs = 0;
1950
1951 if (SGPRForFPSaveRestoreCopy)
1952 NumModifiedRegs++;
1953 if (SGPRForBPSaveRestoreCopy)
1954 NumModifiedRegs++;
1955
1956 for (auto &CS : CSI) {
1957 if (CS.getReg() == FramePtrReg.asMCReg() && SGPRForFPSaveRestoreCopy) {
1958 CS.setDstReg(SGPRForFPSaveRestoreCopy);
1959 if (--NumModifiedRegs)
1960 break;
1961 } else if (CS.getReg() == BasePtrReg.asMCReg() &&
1962 SGPRForBPSaveRestoreCopy) {
1963 CS.setDstReg(SGPRForBPSaveRestoreCopy);
1964 if (--NumModifiedRegs)
1965 break;
1966 }
1967 }
1968
1969 return false;
1970}
1971
1973 const MachineFunction &MF) const {
1974
1975 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1976 const MachineFrameInfo &MFI = MF.getFrameInfo();
1977 const SIInstrInfo *TII = ST.getInstrInfo();
1978 uint64_t EstStackSize = MFI.estimateStackSize(MF);
1979 uint64_t MaxOffset = EstStackSize - 1;
1980
1981 // We need the emergency stack slots to be allocated in range of the
1982 // MUBUF/flat scratch immediate offset from the base register, so assign these
1983 // first at the incoming SP position.
1984 //
1985 // TODO: We could try sorting the objects to find a hole in the first bytes
1986 // rather than allocating as close to possible. This could save a lot of space
1987 // on frames with alignment requirements.
1988 if (ST.enableFlatScratch()) {
1989 if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
1991 return false;
1992 } else {
1993 if (TII->isLegalMUBUFImmOffset(MaxOffset))
1994 return false;
1995 }
1996
1997 return true;
1998}
1999
2003 MachineFunction *MF = MBB.getParent();
2004 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
2005 if (!ST.useVGPRBlockOpsForCSR())
2006 return false;
2007
2008 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
2010 const SIInstrInfo *TII = ST.getInstrInfo();
2012
2013 const TargetRegisterClass *BlockRegClass =
2014 static_cast<const SIRegisterInfo *>(TRI)->getRegClassForBlockOp(*MF);
2015 for (const CalleeSavedInfo &CS : CSI) {
2016 Register Reg = CS.getReg();
2017 if (!BlockRegClass->contains(Reg) ||
2018 !FuncInfo->hasMaskForVGPRBlockOps(Reg)) {
2020 continue;
2021 }
2022
2023 // Build a scratch block store.
2024 uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);
2025 int FrameIndex = CS.getFrameIdx();
2026 MachinePointerInfo PtrInfo =
2027 MachinePointerInfo::getFixedStack(*MF, FrameIndex);
2028 MachineMemOperand *MMO =
2030 FrameInfo.getObjectSize(FrameIndex),
2031 FrameInfo.getObjectAlign(FrameIndex));
2032
2033 BuildMI(MBB, MI, MI->getDebugLoc(),
2034 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE))
2035 .addReg(Reg, getKillRegState(false))
2036 .addFrameIndex(FrameIndex)
2038 .addImm(0)
2039 .addImm(Mask)
2040 .addMemOperand(MMO);
2041
2042 FuncInfo->setHasSpilledVGPRs();
2043
2044 // Add the register to the liveins. This is necessary because if any of the
2045 // VGPRs in the register block is reserved (e.g. if it's a WWM register),
2046 // then the whole block will be marked as reserved and `updateLiveness` will
2047 // skip it.
2048 MBB.addLiveIn(Reg);
2049 }
2050 MBB.sortUniqueLiveIns();
2051
2052 return true;
2053}
2054
2058 MachineFunction *MF = MBB.getParent();
2059 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
2060 if (!ST.useVGPRBlockOpsForCSR())
2061 return false;
2062
2064 MachineFrameInfo &MFI = MF->getFrameInfo();
2065 const SIInstrInfo *TII = ST.getInstrInfo();
2066 const SIRegisterInfo *SITRI = static_cast<const SIRegisterInfo *>(TRI);
2067 const TargetRegisterClass *BlockRegClass = SITRI->getRegClassForBlockOp(*MF);
2068 for (const CalleeSavedInfo &CS : reverse(CSI)) {
2069 Register Reg = CS.getReg();
2070 if (!BlockRegClass->contains(Reg) ||
2071 !FuncInfo->hasMaskForVGPRBlockOps(Reg)) {
2073 continue;
2074 }
2075
2076 // Build a scratch block load.
2077 uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);
2078 int FrameIndex = CS.getFrameIdx();
2079 MachinePointerInfo PtrInfo =
2080 MachinePointerInfo::getFixedStack(*MF, FrameIndex);
2082 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),
2083 MFI.getObjectAlign(FrameIndex));
2084
2085 auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(),
2086 TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE), Reg)
2087 .addFrameIndex(FrameIndex)
2088 .addReg(FuncInfo->getStackPtrOffsetReg())
2089 .addImm(0)
2090 .addImm(Mask)
2091 .addMemOperand(MMO);
2092 SITRI->addImplicitUsesForBlockCSRLoad(MIB, Reg);
2093
2094 // Add the register to the liveins. This is necessary because if any of the
2095 // VGPRs in the register block is reserved (e.g. if it's a WWM register),
2096 // then the whole block will be marked as reserved and `updateLiveness` will
2097 // skip it.
2098 MBB.addLiveIn(Reg);
2099 }
2100
2101 MBB.sortUniqueLiveIns();
2102 return true;
2103}
2104
2106 MachineFunction &MF,
2109 int64_t Amount = I->getOperand(0).getImm();
2110 if (Amount == 0)
2111 return MBB.erase(I);
2112
2113 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2114 const SIInstrInfo *TII = ST.getInstrInfo();
2115 const DebugLoc &DL = I->getDebugLoc();
2116 unsigned Opc = I->getOpcode();
2117 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
2118 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
2119
2120 if (!hasReservedCallFrame(MF)) {
2121 Amount = alignTo(Amount, getStackAlign());
2122 assert(isUInt<32>(Amount) && "exceeded stack address space size");
2125
2126 Amount *= getScratchScaleFactor(ST);
2127 if (IsDestroy)
2128 Amount = -Amount;
2129 auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
2130 .addReg(SPReg)
2131 .addImm(Amount);
2132 Add->getOperand(3).setIsDead(); // Mark SCC as dead.
2133 } else if (CalleePopAmount != 0) {
2134 llvm_unreachable("is this used?");
2135 }
2136
2137 return MBB.erase(I);
2138}
2139
2140/// Returns true if the frame will require a reference to the stack pointer.
2141///
2142/// This is the set of conditions common to setting up the stack pointer in a
2143/// kernel, and for using a frame pointer in a callable function.
2144///
2145/// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
2146/// references SP.
2148 return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
2149}
2150
2151// The FP for kernels is always known 0, so we never really need to setup an
2152// explicit register for it. However, DisableFramePointerElim will force us to
2153// use a register for it.
2155 const MachineFrameInfo &MFI = MF.getFrameInfo();
2156
2157 // For entry & chain functions we can use an immediate offset in most cases,
2158 // so the presence of calls doesn't imply we need a distinct frame pointer.
2159 if (MFI.hasCalls() &&
2162 // All offsets are unsigned, so need to be addressed in the same direction
2163 // as stack growth.
2164
2165 // FIXME: This function is pretty broken, since it can be called before the
2166 // frame layout is determined or CSR spills are inserted.
2167 return MFI.getStackSize() != 0;
2168 }
2169
2170 return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
2171 MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
2172 MF) ||
2175}
2176
2178 const MachineFunction &MF) const {
2179 return MF.getInfo<SIMachineFunctionInfo>()->isDynamicVGPREnabled() &&
2182}
2183
2184// This is essentially a reduced version of hasFP for entry functions. Since the
2185// stack pointer is known 0 on entry to kernels, we never really need an FP
2186// register. We may need to initialize the stack pointer depending on the frame
2187// properties, which logically overlaps many of the cases where an ordinary
2188// function would require an FP.
2189// Also used for chain functions. While not technically entry functions, chain
2190// functions may need to set up a stack pointer in some situations.
2192 const MachineFunction &MF) const {
2193 // Callable functions always require a stack pointer reference.
2196 "only expected to call this for entry points and chain functions");
2197
2198 const MachineFrameInfo &MFI = MF.getFrameInfo();
2199
2200 // Entry points ordinarily don't need to initialize SP. We have to set it up
2201 // for callees if there are any. Also note tail calls are impossible/don't
2202 // make any sense for kernels.
2203 if (MFI.hasCalls())
2204 return true;
2205
2206 // We still need to initialize the SP if we're doing anything weird that
2207 // references the SP, like variable sized stack objects.
2208 return frameTriviallyRequiresSP(MFI);
2209}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A set of register units.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static constexpr MCPhysReg FPReg
static constexpr MCPhysReg SPReg
This file declares the machine register scavenger class.
static void buildEpilogRestore(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static cl::opt< bool > EnableSpillVGPRToAGPR("amdgpu-spill-vgpr-to-agpr", cl::desc("Enable spilling VGPRs to AGPRs"), cl::ReallyHidden, cl::init(true))
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR, const TargetRegisterClass &RC=AMDGPU::SReg_32_XM0_XEXECRegClass, bool IncludeScratchCopy=true)
Query target location for spilling SGPRs IncludeScratchCopy : Also look for free scratch SGPRs.
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, const SIInstrInfo *TII, Register TargetReg)
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
static Register buildScratchExecCopy(LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsProlog, bool EnableInactiveLanes)
static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI)
Returns true if the frame will require a reference to the stack pointer.
static void assignSlotsUsingVGPRBlocks(MachineFunction &MF, const GCNSubtarget &ST, std::vector< CalleeSavedInfo > &CSI, unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex)
static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI, const SIMachineFunctionInfo *FuncInfo, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
static bool allSGPRSpillsAreDead(const MachineFunction &MF)
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits, const TargetRegisterClass &RC, bool Unused=false)
static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, const LiveRegUnits &LiveUnits, const TargetRegisterClass &RC)
static unsigned getScratchScaleFactor(const GCNSubtarget &ST)
#define LLVM_DEBUG(...)
Definition Debug.h:114
static const int BlockSize
Definition TarWriter.cpp:33
static const LaneMaskConstants & get(const GCNSubtarget &ST)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:191
bool test(unsigned Idx) const
Definition BitVector.h:480
BitVector & reset()
Definition BitVector.h:411
void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask.
Definition BitVector.h:744
BitVector & set()
Definition BitVector.h:370
bool any() const
any - Returns true if any bit is set.
Definition BitVector.h:189
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
Definition BitVector.h:732
iterator_range< const_set_bits_iterator > set_bits() const
Definition BitVector.h:159
bool empty() const
empty - Tests whether there are no bits in this bitvector.
Definition BitVector.h:175
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
MCRegister getReg() const
A debug info location.
Definition DebugLoc.h:124
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
bool empty() const
Returns true if the set is empty.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
Describe properties that are true of each instruction in the target description file.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
bool hasTailCall() const
Returns true if the function contains a tail call.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint8_t getStackID(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
mop_range operands()
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
void setIsDead(bool Val=true)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
LLVM_ABI bool isPhysRegModified(MCRegister PhysReg, bool SkipNoReturnDef=false) const
Return true if the specified register is modified in this function.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
PrologEpilogSGPRSpillBuilder(Register Reg, const PrologEpilogSGPRSaveRestoreInfo SI, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, const SIInstrInfo *TII, const SIRegisterInfo &TRI, LiveRegUnits &LiveUnits, Register FrameReg)
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
void backward()
Update internal register state and move MBB iterator backwards.
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:102
void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs, bool NeedExecCopyReservedReg) const
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
bool mayReserveScratchForCWSR(const MachineFunction &MF) const
bool allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction &MF) const override
Control the placement of special register scavenging spill slots when allocating a stack frame.
bool requiresStackPointerReference(const MachineFunction &MF) const
void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
bool hasFPImpl(const MachineFunction &MF) const override
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
bool isSupportedStackID(TargetStackID::Value ID) const override
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
ArrayRef< PrologEpilogSGPRSpill > getPrologEpilogSGPRSpills() const
const WWMSpillsMap & getWWMSpills() const
void getAllScratchSGPRCopyDstRegs(SmallVectorImpl< Register > &Regs) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)
void setMaskForVGPRBlockOps(Register RegisterBlock, uint32_t Mask)
GCNUserSGPRUsageInfo & getUserSGPRInfo()
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
void setVGPRToAGPRSpillDead(int FrameIndex)
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const
bool hasMaskForVGPRBlockOps(Register RegisterBlock) const
bool hasPrologEpilogSGPRSpillEntry(Register Reg) const
Register getGITPtrLoReg(const MachineFunction &MF) const
void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
bool isWWMReservedRegister(Register Reg) const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
void setLongBranchReservedReg(Register Reg)
void setHasSpilledVGPRs(bool Spill=true)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
void setScratchReservedForDynamicVGPRs(unsigned SizeInBytes)
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
const ReservedRegSet & getWWMReservedRegs() const
const PrologEpilogSGPRSaveRestoreInfo & getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const
void setIsStackRealigned(bool Realigned=true)
void addToPrologEpilogSGPRSpills(Register Reg, PrologEpilogSGPRSaveRestoreInfo SI)
Register getScratchSGPRCopyDstReg(Register Reg) const
Register getFrameRegister(const MachineFunction &MF) const override
const TargetRegisterClass * getRegClassForBlockOp(const MachineFunction &MF) const
void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, Register BlockReg) const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:47
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual bool hasReservedCallFrame(const MachineFunction &MF) const
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void restoreCalleeSavedRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
void spillCalleeSavedRegister(MachineBasicBlock &SaveBlock, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
spillCalleeSavedRegister - Default implementation for spilling a single callee saved register.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetOptions Options
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:557
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:222
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1624
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:159
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1399
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition STLExtras.h:1900
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:164
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1941
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
static constexpr uint64_t encode(Fields... Values)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.