LLVM  15.0.0git
SIFrameLowering.cpp
Go to the documentation of this file.
1 //===----------------------- SIFrameLowering.cpp --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 
9 #include "SIFrameLowering.h"
10 #include "AMDGPU.h"
11 #include "GCNSubtarget.h"
13 #include "SIMachineFunctionInfo.h"
18 
19 using namespace llvm;
20 
21 #define DEBUG_TYPE "frame-info"
22 
24  "amdgpu-spill-vgpr-to-agpr",
25  cl::desc("Enable spilling VGPRs to AGPRs"),
27  cl::init(true));
28 
29 // Find a scratch register that we can use in the prologue. We avoid using
30 // callee-save registers since they may appear to be free when this is called
31 // from canUseAsPrologue (during shrink wrapping), but then no longer be free
32 // when this is called from emitPrologue.
34  LivePhysRegs &LiveRegs,
35  const TargetRegisterClass &RC,
36  bool Unused = false) {
37  // Mark callee saved registers as used so we will not choose them.
38  const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
39  for (unsigned i = 0; CSRegs[i]; ++i)
40  LiveRegs.addReg(CSRegs[i]);
41 
42  if (Unused) {
43  // We are looking for a register that can be used throughout the entire
44  // function, so any use is unacceptable.
45  for (MCRegister Reg : RC) {
46  if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
47  return Reg;
48  }
49  } else {
50  for (MCRegister Reg : RC) {
51  if (LiveRegs.available(MRI, Reg))
52  return Reg;
53  }
54  }
55 
56  return MCRegister();
57 }
58 
60  LivePhysRegs &LiveRegs,
61  Register &TempSGPR,
63  bool IsFP) {
65  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
66 
68  const SIRegisterInfo *TRI = ST.getRegisterInfo();
69 
70  // We need to save and restore the current FP/BP.
71 
72  // 1: If there is already a VGPR with free lanes, use it. We
73  // may already have to pay the penalty for spilling a CSR VGPR.
74  if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
75  int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
77 
78  if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
79  llvm_unreachable("allocate SGPR spill should have worked");
80 
81  FrameIndex = NewFI;
82 
83  LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
84  dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to "
85  << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
86  << '\n');
87  return;
88  }
89 
90  // 2: Next, try to save the FP/BP in an unused SGPR.
92  MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
93 
94  if (!TempSGPR) {
95  int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
97 
98  if (TRI->spillSGPRToVGPR() && MFI->allocateSGPRSpillToVGPR(MF, NewFI)) {
99  // 3: There's no free lane to spill, and no free register to save FP/BP,
100  // so we're forced to spill another VGPR to use for the spill.
101  FrameIndex = NewFI;
102 
103  LLVM_DEBUG(
104  auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
105  dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "
106  << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';);
107  } else {
108  // Remove dead <NewFI> index
109  MF.getFrameInfo().RemoveStackObject(NewFI);
110  // 4: If all else fails, spill the FP/BP to memory.
111  FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4));
112  LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling "
113  << (IsFP ? "FP" : "BP") << '\n');
114  }
115  } else {
116  LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to "
117  << printReg(TempSGPR, TRI) << '\n');
118  }
119 }
120 
121 // We need to specially emit stack operations here because a different frame
122 // register is used than in the rest of the function, as getFrameRegister would
123 // use.
124 static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
125  const SIMachineFunctionInfo &FuncInfo,
126  LivePhysRegs &LiveRegs, MachineFunction &MF,
129  Register SpillReg, int FI) {
130  unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
131  : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
132 
133  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
136  PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
137  FrameInfo.getObjectAlign(FI));
138  LiveRegs.addReg(SpillReg);
139  TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, true,
140  FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
141  &LiveRegs);
142  LiveRegs.removeReg(SpillReg);
143 }
144 
145 static void buildEpilogRestore(const GCNSubtarget &ST,
146  const SIRegisterInfo &TRI,
147  const SIMachineFunctionInfo &FuncInfo,
148  LivePhysRegs &LiveRegs, MachineFunction &MF,
151  const DebugLoc &DL, Register SpillReg, int FI) {
152  unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
153  : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
154 
155  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
158  PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
159  FrameInfo.getObjectAlign(FI));
160  TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false,
161  FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
162  &LiveRegs);
163 }
164 
166  const DebugLoc &DL, const SIInstrInfo *TII,
167  Register TargetReg) {
168  MachineFunction *MF = MBB.getParent();
170  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
171  const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
172  Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
173  Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
174 
175  if (MFI->getGITPtrHigh() != 0xffffffff) {
176  BuildMI(MBB, I, DL, SMovB32, TargetHi)
177  .addImm(MFI->getGITPtrHigh())
178  .addReg(TargetReg, RegState::ImplicitDefine);
179  } else {
180  const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
181  BuildMI(MBB, I, DL, GetPC64, TargetReg);
182  }
183  Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
184  MF->getRegInfo().addLiveIn(GitPtrLo);
185  MBB.addLiveIn(GitPtrLo);
186  BuildMI(MBB, I, DL, SMovB32, TargetLo)
187  .addReg(GitPtrLo);
188 }
189 
190 // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
191 void SIFrameLowering::emitEntryFunctionFlatScratchInit(
193  const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
194  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
195  const SIInstrInfo *TII = ST.getInstrInfo();
196  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
198 
199  // We don't need this if we only have spills since there is no user facing
200  // scratch.
201 
202  // TODO: If we know we don't have flat instructions earlier, we can omit
203  // this from the input registers.
204  //
205  // TODO: We only need to know if we access scratch space through a flat
206  // pointer. Because we only detect if flat instructions are used at all,
207  // this will be used more often than necessary on VI.
208 
209  Register FlatScrInitLo;
210  Register FlatScrInitHi;
211 
212  if (ST.isAmdPalOS()) {
213  // Extract the scratch offset from the descriptor in the GIT
214  LivePhysRegs LiveRegs;
215  LiveRegs.init(*TRI);
216  LiveRegs.addLiveIns(MBB);
217 
218  // Find unused reg to load flat scratch init into
220  Register FlatScrInit = AMDGPU::NoRegister;
221  ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
222  unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
223  AllSGPR64s = AllSGPR64s.slice(
224  std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
225  Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
226  for (MCPhysReg Reg : AllSGPR64s) {
227  if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) &&
228  !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
229  FlatScrInit = Reg;
230  break;
231  }
232  }
233  assert(FlatScrInit && "Failed to find free register for scratch init");
234 
235  FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
236  FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
237 
238  buildGitPtr(MBB, I, DL, TII, FlatScrInit);
239 
240  // We now have the GIT ptr - now get the scratch descriptor from the entry
241  // at offset 0 (or offset 16 for a compute shader).
243  const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
244  auto *MMO = MF.getMachineMemOperand(
245  PtrInfo,
248  8, Align(4));
249  unsigned Offset =
251  const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
252  unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
253  BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
254  .addReg(FlatScrInit)
255  .addImm(EncodedOffset) // offset
256  .addImm(0) // cpol
257  .addMemOperand(MMO);
258 
259  // Mask the offset in [47:0] of the descriptor
260  const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
261  auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
262  .addReg(FlatScrInitHi)
263  .addImm(0xffff);
264  And->getOperand(3).setIsDead(); // Mark SCC as dead.
265  } else {
266  Register FlatScratchInitReg =
268  assert(FlatScratchInitReg);
269 
271  MRI.addLiveIn(FlatScratchInitReg);
272  MBB.addLiveIn(FlatScratchInitReg);
273 
274  FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
275  FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
276  }
277 
278  // Do a 64-bit pointer add.
279  if (ST.flatScratchIsPointer()) {
280  if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
281  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
282  .addReg(FlatScrInitLo)
283  .addReg(ScratchWaveOffsetReg);
284  auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
285  FlatScrInitHi)
286  .addReg(FlatScrInitHi)
287  .addImm(0);
288  Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
289 
290  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
291  addReg(FlatScrInitLo).
292  addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
294  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
295  addReg(FlatScrInitHi).
296  addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
298  return;
299  }
300 
301  // For GFX9.
302  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
303  .addReg(FlatScrInitLo)
304  .addReg(ScratchWaveOffsetReg);
305  auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
306  AMDGPU::FLAT_SCR_HI)
307  .addReg(FlatScrInitHi)
308  .addImm(0);
309  Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
310 
311  return;
312  }
313 
314  assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
315 
316  // Copy the size in bytes.
317  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
318  .addReg(FlatScrInitHi, RegState::Kill);
319 
320  // Add wave offset in bytes to private base offset.
321  // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
322  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)
323  .addReg(FlatScrInitLo)
324  .addReg(ScratchWaveOffsetReg);
325 
326  // Convert offset to 256-byte units.
327  auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32),
328  AMDGPU::FLAT_SCR_HI)
329  .addReg(FlatScrInitLo, RegState::Kill)
330  .addImm(8);
331  LShr->getOperand(3).setIsDead(true); // Mark SCC as dead.
332 }
333 
334 // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
335 // memory. They should have been removed by now.
336 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
337  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
338  I != E; ++I) {
339  if (!MFI.isDeadObjectIndex(I))
340  return false;
341  }
342 
343  return true;
344 }
345 
346 // Shift down registers reserved for the scratch RSRC.
347 Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
348  MachineFunction &MF) const {
349 
350  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
351  const SIInstrInfo *TII = ST.getInstrInfo();
352  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
355 
356  assert(MFI->isEntryFunction());
357 
358  Register ScratchRsrcReg = MFI->getScratchRSrcReg();
359 
360  if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
362  return Register();
363 
364  if (ST.hasSGPRInitBug() ||
365  ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
366  return ScratchRsrcReg;
367 
368  // We reserved the last registers for this. Shift it down to the end of those
369  // which were actually used.
370  //
371  // FIXME: It might be safer to use a pseudoregister before replacement.
372 
373  // FIXME: We should be able to eliminate unused input registers. We only
374  // cannot do this for the resources required for scratch access. For now we
375  // skip over user SGPRs and may leave unused holes.
376 
377  unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
378  ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
379  AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
380 
381  // Skip the last N reserved elements because they should have already been
382  // reserved for VCC etc.
383  Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
384  for (MCPhysReg Reg : AllSGPR128s) {
385  // Pick the first unallocated one. Make sure we don't clobber the other
386  // reserved input we needed. Also for PAL, make sure we don't clobber
387  // the GIT pointer passed in SGPR0 or SGPR8.
389  !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
390  MRI.replaceRegWith(ScratchRsrcReg, Reg);
391  MFI->setScratchRSrcReg(Reg);
392  return Reg;
393  }
394  }
395 
396  return ScratchRsrcReg;
397 }
398 
399 static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
400  return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
401 }
402 
404  MachineBasicBlock &MBB) const {
405  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
406 
407  // FIXME: If we only have SGPR spills, we won't actually be using scratch
408  // memory since these spill to VGPRs. We should be cleaning up these unused
409  // SGPR spill frame indices somewhere.
410 
411  // FIXME: We still have implicit uses on SGPR spill instructions in case they
412  // need to spill to vector memory. It's likely that will not happen, but at
413  // this point it appears we need the setup. This part of the prolog should be
414  // emitted after frame indices are eliminated.
415 
416  // FIXME: Remove all of the isPhysRegUsed checks
417 
419  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
420  const SIInstrInfo *TII = ST.getInstrInfo();
421  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
423  const Function &F = MF.getFunction();
424  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
425 
426  assert(MFI->isEntryFunction());
427 
428  Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
430 
431  // We need to do the replacement of the private segment buffer register even
432  // if there are no stack objects. There could be stores to undef or a
433  // constant without an associated object.
434  //
435  // This will return `Register()` in cases where there are no actual
436  // uses of the SRSRC.
437  Register ScratchRsrcReg;
438  if (!ST.enableFlatScratch())
439  ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
440 
441  // Make the selected register live throughout the function.
442  if (ScratchRsrcReg) {
443  for (MachineBasicBlock &OtherBB : MF) {
444  if (&OtherBB != &MBB) {
445  OtherBB.addLiveIn(ScratchRsrcReg);
446  }
447  }
448  }
449 
450  // Now that we have fixed the reserved SRSRC we need to locate the
451  // (potentially) preloaded SRSRC.
452  Register PreloadedScratchRsrcReg;
453  if (ST.isAmdHsaOrMesa(F)) {
454  PreloadedScratchRsrcReg =
456  if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
457  // We added live-ins during argument lowering, but since they were not
458  // used they were deleted. We're adding the uses now, so add them back.
459  MRI.addLiveIn(PreloadedScratchRsrcReg);
460  MBB.addLiveIn(PreloadedScratchRsrcReg);
461  }
462  }
463 
464  // Debug location must be unknown since the first debug location is used to
465  // determine the end of the prologue.
466  DebugLoc DL;
468 
469  // We found the SRSRC first because it needs four registers and has an
470  // alignment requirement. If the SRSRC that we found is clobbering with
471  // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
472  // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
473  // wave offset to a free SGPR.
474  Register ScratchWaveOffsetReg;
475  if (PreloadedScratchWaveOffsetReg &&
476  TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
477  ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
478  unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
479  AllSGPRs = AllSGPRs.slice(
480  std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
481  Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
482  for (MCPhysReg Reg : AllSGPRs) {
484  !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
485  ScratchWaveOffsetReg = Reg;
486  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
487  .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
488  break;
489  }
490  }
491  } else {
492  ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
493  }
494  assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
495 
497  Register SPReg = MFI->getStackPtrOffsetReg();
498  assert(SPReg != AMDGPU::SP_REG);
499  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
500  .addImm(FrameInfo.getStackSize() * getScratchScaleFactor(ST));
501  }
502 
503  if (hasFP(MF)) {
504  Register FPReg = MFI->getFrameOffsetReg();
505  assert(FPReg != AMDGPU::FP_REG);
506  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
507  }
508 
509  bool NeedsFlatScratchInit =
510  MFI->hasFlatScratchInit() &&
511  (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
512  (!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
513 
514  if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
515  PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
516  MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
517  MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
518  }
519 
520  if (NeedsFlatScratchInit) {
521  emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
522  }
523 
524  if (ScratchRsrcReg) {
525  emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
526  PreloadedScratchRsrcReg,
527  ScratchRsrcReg, ScratchWaveOffsetReg);
528  }
529 }
530 
531 // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
532 void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
534  const DebugLoc &DL, Register PreloadedScratchRsrcReg,
535  Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
536 
537  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
538  const SIInstrInfo *TII = ST.getInstrInfo();
539  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
541  const Function &Fn = MF.getFunction();
542 
543  if (ST.isAmdPalOS()) {
544  // The pointer to the GIT is formed from the offset passed in and either
545  // the amdgpu-git-ptr-high function attribute or the top part of the PC
546  Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
547  Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
548 
549  buildGitPtr(MBB, I, DL, TII, Rsrc01);
550 
551  // We now have the GIT ptr - now get the scratch descriptor from the entry
552  // at offset 0 (or offset 16 for a compute shader).
554  const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
555  auto MMO = MF.getMachineMemOperand(PtrInfo,
559  16, Align(4));
560  unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
561  const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
562  unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
563  BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
564  .addReg(Rsrc01)
565  .addImm(EncodedOffset) // offset
566  .addImm(0) // cpol
567  .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
568  .addMemOperand(MMO);
569 
570  // The driver will always set the SRD for wave 64 (bits 118:117 of
571  // descriptor / bits 22:21 of third sub-reg will be 0b11)
572  // If the shader is actually wave32 we have to modify the const_index_stride
573  // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
574  // reason the driver does this is that there can be cases where it presents
575  // 2 shaders with different wave size (e.g. VsFs).
576  // TODO: convert to using SCRATCH instructions or multiple SRD buffers
577  if (ST.isWave32()) {
578  const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
579  BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
580  .addImm(21)
581  .addReg(Rsrc03);
582  }
583  } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
584  assert(!ST.isAmdHsaOrMesa(Fn));
585  const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
586 
587  Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
588  Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
589 
590  // Use relocations to get the pointer, and setup the other bits manually.
591  uint64_t Rsrc23 = TII->getScratchRsrcWords23();
592 
593  if (MFI->hasImplicitBufferPtr()) {
594  Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
595 
597  const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
598 
599  BuildMI(MBB, I, DL, Mov64, Rsrc01)
601  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
602  } else {
603  const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
604 
606  auto MMO = MF.getMachineMemOperand(
607  PtrInfo,
610  8, Align(4));
611  BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
613  .addImm(0) // offset
614  .addImm(0) // cpol
615  .addMemOperand(MMO)
616  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
617 
620  }
621  } else {
622  Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
623  Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
624 
625  BuildMI(MBB, I, DL, SMovB32, Rsrc0)
626  .addExternalSymbol("SCRATCH_RSRC_DWORD0")
627  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
628 
629  BuildMI(MBB, I, DL, SMovB32, Rsrc1)
630  .addExternalSymbol("SCRATCH_RSRC_DWORD1")
631  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
632 
633  }
634 
635  BuildMI(MBB, I, DL, SMovB32, Rsrc2)
636  .addImm(Rsrc23 & 0xffffffff)
637  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
638 
639  BuildMI(MBB, I, DL, SMovB32, Rsrc3)
640  .addImm(Rsrc23 >> 32)
641  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
642  } else if (ST.isAmdHsaOrMesa(Fn)) {
643  assert(PreloadedScratchRsrcReg);
644 
645  if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
646  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
647  .addReg(PreloadedScratchRsrcReg, RegState::Kill);
648  }
649  }
650 
651  // Add the scratch wave offset into the scratch RSRC.
652  //
653  // We only want to update the first 48 bits, which is the base address
654  // pointer, without touching the adjacent 16 bits of flags. We know this add
655  // cannot carry-out from bit 47, otherwise the scratch allocation would be
656  // impossible to fit in the 48-bit global address space.
657  //
658  // TODO: Evaluate if it is better to just construct an SRD using the flat
659  // scratch init and some constants rather than update the one we are passed.
660  Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
661  Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
662 
663  // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
664  // the kernel body via inreg arguments.
665  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
666  .addReg(ScratchRsrcSub0)
667  .addReg(ScratchWaveOffsetReg)
668  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
669  auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
670  .addReg(ScratchRsrcSub1)
671  .addImm(0)
672  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
673  Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
674 }
675 
677  switch (ID) {
681  return true;
684  return false;
685  }
686  llvm_unreachable("Invalid TargetStackID::Value");
687 }
688 
689 static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI,
690  const SIMachineFunctionInfo *FuncInfo,
692  MachineBasicBlock::iterator MBBI, bool IsProlog) {
693  if (LiveRegs.empty()) {
694  LiveRegs.init(TRI);
695  if (IsProlog) {
696  LiveRegs.addLiveIns(MBB);
697  } else {
698  // In epilog.
699  LiveRegs.addLiveOuts(MBB);
700  LiveRegs.stepBackward(*MBBI);
701  }
702  }
703 }
704 
705 // Activate all lanes, returns saved exec.
707  MachineFunction &MF,
710  bool IsProlog) {
711  Register ScratchExecCopy;
713  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
714  const SIInstrInfo *TII = ST.getInstrInfo();
715  const SIRegisterInfo &TRI = TII->getRegisterInfo();
717  DebugLoc DL;
718 
719  initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
720 
721  ScratchExecCopy = findScratchNonCalleeSaveRegister(
722  MRI, LiveRegs, *TRI.getWaveMaskRegClass());
723  if (!ScratchExecCopy)
724  report_fatal_error("failed to find free scratch register");
725 
726  LiveRegs.addReg(ScratchExecCopy);
727 
728  const unsigned OrSaveExec =
729  ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
730  auto SaveExec = BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy)
731  .addImm(-1);
732  SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead.
733 
734  return ScratchExecCopy;
735 }
736 
737 // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR.
738 // Otherwise we are spilling to memory.
739 static bool spilledToMemory(const MachineFunction &MF, int SaveIndex) {
740  const MachineFrameInfo &MFI = MF.getFrameInfo();
741  return MFI.getStackID(SaveIndex) != TargetStackID::SGPRSpill;
742 }
743 
745  MachineBasicBlock &MBB) const {
747  if (FuncInfo->isEntryFunction()) {
749  return;
750  }
751 
752  MachineFrameInfo &MFI = MF.getFrameInfo();
754  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
755  const SIInstrInfo *TII = ST.getInstrInfo();
756  const SIRegisterInfo &TRI = TII->getRegisterInfo();
757 
758  Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
759  Register FramePtrReg = FuncInfo->getFrameOffsetReg();
760  Register BasePtrReg =
761  TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
762  LivePhysRegs LiveRegs;
763 
765  DebugLoc DL;
766 
767  bool HasFP = false;
768  bool HasBP = false;
769  uint32_t NumBytes = MFI.getStackSize();
770  uint32_t RoundedSize = NumBytes;
771  // To avoid clobbering VGPRs in lanes that weren't active on function entry,
772  // turn on all lanes before doing the spill to memory.
773  Register ScratchExecCopy;
774 
775  Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
776  Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
777 
778  // VGPRs used for SGPR->VGPR spills
780  FuncInfo->getSGPRSpillVGPRs()) {
781  if (!Reg.FI)
782  continue;
783 
784  if (!ScratchExecCopy)
785  ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI,
786  /*IsProlog*/ true);
787 
788  buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, Reg.VGPR,
789  *Reg.FI);
790  }
791 
792  for (auto ReservedWWM : FuncInfo->wwmAllocation()) {
793  if (!ScratchExecCopy)
794  ScratchExecCopy =
795  buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true);
796 
797  buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
798  std::get<0>(ReservedWWM), std::get<1>(ReservedWWM));
799  }
800 
801  if (ScratchExecCopy) {
802  // FIXME: Split block and make terminator.
803  unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
804  MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
805  BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
806  .addReg(ScratchExecCopy, RegState::Kill);
807  LiveRegs.addReg(ScratchExecCopy);
808  }
809 
810  auto SaveSGPRToMemory = [&](Register Reg, const int FI) {
811  assert(!MFI.isDeadObjectIndex(FI));
812 
813  initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
814 
816  MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
817  if (!TmpVGPR)
818  report_fatal_error("failed to find free scratch register");
819 
820  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
821  .addReg(Reg);
822 
823  buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
824  FI);
825  };
826 
827  auto SaveSGPRToVGPRLane = [&](Register Reg, const int FI) {
828  assert(!MFI.isDeadObjectIndex(FI));
829 
832  FuncInfo->getSGPRToVGPRSpills(FI);
833  assert(Spill.size() == 1);
834 
835  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
836  .addReg(Reg)
837  .addImm(Spill[0].Lane)
838  .addReg(Spill[0].VGPR, RegState::Undef);
839  };
840 
841  if (FPSaveIndex) {
842  if (spilledToMemory(MF, *FPSaveIndex))
843  SaveSGPRToMemory(FramePtrReg, *FPSaveIndex);
844  else
845  SaveSGPRToVGPRLane(FramePtrReg, *FPSaveIndex);
846  }
847 
848  // Emit the copy if we need an FP, and are using a free SGPR to save it.
849  if (FuncInfo->SGPRForFPSaveRestoreCopy) {
850  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
851  FuncInfo->SGPRForFPSaveRestoreCopy)
852  .addReg(FramePtrReg)
854  }
855 
856  if (BPSaveIndex) {
857  if (spilledToMemory(MF, *BPSaveIndex))
858  SaveSGPRToMemory(BasePtrReg, *BPSaveIndex);
859  else
860  SaveSGPRToVGPRLane(BasePtrReg, *BPSaveIndex);
861  }
862 
863  // Emit the copy if we need a BP, and are using a free SGPR to save it.
864  if (FuncInfo->SGPRForBPSaveRestoreCopy) {
865  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
866  FuncInfo->SGPRForBPSaveRestoreCopy)
867  .addReg(BasePtrReg)
869  }
870 
871  // If a copy has been emitted for FP and/or BP, Make the SGPRs
872  // used in the copy instructions live throughout the function.
873  SmallVector<MCPhysReg, 2> TempSGPRs;
874  if (FuncInfo->SGPRForFPSaveRestoreCopy)
875  TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy);
876 
877  if (FuncInfo->SGPRForBPSaveRestoreCopy)
878  TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy);
879 
880  if (!TempSGPRs.empty()) {
881  for (MachineBasicBlock &MBB : MF) {
882  for (MCPhysReg Reg : TempSGPRs)
883  MBB.addLiveIn(Reg);
884 
886  }
887  if (!LiveRegs.empty()) {
888  LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
889  LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy);
890  }
891  }
892 
893  if (TRI.hasStackRealignment(MF)) {
894  HasFP = true;
895  const unsigned Alignment = MFI.getMaxAlign().value();
896 
897  RoundedSize += Alignment;
898  if (LiveRegs.empty()) {
899  LiveRegs.init(TRI);
900  LiveRegs.addLiveIns(MBB);
901  }
902 
903  // s_add_i32 s33, s32, NumBytes
904  // s_and_b32 s33, s33, 0b111...0000
905  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
906  .addReg(StackPtrReg)
907  .addImm((Alignment - 1) * getScratchScaleFactor(ST))
909  auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
910  .addReg(FramePtrReg, RegState::Kill)
911  .addImm(-Alignment * getScratchScaleFactor(ST))
913  And->getOperand(3).setIsDead(); // Mark SCC as dead.
914  FuncInfo->setIsStackRealigned(true);
915  } else if ((HasFP = hasFP(MF))) {
916  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
917  .addReg(StackPtrReg)
919  }
920 
921  // If we need a base pointer, set it up here. It's whatever the value of
922  // the stack pointer is at this point. Any variable size objects will be
923  // allocated after this, so we can still use the base pointer to reference
924  // the incoming arguments.
925  if ((HasBP = TRI.hasBasePointer(MF))) {
926  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
927  .addReg(StackPtrReg)
929  }
930 
931  if (HasFP && RoundedSize != 0) {
932  auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
933  .addReg(StackPtrReg)
934  .addImm(RoundedSize * getScratchScaleFactor(ST))
936  Add->getOperand(3).setIsDead(); // Mark SCC as dead.
937  }
938 
939  assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||
940  FuncInfo->FramePointerSaveIndex)) &&
941  "Needed to save FP but didn't save it anywhere");
942 
943  // If we allow spilling to AGPRs we may have saved FP but then spill
944  // everything into AGPRs instead of the stack.
945  assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy &&
946  !FuncInfo->FramePointerSaveIndex) ||
948  "Saved FP but didn't need it");
949 
950  assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy ||
951  FuncInfo->BasePointerSaveIndex)) &&
952  "Needed to save BP but didn't save it anywhere");
953 
954  assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy &&
955  !FuncInfo->BasePointerSaveIndex)) &&
956  "Saved BP but didn't need it");
957 }
958 
960  MachineBasicBlock &MBB) const {
961  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
962  if (FuncInfo->isEntryFunction())
963  return;
964 
965  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
966  const SIInstrInfo *TII = ST.getInstrInfo();
968  const SIRegisterInfo &TRI = TII->getRegisterInfo();
970  LivePhysRegs LiveRegs;
971  DebugLoc DL;
972 
973  const MachineFrameInfo &MFI = MF.getFrameInfo();
974  uint32_t NumBytes = MFI.getStackSize();
975  uint32_t RoundedSize = FuncInfo->isStackRealigned()
976  ? NumBytes + MFI.getMaxAlign().value()
977  : NumBytes;
978  const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
979  const Register FramePtrReg = FuncInfo->getFrameOffsetReg();
980  const Register BasePtrReg =
981  TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
982 
983  Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
984  Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
985 
986  if (RoundedSize != 0 && hasFP(MF)) {
987  auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
988  .addReg(StackPtrReg)
989  .addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST)))
991  Add->getOperand(3).setIsDead(); // Mark SCC as dead.
992  }
993 
994  if (FuncInfo->SGPRForFPSaveRestoreCopy) {
995  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
996  .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
998  }
999 
1000  if (FuncInfo->SGPRForBPSaveRestoreCopy) {
1001  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
1002  .addReg(FuncInfo->SGPRForBPSaveRestoreCopy)
1004  }
1005 
1006  auto RestoreSGPRFromMemory = [&](Register Reg, const int FI) {
1007  initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1009  MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
1010  if (!TmpVGPR)
1011  report_fatal_error("failed to find free scratch register");
1012  buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
1013  FI);
1014  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), Reg)
1015  .addReg(TmpVGPR, RegState::Kill);
1016  };
1017 
1018  auto RestoreSGPRFromVGPRLane = [&](Register Reg, const int FI) {
1021  FuncInfo->getSGPRToVGPRSpills(FI);
1022  assert(Spill.size() == 1);
1023  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), Reg)
1024  .addReg(Spill[0].VGPR)
1025  .addImm(Spill[0].Lane);
1026  };
1027 
1028  if (FPSaveIndex) {
1029  const int FramePtrFI = *FPSaveIndex;
1030  assert(!MFI.isDeadObjectIndex(FramePtrFI));
1031  if (spilledToMemory(MF, FramePtrFI))
1032  RestoreSGPRFromMemory(FramePtrReg, FramePtrFI);
1033  else
1034  RestoreSGPRFromVGPRLane(FramePtrReg, FramePtrFI);
1035  }
1036 
1037  if (BPSaveIndex) {
1038  const int BasePtrFI = *BPSaveIndex;
1039  assert(!MFI.isDeadObjectIndex(BasePtrFI));
1040  if (spilledToMemory(MF, BasePtrFI))
1041  RestoreSGPRFromMemory(BasePtrReg, BasePtrFI);
1042  else
1043  RestoreSGPRFromVGPRLane(BasePtrReg, BasePtrFI);
1044  }
1045 
1046  Register ScratchExecCopy;
1048  FuncInfo->getSGPRSpillVGPRs()) {
1049  if (!Reg.FI)
1050  continue;
1051 
1052  if (!ScratchExecCopy)
1053  ScratchExecCopy =
1054  buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
1055 
1056  buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
1057  Reg.VGPR, *Reg.FI);
1058  }
1059 
1060  for (auto ReservedWWM : FuncInfo->wwmAllocation()) {
1061  if (!ScratchExecCopy)
1062  ScratchExecCopy =
1063  buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
1064 
1065  buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
1066  std::get<0>(ReservedWWM), std::get<1>(ReservedWWM));
1067  }
1068 
1069  if (ScratchExecCopy) {
1070  // FIXME: Split block and make terminator.
1071  unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1072  MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1073  BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
1074  .addReg(ScratchExecCopy, RegState::Kill);
1075  }
1076 }
1077 
1078 #ifndef NDEBUG
1079 static bool allSGPRSpillsAreDead(const MachineFunction &MF) {
1080  const MachineFrameInfo &MFI = MF.getFrameInfo();
1081  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1082  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
1083  I != E; ++I) {
1084  if (!MFI.isDeadObjectIndex(I) &&
1086  (I != FuncInfo->FramePointerSaveIndex &&
1087  I != FuncInfo->BasePointerSaveIndex)) {
1088  return false;
1089  }
1090  }
1091 
1092  return true;
1093 }
1094 #endif
1095 
1097  int FI,
1098  Register &FrameReg) const {
1099  const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1100 
1101  FrameReg = RI->getFrameRegister(MF);
1103 }
1104 
1106  MachineFunction &MF,
1107  RegScavenger *RS) const {
1108  MachineFrameInfo &MFI = MF.getFrameInfo();
1109 
1110  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1111  const SIInstrInfo *TII = ST.getInstrInfo();
1112  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1115 
1116  if (!FuncInfo->isEntryFunction()) {
1117  // Spill VGPRs used for Whole Wave Mode
1118  FuncInfo->allocateWWMReservedSpillSlots(MFI, *TRI);
1119  }
1120 
1121  const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
1123 
1124  if (SpillVGPRToAGPR) {
1125  // To track the spill frame indices handled in this pass.
1126  BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
1127  BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false);
1128 
1129  bool SeenDbgInstr = false;
1130 
1131  for (MachineBasicBlock &MBB : MF) {
1133  int FrameIndex;
1134  if (MI.isDebugInstr())
1135  SeenDbgInstr = true;
1136 
1137  if (TII->isVGPRSpill(MI)) {
1138  // Try to eliminate stack used by VGPR spills before frame
1139  // finalization.
1140  unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1141  AMDGPU::OpName::vaddr);
1142  int FI = MI.getOperand(FIOp).getIndex();
1143  Register VReg =
1144  TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
1145  if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
1146  TRI->isAGPR(MRI, VReg))) {
1147  // FIXME: change to enterBasicBlockEnd()
1148  RS->enterBasicBlock(MBB);
1149  TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
1150  SpillFIs.set(FI);
1151  continue;
1152  }
1153  } else if (TII->isStoreToStackSlot(MI, FrameIndex) ||
1155  if (!MFI.isFixedObjectIndex(FrameIndex))
1156  NonVGPRSpillFIs.set(FrameIndex);
1157  }
1158  }
1159 
1160  // Stack slot coloring may assign different objects to the same stack slot.
1161  // If not, then the VGPR to AGPR spill slot is dead.
1162  for (unsigned FI : SpillFIs.set_bits())
1163  if (!NonVGPRSpillFIs.test(FI))
1164  FuncInfo->setVGPRToAGPRSpillDead(FI);
1165 
1166  for (MachineBasicBlock &MBB : MF) {
1167  for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
1168  MBB.addLiveIn(Reg);
1169 
1170  for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
1171  MBB.addLiveIn(Reg);
1172 
1174 
1175  if (!SpillFIs.empty() && SeenDbgInstr) {
1176  // FIXME: The dead frame indices are replaced with a null register from
1177  // the debug value instructions. We should instead, update it with the
1178  // correct register value. But not sure the register value alone is
1179  for (MachineInstr &MI : MBB) {
1180  if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
1181  SpillFIs[MI.getOperand(0).getIndex()]) {
1182  MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
1183  }
1184  }
1185  }
1186  }
1187  }
1188 
1189  // At this point we've already allocated all spilled SGPRs to VGPRs if we
1190  // can. Any remaining SGPR spills will go to memory, so move them back to the
1191  // default stack.
1192  bool HaveSGPRToVMemSpill =
1193  FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);
1195  "SGPR spill should have been removed in SILowerSGPRSpills");
1196 
1197  // FIXME: The other checks should be redundant with allStackObjectsAreDead,
1198  // but currently hasNonSpillStackObjects is set only from source
1199  // allocas. Stack temps produced from legalization are not counted currently.
1200  if (!allStackObjectsAreDead(MFI)) {
1201  assert(RS && "RegScavenger required if spilling");
1202 
1203  // Add an emergency spill slot
1204  RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
1205 
1206  // If we are spilling SGPRs to memory with a large frame, we may need a
1207  // second VGPR emergency frame index.
1208  if (HaveSGPRToVMemSpill &&
1210  RS->addScavengingFrameIndex(MFI.CreateStackObject(4, Align(4), false));
1211  }
1212  }
1213 }
1214 
1216  MachineFunction &MF, RegScavenger *RS) const {
1217  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1218  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1221 
1222  if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
1223  // On gfx908, we had initially reserved highest available VGPR for AGPR
1224  // copy. Now since we are done with RA, check if there exist an unused VGPR
1225  // which is lower than the eariler reserved VGPR before RA. If one exist,
1226  // use it for AGPR copy instead of one reserved before RA.
1227  Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy();
1228  Register UnusedLowVGPR =
1229  TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
1230  if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <
1231  TRI->getHWRegIndex(VGPRForAGPRCopy))) {
1232  // Call to setVGPRForAGPRCopy() should happen first before calling
1233  // freezeReservedRegs() so that getReservedRegs() can reserve this newly
1234  // identified VGPR (for AGPR copy).
1235  FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR);
1236  MRI.freezeReservedRegs(MF);
1237  }
1238  }
1239 }
1240 
1241 // Only report VGPRs to generic code.
1243  BitVector &SavedVGPRs,
1244  RegScavenger *RS) const {
1245  TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
1247  if (MFI->isEntryFunction())
1248  return;
1249 
1250  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1251  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1252  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1253 
1254  // Ignore the SGPRs the default implementation found.
1255  SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
1256 
1257  // Do not save AGPRs prior to GFX90A because there was no easy way to do so.
1258  // In gfx908 there was do AGPR loads and stores and thus spilling also
1259  // require a temporary VGPR.
1260  if (!ST.hasGFX90AInsts())
1261  SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
1262 
1263  // hasFP only knows about stack objects that already exist. We're now
1264  // determining the stack slots that will be created, so we have to predict
1265  // them. Stack objects force FP usage with calls.
1266  //
1267  // Note a new VGPR CSR may be introduced if one is used for the spill, but we
1268  // don't want to report it here.
1269  //
1270  // FIXME: Is this really hasReservedCallFrame?
1271  const bool WillHaveFP =
1272  FrameInfo.hasCalls() &&
1273  (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1274 
1275  // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
1276  // so don't allow the default insertion to handle them.
1277  for (auto SSpill : MFI->getSGPRSpillVGPRs())
1278  SavedVGPRs.reset(SSpill.VGPR);
1279 
1280  LivePhysRegs LiveRegs;
1281  LiveRegs.init(*TRI);
1282 
1283  if (WillHaveFP || hasFP(MF)) {
1285  "Re-reserving spill slot for FP");
1287  MFI->FramePointerSaveIndex, true);
1288  }
1289 
1290  if (TRI->hasBasePointer(MF)) {
1291  if (MFI->SGPRForFPSaveRestoreCopy)
1292  LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy);
1293 
1295  !MFI->BasePointerSaveIndex && "Re-reserving spill slot for BP");
1297  MFI->BasePointerSaveIndex, false);
1298  }
1299 }
1300 
1302  BitVector &SavedRegs,
1303  RegScavenger *RS) const {
1304  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1306  if (MFI->isEntryFunction())
1307  return;
1308 
1309  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1310  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1311 
1312  // The SP is specifically managed and we don't want extra spills of it.
1313  SavedRegs.reset(MFI->getStackPtrOffsetReg());
1314 
1315  const BitVector AllSavedRegs = SavedRegs;
1316  SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
1317 
1318  // We have to anticipate introducing CSR VGPR spills or spill of caller
1319  // save VGPR reserved for SGPR spills as we now always create stack entry
1320  // for it, if we don't have any stack objects already, since we require a FP
1321  // if there is a call and stack. We will allocate a VGPR for SGPR spills if
1322  // there are any SGPR spills. Whether they are CSR spills or otherwise.
1323  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1324  const bool WillHaveFP =
1325  FrameInfo.hasCalls() && (AllSavedRegs.any() || MFI->hasSpilledSGPRs());
1326 
1327  // FP will be specially managed like SP.
1328  if (WillHaveFP || hasFP(MF))
1329  SavedRegs.reset(MFI->getFrameOffsetReg());
1330 
1331  // Return address use with return instruction is hidden through the SI_RETURN
1332  // pseudo. Given that and since the IPRA computes actual register usage and
1333  // does not use CSR list, the clobbering of return address by function calls
1334  // (D117243) or otherwise (D120922) is ignored/not seen by the IPRA's register
1335  // usage collection. This will ensure save/restore of return address happens
1336  // in those scenarios.
1337  const MachineRegisterInfo &MRI = MF.getRegInfo();
1338  Register RetAddrReg = TRI->getReturnAddressReg(MF);
1339  if (!MFI->isEntryFunction() &&
1340  (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) {
1341  SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0));
1342  SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1));
1343  }
1344 }
1345 
1348  std::vector<CalleeSavedInfo> &CSI) const {
1349  if (CSI.empty())
1350  return true; // Early exit if no callee saved registers are modified!
1351 
1352  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1353  if (!FuncInfo->SGPRForFPSaveRestoreCopy &&
1354  !FuncInfo->SGPRForBPSaveRestoreCopy)
1355  return false;
1356 
1357  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1358  const SIRegisterInfo *RI = ST.getRegisterInfo();
1359  Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1360  Register BasePtrReg = RI->getBaseRegister();
1361  unsigned NumModifiedRegs = 0;
1362 
1363  if (FuncInfo->SGPRForFPSaveRestoreCopy)
1364  NumModifiedRegs++;
1365  if (FuncInfo->SGPRForBPSaveRestoreCopy)
1366  NumModifiedRegs++;
1367 
1368  for (auto &CS : CSI) {
1369  if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) {
1370  CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
1371  if (--NumModifiedRegs)
1372  break;
1373  } else if (CS.getReg() == BasePtrReg &&
1374  FuncInfo->SGPRForBPSaveRestoreCopy) {
1375  CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy);
1376  if (--NumModifiedRegs)
1377  break;
1378  }
1379  }
1380 
1381  return false;
1382 }
1383 
1385  const MachineFunction &MF) const {
1386 
1387  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1388  const MachineFrameInfo &MFI = MF.getFrameInfo();
1389  uint64_t EstStackSize = MFI.estimateStackSize(MF);
1390  uint64_t MaxOffset = EstStackSize - 1;
1391 
1392  // We need the emergency stack slots to be allocated in range of the
1393  // MUBUF/flat scratch immediate offset from the base register, so assign these
1394  // first at the incoming SP position.
1395  //
1396  // TODO: We could try sorting the objects to find a hole in the first bytes
1397  // rather than allocating as close to possible. This could save a lot of space
1398  // on frames with alignment requirements.
1399  if (ST.enableFlatScratch()) {
1400  const SIInstrInfo *TII = ST.getInstrInfo();
1401  if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
1403  return false;
1404  } else {
1405  if (SIInstrInfo::isLegalMUBUFImmOffset(MaxOffset))
1406  return false;
1407  }
1408 
1409  return true;
1410 }
1411 
1413  MachineFunction &MF,
1416  int64_t Amount = I->getOperand(0).getImm();
1417  if (Amount == 0)
1418  return MBB.erase(I);
1419 
1420  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1421  const SIInstrInfo *TII = ST.getInstrInfo();
1422  const DebugLoc &DL = I->getDebugLoc();
1423  unsigned Opc = I->getOpcode();
1424  bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
1425  uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
1426 
1427  if (!hasReservedCallFrame(MF)) {
1428  Amount = alignTo(Amount, getStackAlign());
1429  assert(isUInt<32>(Amount) && "exceeded stack address space size");
1431  Register SPReg = MFI->getStackPtrOffsetReg();
1432 
1433  Amount *= getScratchScaleFactor(ST);
1434  if (IsDestroy)
1435  Amount = -Amount;
1436  auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
1437  .addReg(SPReg)
1438  .addImm(Amount);
1439  Add->getOperand(3).setIsDead(); // Mark SCC as dead.
1440  } else if (CalleePopAmount != 0) {
1441  llvm_unreachable("is this used?");
1442  }
1443 
1444  return MBB.erase(I);
1445 }
1446 
1447 /// Returns true if the frame will require a reference to the stack pointer.
1448 ///
1449 /// This is the set of conditions common to setting up the stack pointer in a
1450 /// kernel, and for using a frame pointer in a callable function.
1451 ///
1452 /// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
1453 /// references SP.
1455  return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
1456 }
1457 
1458 // The FP for kernels is always known 0, so we never really need to setup an
1459 // explicit register for it. However, DisableFramePointerElim will force us to
1460 // use a register for it.
1462  const MachineFrameInfo &MFI = MF.getFrameInfo();
1463 
1464  // For entry functions we can use an immediate offset in most cases, so the
1465  // presence of calls doesn't imply we need a distinct frame pointer.
1466  if (MFI.hasCalls() &&
1468  // All offsets are unsigned, so need to be addressed in the same direction
1469  // as stack growth.
1470 
1471  // FIXME: This function is pretty broken, since it can be called before the
1472  // frame layout is determined or CSR spills are inserted.
1473  return MFI.getStackSize() != 0;
1474  }
1475 
1476  return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
1477  MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
1478  MF) ||
1480 }
1481 
1482 // This is essentially a reduced version of hasFP for entry functions. Since the
1483 // stack pointer is known 0 on entry to kernels, we never really need an FP
1484 // register. We may need to initialize the stack pointer depending on the frame
1485 // properties, which logically overlaps many of the cases where an ordinary
1486 // function would require an FP.
1488  const MachineFunction &MF) const {
1489  // Callable functions always require a stack pointer reference.
1491  "only expected to call this for entry points");
1492 
1493  const MachineFrameInfo &MFI = MF.getFrameInfo();
1494 
1495  // Entry points ordinarily don't need to initialize SP. We have to set it up
1496  // for callees if there are any. Also note tail calls are impossible/don't
1497  // make any sense for kernels.
1498  if (MFI.hasCalls())
1499  return true;
1500 
1501  // We still need to initialize the SP if we're doing anything weird that
1502  // references the SP, like variable sized stack objects.
1503  return frameTriviallyRequiresSP(MFI);
1504 }
llvm::MachineRegisterInfo::addLiveIn
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
Definition: MachineRegisterInfo.h:954
i
i
Definition: README.txt:29
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:148
llvm::MachineFrameInfo::hasVarSizedObjects
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
Definition: MachineFrameInfo.h:354
llvm::SIMachineFunctionInfo::setIsStackRealigned
void setIsStackRealigned(bool Realigned=true)
Definition: SIMachineFunctionInfo.h:836
llvm::SIMachineFunctionInfo::getSGPRSpillVGPRs
ArrayRef< SGPRSpillVGPR > getSGPRSpillVGPRs() const
Definition: SIMachineFunctionInfo.h:562
llvm::AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT
@ FLAT_SCRATCH_INIT
Definition: AMDGPUArgumentUsageInfo.h:105
llvm::SIMachineFunctionInfo::wwmAllocation
auto wwmAllocation() const
Definition: SIMachineFunctionInfo.h:495
llvm::SIMachineFunctionInfo::hasSpilledVGPRs
bool hasSpilledVGPRs() const
Definition: SIMachineFunctionInfo.h:816
llvm::HexagonInstrInfo::isStoreToStackSlot
unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
If the specified machine instruction is a direct store to a stack slot, return the virtual or physica...
Definition: HexagonInstrInfo.cpp:334
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::TargetStackID::WasmLocal
@ WasmLocal
Definition: TargetFrameLowering.h:31
llvm::LivePhysRegs::removeReg
void removeReg(MCPhysReg Reg)
Removes a physical register, all its sub-registers, and all its super-registers from the set.
Definition: LivePhysRegs.h:91
llvm::MachineRegisterInfo::isPhysRegUsed
bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
Definition: MachineRegisterInfo.cpp:581
llvm::SIMachineFunctionInfo::getVGPRForAGPRCopy
Register getVGPRForAGPRCopy() const
Definition: SIMachineFunctionInfo.h:523
llvm::LivePhysRegs::addReg
void addReg(MCPhysReg Reg)
Adds a physical register and all its sub-registers to the set.
Definition: LivePhysRegs.h:81
llvm::SIMachineFunctionInfo::getPreloadedReg
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
Definition: SIMachineFunctionInfo.h:740
llvm::SIFrameLowering::emitEpilogue
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
Definition: SIFrameLowering.cpp:959
buildEpilogRestore
static void buildEpilogRestore(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI)
Definition: SIFrameLowering.cpp:145
SIMachineFunctionInfo.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::SIMachineFunctionInfo::setVGPRForAGPRCopy
void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)
Definition: SIMachineFunctionInfo.h:527
llvm::Function
Definition: Function.h:60
llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:344
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition: AMDGPUSubtarget.h:40
llvm::SIInstrFlags::FlatScratch
@ FlatScratch
Definition: SIDefines.h:123
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition: MachineFunction.cpp:456
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition: MachineMemOperand.h:143
llvm::BitVector::set_bits
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:133
llvm::SIMachineFunctionInfo::allocateSGPRSpillToVGPR
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI)
Reserve a slice of a VGPR to support spilling for FrameIndex FI.
Definition: SIMachineFunctionInfo.cpp:276
llvm::LivePhysRegs
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:50
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:234
llvm::SIMachineFunctionInfo::getGITPtrHigh
unsigned getGITPtrHigh() const
Definition: SIMachineFunctionInfo.h:745
llvm::StackOffset::getFixed
ScalarTy getFixed() const
Definition: TypeSize.h:149
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:126
llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition: MachineMemOperand.h:141
llvm::Optional< int >
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::MachineFrameInfo::RemoveStackObject
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
Definition: MachineFrameInfo.h:768
llvm::LivePhysRegs::empty
bool empty() const
Returns true if the set is empty.
Definition: LivePhysRegs.h:78
llvm::RegState::ImplicitDefine
@ ImplicitDefine
Definition: MachineInstrBuilder.h:63
llvm::MachineFrameInfo::getObjectIndexEnd
int getObjectIndexEnd() const
Return one past the maximum frame object index.
Definition: MachineFrameInfo.h:409
llvm::SIMachineFunctionInfo::SGPRForFPSaveRestoreCopy
Register SGPRForFPSaveRestoreCopy
If this is set, an SGPR used for save/restore of the register used for the frame pointer.
Definition: SIMachineFunctionInfo.h:534
llvm::AMDGPU::getNamedOperandIdx
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1618
llvm::SIMachineFunctionInfo::getNumPreloadedSGPRs
unsigned getNumPreloadedSGPRs() const
Definition: SIMachineFunctionInfo.h:759
llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:140
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::MachineInstr::FrameDestroy
@ FrameDestroy
Definition: MachineInstr.h:86
buildGitPtr
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, const SIInstrInfo *TII, Register TargetReg)
Definition: SIFrameLowering.cpp:165
llvm::MachineBasicBlock::erase
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
Definition: MachineBasicBlock.cpp:1295
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::SIFrameLowering::hasFP
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
Definition: SIFrameLowering.cpp:1461
llvm::SIRegisterInfo::getFrameRegister
Register getFrameRegister(const MachineFunction &MF) const override
Definition: SIRegisterInfo.cpp:490
frameTriviallyRequiresSP
static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI)
Returns true if the frame will require a reference to the stack pointer.
Definition: SIFrameLowering.cpp:1454
llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition: MachineFunction.h:834
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:650
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
TargetMachine.h
GCNSubtarget.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:738
llvm::LivePhysRegs::addLiveIns
void addLiveIns(const MachineBasicBlock &MBB)
Adds all live-in registers of basic block MBB.
Definition: LivePhysRegs.cpp:238
llvm::TargetFrameLowering::getStackAlign
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
Definition: TargetFrameLowering.h:100
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:501
llvm::TargetFrameLowering::hasReservedCallFrame
virtual bool hasReservedCallFrame(const MachineFunction &MF) const
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
Definition: TargetFrameLowering.h:292
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
llvm::LivePhysRegs::addLiveOuts
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
Definition: LivePhysRegs.cpp:232
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:127
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
llvm::SIFrameLowering::requiresStackPointerReference
bool requiresStackPointerReference(const MachineFunction &MF) const
Definition: SIFrameLowering.cpp:1487
initLiveRegs
static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI, const SIMachineFunctionInfo *FuncInfo, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
Definition: SIFrameLowering.cpp:689
llvm::MachineInstr::FrameSetup
@ FrameSetup
Definition: MachineInstr.h:84
buildScratchExecCopy
static Register buildScratchExecCopy(LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
Definition: SIFrameLowering.cpp:706
llvm::SIMachineFunctionInfo::getStackPtrOffsetReg
Register getStackPtrOffsetReg() const
Definition: SIMachineFunctionInfo.h:796
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
llvm::MachineFrameInfo::getStackID
uint8_t getStackID(int ObjectIdx) const
Definition: MachineFrameInfo.h:723
llvm::MachineFrameInfo::getStackSize
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Definition: MachineFrameInfo.h:577
llvm::MachineRegisterInfo::freezeReservedRegs
void freezeReservedRegs(const MachineFunction &)
freezeReservedRegs - Called by the register allocator to freeze the set of reserved registers before ...
Definition: MachineRegisterInfo.cpp:503
getVGPRSpillLaneOrTempRegister
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LivePhysRegs &LiveRegs, Register &TempSGPR, Optional< int > &FrameIndex, bool IsFP)
Definition: SIFrameLowering.cpp:59
llvm::MachineFrameInfo::getObjectOffset
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
Definition: MachineFrameInfo.h:518
llvm::MCRegisterInfo::isSubRegisterEq
bool isSubRegisterEq(MCRegister RegA, MCRegister RegB) const
Returns true if RegB is a sub-register of RegA or if RegB == RegA.
Definition: MCRegisterInfo.h:568
llvm::AMDGPU::convertSMRDOffsetUnits
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
Definition: AMDGPUBaseInfo.cpp:2165
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::SIMachineFunctionInfo::SGPRForBPSaveRestoreCopy
Register SGPRForBPSaveRestoreCopy
If this is set, an SGPR used for save/restore of the register used for the base pointer.
Definition: SIMachineFunctionInfo.h:539
llvm::BitVector::clearBitsNotInMask
void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask.
Definition: BitVector.h:718
llvm::BitVector
Definition: BitVector.h:75
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP
bool allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction &MF) const override
Control the placement of special register scavenging spill slots when allocating a stack frame.
Definition: SIFrameLowering.cpp:1384
llvm::MachineFrameInfo::isFixedObjectIndex
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
Definition: MachineFrameInfo.h:680
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MachineFrameInfo::getObjectIndexBegin
int getObjectIndexBegin() const
Return the minimum frame object index.
Definition: MachineFrameInfo.h:406
llvm::MachineInstrBuilder::addExternalSymbol
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:184
llvm::ArrayRef::slice
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:194
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::MachineFrameInfo::isDeadObjectIndex
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
Definition: MachineFrameInfo.h:737
llvm::SIFrameLowering::assignCalleeSavedSpillSlots
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
Definition: SIFrameLowering.cpp:1346
llvm::SIFrameLowering::emitPrologue
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
Definition: SIFrameLowering.cpp:744
llvm::BitVector::empty
bool empty() const
empty - Tests whether there are no bits in this bitvector.
Definition: BitVector.h:149
llvm::AMDGPU::Hwreg::WIDTH_M1_SHIFT_
@ WIDTH_M1_SHIFT_
Definition: SIDefines.h:426
llvm::SIMachineFunctionInfo::hasSpilledSGPRs
bool hasSpilledSGPRs() const
Definition: SIMachineFunctionInfo.h:808
llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition: AMDGPUSubtarget.h:41
llvm::SIMachineFunctionInfo::haveFreeLanesForSGPRSpill
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, unsigned NumLane) const
returns true if NumLanes slots are available in VGPRs already used for SGPR spilling.
Definition: SIMachineFunctionInfo.cpp:268
llvm::TargetOptions::DisableFramePointerElim
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
Definition: TargetOptionsImpl.cpp:23
llvm::SIFrameLowering::determineCalleeSaves
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
Definition: SIFrameLowering.cpp:1242
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:640
llvm::MachineInstrBuilder::setMIFlag
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
Definition: MachineInstrBuilder.h:278
llvm::cl::opt< bool >
llvm::LivePhysRegs::stepBackward
void stepBackward(const MachineInstr &MI)
Simulates liveness when stepping backwards over an instruction(bundle).
Definition: LivePhysRegs.cpp:68
llvm::SIMachineFunctionInfo::getVGPRSpillAGPRs
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
Definition: SIMachineFunctionInfo.h:568
llvm::AMDGPU::Hwreg::ID_FLAT_SCR_HI
@ ID_FLAT_SCR_HI
Definition: SIDefines.h:400
AMDGPUMCTargetDesc.h
llvm::MachineOperand::setIsDead
void setIsDead(bool Val=true)
Definition: MachineOperand.h:515
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:412
llvm::SIMachineFunctionInfo::isStackRealigned
bool isStackRealigned() const
Definition: SIMachineFunctionInfo.h:832
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
uint64_t
llvm::MachineFrameInfo::getObjectSize
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
Definition: MachineFrameInfo.h:469
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:238
llvm::BitVector::any
bool any() const
any - Returns true if any bit is set.
Definition: BitVector.h:163
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET
@ PRIVATE_SEGMENT_WAVE_BYTE_OFFSET
Definition: AMDGPUArgumentUsageInfo.h:109
llvm::LivePhysRegs::available
bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const
Returns true if register Reg and no aliasing register is in the set.
Definition: LivePhysRegs.cpp:141
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:38
llvm::MachineRegisterInfo::getCalleeSavedRegs
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
Definition: MachineRegisterInfo.cpp:617
llvm::SIMachineFunctionInfo::FramePointerSaveIndex
Optional< int > FramePointerSaveIndex
Definition: SIMachineFunctionInfo.h:535
allStackObjectsAreDead
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)
Definition: SIFrameLowering.cpp:336
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:362
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::RegScavenger
Definition: RegisterScavenging.h:34
llvm::MachineFrameInfo::getObjectAlign
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
Definition: MachineFrameInfo.h:483
llvm::AMDGPUMachineFunction::isEntryFunction
bool isEntryFunction() const
Definition: AMDGPUMachineFunction.h:86
llvm::TargetRegisterInfo::eliminateFrameIndex
virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS=nullptr) const =0
This method must be overriden to eliminate abstract frame indices from instructions which may use the...
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:608
llvm::TargetStackID::ScalableVector
@ ScalableVector
Definition: TargetFrameLowering.h:30
llvm::SIFrameLowering::processFunctionBeforeFrameIndicesReplaced
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
Definition: SIFrameLowering.cpp:1215
llvm::isUInt< 32 >
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:411
llvm::MachineRegisterInfo::isAllocatable
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
Definition: MachineRegisterInfo.h:943
llvm::TargetMachine::Options
TargetOptions Options
Definition: TargetMachine.h:118
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:656
llvm::SIMachineFunctionInfo::getScavengeFI
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Definition: SIMachineFunctionInfo.cpp:467
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:234
llvm::RegScavenger::addScavengingFrameIndex
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Definition: RegisterScavenging.h:143
llvm::MachineInstrBuilder::addMemOperand
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Definition: MachineInstrBuilder.h:202
llvm::SIMachineFunctionInfo::allocateVGPRSpillToAGPR
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
Definition: SIMachineFunctionInfo.cpp:350
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::SIMachineFunctionInfo::getGITPtrLoReg
Register getGITPtrLoReg(const MachineFunction &MF) const
Definition: SIMachineFunctionInfo.cpp:492
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:364
llvm::MachineFunction
Definition: MachineFunction.h:241
llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:238
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::MachineFrameInfo::hasPatchPoint
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
Definition: MachineFrameInfo.h:388
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::SIFrameLowering::isSupportedStackID
bool isSupportedStackID(TargetStackID::Value ID) const override
Definition: SIFrameLowering.cpp:676
llvm::MachineFrameInfo::CreateStackObject
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Definition: MachineFrameInfo.cpp:51
AMDGPU.h
llvm::LivePhysRegs::init
void init(const TargetRegisterInfo &TRI)
(re-)initializes and clears the set.
Definition: LivePhysRegs.h:68
MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition: AArch64SLSHardening.cpp:75
llvm::BitVector::clearBitsInMask
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
Definition: BitVector.h:706
llvm::TargetStackID::NoAlloc
@ NoAlloc
Definition: TargetFrameLowering.h:32
getScratchScaleFactor
static unsigned getScratchScaleFactor(const GCNSubtarget &ST)
Definition: SIFrameLowering.cpp:399
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::AMDGPU::isCompute
bool isCompute(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1609
uint32_t
llvm::StackOffset
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
Definition: TypeSize.h:134
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
SIFrameLowering.h
llvm::HexagonInstrInfo::isLoadFromStackSlot
unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
Definition: HexagonInstrInfo.cpp:286
llvm::SIMachineFunctionInfo::getScratchRSrcReg
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
Definition: SIMachineFunctionInfo.h:769
llvm::SIMachineFunctionInfo::getImplicitBufferPtrUserSGPR
Register getImplicitBufferPtrUserSGPR() const
Definition: SIMachineFunctionInfo.h:804
EnableSpillVGPRToAGPR
static cl::opt< bool > EnableSpillVGPRToAGPR("amdgpu-spill-vgpr-to-agpr", cl::desc("Enable spilling VGPRs to AGPRs"), cl::ReallyHidden, cl::init(true))
llvm::SIFrameLowering::processFunctionBeforeFrameFinalized
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
Definition: SIFrameLowering.cpp:1105
llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition: MachineMemOperand.h:133
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::MachineFrameInfo::getMaxAlign
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Definition: MachineFrameInfo.h:593
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition: MachineBasicBlock.h:376
llvm::MachineFrameInfo::isFrameAddressTaken
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Definition: MachineFrameInfo.h:370
llvm::RegScavenger::enterBasicBlock
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
Definition: RegisterScavenging.cpp:82
llvm::MachineFrameInfo::hasCalls
bool hasCalls() const
Return true if the current function has any function calls.
Definition: MachineFrameInfo.h:605
llvm::SIMachineFunctionInfo::setVGPRToAGPRSpillDead
void setVGPRToAGPRSpillDead(int FrameIndex)
Definition: SIMachineFunctionInfo.h:578
llvm::SIInstrInfo::isLegalMUBUFImmOffset
static bool isLegalMUBUFImmOffset(unsigned Imm)
Definition: SIInstrInfo.h:1109
llvm::ISD::FrameIndex
@ FrameIndex
Definition: ISDOpcodes.h:80
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:378
llvm::SIMachineFunctionInfo::removeDeadFrameIndices
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
Definition: SIMachineFunctionInfo.cpp:415
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
spilledToMemory
static bool spilledToMemory(const MachineFunction &MF, int SaveIndex)
Definition: SIFrameLowering.cpp:739
llvm::BitVector::test
bool test(unsigned Idx) const
Definition: BitVector.h:454
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:606
allSGPRSpillsAreDead
static bool allSGPRSpillsAreDead(const MachineFunction &MF)
Definition: SIFrameLowering.cpp:1079
llvm::MachineBasicBlock::sortUniqueLiveIns
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
Definition: MachineBasicBlock.cpp:582
uint16_t
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:636
llvm::SIFrameLowering::determineCalleeSavesSGPR
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
Definition: SIFrameLowering.cpp:1301
MachineFrameInfo.h
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::SIMachineFunctionInfo::BasePointerSaveIndex
Optional< int > BasePointerSaveIndex
Definition: SIMachineFunctionInfo.h:540
llvm::SIFrameLowering::eliminateCallFramePseudoInstr
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
Definition: SIFrameLowering.cpp:1412
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::TargetStackID::Default
@ Default
Definition: TargetFrameLowering.h:28
llvm::SIMachineFunctionInfo::getAGPRSpillVGPRs
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
Definition: SIMachineFunctionInfo.h:564
llvm::SIMachineFunctionInfo::setScratchRSrcReg
void setScratchRSrcReg(Register Reg)
Definition: SIMachineFunctionInfo.h:773
llvm::TargetStackID::SGPRSpill
@ SGPRSpill
Definition: TargetFrameLowering.h:29
llvm::MachineRegisterInfo::isPhysRegModified
bool isPhysRegModified(MCRegister PhysReg, bool SkipNoReturnDef=false) const
Return true if the specified register is modified in this function.
Definition: MachineRegisterInfo.cpp:566
llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition: MachineMemOperand.h:135
llvm::BitVector::reset
BitVector & reset()
Definition: BitVector.h:385
llvm::SIInstrInfo
Definition: SIInstrInfo.h:43
llvm::TargetRegisterInfo::hasStackRealignment
bool hasStackRealignment(const MachineFunction &MF) const
True if stack realignment is required and still possible.
Definition: TargetRegisterInfo.h:947
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:105
llvm::MachinePointerInfo::getFixedStack
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition: MachineOperand.cpp:1006
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:277
buildPrologSpill
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI)
Definition: SIFrameLowering.cpp:124
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:348
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::SIFrameLowering::emitEntryFunctionPrologue
void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
Definition: SIFrameLowering.cpp:403
llvm::AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER
@ PRIVATE_SEGMENT_BUFFER
Definition: AMDGPUArgumentUsageInfo.h:100
llvm::TargetRegisterInfo::getSubReg
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Definition: TargetRegisterInfo.h:1113
llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:213
llvm::AMDGPU::Hwreg::ID_FLAT_SCR_LO
@ ID_FLAT_SCR_LO
Definition: SIDefines.h:399
llvm::TargetFrameLowering::determineCalleeSaves
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
Definition: TargetFrameLoweringImpl.cpp:83
llvm::MachineFrameInfo::hasStackMap
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
Definition: MachineFrameInfo.h:382
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::cl::desc
Definition: CommandLine.h:405
RegisterScavenging.h
findScratchNonCalleeSaveRegister
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, const TargetRegisterClass &RC, bool Unused=false)
Definition: SIFrameLowering.cpp:33
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:111
llvm::SIMachineFunctionInfo::SGPRSpillVGPR
Definition: SIMachineFunctionInfo.h:465
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::SIMachineFunctionInfo::getSGPRToVGPRSpills
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
Definition: SIMachineFunctionInfo.h:556
llvm::SIFrameLowering::getFrameIndexReference
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
Definition: SIFrameLowering.cpp:1096
llvm::SIMachineFunctionInfo::hasImplicitBufferPtr
bool hasImplicitBufferPtr() const
Definition: SIMachineFunctionInfo.h:723
llvm::SIMachineFunctionInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition: SIMachineFunctionInfo.h:683
llvm::SIMachineFunctionInfo::getFrameOffsetReg
Register getFrameOffsetReg() const
Definition: SIMachineFunctionInfo.h:778
llvm::SIMachineFunctionInfo::allocateWWMReservedSpillSlots
void allocateWWMReservedSpillSlots(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Definition: SIMachineFunctionInfo.cpp:453
llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
LivePhysRegs.h
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52