LLVM  14.0.0git
SIFrameLowering.cpp
Go to the documentation of this file.
1 //===----------------------- SIFrameLowering.cpp --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 
9 #include "SIFrameLowering.h"
10 #include "AMDGPU.h"
11 #include "GCNSubtarget.h"
13 #include "SIMachineFunctionInfo.h"
18 
19 using namespace llvm;
20 
21 #define DEBUG_TYPE "frame-info"
22 
24  "amdgpu-spill-vgpr-to-agpr",
25  cl::desc("Enable spilling VGPRs to AGPRs"),
27  cl::init(true));
28 
29 // Find a scratch register that we can use in the prologue. We avoid using
30 // callee-save registers since they may appear to be free when this is called
31 // from canUseAsPrologue (during shrink wrapping), but then no longer be free
32 // when this is called from emitPrologue.
34  LivePhysRegs &LiveRegs,
35  const TargetRegisterClass &RC,
36  bool Unused = false) {
37  // Mark callee saved registers as used so we will not choose them.
38  const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
39  for (unsigned i = 0; CSRegs[i]; ++i)
40  LiveRegs.addReg(CSRegs[i]);
41 
42  if (Unused) {
43  // We are looking for a register that can be used throughout the entire
44  // function, so any use is unacceptable.
45  for (MCRegister Reg : RC) {
46  if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
47  return Reg;
48  }
49  } else {
50  for (MCRegister Reg : RC) {
51  if (LiveRegs.available(MRI, Reg))
52  return Reg;
53  }
54  }
55 
56  return MCRegister();
57 }
58 
60  LivePhysRegs &LiveRegs,
61  Register &TempSGPR,
63  bool IsFP) {
65  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
66 
68  const SIRegisterInfo *TRI = ST.getRegisterInfo();
69 
70  // We need to save and restore the current FP/BP.
71 
72  // 1: If there is already a VGPR with free lanes, use it. We
73  // may already have to pay the penalty for spilling a CSR VGPR.
74  if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
75  int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
77 
78  if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
79  llvm_unreachable("allocate SGPR spill should have worked");
80 
81  FrameIndex = NewFI;
82 
83  LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
84  dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to "
85  << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
86  << '\n');
87  return;
88  }
89 
90  // 2: Next, try to save the FP/BP in an unused SGPR.
92  MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
93 
94  if (!TempSGPR) {
95  int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
97 
98  if (TRI->spillSGPRToVGPR() && MFI->allocateSGPRSpillToVGPR(MF, NewFI)) {
99  // 3: There's no free lane to spill, and no free register to save FP/BP,
100  // so we're forced to spill another VGPR to use for the spill.
101  FrameIndex = NewFI;
102 
103  LLVM_DEBUG(
104  auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
105  dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "
106  << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';);
107  } else {
108  // Remove dead <NewFI> index
109  MF.getFrameInfo().RemoveStackObject(NewFI);
110  // 4: If all else fails, spill the FP/BP to memory.
111  FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4));
112  LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling "
113  << (IsFP ? "FP" : "BP") << '\n');
114  }
115  } else {
116  LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to "
117  << printReg(TempSGPR, TRI) << '\n');
118  }
119 }
120 
121 // We need to specially emit stack operations here because a different frame
122 // register is used than in the rest of the function, as getFrameRegister would
123 // use.
124 static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
125  const SIMachineFunctionInfo &FuncInfo,
126  LivePhysRegs &LiveRegs, MachineFunction &MF,
129  Register SpillReg, int FI) {
130  unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
131  : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
132 
133  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
136  PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
137  FrameInfo.getObjectAlign(FI));
138  LiveRegs.addReg(SpillReg);
139  TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, true,
140  FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
141  &LiveRegs);
142  LiveRegs.removeReg(SpillReg);
143 }
144 
145 static void buildEpilogRestore(const GCNSubtarget &ST,
146  const SIRegisterInfo &TRI,
147  const SIMachineFunctionInfo &FuncInfo,
148  LivePhysRegs &LiveRegs, MachineFunction &MF,
151  const DebugLoc &DL, Register SpillReg, int FI) {
152  unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
153  : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
154 
155  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
158  PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
159  FrameInfo.getObjectAlign(FI));
160  TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false,
161  FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
162  &LiveRegs);
163 }
164 
166  const DebugLoc &DL, const SIInstrInfo *TII,
167  Register TargetReg) {
168  MachineFunction *MF = MBB.getParent();
170  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
171  const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
172  Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
173  Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
174 
175  if (MFI->getGITPtrHigh() != 0xffffffff) {
176  BuildMI(MBB, I, DL, SMovB32, TargetHi)
177  .addImm(MFI->getGITPtrHigh())
178  .addReg(TargetReg, RegState::ImplicitDefine);
179  } else {
180  const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
181  BuildMI(MBB, I, DL, GetPC64, TargetReg);
182  }
183  Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
184  MF->getRegInfo().addLiveIn(GitPtrLo);
185  MBB.addLiveIn(GitPtrLo);
186  BuildMI(MBB, I, DL, SMovB32, TargetLo)
187  .addReg(GitPtrLo);
188 }
189 
190 // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
191 void SIFrameLowering::emitEntryFunctionFlatScratchInit(
193  const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
194  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
195  const SIInstrInfo *TII = ST.getInstrInfo();
196  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
198 
199  // We don't need this if we only have spills since there is no user facing
200  // scratch.
201 
202  // TODO: If we know we don't have flat instructions earlier, we can omit
203  // this from the input registers.
204  //
205  // TODO: We only need to know if we access scratch space through a flat
206  // pointer. Because we only detect if flat instructions are used at all,
207  // this will be used more often than necessary on VI.
208 
209  Register FlatScrInitLo;
210  Register FlatScrInitHi;
211 
212  if (ST.isAmdPalOS()) {
213  // Extract the scratch offset from the descriptor in the GIT
214  LivePhysRegs LiveRegs;
215  LiveRegs.init(*TRI);
216  LiveRegs.addLiveIns(MBB);
217 
218  // Find unused reg to load flat scratch init into
220  Register FlatScrInit = AMDGPU::NoRegister;
221  ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
222  unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
223  AllSGPR64s = AllSGPR64s.slice(
224  std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
225  Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
226  for (MCPhysReg Reg : AllSGPR64s) {
227  if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) &&
228  !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
229  FlatScrInit = Reg;
230  break;
231  }
232  }
233  assert(FlatScrInit && "Failed to find free register for scratch init");
234 
235  FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
236  FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
237 
238  buildGitPtr(MBB, I, DL, TII, FlatScrInit);
239 
240  // We now have the GIT ptr - now get the scratch descriptor from the entry
241  // at offset 0 (or offset 16 for a compute shader).
243  const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
244  auto *MMO = MF.getMachineMemOperand(
245  PtrInfo,
248  8, Align(4));
249  unsigned Offset =
251  const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
252  unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
253  BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
254  .addReg(FlatScrInit)
255  .addImm(EncodedOffset) // offset
256  .addImm(0) // cpol
257  .addMemOperand(MMO);
258 
259  // Mask the offset in [47:0] of the descriptor
260  const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
261  auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
262  .addReg(FlatScrInitHi)
263  .addImm(0xffff);
264  And->getOperand(3).setIsDead(); // Mark SCC as dead.
265  } else {
266  Register FlatScratchInitReg =
268  assert(FlatScratchInitReg);
269 
271  MRI.addLiveIn(FlatScratchInitReg);
272  MBB.addLiveIn(FlatScratchInitReg);
273 
274  FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
275  FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
276  }
277 
278  // Do a 64-bit pointer add.
279  if (ST.flatScratchIsPointer()) {
280  if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
281  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
282  .addReg(FlatScrInitLo)
283  .addReg(ScratchWaveOffsetReg);
284  auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
285  FlatScrInitHi)
286  .addReg(FlatScrInitHi)
287  .addImm(0);
288  Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
289 
290  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
291  addReg(FlatScrInitLo).
292  addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
294  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
295  addReg(FlatScrInitHi).
296  addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
298  return;
299  }
300 
301  // For GFX9.
302  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
303  .addReg(FlatScrInitLo)
304  .addReg(ScratchWaveOffsetReg);
305  auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
306  AMDGPU::FLAT_SCR_HI)
307  .addReg(FlatScrInitHi)
308  .addImm(0);
309  Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
310 
311  return;
312  }
313 
314  assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
315 
316  // Copy the size in bytes.
317  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
318  .addReg(FlatScrInitHi, RegState::Kill);
319 
320  // Add wave offset in bytes to private base offset.
321  // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
322  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)
323  .addReg(FlatScrInitLo)
324  .addReg(ScratchWaveOffsetReg);
325 
326  // Convert offset to 256-byte units.
327  auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32),
328  AMDGPU::FLAT_SCR_HI)
329  .addReg(FlatScrInitLo, RegState::Kill)
330  .addImm(8);
331  LShr->getOperand(3).setIsDead(true); // Mark SCC as dead.
332 }
333 
334 // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
335 // memory. They should have been removed by now.
336 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
337  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
338  I != E; ++I) {
339  if (!MFI.isDeadObjectIndex(I))
340  return false;
341  }
342 
343  return true;
344 }
345 
346 // Shift down registers reserved for the scratch RSRC.
347 Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
348  MachineFunction &MF) const {
349 
350  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
351  const SIInstrInfo *TII = ST.getInstrInfo();
352  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
355 
356  assert(MFI->isEntryFunction());
357 
358  Register ScratchRsrcReg = MFI->getScratchRSrcReg();
359 
360  if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
362  return Register();
363 
364  if (ST.hasSGPRInitBug() ||
365  ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
366  return ScratchRsrcReg;
367 
368  // We reserved the last registers for this. Shift it down to the end of those
369  // which were actually used.
370  //
371  // FIXME: It might be safer to use a pseudoregister before replacement.
372 
373  // FIXME: We should be able to eliminate unused input registers. We only
374  // cannot do this for the resources required for scratch access. For now we
375  // skip over user SGPRs and may leave unused holes.
376 
377  unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
378  ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
379  AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
380 
381  // Skip the last N reserved elements because they should have already been
382  // reserved for VCC etc.
383  Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
384  for (MCPhysReg Reg : AllSGPR128s) {
385  // Pick the first unallocated one. Make sure we don't clobber the other
386  // reserved input we needed. Also for PAL, make sure we don't clobber
387  // the GIT pointer passed in SGPR0 or SGPR8.
389  !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
390  MRI.replaceRegWith(ScratchRsrcReg, Reg);
391  MFI->setScratchRSrcReg(Reg);
392  return Reg;
393  }
394  }
395 
396  return ScratchRsrcReg;
397 }
398 
399 static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
400  return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
401 }
402 
404  MachineBasicBlock &MBB) const {
405  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
406 
407  // FIXME: If we only have SGPR spills, we won't actually be using scratch
408  // memory since these spill to VGPRs. We should be cleaning up these unused
409  // SGPR spill frame indices somewhere.
410 
411  // FIXME: We still have implicit uses on SGPR spill instructions in case they
412  // need to spill to vector memory. It's likely that will not happen, but at
413  // this point it appears we need the setup. This part of the prolog should be
414  // emitted after frame indices are eliminated.
415 
416  // FIXME: Remove all of the isPhysRegUsed checks
417 
419  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
420  const SIInstrInfo *TII = ST.getInstrInfo();
421  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
423  const Function &F = MF.getFunction();
424  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
425 
426  assert(MFI->isEntryFunction());
427 
428  Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
430 
431  // We need to do the replacement of the private segment buffer register even
432  // if there are no stack objects. There could be stores to undef or a
433  // constant without an associated object.
434  //
435  // This will return `Register()` in cases where there are no actual
436  // uses of the SRSRC.
437  Register ScratchRsrcReg;
438  if (!ST.enableFlatScratch())
439  ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
440 
441  // Make the selected register live throughout the function.
442  if (ScratchRsrcReg) {
443  for (MachineBasicBlock &OtherBB : MF) {
444  if (&OtherBB != &MBB) {
445  OtherBB.addLiveIn(ScratchRsrcReg);
446  }
447  }
448  }
449 
450  // Now that we have fixed the reserved SRSRC we need to locate the
451  // (potentially) preloaded SRSRC.
452  Register PreloadedScratchRsrcReg;
453  if (ST.isAmdHsaOrMesa(F)) {
454  PreloadedScratchRsrcReg =
456  if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
457  // We added live-ins during argument lowering, but since they were not
458  // used they were deleted. We're adding the uses now, so add them back.
459  MRI.addLiveIn(PreloadedScratchRsrcReg);
460  MBB.addLiveIn(PreloadedScratchRsrcReg);
461  }
462  }
463 
464  // Debug location must be unknown since the first debug location is used to
465  // determine the end of the prologue.
466  DebugLoc DL;
468 
469  // We found the SRSRC first because it needs four registers and has an
470  // alignment requirement. If the SRSRC that we found is clobbering with
471  // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
472  // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
473  // wave offset to a free SGPR.
474  Register ScratchWaveOffsetReg;
475  if (PreloadedScratchWaveOffsetReg &&
476  TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
477  ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
478  unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
479  AllSGPRs = AllSGPRs.slice(
480  std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
481  Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
482  for (MCPhysReg Reg : AllSGPRs) {
484  !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
485  ScratchWaveOffsetReg = Reg;
486  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
487  .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
488  break;
489  }
490  }
491  } else {
492  ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
493  }
494  assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
495 
497  Register SPReg = MFI->getStackPtrOffsetReg();
498  assert(SPReg != AMDGPU::SP_REG);
499  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
500  .addImm(FrameInfo.getStackSize() * getScratchScaleFactor(ST));
501  }
502 
503  if (hasFP(MF)) {
504  Register FPReg = MFI->getFrameOffsetReg();
505  assert(FPReg != AMDGPU::FP_REG);
506  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
507  }
508 
509  bool NeedsFlatScratchInit =
510  MFI->hasFlatScratchInit() &&
511  (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
512  (!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
513 
514  if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
515  PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
516  MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
517  MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
518  }
519 
520  if (NeedsFlatScratchInit) {
521  emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
522  }
523 
524  if (ScratchRsrcReg) {
525  emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
526  PreloadedScratchRsrcReg,
527  ScratchRsrcReg, ScratchWaveOffsetReg);
528  }
529 }
530 
531 // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
532 void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
534  const DebugLoc &DL, Register PreloadedScratchRsrcReg,
535  Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
536 
537  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
538  const SIInstrInfo *TII = ST.getInstrInfo();
539  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
541  const Function &Fn = MF.getFunction();
542 
543  if (ST.isAmdPalOS()) {
544  // The pointer to the GIT is formed from the offset passed in and either
545  // the amdgpu-git-ptr-high function attribute or the top part of the PC
546  Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
547  Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
548 
549  buildGitPtr(MBB, I, DL, TII, Rsrc01);
550 
551  // We now have the GIT ptr - now get the scratch descriptor from the entry
552  // at offset 0 (or offset 16 for a compute shader).
554  const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
555  auto MMO = MF.getMachineMemOperand(PtrInfo,
559  16, Align(4));
560  unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
561  const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
562  unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
563  BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
564  .addReg(Rsrc01)
565  .addImm(EncodedOffset) // offset
566  .addImm(0) // cpol
567  .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
568  .addMemOperand(MMO);
569 
570  // The driver will always set the SRD for wave 64 (bits 118:117 of
571  // descriptor / bits 22:21 of third sub-reg will be 0b11)
572  // If the shader is actually wave32 we have to modify the const_index_stride
573  // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
574  // reason the driver does this is that there can be cases where it presents
575  // 2 shaders with different wave size (e.g. VsFs).
576  // TODO: convert to using SCRATCH instructions or multiple SRD buffers
577  if (ST.isWave32()) {
578  const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
579  BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
580  .addImm(21)
581  .addReg(Rsrc03);
582  }
583  } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
584  assert(!ST.isAmdHsaOrMesa(Fn));
585  const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
586 
587  Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
588  Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
589 
590  // Use relocations to get the pointer, and setup the other bits manually.
591  uint64_t Rsrc23 = TII->getScratchRsrcWords23();
592 
593  if (MFI->hasImplicitBufferPtr()) {
594  Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
595 
597  const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
598 
599  BuildMI(MBB, I, DL, Mov64, Rsrc01)
601  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
602  } else {
603  const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
604 
606  auto MMO = MF.getMachineMemOperand(
607  PtrInfo,
610  8, Align(4));
611  BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
613  .addImm(0) // offset
614  .addImm(0) // cpol
615  .addMemOperand(MMO)
616  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
617 
620  }
621  } else {
622  Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
623  Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
624 
625  BuildMI(MBB, I, DL, SMovB32, Rsrc0)
626  .addExternalSymbol("SCRATCH_RSRC_DWORD0")
627  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
628 
629  BuildMI(MBB, I, DL, SMovB32, Rsrc1)
630  .addExternalSymbol("SCRATCH_RSRC_DWORD1")
631  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
632 
633  }
634 
635  BuildMI(MBB, I, DL, SMovB32, Rsrc2)
636  .addImm(Rsrc23 & 0xffffffff)
637  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
638 
639  BuildMI(MBB, I, DL, SMovB32, Rsrc3)
640  .addImm(Rsrc23 >> 32)
641  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
642  } else if (ST.isAmdHsaOrMesa(Fn)) {
643  assert(PreloadedScratchRsrcReg);
644 
645  if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
646  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
647  .addReg(PreloadedScratchRsrcReg, RegState::Kill);
648  }
649  }
650 
651  // Add the scratch wave offset into the scratch RSRC.
652  //
653  // We only want to update the first 48 bits, which is the base address
654  // pointer, without touching the adjacent 16 bits of flags. We know this add
655  // cannot carry-out from bit 47, otherwise the scratch allocation would be
656  // impossible to fit in the 48-bit global address space.
657  //
658  // TODO: Evaluate if it is better to just construct an SRD using the flat
659  // scratch init and some constants rather than update the one we are passed.
660  Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
661  Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
662 
663  // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
664  // the kernel body via inreg arguments.
665  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
666  .addReg(ScratchRsrcSub0)
667  .addReg(ScratchWaveOffsetReg)
668  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
669  auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
670  .addReg(ScratchRsrcSub1)
671  .addImm(0)
672  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
673  Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
674 }
675 
677  switch (ID) {
681  return true;
684  return false;
685  }
686  llvm_unreachable("Invalid TargetStackID::Value");
687 }
688 
689 static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI,
690  const SIMachineFunctionInfo *FuncInfo,
692  MachineBasicBlock::iterator MBBI, bool IsProlog) {
693  if (LiveRegs.empty()) {
694  LiveRegs.init(TRI);
695  if (IsProlog) {
696  LiveRegs.addLiveIns(MBB);
697  } else {
698  // In epilog.
699  LiveRegs.addLiveOuts(MBB);
700  LiveRegs.stepBackward(*MBBI);
701  }
702  }
703 }
704 
705 // Activate all lanes, returns saved exec.
707  MachineFunction &MF,
710  bool IsProlog) {
711  Register ScratchExecCopy;
713  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
714  const SIInstrInfo *TII = ST.getInstrInfo();
715  const SIRegisterInfo &TRI = TII->getRegisterInfo();
717  DebugLoc DL;
718 
719  initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
720 
721  ScratchExecCopy = findScratchNonCalleeSaveRegister(
722  MRI, LiveRegs, *TRI.getWaveMaskRegClass());
723  if (!ScratchExecCopy)
724  report_fatal_error("failed to find free scratch register");
725 
726  LiveRegs.addReg(ScratchExecCopy);
727 
728  const unsigned OrSaveExec =
729  ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
730  auto SaveExec = BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy)
731  .addImm(-1);
732  SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead.
733 
734  return ScratchExecCopy;
735 }
736 
737 // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR.
738 // Otherwise we are spilling to memory.
739 static bool spilledToMemory(const MachineFunction &MF, int SaveIndex) {
740  const MachineFrameInfo &MFI = MF.getFrameInfo();
741  return MFI.getStackID(SaveIndex) != TargetStackID::SGPRSpill;
742 }
743 
745  MachineBasicBlock &MBB) const {
747  if (FuncInfo->isEntryFunction()) {
749  return;
750  }
751 
752  const MachineFrameInfo &MFI = MF.getFrameInfo();
754  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
755  const SIInstrInfo *TII = ST.getInstrInfo();
756  const SIRegisterInfo &TRI = TII->getRegisterInfo();
757 
758  Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
759  Register FramePtrReg = FuncInfo->getFrameOffsetReg();
760  Register BasePtrReg =
761  TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
762  LivePhysRegs LiveRegs;
763 
765  DebugLoc DL;
766 
767  bool HasFP = false;
768  bool HasBP = false;
769  uint32_t NumBytes = MFI.getStackSize();
770  uint32_t RoundedSize = NumBytes;
771  // To avoid clobbering VGPRs in lanes that weren't active on function entry,
772  // turn on all lanes before doing the spill to memory.
773  Register ScratchExecCopy;
774 
775  Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
776  Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
777 
778  // VGPRs used for SGPR->VGPR spills
780  FuncInfo->getSGPRSpillVGPRs()) {
781  if (!Reg.FI)
782  continue;
783 
784  if (!ScratchExecCopy)
785  ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI,
786  /*IsProlog*/ true);
787 
788  buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, Reg.VGPR,
789  *Reg.FI);
790  }
791 
792  // VGPRs used for Whole Wave Mode
793  for (const auto &Reg : FuncInfo->WWMReservedRegs) {
794  auto VGPR = Reg.first;
795  auto FI = Reg.second;
796  if (!FI)
797  continue;
798 
799  if (!ScratchExecCopy)
800  ScratchExecCopy =
801  buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true);
802 
803  buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR,
804  *FI);
805  }
806 
807  if (ScratchExecCopy) {
808  // FIXME: Split block and make terminator.
809  unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
810  MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
811  BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
812  .addReg(ScratchExecCopy, RegState::Kill);
813  LiveRegs.addReg(ScratchExecCopy);
814  }
815 
816  if (FPSaveIndex && spilledToMemory(MF, *FPSaveIndex)) {
817  const int FramePtrFI = *FPSaveIndex;
818  assert(!MFI.isDeadObjectIndex(FramePtrFI));
819 
820  initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
821 
823  MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
824  if (!TmpVGPR)
825  report_fatal_error("failed to find free scratch register");
826 
827  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
828  .addReg(FramePtrReg);
829 
830  buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
831  FramePtrFI);
832  }
833 
834  if (BPSaveIndex && spilledToMemory(MF, *BPSaveIndex)) {
835  const int BasePtrFI = *BPSaveIndex;
836  assert(!MFI.isDeadObjectIndex(BasePtrFI));
837 
838  initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
839 
841  MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
842  if (!TmpVGPR)
843  report_fatal_error("failed to find free scratch register");
844 
845  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
846  .addReg(BasePtrReg);
847 
848  buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
849  BasePtrFI);
850  }
851 
852  // In this case, spill the FP to a reserved VGPR.
853  if (FPSaveIndex && !spilledToMemory(MF, *FPSaveIndex)) {
854  const int FramePtrFI = *FPSaveIndex;
855  assert(!MFI.isDeadObjectIndex(FramePtrFI));
856 
857  assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill);
859  FuncInfo->getSGPRToVGPRSpills(FramePtrFI);
860  assert(Spill.size() == 1);
861 
862  // Save FP before setting it up.
863  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
864  .addReg(FramePtrReg)
865  .addImm(Spill[0].Lane)
866  .addReg(Spill[0].VGPR, RegState::Undef);
867  }
868 
869  // In this case, spill the BP to a reserved VGPR.
870  if (BPSaveIndex && !spilledToMemory(MF, *BPSaveIndex)) {
871  const int BasePtrFI = *BPSaveIndex;
872  assert(!MFI.isDeadObjectIndex(BasePtrFI));
873 
874  assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
876  FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
877  assert(Spill.size() == 1);
878 
879  // Save BP before setting it up.
880  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
881  .addReg(BasePtrReg)
882  .addImm(Spill[0].Lane)
883  .addReg(Spill[0].VGPR, RegState::Undef);
884  }
885 
886  // Emit the copy if we need an FP, and are using a free SGPR to save it.
887  if (FuncInfo->SGPRForFPSaveRestoreCopy) {
888  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
889  FuncInfo->SGPRForFPSaveRestoreCopy)
890  .addReg(FramePtrReg)
892  }
893 
894  // Emit the copy if we need a BP, and are using a free SGPR to save it.
895  if (FuncInfo->SGPRForBPSaveRestoreCopy) {
896  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
897  FuncInfo->SGPRForBPSaveRestoreCopy)
898  .addReg(BasePtrReg)
900  }
901 
902  // If a copy has been emitted for FP and/or BP, Make the SGPRs
903  // used in the copy instructions live throughout the function.
904  SmallVector<MCPhysReg, 2> TempSGPRs;
905  if (FuncInfo->SGPRForFPSaveRestoreCopy)
906  TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy);
907 
908  if (FuncInfo->SGPRForBPSaveRestoreCopy)
909  TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy);
910 
911  if (!TempSGPRs.empty()) {
912  for (MachineBasicBlock &MBB : MF) {
913  for (MCPhysReg Reg : TempSGPRs)
914  MBB.addLiveIn(Reg);
915 
917  }
918  if (!LiveRegs.empty()) {
919  LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
920  LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy);
921  }
922  }
923 
924  if (TRI.hasStackRealignment(MF)) {
925  HasFP = true;
926  const unsigned Alignment = MFI.getMaxAlign().value();
927 
928  RoundedSize += Alignment;
929  if (LiveRegs.empty()) {
930  LiveRegs.init(TRI);
931  LiveRegs.addLiveIns(MBB);
932  }
933 
934  // s_add_i32 s33, s32, NumBytes
935  // s_and_b32 s33, s33, 0b111...0000
936  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
937  .addReg(StackPtrReg)
938  .addImm((Alignment - 1) * getScratchScaleFactor(ST))
940  auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
941  .addReg(FramePtrReg, RegState::Kill)
942  .addImm(-Alignment * getScratchScaleFactor(ST))
944  And->getOperand(3).setIsDead(); // Mark SCC as dead.
945  FuncInfo->setIsStackRealigned(true);
946  } else if ((HasFP = hasFP(MF))) {
947  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
948  .addReg(StackPtrReg)
950  }
951 
952  // If we need a base pointer, set it up here. It's whatever the value of
953  // the stack pointer is at this point. Any variable size objects will be
954  // allocated after this, so we can still use the base pointer to reference
955  // the incoming arguments.
956  if ((HasBP = TRI.hasBasePointer(MF))) {
957  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
958  .addReg(StackPtrReg)
960  }
961 
962  if (HasFP && RoundedSize != 0) {
963  auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
964  .addReg(StackPtrReg)
965  .addImm(RoundedSize * getScratchScaleFactor(ST))
967  Add->getOperand(3).setIsDead(); // Mark SCC as dead.
968  }
969 
970  assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||
971  FuncInfo->FramePointerSaveIndex)) &&
972  "Needed to save FP but didn't save it anywhere");
973 
974  // If we allow spilling to AGPRs we may have saved FP but then spill
975  // everything into AGPRs instead of the stack.
976  assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy &&
977  !FuncInfo->FramePointerSaveIndex) ||
979  "Saved FP but didn't need it");
980 
981  assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy ||
982  FuncInfo->BasePointerSaveIndex)) &&
983  "Needed to save BP but didn't save it anywhere");
984 
985  assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy &&
986  !FuncInfo->BasePointerSaveIndex)) &&
987  "Saved BP but didn't need it");
988 }
989 
991  MachineBasicBlock &MBB) const {
992  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
993  if (FuncInfo->isEntryFunction())
994  return;
995 
996  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
997  const SIInstrInfo *TII = ST.getInstrInfo();
999  const SIRegisterInfo &TRI = TII->getRegisterInfo();
1001  LivePhysRegs LiveRegs;
1002  DebugLoc DL;
1003 
1004  const MachineFrameInfo &MFI = MF.getFrameInfo();
1005  uint32_t NumBytes = MFI.getStackSize();
1006  uint32_t RoundedSize = FuncInfo->isStackRealigned()
1007  ? NumBytes + MFI.getMaxAlign().value()
1008  : NumBytes;
1009  const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
1010  const Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1011  const Register BasePtrReg =
1012  TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
1013 
1014  Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
1015  Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
1016 
1017  if (RoundedSize != 0 && hasFP(MF)) {
1018  auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
1019  .addReg(StackPtrReg)
1020  .addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST)))
1022  Add->getOperand(3).setIsDead(); // Mark SCC as dead.
1023  }
1024 
1025  if (FuncInfo->SGPRForFPSaveRestoreCopy) {
1026  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1027  .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
1029  }
1030 
1031  if (FuncInfo->SGPRForBPSaveRestoreCopy) {
1032  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
1033  .addReg(FuncInfo->SGPRForBPSaveRestoreCopy)
1035  }
1036 
1037  if (FPSaveIndex) {
1038  const int FramePtrFI = *FPSaveIndex;
1039  assert(!MFI.isDeadObjectIndex(FramePtrFI));
1040  if (spilledToMemory(MF, FramePtrFI)) {
1041  initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1042 
1044  MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
1045  if (!TmpVGPR)
1046  report_fatal_error("failed to find free scratch register");
1047  buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
1048  TmpVGPR, FramePtrFI);
1049  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
1050  .addReg(TmpVGPR, RegState::Kill);
1051  } else {
1052  // Reload from VGPR spill.
1053  assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill);
1055  FuncInfo->getSGPRToVGPRSpills(FramePtrFI);
1056  assert(Spill.size() == 1);
1057  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg)
1058  .addReg(Spill[0].VGPR)
1059  .addImm(Spill[0].Lane);
1060  }
1061  }
1062 
1063  if (BPSaveIndex) {
1064  const int BasePtrFI = *BPSaveIndex;
1065  assert(!MFI.isDeadObjectIndex(BasePtrFI));
1066  if (spilledToMemory(MF, BasePtrFI)) {
1067  initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1068 
1070  MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
1071  if (!TmpVGPR)
1072  report_fatal_error("failed to find free scratch register");
1073  buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
1074  TmpVGPR, BasePtrFI);
1075  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg)
1076  .addReg(TmpVGPR, RegState::Kill);
1077  } else {
1078  // Reload from VGPR spill.
1079  assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
1081  FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
1082  assert(Spill.size() == 1);
1083  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg)
1084  .addReg(Spill[0].VGPR)
1085  .addImm(Spill[0].Lane);
1086  }
1087  }
1088 
1089  Register ScratchExecCopy;
1091  FuncInfo->getSGPRSpillVGPRs()) {
1092  if (!Reg.FI)
1093  continue;
1094 
1095  if (!ScratchExecCopy)
1096  ScratchExecCopy =
1097  buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
1098 
1099  buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
1100  Reg.VGPR, *Reg.FI);
1101  }
1102 
1103  for (const auto &Reg : FuncInfo->WWMReservedRegs) {
1104  auto VGPR = Reg.first;
1105  auto FI = Reg.second;
1106  if (!FI)
1107  continue;
1108 
1109  if (!ScratchExecCopy)
1110  ScratchExecCopy =
1111  buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
1112 
1113  buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR,
1114  *FI);
1115  }
1116 
1117  if (ScratchExecCopy) {
1118  // FIXME: Split block and make terminator.
1119  unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1120  MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1121  BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
1122  .addReg(ScratchExecCopy, RegState::Kill);
1123  }
1124 }
1125 
1126 #ifndef NDEBUG
1127 static bool allSGPRSpillsAreDead(const MachineFunction &MF) {
1128  const MachineFrameInfo &MFI = MF.getFrameInfo();
1129  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1130  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
1131  I != E; ++I) {
1132  if (!MFI.isDeadObjectIndex(I) &&
1134  (I != FuncInfo->FramePointerSaveIndex &&
1135  I != FuncInfo->BasePointerSaveIndex)) {
1136  return false;
1137  }
1138  }
1139 
1140  return true;
1141 }
1142 #endif
1143 
1145  int FI,
1146  Register &FrameReg) const {
1147  const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1148 
1149  FrameReg = RI->getFrameRegister(MF);
1151 }
1152 
1154  MachineFunction &MF,
1155  RegScavenger *RS) const {
1156  MachineFrameInfo &MFI = MF.getFrameInfo();
1157 
1158  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1159  const SIInstrInfo *TII = ST.getInstrInfo();
1160  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1163 
1164  const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
1166 
1167  if (SpillVGPRToAGPR) {
1168  // To track the spill frame indices handled in this pass.
1169  BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
1170 
1171  bool SeenDbgInstr = false;
1172 
1173  for (MachineBasicBlock &MBB : MF) {
1175  if (MI.isDebugInstr())
1176  SeenDbgInstr = true;
1177 
1178  if (TII->isVGPRSpill(MI)) {
1179  // Try to eliminate stack used by VGPR spills before frame
1180  // finalization.
1181  unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1182  AMDGPU::OpName::vaddr);
1183  int FI = MI.getOperand(FIOp).getIndex();
1184  Register VReg =
1185  TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
1186  if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
1187  TRI->isAGPR(MRI, VReg))) {
1188  // FIXME: change to enterBasicBlockEnd()
1189  RS->enterBasicBlock(MBB);
1190  TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
1191  SpillFIs.set(FI);
1192  continue;
1193  }
1194  }
1195  }
1196  }
1197 
1198  for (MachineBasicBlock &MBB : MF) {
1199  for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
1200  MBB.addLiveIn(Reg);
1201 
1202  for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
1203  MBB.addLiveIn(Reg);
1204 
1206 
1207  if (!SpillFIs.empty() && SeenDbgInstr) {
1208  // FIXME: The dead frame indices are replaced with a null register from
1209  // the debug value instructions. We should instead, update it with the
1210  // correct register value. But not sure the register value alone is
1211  for (MachineInstr &MI : MBB) {
1212  if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
1213  SpillFIs[MI.getOperand(0).getIndex()]) {
1214  MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
1215  }
1216  }
1217  }
1218  }
1219  }
1220 
1221  FuncInfo->removeDeadFrameIndices(MFI);
1223  "SGPR spill should have been removed in SILowerSGPRSpills");
1224 
1225  // FIXME: The other checks should be redundant with allStackObjectsAreDead,
1226  // but currently hasNonSpillStackObjects is set only from source
1227  // allocas. Stack temps produced from legalization are not counted currently.
1228  if (!allStackObjectsAreDead(MFI)) {
1229  assert(RS && "RegScavenger required if spilling");
1230 
1231  // Add an emergency spill slot
1232  RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
1233  }
1234 }
1235 
1236 // Only report VGPRs to generic code.
1238  BitVector &SavedVGPRs,
1239  RegScavenger *RS) const {
1240  TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
1242  if (MFI->isEntryFunction())
1243  return;
1244 
1245  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1246  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1247  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1248 
1249  // Ignore the SGPRs the default implementation found.
1250  SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
1251 
1252  // Do not save AGPRs prior to GFX90A because there was no easy way to do so.
1253  // In gfx908 there was do AGPR loads and stores and thus spilling also
1254  // require a temporary VGPR.
1255  if (!ST.hasGFX90AInsts())
1256  SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
1257 
1258  // hasFP only knows about stack objects that already exist. We're now
1259  // determining the stack slots that will be created, so we have to predict
1260  // them. Stack objects force FP usage with calls.
1261  //
1262  // Note a new VGPR CSR may be introduced if one is used for the spill, but we
1263  // don't want to report it here.
1264  //
1265  // FIXME: Is this really hasReservedCallFrame?
1266  const bool WillHaveFP =
1267  FrameInfo.hasCalls() &&
1268  (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1269 
1270  // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
1271  // so don't allow the default insertion to handle them.
1272  for (auto SSpill : MFI->getSGPRSpillVGPRs())
1273  SavedVGPRs.reset(SSpill.VGPR);
1274 
1275  LivePhysRegs LiveRegs;
1276  LiveRegs.init(*TRI);
1277 
1278  if (WillHaveFP || hasFP(MF)) {
1280  "Re-reserving spill slot for FP");
1282  MFI->FramePointerSaveIndex, true);
1283  }
1284 
1285  if (TRI->hasBasePointer(MF)) {
1286  if (MFI->SGPRForFPSaveRestoreCopy)
1287  LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy);
1288 
1290  !MFI->BasePointerSaveIndex && "Re-reserving spill slot for BP");
1292  MFI->BasePointerSaveIndex, false);
1293  }
1294 }
1295 
1297  BitVector &SavedRegs,
1298  RegScavenger *RS) const {
1299  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1301  if (MFI->isEntryFunction())
1302  return;
1303 
1304  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1305  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1306 
1307  // The SP is specifically managed and we don't want extra spills of it.
1308  SavedRegs.reset(MFI->getStackPtrOffsetReg());
1309 
1310  const BitVector AllSavedRegs = SavedRegs;
1311  SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
1312 
1313  // If clearing VGPRs changed the mask, we will have some CSR VGPR spills.
1314  const bool HaveAnyCSRVGPR = SavedRegs != AllSavedRegs;
1315 
1316  // We have to anticipate introducing CSR VGPR spills or spill of caller
1317  // save VGPR reserved for SGPR spills as we now always create stack entry
1318  // for it, if we don't have any stack objects already, since we require
1319  // an FP if there is a call and stack.
1320  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1321  const bool WillHaveFP =
1322  FrameInfo.hasCalls() && (HaveAnyCSRVGPR || MFI->VGPRReservedForSGPRSpill);
1323 
1324  // FP will be specially managed like SP.
1325  if (WillHaveFP || hasFP(MF))
1326  SavedRegs.reset(MFI->getFrameOffsetReg());
1327 }
1328 
1331  std::vector<CalleeSavedInfo> &CSI) const {
1332  if (CSI.empty())
1333  return true; // Early exit if no callee saved registers are modified!
1334 
1335  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1336  if (!FuncInfo->SGPRForFPSaveRestoreCopy &&
1337  !FuncInfo->SGPRForBPSaveRestoreCopy)
1338  return false;
1339 
1340  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1341  const SIRegisterInfo *RI = ST.getRegisterInfo();
1342  Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1343  Register BasePtrReg = RI->getBaseRegister();
1344  unsigned NumModifiedRegs = 0;
1345 
1346  if (FuncInfo->SGPRForFPSaveRestoreCopy)
1347  NumModifiedRegs++;
1348  if (FuncInfo->SGPRForBPSaveRestoreCopy)
1349  NumModifiedRegs++;
1350 
1351  for (auto &CS : CSI) {
1352  if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) {
1353  CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
1354  if (--NumModifiedRegs)
1355  break;
1356  } else if (CS.getReg() == BasePtrReg &&
1357  FuncInfo->SGPRForBPSaveRestoreCopy) {
1358  CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy);
1359  if (--NumModifiedRegs)
1360  break;
1361  }
1362  }
1363 
1364  return false;
1365 }
1366 
1368  const MachineFunction &MF) const {
1369 
1370  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1371  const MachineFrameInfo &MFI = MF.getFrameInfo();
1372  uint64_t EstStackSize = MFI.estimateStackSize(MF);
1373  uint64_t MaxOffset = EstStackSize - 1;
1374 
1375  // We need the emergency stack slots to be allocated in range of the
1376  // MUBUF/flat scratch immediate offset from the base register, so assign these
1377  // first at the incoming SP position.
1378  //
1379  // TODO: We could try sorting the objects to find a hole in the first bytes
1380  // rather than allocating as close to possible. This could save a lot of space
1381  // on frames with alignment requirements.
1382  if (ST.enableFlatScratch()) {
1383  const SIInstrInfo *TII = ST.getInstrInfo();
1384  if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
1386  return false;
1387  } else {
1388  if (SIInstrInfo::isLegalMUBUFImmOffset(MaxOffset))
1389  return false;
1390  }
1391 
1392  return true;
1393 }
1394 
1396  MachineFunction &MF,
1399  int64_t Amount = I->getOperand(0).getImm();
1400  if (Amount == 0)
1401  return MBB.erase(I);
1402 
1403  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1404  const SIInstrInfo *TII = ST.getInstrInfo();
1405  const DebugLoc &DL = I->getDebugLoc();
1406  unsigned Opc = I->getOpcode();
1407  bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
1408  uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
1409 
1410  if (!hasReservedCallFrame(MF)) {
1411  Amount = alignTo(Amount, getStackAlign());
1412  assert(isUInt<32>(Amount) && "exceeded stack address space size");
1414  Register SPReg = MFI->getStackPtrOffsetReg();
1415 
1416  Amount *= getScratchScaleFactor(ST);
1417  if (IsDestroy)
1418  Amount = -Amount;
1419  auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
1420  .addReg(SPReg)
1421  .addImm(Amount);
1422  Add->getOperand(3).setIsDead(); // Mark SCC as dead.
1423  } else if (CalleePopAmount != 0) {
1424  llvm_unreachable("is this used?");
1425  }
1426 
1427  return MBB.erase(I);
1428 }
1429 
1430 /// Returns true if the frame will require a reference to the stack pointer.
1431 ///
1432 /// This is the set of conditions common to setting up the stack pointer in a
1433 /// kernel, and for using a frame pointer in a callable function.
1434 ///
1435 /// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
1436 /// references SP.
1438  return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
1439 }
1440 
1441 // The FP for kernels is always known 0, so we never really need to setup an
1442 // explicit register for it. However, DisableFramePointerElim will force us to
1443 // use a register for it.
1445  const MachineFrameInfo &MFI = MF.getFrameInfo();
1446 
1447  // For entry functions we can use an immediate offset in most cases, so the
1448  // presence of calls doesn't imply we need a distinct frame pointer.
1449  if (MFI.hasCalls() &&
1451  // All offsets are unsigned, so need to be addressed in the same direction
1452  // as stack growth.
1453 
1454  // FIXME: This function is pretty broken, since it can be called before the
1455  // frame layout is determined or CSR spills are inserted.
1456  return MFI.getStackSize() != 0;
1457  }
1458 
1459  return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
1460  MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
1461  MF) ||
1463 }
1464 
1465 // This is essentially a reduced version of hasFP for entry functions. Since the
1466 // stack pointer is known 0 on entry to kernels, we never really need an FP
1467 // register. We may need to initialize the stack pointer depending on the frame
1468 // properties, which logically overlaps many of the cases where an ordinary
1469 // function would require an FP.
1471  const MachineFunction &MF) const {
1472  // Callable functions always require a stack pointer reference.
1474  "only expected to call this for entry points");
1475 
1476  const MachineFrameInfo &MFI = MF.getFrameInfo();
1477 
1478  // Entry points ordinarily don't need to initialize SP. We have to set it up
1479  // for callees if there are any. Also note tail calls are impossible/don't
1480  // make any sense for kernels.
1481  if (MFI.hasCalls())
1482  return true;
1483 
1484  // We still need to initialize the SP if we're doing anything weird that
1485  // references the SP, like variable sized stack objects.
1486  return frameTriviallyRequiresSP(MFI);
1487 }
llvm::MachineRegisterInfo::addLiveIn
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
Definition: MachineRegisterInfo.h:957
i
i
Definition: README.txt:29
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:148
llvm::MachineFrameInfo::hasVarSizedObjects
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
Definition: MachineFrameInfo.h:353
llvm::SIMachineFunctionInfo::setIsStackRealigned
void setIsStackRealigned(bool Realigned=true)
Definition: SIMachineFunctionInfo.h:806
llvm::SIMachineFunctionInfo::getSGPRSpillVGPRs
ArrayRef< SGPRSpillVGPR > getSGPRSpillVGPRs() const
Definition: SIMachineFunctionInfo.h:525
llvm::AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT
@ FLAT_SCRATCH_INIT
Definition: AMDGPUArgumentUsageInfo.h:105
llvm::SIMachineFunctionInfo::hasSpilledVGPRs
bool hasSpilledVGPRs() const
Definition: SIMachineFunctionInfo.h:786
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::TargetStackID::WasmLocal
@ WasmLocal
Definition: TargetFrameLowering.h:31
llvm::LivePhysRegs::removeReg
void removeReg(MCPhysReg Reg)
Removes a physical register, all its sub-registers, and all its super-registers from the set.
Definition: LivePhysRegs.h:89
Reg
unsigned Reg
Definition: MachineSink.cpp:1558
llvm::MachineRegisterInfo::isPhysRegUsed
bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
Definition: MachineRegisterInfo.cpp:583
llvm::LivePhysRegs::addReg
void addReg(MCPhysReg Reg)
Adds a physical register and all its sub-registers to the set.
Definition: LivePhysRegs.h:79
llvm::SIMachineFunctionInfo::getPreloadedReg
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
Definition: SIMachineFunctionInfo.h:706
llvm::SIFrameLowering::emitEpilogue
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
Definition: SIFrameLowering.cpp:990
buildEpilogRestore
static void buildEpilogRestore(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI)
Definition: SIFrameLowering.cpp:145
SIMachineFunctionInfo.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::Function
Definition: Function.h:62
llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:343
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition: AMDGPUSubtarget.h:40
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition: MachineFunction.cpp:434
llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition: MachineMemOperand.h:145
llvm::SIMachineFunctionInfo::allocateSGPRSpillToVGPR
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI)
Reserve a slice of a VGPR to support spilling for FrameIndex FI.
Definition: SIMachineFunctionInfo.cpp:269
llvm::LivePhysRegs
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:48
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:233
llvm::SIMachineFunctionInfo::getGITPtrHigh
unsigned getGITPtrHigh() const
Definition: SIMachineFunctionInfo.h:711
llvm::StackOffset::getFixed
ScalarTy getFixed() const
Definition: TypeSize.h:149
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:128
llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition: MachineMemOperand.h:143
llvm::Optional< int >
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:362
llvm::MachineFrameInfo::RemoveStackObject
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
Definition: MachineFrameInfo.h:744
llvm::LivePhysRegs::empty
bool empty() const
Returns true if the set is empty.
Definition: LivePhysRegs.h:76
llvm::MachineFrameInfo::getObjectIndexEnd
int getObjectIndexEnd() const
Return one past the maximum frame object index.
Definition: MachineFrameInfo.h:393
llvm::SIMachineFunctionInfo::SGPRForFPSaveRestoreCopy
Register SGPRForFPSaveRestoreCopy
If this is set, an SGPR used for save/restore of the register used for the frame pointer.
Definition: SIMachineFunctionInfo.h:496
llvm::AMDGPU::getNamedOperandIdx
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1559
llvm::SIMachineFunctionInfo::getNumPreloadedSGPRs
unsigned getNumPreloadedSGPRs() const
Definition: SIMachineFunctionInfo.h:729
llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:144
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::SIMachineFunctionInfo::VGPRReservedForSGPRSpill
Register VGPRReservedForSGPRSpill
Definition: SIMachineFunctionInfo.h:504
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::MachineInstr::FrameDestroy
@ FrameDestroy
Definition: MachineInstr.h:84
buildGitPtr
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, const SIInstrInfo *TII, Register TargetReg)
Definition: SIFrameLowering.cpp:165
llvm::MachineBasicBlock::erase
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
Definition: MachineBasicBlock.cpp:1298
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::SIInstrFlags::FlatScratch
@ FlatScratch
Definition: SIDefines.h:117
llvm::SIFrameLowering::hasFP
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
Definition: SIFrameLowering.cpp:1444
llvm::SIRegisterInfo::getFrameRegister
Register getFrameRegister(const MachineFunction &MF) const override
Definition: SIRegisterInfo.cpp:461
frameTriviallyRequiresSP
static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI)
Returns true if the frame will require a reference to the stack pointer.
Definition: SIFrameLowering.cpp:1437
llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition: MachineFunction.h:835
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:651
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
TargetMachine.h
GCNSubtarget.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:739
llvm::LivePhysRegs::addLiveIns
void addLiveIns(const MachineBasicBlock &MBB)
Adds all live-in registers of basic block MBB.
Definition: LivePhysRegs.cpp:238
llvm::TargetFrameLowering::getStackAlign
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
Definition: TargetFrameLowering.h:100
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:499
llvm::TargetFrameLowering::hasReservedCallFrame
virtual bool hasReservedCallFrame(const MachineFunction &MF) const
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
Definition: TargetFrameLowering.h:280
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::LivePhysRegs::addLiveOuts
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
Definition: LivePhysRegs.cpp:232
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:195
llvm::SIFrameLowering::requiresStackPointerReference
bool requiresStackPointerReference(const MachineFunction &MF) const
Definition: SIFrameLowering.cpp:1470
initLiveRegs
static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI, const SIMachineFunctionInfo *FuncInfo, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
Definition: SIFrameLowering.cpp:689
llvm::MachineInstr::FrameSetup
@ FrameSetup
Definition: MachineInstr.h:82
buildScratchExecCopy
static Register buildScratchExecCopy(LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
Definition: SIFrameLowering.cpp:706
llvm::SIMachineFunctionInfo::getStackPtrOffsetReg
Register getStackPtrOffsetReg() const
Definition: SIMachineFunctionInfo.h:766
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
llvm::MachineFrameInfo::getStackID
uint8_t getStackID(int ObjectIdx) const
Definition: MachineFrameInfo.h:699
llvm::RegState::ImplicitDefine
@ ImplicitDefine
Definition: MachineInstrBuilder.h:63
llvm::MachineFrameInfo::getStackSize
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Definition: MachineFrameInfo.h:553
getVGPRSpillLaneOrTempRegister
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LivePhysRegs &LiveRegs, Register &TempSGPR, Optional< int > &FrameIndex, bool IsFP)
Definition: SIFrameLowering.cpp:59
llvm::MachineFrameInfo::getObjectOffset
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
Definition: MachineFrameInfo.h:494
llvm::MCRegisterInfo::isSubRegisterEq
bool isSubRegisterEq(MCRegister RegA, MCRegister RegB) const
Returns true if RegB is a sub-register of RegA or if RegB == RegA.
Definition: MCRegisterInfo.h:568
llvm::AMDGPU::convertSMRDOffsetUnits
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
Definition: AMDGPUBaseInfo.cpp:1880
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::SIMachineFunctionInfo::SGPRForBPSaveRestoreCopy
Register SGPRForBPSaveRestoreCopy
If this is set, an SGPR used for save/restore of the register used for the base pointer.
Definition: SIMachineFunctionInfo.h:501
llvm::BitVector::clearBitsNotInMask
void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask.
Definition: BitVector.h:705
llvm::BitVector
Definition: BitVector.h:74
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP
bool allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction &MF) const override
Control the placement of special register scavenging spill slots when allocating a stack frame.
Definition: SIFrameLowering.cpp:1367
llvm::MachineFrameInfo::getObjectIndexBegin
int getObjectIndexBegin() const
Return the minimum frame object index.
Definition: MachineFrameInfo.h:390
llvm::MachineInstrBuilder::addExternalSymbol
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:184
llvm::ArrayRef::slice
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:193
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::MachineFrameInfo::isDeadObjectIndex
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
Definition: MachineFrameInfo.h:713
llvm::SIFrameLowering::assignCalleeSavedSpillSlots
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
Definition: SIFrameLowering.cpp:1329
llvm::SIFrameLowering::emitPrologue
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
Definition: SIFrameLowering.cpp:744
llvm::BitVector::empty
bool empty() const
empty - Tests whether there are no bits in this bitvector.
Definition: BitVector.h:148
llvm::AMDGPU::Hwreg::WIDTH_M1_SHIFT_
@ WIDTH_M1_SHIFT_
Definition: SIDefines.h:405
llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition: AMDGPUSubtarget.h:41
llvm::SIMachineFunctionInfo::haveFreeLanesForSGPRSpill
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, unsigned NumLane) const
returns true if NumLanes slots are available in VGPRs already used for SGPR spilling.
Definition: SIMachineFunctionInfo.cpp:261
llvm::TargetOptions::DisableFramePointerElim
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
Definition: TargetOptionsImpl.cpp:24
llvm::SIFrameLowering::determineCalleeSaves
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
Definition: SIFrameLowering.cpp:1237
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:641
llvm::MachineInstrBuilder::setMIFlag
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
Definition: MachineInstrBuilder.h:278
llvm::cl::opt< bool >
llvm::SIMachineFunctionInfo::WWMReservedRegs
MapVector< Register, Optional< int > > WWMReservedRegs
Definition: SIMachineFunctionInfo.h:472
llvm::LivePhysRegs::stepBackward
void stepBackward(const MachineInstr &MI)
Simulates liveness when stepping backwards over an instruction(bundle).
Definition: LivePhysRegs.cpp:68
llvm::SIMachineFunctionInfo::getVGPRSpillAGPRs
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
Definition: SIMachineFunctionInfo.h:539
llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:213
llvm::AMDGPU::Hwreg::ID_FLAT_SCR_HI
@ ID_FLAT_SCR_HI
Definition: SIDefines.h:380
AMDGPUMCTargetDesc.h
llvm::MachineOperand::setIsDead
void setIsDead(bool Val=true)
Definition: MachineOperand.h:506
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:364
llvm::SIMachineFunctionInfo::isStackRealigned
bool isStackRealigned() const
Definition: SIMachineFunctionInfo.h:802
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
uint64_t
llvm::MachineFrameInfo::getObjectSize
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
Definition: MachineFrameInfo.h:453
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:240
llvm::BitVector::any
bool any() const
any - Returns true if any bit is set.
Definition: BitVector.h:162
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET
@ PRIVATE_SEGMENT_WAVE_BYTE_OFFSET
Definition: AMDGPUArgumentUsageInfo.h:109
llvm::LivePhysRegs::available
bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const
Returns true if register Reg and no aliasing register is in the set.
Definition: LivePhysRegs.cpp:141
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:38
llvm::MachineRegisterInfo::getCalleeSavedRegs
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
Definition: MachineRegisterInfo.cpp:619
llvm::SIMachineFunctionInfo::FramePointerSaveIndex
Optional< int > FramePointerSaveIndex
Definition: SIMachineFunctionInfo.h:497
allStackObjectsAreDead
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)
Definition: SIFrameLowering.cpp:336
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::RegScavenger
Definition: RegisterScavenging.h:34
llvm::MachineFrameInfo::getObjectAlign
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
Definition: MachineFrameInfo.h:467
llvm::AMDGPUMachineFunction::isEntryFunction
bool isEntryFunction() const
Definition: AMDGPUMachineFunction.h:78
llvm::TargetRegisterInfo::eliminateFrameIndex
virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS=nullptr) const =0
This method must be overriden to eliminate abstract frame indices from instructions which may use the...
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:593
llvm::TargetStackID::ScalableVector
@ ScalableVector
Definition: TargetFrameLowering.h:30
llvm::isUInt< 32 >
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:411
llvm::MachineRegisterInfo::isAllocatable
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
Definition: MachineRegisterInfo.h:946
llvm::TargetMachine::Options
TargetOptions Options
Definition: TargetMachine.h:121
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:657
llvm::SIMachineFunctionInfo::getScavengeFI
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Definition: SIMachineFunctionInfo.cpp:457
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
llvm::RegScavenger::addScavengingFrameIndex
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Definition: RegisterScavenging.h:123
llvm::MachineInstrBuilder::addMemOperand
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Definition: MachineInstrBuilder.h:202
llvm::SIMachineFunctionInfo::allocateVGPRSpillToAGPR
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
Definition: SIMachineFunctionInfo.cpp:366
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::SIMachineFunctionInfo::getGITPtrLoReg
Register getGITPtrLoReg(const MachineFunction &MF) const
Definition: SIMachineFunctionInfo.cpp:482
llvm::MachineFunction
Definition: MachineFunction.h:241
llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:241
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::MachineFrameInfo::hasPatchPoint
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
Definition: MachineFrameInfo.h:386
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::SIFrameLowering::isSupportedStackID
bool isSupportedStackID(TargetStackID::Value ID) const override
Definition: SIFrameLowering.cpp:676
AMDGPU.h
llvm::LivePhysRegs::init
void init(const TargetRegisterInfo &TRI)
(re-)initializes and clears the set.
Definition: LivePhysRegs.h:66
MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition: AArch64SLSHardening.cpp:75
llvm::BitVector::clearBitsInMask
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
Definition: BitVector.h:693
llvm::TargetStackID::NoAlloc
@ NoAlloc
Definition: TargetFrameLowering.h:32
getScratchScaleFactor
static unsigned getScratchScaleFactor(const GCNSubtarget &ST)
Definition: SIFrameLowering.cpp:399
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::AMDGPU::isCompute
bool isCompute(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1379
uint32_t
llvm::StackOffset
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
Definition: TypeSize.h:134
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
SIFrameLowering.h
llvm::SIMachineFunctionInfo::getScratchRSrcReg
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
Definition: SIMachineFunctionInfo.h:739
llvm::SIMachineFunctionInfo::getImplicitBufferPtrUserSGPR
Register getImplicitBufferPtrUserSGPR() const
Definition: SIMachineFunctionInfo.h:774
EnableSpillVGPRToAGPR
static cl::opt< bool > EnableSpillVGPRToAGPR("amdgpu-spill-vgpr-to-agpr", cl::desc("Enable spilling VGPRs to AGPRs"), cl::ReallyHidden, cl::init(true))
llvm::SIFrameLowering::processFunctionBeforeFrameFinalized
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
Definition: SIFrameLowering.cpp:1153
llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition: MachineMemOperand.h:135
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::MachineFrameInfo::getMaxAlign
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Definition: MachineFrameInfo.h:569
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition: MachineBasicBlock.h:367
llvm::MachineFrameInfo::isFrameAddressTaken
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Definition: MachineFrameInfo.h:368
llvm::RegScavenger::enterBasicBlock
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
Definition: RegisterScavenging.cpp:84
llvm::MachineFrameInfo::hasCalls
bool hasCalls() const
Return true if the current function has any function calls.
Definition: MachineFrameInfo.h:581
llvm::SIInstrInfo::isLegalMUBUFImmOffset
static bool isLegalMUBUFImmOffset(unsigned Imm)
Definition: SIInstrInfo.h:1086
llvm::ISD::FrameIndex
@ FrameIndex
Definition: ISDOpcodes.h:80
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:380
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
spilledToMemory
static bool spilledToMemory(const MachineFunction &MF, int SaveIndex)
Definition: SIFrameLowering.cpp:739
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:607
allSGPRSpillsAreDead
static bool allSGPRSpillsAreDead(const MachineFunction &MF)
Definition: SIFrameLowering.cpp:1127
llvm::MachineBasicBlock::sortUniqueLiveIns
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
Definition: MachineBasicBlock.cpp:585
uint16_t
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:637
llvm::SIFrameLowering::determineCalleeSavesSGPR
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
Definition: SIFrameLowering.cpp:1296
MachineFrameInfo.h
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::SIMachineFunctionInfo::BasePointerSaveIndex
Optional< int > BasePointerSaveIndex
Definition: SIMachineFunctionInfo.h:502
llvm::SIFrameLowering::eliminateCallFramePseudoInstr
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
Definition: SIFrameLowering.cpp:1395
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::TargetStackID::Default
@ Default
Definition: TargetFrameLowering.h:28
llvm::SIMachineFunctionInfo::getAGPRSpillVGPRs
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
Definition: SIMachineFunctionInfo.h:535
llvm::SIMachineFunctionInfo::setScratchRSrcReg
void setScratchRSrcReg(Register Reg)
Definition: SIMachineFunctionInfo.h:743
llvm::TargetStackID::SGPRSpill
@ SGPRSpill
Definition: TargetFrameLowering.h:29
llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition: MachineMemOperand.h:137
llvm::BitVector::reset
BitVector & reset()
Definition: BitVector.h:384
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
llvm::TargetRegisterInfo::hasStackRealignment
bool hasStackRealignment(const MachineFunction &MF) const
True if stack realignment is required and still possible.
Definition: TargetRegisterInfo.h:936
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:107
llvm::MachinePointerInfo::getFixedStack
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition: MachineOperand.cpp:1008
llvm::SIMachineFunctionInfo::removeDeadFrameIndices
void removeDeadFrameIndices(MachineFrameInfo &MFI)
Definition: SIMachineFunctionInfo.cpp:431
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
buildPrologSpill
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI)
Definition: SIFrameLowering.cpp:124
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:335
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::SIFrameLowering::emitEntryFunctionPrologue
void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
Definition: SIFrameLowering.cpp:403
llvm::AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER
@ PRIVATE_SEGMENT_BUFFER
Definition: AMDGPUArgumentUsageInfo.h:100
llvm::TargetRegisterInfo::getSubReg
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Definition: TargetRegisterInfo.h:1094
llvm::AMDGPU::Hwreg::ID_FLAT_SCR_LO
@ ID_FLAT_SCR_LO
Definition: SIDefines.h:379
llvm::TargetFrameLowering::determineCalleeSaves
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
Definition: TargetFrameLoweringImpl.cpp:78
llvm::MachineFrameInfo::hasStackMap
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
Definition: MachineFrameInfo.h:380
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::cl::desc
Definition: CommandLine.h:412
RegisterScavenging.h
findScratchNonCalleeSaveRegister
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, const TargetRegisterClass &RC, bool Unused=false)
Definition: SIFrameLowering.cpp:33
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:110
llvm::SIMachineFunctionInfo::SGPRSpillVGPR
Definition: SIMachineFunctionInfo.h:454
llvm::MachineInstrBundleIterator
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i....
Definition: MachineInstrBundleIterator.h:108
llvm::SIMachineFunctionInfo::getSGPRToVGPRSpills
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
Definition: SIMachineFunctionInfo.h:519
llvm::SIFrameLowering::getFrameIndexReference
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
Definition: SIFrameLowering.cpp:1144
llvm::SIMachineFunctionInfo::hasImplicitBufferPtr
bool hasImplicitBufferPtr() const
Definition: SIMachineFunctionInfo.h:689
llvm::SIMachineFunctionInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition: SIMachineFunctionInfo.h:649
llvm::SIMachineFunctionInfo::getFrameOffsetReg
Register getFrameOffsetReg() const
Definition: SIMachineFunctionInfo.h:748
llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
LivePhysRegs.h