LLVM  16.0.0git
SIFrameLowering.cpp
Go to the documentation of this file.
1 //===----------------------- SIFrameLowering.cpp --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 
9 #include "SIFrameLowering.h"
10 #include "AMDGPU.h"
11 #include "GCNSubtarget.h"
13 #include "SIMachineFunctionInfo.h"
18 
19 using namespace llvm;
20 
21 #define DEBUG_TYPE "frame-info"
22 
24  "amdgpu-spill-vgpr-to-agpr",
25  cl::desc("Enable spilling VGPRs to AGPRs"),
27  cl::init(true));
28 
29 // Find a scratch register that we can use in the prologue. We avoid using
30 // callee-save registers since they may appear to be free when this is called
31 // from canUseAsPrologue (during shrink wrapping), but then no longer be free
32 // when this is called from emitPrologue.
34  LivePhysRegs &LiveRegs,
35  const TargetRegisterClass &RC,
36  bool Unused = false) {
37  // Mark callee saved registers as used so we will not choose them.
38  const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
39  for (unsigned i = 0; CSRegs[i]; ++i)
40  LiveRegs.addReg(CSRegs[i]);
41 
42  if (Unused) {
43  // We are looking for a register that can be used throughout the entire
44  // function, so any use is unacceptable.
45  for (MCRegister Reg : RC) {
46  if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
47  return Reg;
48  }
49  } else {
50  for (MCRegister Reg : RC) {
51  if (LiveRegs.available(MRI, Reg))
52  return Reg;
53  }
54  }
55 
56  return MCRegister();
57 }
58 
60  LivePhysRegs &LiveRegs,
61  Register &TempSGPR,
63  bool IsFP) {
65  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
66 
68  const SIRegisterInfo *TRI = ST.getRegisterInfo();
69 
70  // We need to save and restore the current FP/BP.
71 
72  // 1: If there is already a VGPR with free lanes, use it. We
73  // may already have to pay the penalty for spilling a CSR VGPR.
74  if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
75  int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
77 
78  if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
79  llvm_unreachable("allocate SGPR spill should have worked");
80 
81  FrameIndex = NewFI;
82 
83  LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
84  dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to "
85  << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
86  << '\n');
87  return;
88  }
89 
90  // 2: Next, try to save the FP/BP in an unused SGPR.
92  MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
93 
94  if (!TempSGPR) {
95  int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
97 
98  if (TRI->spillSGPRToVGPR() && MFI->allocateSGPRSpillToVGPR(MF, NewFI)) {
99  // 3: There's no free lane to spill, and no free register to save FP/BP,
100  // so we're forced to spill another VGPR to use for the spill.
101  FrameIndex = NewFI;
102 
103  LLVM_DEBUG(
104  auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
105  dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "
106  << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';);
107  } else {
108  // Remove dead <NewFI> index
109  MF.getFrameInfo().RemoveStackObject(NewFI);
110  // 4: If all else fails, spill the FP/BP to memory.
111  FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4));
112  LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling "
113  << (IsFP ? "FP" : "BP") << '\n');
114  }
115  } else {
116  LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to "
117  << printReg(TempSGPR, TRI) << '\n');
118  }
119 }
120 
121 // We need to specially emit stack operations here because a different frame
122 // register is used than in the rest of the function, as getFrameRegister would
123 // use.
124 static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
125  const SIMachineFunctionInfo &FuncInfo,
126  LivePhysRegs &LiveRegs, MachineFunction &MF,
129  Register SpillReg, int FI) {
130  unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
131  : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
132 
133  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
136  PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
137  FrameInfo.getObjectAlign(FI));
138  LiveRegs.addReg(SpillReg);
139  TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, true,
140  FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
141  &LiveRegs);
142  LiveRegs.removeReg(SpillReg);
143 }
144 
145 static void buildEpilogRestore(const GCNSubtarget &ST,
146  const SIRegisterInfo &TRI,
147  const SIMachineFunctionInfo &FuncInfo,
148  LivePhysRegs &LiveRegs, MachineFunction &MF,
151  const DebugLoc &DL, Register SpillReg, int FI) {
152  unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
153  : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
154 
155  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
158  PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
159  FrameInfo.getObjectAlign(FI));
160  TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false,
161  FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
162  &LiveRegs);
163 }
164 
166  const DebugLoc &DL, const SIInstrInfo *TII,
167  Register TargetReg) {
168  MachineFunction *MF = MBB.getParent();
170  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
171  const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
172  Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
173  Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
174 
175  if (MFI->getGITPtrHigh() != 0xffffffff) {
176  BuildMI(MBB, I, DL, SMovB32, TargetHi)
177  .addImm(MFI->getGITPtrHigh())
178  .addReg(TargetReg, RegState::ImplicitDefine);
179  } else {
180  const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
181  BuildMI(MBB, I, DL, GetPC64, TargetReg);
182  }
183  Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
184  MF->getRegInfo().addLiveIn(GitPtrLo);
185  MBB.addLiveIn(GitPtrLo);
186  BuildMI(MBB, I, DL, SMovB32, TargetLo)
187  .addReg(GitPtrLo);
188 }
189 
190 // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
191 void SIFrameLowering::emitEntryFunctionFlatScratchInit(
193  const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
194  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
195  const SIInstrInfo *TII = ST.getInstrInfo();
196  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
198 
199  // We don't need this if we only have spills since there is no user facing
200  // scratch.
201 
202  // TODO: If we know we don't have flat instructions earlier, we can omit
203  // this from the input registers.
204  //
205  // TODO: We only need to know if we access scratch space through a flat
206  // pointer. Because we only detect if flat instructions are used at all,
207  // this will be used more often than necessary on VI.
208 
209  Register FlatScrInitLo;
210  Register FlatScrInitHi;
211 
212  if (ST.isAmdPalOS()) {
213  // Extract the scratch offset from the descriptor in the GIT
214  LivePhysRegs LiveRegs;
215  LiveRegs.init(*TRI);
216  LiveRegs.addLiveIns(MBB);
217 
218  // Find unused reg to load flat scratch init into
220  Register FlatScrInit = AMDGPU::NoRegister;
221  ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
222  unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
223  AllSGPR64s = AllSGPR64s.slice(
224  std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
225  Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
226  for (MCPhysReg Reg : AllSGPR64s) {
227  if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) &&
228  !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
229  FlatScrInit = Reg;
230  break;
231  }
232  }
233  assert(FlatScrInit && "Failed to find free register for scratch init");
234 
235  FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
236  FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
237 
238  buildGitPtr(MBB, I, DL, TII, FlatScrInit);
239 
240  // We now have the GIT ptr - now get the scratch descriptor from the entry
241  // at offset 0 (or offset 16 for a compute shader).
243  const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
244  auto *MMO = MF.getMachineMemOperand(
245  PtrInfo,
248  8, Align(4));
249  unsigned Offset =
251  const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
252  unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
253  BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
254  .addReg(FlatScrInit)
255  .addImm(EncodedOffset) // offset
256  .addImm(0) // cpol
257  .addMemOperand(MMO);
258 
259  // Mask the offset in [47:0] of the descriptor
260  const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
261  auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
262  .addReg(FlatScrInitHi)
263  .addImm(0xffff);
264  And->getOperand(3).setIsDead(); // Mark SCC as dead.
265  } else {
266  Register FlatScratchInitReg =
268  assert(FlatScratchInitReg);
269 
271  MRI.addLiveIn(FlatScratchInitReg);
272  MBB.addLiveIn(FlatScratchInitReg);
273 
274  FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
275  FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
276  }
277 
278  // Do a 64-bit pointer add.
279  if (ST.flatScratchIsPointer()) {
280  if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
281  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
282  .addReg(FlatScrInitLo)
283  .addReg(ScratchWaveOffsetReg);
284  auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
285  FlatScrInitHi)
286  .addReg(FlatScrInitHi)
287  .addImm(0);
288  Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
289 
290  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
291  addReg(FlatScrInitLo).
292  addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
294  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
295  addReg(FlatScrInitHi).
296  addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
298  return;
299  }
300 
301  // For GFX9.
302  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
303  .addReg(FlatScrInitLo)
304  .addReg(ScratchWaveOffsetReg);
305  auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
306  AMDGPU::FLAT_SCR_HI)
307  .addReg(FlatScrInitHi)
308  .addImm(0);
309  Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
310 
311  return;
312  }
313 
314  assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
315 
316  // Copy the size in bytes.
317  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
318  .addReg(FlatScrInitHi, RegState::Kill);
319 
320  // Add wave offset in bytes to private base offset.
321  // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
322  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)
323  .addReg(FlatScrInitLo)
324  .addReg(ScratchWaveOffsetReg);
325 
326  // Convert offset to 256-byte units.
327  auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32),
328  AMDGPU::FLAT_SCR_HI)
329  .addReg(FlatScrInitLo, RegState::Kill)
330  .addImm(8);
331  LShr->getOperand(3).setIsDead(); // Mark SCC as dead.
332 }
333 
334 // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
335 // memory. They should have been removed by now.
336 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
337  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
338  I != E; ++I) {
339  if (!MFI.isDeadObjectIndex(I))
340  return false;
341  }
342 
343  return true;
344 }
345 
346 // Shift down registers reserved for the scratch RSRC.
347 Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
348  MachineFunction &MF) const {
349 
350  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
351  const SIInstrInfo *TII = ST.getInstrInfo();
352  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
355 
356  assert(MFI->isEntryFunction());
357 
358  Register ScratchRsrcReg = MFI->getScratchRSrcReg();
359 
360  if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
362  return Register();
363 
364  if (ST.hasSGPRInitBug() ||
365  ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
366  return ScratchRsrcReg;
367 
368  // We reserved the last registers for this. Shift it down to the end of those
369  // which were actually used.
370  //
371  // FIXME: It might be safer to use a pseudoregister before replacement.
372 
373  // FIXME: We should be able to eliminate unused input registers. We only
374  // cannot do this for the resources required for scratch access. For now we
375  // skip over user SGPRs and may leave unused holes.
376 
377  unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
378  ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
379  AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
380 
381  // Skip the last N reserved elements because they should have already been
382  // reserved for VCC etc.
383  Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
384  for (MCPhysReg Reg : AllSGPR128s) {
385  // Pick the first unallocated one. Make sure we don't clobber the other
386  // reserved input we needed. Also for PAL, make sure we don't clobber
387  // the GIT pointer passed in SGPR0 or SGPR8.
389  !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
390  MRI.replaceRegWith(ScratchRsrcReg, Reg);
391  MFI->setScratchRSrcReg(Reg);
392  return Reg;
393  }
394  }
395 
396  return ScratchRsrcReg;
397 }
398 
399 static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
400  return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
401 }
402 
404  MachineBasicBlock &MBB) const {
405  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
406 
407  // FIXME: If we only have SGPR spills, we won't actually be using scratch
408  // memory since these spill to VGPRs. We should be cleaning up these unused
409  // SGPR spill frame indices somewhere.
410 
411  // FIXME: We still have implicit uses on SGPR spill instructions in case they
412  // need to spill to vector memory. It's likely that will not happen, but at
413  // this point it appears we need the setup. This part of the prolog should be
414  // emitted after frame indices are eliminated.
415 
416  // FIXME: Remove all of the isPhysRegUsed checks
417 
419  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
420  const SIInstrInfo *TII = ST.getInstrInfo();
421  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
423  const Function &F = MF.getFunction();
424  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
425 
426  assert(MFI->isEntryFunction());
427 
428  Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
430 
431  // We need to do the replacement of the private segment buffer register even
432  // if there are no stack objects. There could be stores to undef or a
433  // constant without an associated object.
434  //
435  // This will return `Register()` in cases where there are no actual
436  // uses of the SRSRC.
437  Register ScratchRsrcReg;
438  if (!ST.enableFlatScratch())
439  ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
440 
441  // Make the selected register live throughout the function.
442  if (ScratchRsrcReg) {
443  for (MachineBasicBlock &OtherBB : MF) {
444  if (&OtherBB != &MBB) {
445  OtherBB.addLiveIn(ScratchRsrcReg);
446  }
447  }
448  }
449 
450  // Now that we have fixed the reserved SRSRC we need to locate the
451  // (potentially) preloaded SRSRC.
452  Register PreloadedScratchRsrcReg;
453  if (ST.isAmdHsaOrMesa(F)) {
454  PreloadedScratchRsrcReg =
456  if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
457  // We added live-ins during argument lowering, but since they were not
458  // used they were deleted. We're adding the uses now, so add them back.
459  MRI.addLiveIn(PreloadedScratchRsrcReg);
460  MBB.addLiveIn(PreloadedScratchRsrcReg);
461  }
462  }
463 
464  // Debug location must be unknown since the first debug location is used to
465  // determine the end of the prologue.
466  DebugLoc DL;
468 
469  // We found the SRSRC first because it needs four registers and has an
470  // alignment requirement. If the SRSRC that we found is clobbering with
471  // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
472  // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
473  // wave offset to a free SGPR.
474  Register ScratchWaveOffsetReg;
475  if (PreloadedScratchWaveOffsetReg &&
476  TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
477  ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
478  unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
479  AllSGPRs = AllSGPRs.slice(
480  std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
481  Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
482  for (MCPhysReg Reg : AllSGPRs) {
484  !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
485  ScratchWaveOffsetReg = Reg;
486  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
487  .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
488  break;
489  }
490  }
491  } else {
492  ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
493  }
494  assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
495 
497  Register SPReg = MFI->getStackPtrOffsetReg();
498  assert(SPReg != AMDGPU::SP_REG);
499  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
500  .addImm(FrameInfo.getStackSize() * getScratchScaleFactor(ST));
501  }
502 
503  if (hasFP(MF)) {
504  Register FPReg = MFI->getFrameOffsetReg();
505  assert(FPReg != AMDGPU::FP_REG);
506  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
507  }
508 
509  bool NeedsFlatScratchInit =
510  MFI->hasFlatScratchInit() &&
511  (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
512  (!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
513 
514  if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
515  PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
516  MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
517  MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
518  }
519 
520  if (NeedsFlatScratchInit) {
521  emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
522  }
523 
524  if (ScratchRsrcReg) {
525  emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
526  PreloadedScratchRsrcReg,
527  ScratchRsrcReg, ScratchWaveOffsetReg);
528  }
529 }
530 
531 // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
532 void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
534  const DebugLoc &DL, Register PreloadedScratchRsrcReg,
535  Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
536 
537  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
538  const SIInstrInfo *TII = ST.getInstrInfo();
539  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
541  const Function &Fn = MF.getFunction();
542 
543  if (ST.isAmdPalOS()) {
544  // The pointer to the GIT is formed from the offset passed in and either
545  // the amdgpu-git-ptr-high function attribute or the top part of the PC
546  Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
547  Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
548 
549  buildGitPtr(MBB, I, DL, TII, Rsrc01);
550 
551  // We now have the GIT ptr - now get the scratch descriptor from the entry
552  // at offset 0 (or offset 16 for a compute shader).
554  const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
555  auto MMO = MF.getMachineMemOperand(PtrInfo,
559  16, Align(4));
560  unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
561  const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
562  unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
563  BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
564  .addReg(Rsrc01)
565  .addImm(EncodedOffset) // offset
566  .addImm(0) // cpol
567  .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
568  .addMemOperand(MMO);
569 
570  // The driver will always set the SRD for wave 64 (bits 118:117 of
571  // descriptor / bits 22:21 of third sub-reg will be 0b11)
572  // If the shader is actually wave32 we have to modify the const_index_stride
573  // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
574  // reason the driver does this is that there can be cases where it presents
575  // 2 shaders with different wave size (e.g. VsFs).
576  // TODO: convert to using SCRATCH instructions or multiple SRD buffers
577  if (ST.isWave32()) {
578  const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
579  BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
580  .addImm(21)
581  .addReg(Rsrc03);
582  }
583  } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
584  assert(!ST.isAmdHsaOrMesa(Fn));
585  const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
586 
587  Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
588  Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
589 
590  // Use relocations to get the pointer, and setup the other bits manually.
591  uint64_t Rsrc23 = TII->getScratchRsrcWords23();
592 
593  if (MFI->hasImplicitBufferPtr()) {
594  Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
595 
597  const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
598 
599  BuildMI(MBB, I, DL, Mov64, Rsrc01)
601  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
602  } else {
603  const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
604 
606  auto MMO = MF.getMachineMemOperand(
607  PtrInfo,
610  8, Align(4));
611  BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
613  .addImm(0) // offset
614  .addImm(0) // cpol
615  .addMemOperand(MMO)
616  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
617 
620  }
621  } else {
622  Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
623  Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
624 
625  BuildMI(MBB, I, DL, SMovB32, Rsrc0)
626  .addExternalSymbol("SCRATCH_RSRC_DWORD0")
627  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
628 
629  BuildMI(MBB, I, DL, SMovB32, Rsrc1)
630  .addExternalSymbol("SCRATCH_RSRC_DWORD1")
631  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
632 
633  }
634 
635  BuildMI(MBB, I, DL, SMovB32, Rsrc2)
636  .addImm(Rsrc23 & 0xffffffff)
637  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
638 
639  BuildMI(MBB, I, DL, SMovB32, Rsrc3)
640  .addImm(Rsrc23 >> 32)
641  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
642  } else if (ST.isAmdHsaOrMesa(Fn)) {
643  assert(PreloadedScratchRsrcReg);
644 
645  if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
646  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
647  .addReg(PreloadedScratchRsrcReg, RegState::Kill);
648  }
649  }
650 
651  // Add the scratch wave offset into the scratch RSRC.
652  //
653  // We only want to update the first 48 bits, which is the base address
654  // pointer, without touching the adjacent 16 bits of flags. We know this add
655  // cannot carry-out from bit 47, otherwise the scratch allocation would be
656  // impossible to fit in the 48-bit global address space.
657  //
658  // TODO: Evaluate if it is better to just construct an SRD using the flat
659  // scratch init and some constants rather than update the one we are passed.
660  Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
661  Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
662 
663  // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
664  // the kernel body via inreg arguments.
665  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
666  .addReg(ScratchRsrcSub0)
667  .addReg(ScratchWaveOffsetReg)
668  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
669  auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
670  .addReg(ScratchRsrcSub1)
671  .addImm(0)
672  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
673  Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
674 }
675 
677  switch (ID) {
681  return true;
684  return false;
685  }
686  llvm_unreachable("Invalid TargetStackID::Value");
687 }
688 
689 static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI,
690  const SIMachineFunctionInfo *FuncInfo,
692  MachineBasicBlock::iterator MBBI, bool IsProlog) {
693  if (LiveRegs.empty()) {
694  LiveRegs.init(TRI);
695  if (IsProlog) {
696  LiveRegs.addLiveIns(MBB);
697  } else {
698  // In epilog.
699  LiveRegs.addLiveOuts(MBB);
700  LiveRegs.stepBackward(*MBBI);
701  }
702  }
703 }
704 
705 // Activate all lanes, returns saved exec.
707  MachineFunction &MF,
710  const DebugLoc &DL, bool IsProlog) {
711  Register ScratchExecCopy;
713  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
714  const SIInstrInfo *TII = ST.getInstrInfo();
715  const SIRegisterInfo &TRI = TII->getRegisterInfo();
717 
718  initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
719 
720  ScratchExecCopy = findScratchNonCalleeSaveRegister(
721  MRI, LiveRegs, *TRI.getWaveMaskRegClass());
722  if (!ScratchExecCopy)
723  report_fatal_error("failed to find free scratch register");
724 
725  LiveRegs.addReg(ScratchExecCopy);
726 
727  const unsigned OrSaveExec =
728  ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
729  auto SaveExec = BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy)
730  .addImm(-1);
731  SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead.
732 
733  return ScratchExecCopy;
734 }
735 
736 // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR.
737 // Otherwise we are spilling to memory.
738 static bool spilledToMemory(const MachineFunction &MF, int SaveIndex) {
739  const MachineFrameInfo &MFI = MF.getFrameInfo();
740  return MFI.getStackID(SaveIndex) != TargetStackID::SGPRSpill;
741 }
742 
744  MachineBasicBlock &MBB) const {
746  if (FuncInfo->isEntryFunction()) {
748  return;
749  }
750 
751  MachineFrameInfo &MFI = MF.getFrameInfo();
753  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
754  const SIInstrInfo *TII = ST.getInstrInfo();
755  const SIRegisterInfo &TRI = TII->getRegisterInfo();
756 
757  Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
758  Register FramePtrReg = FuncInfo->getFrameOffsetReg();
759  Register BasePtrReg =
760  TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
761  LivePhysRegs LiveRegs;
762 
764  // DebugLoc must be unknown since the first instruction with DebugLoc is used
765  // to determine the end of the prologue.
766  DebugLoc DL;
767 
768  bool HasFP = false;
769  bool HasBP = false;
770  uint32_t NumBytes = MFI.getStackSize();
771  uint32_t RoundedSize = NumBytes;
772  // To avoid clobbering VGPRs in lanes that weren't active on function entry,
773  // turn on all lanes before doing the spill to memory.
774  Register ScratchExecCopy;
775 
776  Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
777  Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
778 
779  // VGPRs used for SGPR->VGPR spills
781  FuncInfo->getSGPRSpillVGPRs()) {
782  if (!Reg.FI)
783  continue;
784 
785  if (!ScratchExecCopy)
786  ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
787  /*IsProlog*/ true);
788 
789  buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, Reg.VGPR,
790  *Reg.FI);
791  }
792 
793  for (auto ReservedWWM : FuncInfo->wwmAllocation()) {
794  if (!ScratchExecCopy)
795  ScratchExecCopy =
796  buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL, /*IsProlog*/ true);
797 
798  buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
799  std::get<0>(ReservedWWM), std::get<1>(ReservedWWM));
800  }
801 
802  if (ScratchExecCopy) {
803  // FIXME: Split block and make terminator.
804  unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
805  MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
806  BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
807  .addReg(ScratchExecCopy, RegState::Kill);
808  LiveRegs.addReg(ScratchExecCopy);
809  }
810 
811  auto SaveSGPRToMemory = [&](Register Reg, const int FI) {
812  assert(!MFI.isDeadObjectIndex(FI));
813 
814  initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
815 
817  MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
818  if (!TmpVGPR)
819  report_fatal_error("failed to find free scratch register");
820 
821  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
822  .addReg(Reg);
823 
824  buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
825  FI);
826  };
827 
828  auto SaveSGPRToVGPRLane = [&](Register Reg, const int FI) {
829  assert(!MFI.isDeadObjectIndex(FI));
830 
833  FuncInfo->getSGPRToVGPRSpills(FI);
834  assert(Spill.size() == 1);
835 
836  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
837  .addReg(Reg)
838  .addImm(Spill[0].Lane)
839  .addReg(Spill[0].VGPR, RegState::Undef);
840  };
841 
842  if (FPSaveIndex) {
843  if (spilledToMemory(MF, *FPSaveIndex))
844  SaveSGPRToMemory(FramePtrReg, *FPSaveIndex);
845  else
846  SaveSGPRToVGPRLane(FramePtrReg, *FPSaveIndex);
847  }
848 
849  // Emit the copy if we need an FP, and are using a free SGPR to save it.
850  if (FuncInfo->SGPRForFPSaveRestoreCopy) {
851  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
852  FuncInfo->SGPRForFPSaveRestoreCopy)
853  .addReg(FramePtrReg)
855  }
856 
857  if (BPSaveIndex) {
858  if (spilledToMemory(MF, *BPSaveIndex))
859  SaveSGPRToMemory(BasePtrReg, *BPSaveIndex);
860  else
861  SaveSGPRToVGPRLane(BasePtrReg, *BPSaveIndex);
862  }
863 
864  // Emit the copy if we need a BP, and are using a free SGPR to save it.
865  if (FuncInfo->SGPRForBPSaveRestoreCopy) {
866  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
867  FuncInfo->SGPRForBPSaveRestoreCopy)
868  .addReg(BasePtrReg)
870  }
871 
872  // If a copy has been emitted for FP and/or BP, Make the SGPRs
873  // used in the copy instructions live throughout the function.
874  SmallVector<MCPhysReg, 2> TempSGPRs;
875  if (FuncInfo->SGPRForFPSaveRestoreCopy)
876  TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy);
877 
878  if (FuncInfo->SGPRForBPSaveRestoreCopy)
879  TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy);
880 
881  if (!TempSGPRs.empty()) {
882  for (MachineBasicBlock &MBB : MF) {
883  for (MCPhysReg Reg : TempSGPRs)
884  MBB.addLiveIn(Reg);
885 
887  }
888  if (!LiveRegs.empty()) {
889  LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
890  LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy);
891  }
892  }
893 
894  if (TRI.hasStackRealignment(MF)) {
895  HasFP = true;
896  const unsigned Alignment = MFI.getMaxAlign().value();
897 
898  RoundedSize += Alignment;
899  if (LiveRegs.empty()) {
900  LiveRegs.init(TRI);
901  LiveRegs.addLiveIns(MBB);
902  }
903 
904  // s_add_i32 s33, s32, NumBytes
905  // s_and_b32 s33, s33, 0b111...0000
906  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
907  .addReg(StackPtrReg)
908  .addImm((Alignment - 1) * getScratchScaleFactor(ST))
910  auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
911  .addReg(FramePtrReg, RegState::Kill)
912  .addImm(-Alignment * getScratchScaleFactor(ST))
914  And->getOperand(3).setIsDead(); // Mark SCC as dead.
915  FuncInfo->setIsStackRealigned(true);
916  } else if ((HasFP = hasFP(MF))) {
917  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
918  .addReg(StackPtrReg)
920  }
921 
922  // If we need a base pointer, set it up here. It's whatever the value of
923  // the stack pointer is at this point. Any variable size objects will be
924  // allocated after this, so we can still use the base pointer to reference
925  // the incoming arguments.
926  if ((HasBP = TRI.hasBasePointer(MF))) {
927  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
928  .addReg(StackPtrReg)
930  }
931 
932  if (HasFP && RoundedSize != 0) {
933  auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
934  .addReg(StackPtrReg)
935  .addImm(RoundedSize * getScratchScaleFactor(ST))
937  Add->getOperand(3).setIsDead(); // Mark SCC as dead.
938  }
939 
940  assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||
941  FuncInfo->FramePointerSaveIndex)) &&
942  "Needed to save FP but didn't save it anywhere");
943 
944  // If we allow spilling to AGPRs we may have saved FP but then spill
945  // everything into AGPRs instead of the stack.
946  assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy &&
947  !FuncInfo->FramePointerSaveIndex) ||
949  "Saved FP but didn't need it");
950 
951  assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy ||
952  FuncInfo->BasePointerSaveIndex)) &&
953  "Needed to save BP but didn't save it anywhere");
954 
955  assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy &&
956  !FuncInfo->BasePointerSaveIndex)) &&
957  "Saved BP but didn't need it");
958 }
959 
961  MachineBasicBlock &MBB) const {
962  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
963  if (FuncInfo->isEntryFunction())
964  return;
965 
966  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
967  const SIInstrInfo *TII = ST.getInstrInfo();
969  const SIRegisterInfo &TRI = TII->getRegisterInfo();
970  LivePhysRegs LiveRegs;
971  // Get the insert location for the epilogue. If there were no terminators in
972  // the block, get the last instruction.
974  DebugLoc DL;
975  if (!MBB.empty()) {
977  if (MBBI != MBB.end())
978  DL = MBBI->getDebugLoc();
979 
981  }
982 
983  const MachineFrameInfo &MFI = MF.getFrameInfo();
984  uint32_t NumBytes = MFI.getStackSize();
985  uint32_t RoundedSize = FuncInfo->isStackRealigned()
986  ? NumBytes + MFI.getMaxAlign().value()
987  : NumBytes;
988  const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
989  const Register FramePtrReg = FuncInfo->getFrameOffsetReg();
990  const Register BasePtrReg =
991  TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
992 
993  Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
994  Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
995 
996  if (RoundedSize != 0 && hasFP(MF)) {
997  auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
998  .addReg(StackPtrReg)
999  .addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST)))
1001  Add->getOperand(3).setIsDead(); // Mark SCC as dead.
1002  }
1003 
1004  if (FuncInfo->SGPRForFPSaveRestoreCopy) {
1005  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1006  .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
1008  }
1009 
1010  if (FuncInfo->SGPRForBPSaveRestoreCopy) {
1011  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
1012  .addReg(FuncInfo->SGPRForBPSaveRestoreCopy)
1014  }
1015 
1016  auto RestoreSGPRFromMemory = [&](Register Reg, const int FI) {
1017  initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1019  MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
1020  if (!TmpVGPR)
1021  report_fatal_error("failed to find free scratch register");
1022  buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
1023  FI);
1024  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), Reg)
1025  .addReg(TmpVGPR, RegState::Kill);
1026  };
1027 
1028  auto RestoreSGPRFromVGPRLane = [&](Register Reg, const int FI) {
1031  FuncInfo->getSGPRToVGPRSpills(FI);
1032  assert(Spill.size() == 1);
1033  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), Reg)
1034  .addReg(Spill[0].VGPR)
1035  .addImm(Spill[0].Lane);
1036  };
1037 
1038  if (FPSaveIndex) {
1039  const int FramePtrFI = *FPSaveIndex;
1040  assert(!MFI.isDeadObjectIndex(FramePtrFI));
1041  if (spilledToMemory(MF, FramePtrFI))
1042  RestoreSGPRFromMemory(FramePtrReg, FramePtrFI);
1043  else
1044  RestoreSGPRFromVGPRLane(FramePtrReg, FramePtrFI);
1045  }
1046 
1047  if (BPSaveIndex) {
1048  const int BasePtrFI = *BPSaveIndex;
1049  assert(!MFI.isDeadObjectIndex(BasePtrFI));
1050  if (spilledToMemory(MF, BasePtrFI))
1051  RestoreSGPRFromMemory(BasePtrReg, BasePtrFI);
1052  else
1053  RestoreSGPRFromVGPRLane(BasePtrReg, BasePtrFI);
1054  }
1055 
1056  Register ScratchExecCopy;
1058  FuncInfo->getSGPRSpillVGPRs()) {
1059  if (!Reg.FI)
1060  continue;
1061 
1062  if (!ScratchExecCopy)
1063  ScratchExecCopy =
1064  buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL, /*IsProlog*/ false);
1065 
1066  buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
1067  Reg.VGPR, *Reg.FI);
1068  }
1069 
1070  for (auto ReservedWWM : FuncInfo->wwmAllocation()) {
1071  if (!ScratchExecCopy)
1072  ScratchExecCopy =
1073  buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL, /*IsProlog*/ false);
1074 
1075  buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
1076  std::get<0>(ReservedWWM), std::get<1>(ReservedWWM));
1077  }
1078 
1079  if (ScratchExecCopy) {
1080  // FIXME: Split block and make terminator.
1081  unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1082  MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1083  BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
1084  .addReg(ScratchExecCopy, RegState::Kill);
1085  }
1086 }
1087 
1088 #ifndef NDEBUG
1089 static bool allSGPRSpillsAreDead(const MachineFunction &MF) {
1090  const MachineFrameInfo &MFI = MF.getFrameInfo();
1091  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1092  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
1093  I != E; ++I) {
1094  if (!MFI.isDeadObjectIndex(I) &&
1096  (I != FuncInfo->FramePointerSaveIndex &&
1097  I != FuncInfo->BasePointerSaveIndex)) {
1098  return false;
1099  }
1100  }
1101 
1102  return true;
1103 }
1104 #endif
1105 
1107  int FI,
1108  Register &FrameReg) const {
1109  const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1110 
1111  FrameReg = RI->getFrameRegister(MF);
1113 }
1114 
1116  MachineFunction &MF,
1117  RegScavenger *RS) const {
1118  MachineFrameInfo &MFI = MF.getFrameInfo();
1119 
1120  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1121  const SIInstrInfo *TII = ST.getInstrInfo();
1122  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1125 
1126  if (!FuncInfo->isEntryFunction()) {
1127  // Spill VGPRs used for Whole Wave Mode
1128  FuncInfo->allocateWWMReservedSpillSlots(MFI, *TRI);
1129  }
1130 
1131  const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
1133 
1134  if (SpillVGPRToAGPR) {
1135  // To track the spill frame indices handled in this pass.
1136  BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
1137  BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false);
1138 
1139  bool SeenDbgInstr = false;
1140 
1141  for (MachineBasicBlock &MBB : MF) {
1143  int FrameIndex;
1144  if (MI.isDebugInstr())
1145  SeenDbgInstr = true;
1146 
1147  if (TII->isVGPRSpill(MI)) {
1148  // Try to eliminate stack used by VGPR spills before frame
1149  // finalization.
1150  unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1151  AMDGPU::OpName::vaddr);
1152  int FI = MI.getOperand(FIOp).getIndex();
1153  Register VReg =
1154  TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
1155  if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
1156  TRI->isAGPR(MRI, VReg))) {
1157  // FIXME: change to enterBasicBlockEnd()
1158  RS->enterBasicBlock(MBB);
1159  TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
1160  SpillFIs.set(FI);
1161  continue;
1162  }
1163  } else if (TII->isStoreToStackSlot(MI, FrameIndex) ||
1165  if (!MFI.isFixedObjectIndex(FrameIndex))
1166  NonVGPRSpillFIs.set(FrameIndex);
1167  }
1168  }
1169 
1170  // Stack slot coloring may assign different objects to the same stack slot.
1171  // If not, then the VGPR to AGPR spill slot is dead.
1172  for (unsigned FI : SpillFIs.set_bits())
1173  if (!NonVGPRSpillFIs.test(FI))
1174  FuncInfo->setVGPRToAGPRSpillDead(FI);
1175 
1176  for (MachineBasicBlock &MBB : MF) {
1177  for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
1178  MBB.addLiveIn(Reg);
1179 
1180  for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
1181  MBB.addLiveIn(Reg);
1182 
1184 
1185  if (!SpillFIs.empty() && SeenDbgInstr) {
1186  // FIXME: The dead frame indices are replaced with a null register from
1187  // the debug value instructions. We should instead, update it with the
1188  // correct register value. But not sure the register value alone is
1189  for (MachineInstr &MI : MBB) {
1190  if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
1191  !MFI.isFixedObjectIndex(MI.getOperand(0).getIndex()) &&
1192  SpillFIs[MI.getOperand(0).getIndex()]) {
1193  MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
1194  }
1195  }
1196  }
1197  }
1198  }
1199 
1200  // At this point we've already allocated all spilled SGPRs to VGPRs if we
1201  // can. Any remaining SGPR spills will go to memory, so move them back to the
1202  // default stack.
1203  bool HaveSGPRToVMemSpill =
1204  FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);
1206  "SGPR spill should have been removed in SILowerSGPRSpills");
1207 
1208  // FIXME: The other checks should be redundant with allStackObjectsAreDead,
1209  // but currently hasNonSpillStackObjects is set only from source
1210  // allocas. Stack temps produced from legalization are not counted currently.
1211  if (!allStackObjectsAreDead(MFI)) {
1212  assert(RS && "RegScavenger required if spilling");
1213 
1214  // Add an emergency spill slot
1215  RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
1216 
1217  // If we are spilling SGPRs to memory with a large frame, we may need a
1218  // second VGPR emergency frame index.
1219  if (HaveSGPRToVMemSpill &&
1221  RS->addScavengingFrameIndex(MFI.CreateStackObject(4, Align(4), false));
1222  }
1223  }
1224 }
1225 
1227  MachineFunction &MF, RegScavenger *RS) const {
1228  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1229  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1232 
1233  if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
1234  // On gfx908, we had initially reserved highest available VGPR for AGPR
1235  // copy. Now since we are done with RA, check if there exist an unused VGPR
1236  // which is lower than the eariler reserved VGPR before RA. If one exist,
1237  // use it for AGPR copy instead of one reserved before RA.
1238  Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy();
1239  Register UnusedLowVGPR =
1240  TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
1241  if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <
1242  TRI->getHWRegIndex(VGPRForAGPRCopy))) {
1243  // Call to setVGPRForAGPRCopy() should happen first before calling
1244  // freezeReservedRegs() so that getReservedRegs() can reserve this newly
1245  // identified VGPR (for AGPR copy).
1246  FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR);
1247  MRI.freezeReservedRegs(MF);
1248  }
1249  }
1250 }
1251 
1252 // Only report VGPRs to generic code.
1254  BitVector &SavedVGPRs,
1255  RegScavenger *RS) const {
1256  TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
1258  if (MFI->isEntryFunction())
1259  return;
1260 
1261  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1262  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1263  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1264 
1265  // Ignore the SGPRs the default implementation found.
1266  SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
1267 
1268  // Do not save AGPRs prior to GFX90A because there was no easy way to do so.
1269  // In gfx908 there was do AGPR loads and stores and thus spilling also
1270  // require a temporary VGPR.
1271  if (!ST.hasGFX90AInsts())
1272  SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
1273 
1274  // hasFP only knows about stack objects that already exist. We're now
1275  // determining the stack slots that will be created, so we have to predict
1276  // them. Stack objects force FP usage with calls.
1277  //
1278  // Note a new VGPR CSR may be introduced if one is used for the spill, but we
1279  // don't want to report it here.
1280  //
1281  // FIXME: Is this really hasReservedCallFrame?
1282  const bool WillHaveFP =
1283  FrameInfo.hasCalls() &&
1284  (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1285 
1286  // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
1287  // so don't allow the default insertion to handle them.
1288  for (auto SSpill : MFI->getSGPRSpillVGPRs())
1289  SavedVGPRs.reset(SSpill.VGPR);
1290 
1291  LivePhysRegs LiveRegs;
1292  LiveRegs.init(*TRI);
1293 
1294  if (WillHaveFP || hasFP(MF)) {
1296  "Re-reserving spill slot for FP");
1298  MFI->FramePointerSaveIndex, true);
1299  }
1300 
1301  if (TRI->hasBasePointer(MF)) {
1302  if (MFI->SGPRForFPSaveRestoreCopy)
1303  LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy);
1304 
1306  !MFI->BasePointerSaveIndex && "Re-reserving spill slot for BP");
1308  MFI->BasePointerSaveIndex, false);
1309  }
1310 }
1311 
1313  BitVector &SavedRegs,
1314  RegScavenger *RS) const {
1315  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1317  if (MFI->isEntryFunction())
1318  return;
1319 
1320  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1321  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1322 
1323  // The SP is specifically managed and we don't want extra spills of it.
1324  SavedRegs.reset(MFI->getStackPtrOffsetReg());
1325 
1326  const BitVector AllSavedRegs = SavedRegs;
1327  SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
1328 
1329  // We have to anticipate introducing CSR VGPR spills or spill of caller
1330  // save VGPR reserved for SGPR spills as we now always create stack entry
1331  // for it, if we don't have any stack objects already, since we require a FP
1332  // if there is a call and stack. We will allocate a VGPR for SGPR spills if
1333  // there are any SGPR spills. Whether they are CSR spills or otherwise.
1334  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1335  const bool WillHaveFP =
1336  FrameInfo.hasCalls() && (AllSavedRegs.any() || MFI->hasSpilledSGPRs());
1337 
1338  // FP will be specially managed like SP.
1339  if (WillHaveFP || hasFP(MF))
1340  SavedRegs.reset(MFI->getFrameOffsetReg());
1341 
1342  // Return address use with return instruction is hidden through the SI_RETURN
1343  // pseudo. Given that and since the IPRA computes actual register usage and
1344  // does not use CSR list, the clobbering of return address by function calls
1345  // (D117243) or otherwise (D120922) is ignored/not seen by the IPRA's register
1346  // usage collection. This will ensure save/restore of return address happens
1347  // in those scenarios.
1348  const MachineRegisterInfo &MRI = MF.getRegInfo();
1349  Register RetAddrReg = TRI->getReturnAddressReg(MF);
1350  if (!MFI->isEntryFunction() &&
1351  (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) {
1352  SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0));
1353  SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1));
1354  }
1355 }
1356 
1359  std::vector<CalleeSavedInfo> &CSI) const {
1360  if (CSI.empty())
1361  return true; // Early exit if no callee saved registers are modified!
1362 
1363  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1364  if (!FuncInfo->SGPRForFPSaveRestoreCopy &&
1365  !FuncInfo->SGPRForBPSaveRestoreCopy)
1366  return false;
1367 
1368  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1369  const SIRegisterInfo *RI = ST.getRegisterInfo();
1370  Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1371  Register BasePtrReg = RI->getBaseRegister();
1372  unsigned NumModifiedRegs = 0;
1373 
1374  if (FuncInfo->SGPRForFPSaveRestoreCopy)
1375  NumModifiedRegs++;
1376  if (FuncInfo->SGPRForBPSaveRestoreCopy)
1377  NumModifiedRegs++;
1378 
1379  for (auto &CS : CSI) {
1380  if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) {
1381  CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
1382  if (--NumModifiedRegs)
1383  break;
1384  } else if (CS.getReg() == BasePtrReg &&
1385  FuncInfo->SGPRForBPSaveRestoreCopy) {
1386  CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy);
1387  if (--NumModifiedRegs)
1388  break;
1389  }
1390  }
1391 
1392  return false;
1393 }
1394 
1396  const MachineFunction &MF) const {
1397 
1398  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1399  const MachineFrameInfo &MFI = MF.getFrameInfo();
1400  uint64_t EstStackSize = MFI.estimateStackSize(MF);
1401  uint64_t MaxOffset = EstStackSize - 1;
1402 
1403  // We need the emergency stack slots to be allocated in range of the
1404  // MUBUF/flat scratch immediate offset from the base register, so assign these
1405  // first at the incoming SP position.
1406  //
1407  // TODO: We could try sorting the objects to find a hole in the first bytes
1408  // rather than allocating as close to possible. This could save a lot of space
1409  // on frames with alignment requirements.
1410  if (ST.enableFlatScratch()) {
1411  const SIInstrInfo *TII = ST.getInstrInfo();
1412  if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
1414  return false;
1415  } else {
1416  if (SIInstrInfo::isLegalMUBUFImmOffset(MaxOffset))
1417  return false;
1418  }
1419 
1420  return true;
1421 }
1422 
1424  MachineFunction &MF,
1427  int64_t Amount = I->getOperand(0).getImm();
1428  if (Amount == 0)
1429  return MBB.erase(I);
1430 
1431  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1432  const SIInstrInfo *TII = ST.getInstrInfo();
1433  const DebugLoc &DL = I->getDebugLoc();
1434  unsigned Opc = I->getOpcode();
1435  bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
1436  uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
1437 
1438  if (!hasReservedCallFrame(MF)) {
1439  Amount = alignTo(Amount, getStackAlign());
1440  assert(isUInt<32>(Amount) && "exceeded stack address space size");
1442  Register SPReg = MFI->getStackPtrOffsetReg();
1443 
1444  Amount *= getScratchScaleFactor(ST);
1445  if (IsDestroy)
1446  Amount = -Amount;
1447  auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
1448  .addReg(SPReg)
1449  .addImm(Amount);
1450  Add->getOperand(3).setIsDead(); // Mark SCC as dead.
1451  } else if (CalleePopAmount != 0) {
1452  llvm_unreachable("is this used?");
1453  }
1454 
1455  return MBB.erase(I);
1456 }
1457 
1458 /// Returns true if the frame will require a reference to the stack pointer.
1459 ///
1460 /// This is the set of conditions common to setting up the stack pointer in a
1461 /// kernel, and for using a frame pointer in a callable function.
1462 ///
1463 /// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
1464 /// references SP.
1466  return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
1467 }
1468 
1469 // The FP for kernels is always known 0, so we never really need to setup an
1470 // explicit register for it. However, DisableFramePointerElim will force us to
1471 // use a register for it.
1473  const MachineFrameInfo &MFI = MF.getFrameInfo();
1474 
1475  // For entry functions we can use an immediate offset in most cases, so the
1476  // presence of calls doesn't imply we need a distinct frame pointer.
1477  if (MFI.hasCalls() &&
1479  // All offsets are unsigned, so need to be addressed in the same direction
1480  // as stack growth.
1481 
1482  // FIXME: This function is pretty broken, since it can be called before the
1483  // frame layout is determined or CSR spills are inserted.
1484  return MFI.getStackSize() != 0;
1485  }
1486 
1487  return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
1488  MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
1489  MF) ||
1491 }
1492 
1493 // This is essentially a reduced version of hasFP for entry functions. Since the
1494 // stack pointer is known 0 on entry to kernels, we never really need an FP
1495 // register. We may need to initialize the stack pointer depending on the frame
1496 // properties, which logically overlaps many of the cases where an ordinary
1497 // function would require an FP.
1499  const MachineFunction &MF) const {
1500  // Callable functions always require a stack pointer reference.
1502  "only expected to call this for entry points");
1503 
1504  const MachineFrameInfo &MFI = MF.getFrameInfo();
1505 
1506  // Entry points ordinarily don't need to initialize SP. We have to set it up
1507  // for callees if there are any. Also note tail calls are impossible/don't
1508  // make any sense for kernels.
1509  if (MFI.hasCalls())
1510  return true;
1511 
1512  // We still need to initialize the SP if we're doing anything weird that
1513  // references the SP, like variable sized stack objects.
1514  return frameTriviallyRequiresSP(MFI);
1515 }
llvm::MachineRegisterInfo::addLiveIn
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
Definition: MachineRegisterInfo.h:959
i
i
Definition: README.txt:29
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:156
llvm::MachineFrameInfo::hasVarSizedObjects
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
Definition: MachineFrameInfo.h:355
llvm::SIMachineFunctionInfo::setIsStackRealigned
void setIsStackRealigned(bool Realigned=true)
Definition: SIMachineFunctionInfo.h:826
llvm::SIMachineFunctionInfo::getSGPRSpillVGPRs
ArrayRef< SGPRSpillVGPR > getSGPRSpillVGPRs() const
Definition: SIMachineFunctionInfo.h:542
llvm::AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT
@ FLAT_SCRATCH_INIT
Definition: AMDGPUArgumentUsageInfo.h:105
llvm::SIMachineFunctionInfo::wwmAllocation
auto wwmAllocation() const
Definition: SIMachineFunctionInfo.h:463
llvm::SIMachineFunctionInfo::hasSpilledVGPRs
bool hasSpilledVGPRs() const
Definition: SIMachineFunctionInfo.h:806
llvm::HexagonInstrInfo::isStoreToStackSlot
unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
If the specified machine instruction is a direct store to a stack slot, return the virtual or physica...
Definition: HexagonInstrInfo.cpp:335
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::TargetStackID::WasmLocal
@ WasmLocal
Definition: TargetFrameLowering.h:31
llvm::LivePhysRegs::removeReg
void removeReg(MCPhysReg Reg)
Removes a physical register, all its sub-registers, and all its super-registers from the set.
Definition: LivePhysRegs.h:91
llvm::MachineRegisterInfo::isPhysRegUsed
bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
Definition: MachineRegisterInfo.cpp:587
llvm::SIMachineFunctionInfo::getVGPRForAGPRCopy
Register getVGPRForAGPRCopy() const
Definition: SIMachineFunctionInfo.h:491
llvm::LivePhysRegs::addReg
void addReg(MCPhysReg Reg)
Adds a physical register and all its sub-registers to the set.
Definition: LivePhysRegs.h:81
llvm::SIMachineFunctionInfo::getPreloadedReg
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
Definition: SIMachineFunctionInfo.h:730
llvm::SIFrameLowering::emitEpilogue
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
Definition: SIFrameLowering.cpp:960
buildEpilogRestore
static void buildEpilogRestore(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI)
Definition: SIFrameLowering.cpp:145
SIMachineFunctionInfo.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::SIMachineFunctionInfo::setVGPRForAGPRCopy
void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)
Definition: SIMachineFunctionInfo.h:495
llvm::Function
Definition: Function.h:60
llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:344
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition: AMDGPUSubtarget.h:40
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition: MachineFunction.cpp:454
llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:198
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition: MachineMemOperand.h:144
llvm::BitVector::set_bits
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:133
llvm::SIMachineFunctionInfo::allocateSGPRSpillToVGPR
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI)
Reserve a slice of a VGPR to support spilling for FrameIndex FI.
Definition: SIMachineFunctionInfo.cpp:295
llvm::LivePhysRegs
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:50
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:237
llvm::SIMachineFunctionInfo::getGITPtrHigh
unsigned getGITPtrHigh() const
Definition: SIMachineFunctionInfo.h:735
llvm::StackOffset::getFixed
ScalarTy getFixed() const
Definition: TypeSize.h:149
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition: MachineMemOperand.h:142
llvm::Optional< int >
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::MachineFrameInfo::RemoveStackObject
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
Definition: MachineFrameInfo.h:776
llvm::LivePhysRegs::empty
bool empty() const
Returns true if the set is empty.
Definition: LivePhysRegs.h:78
llvm::MachineFrameInfo::getObjectIndexEnd
int getObjectIndexEnd() const
Return one past the maximum frame object index.
Definition: MachineFrameInfo.h:410
llvm::SIMachineFunctionInfo::SGPRForFPSaveRestoreCopy
Register SGPRForFPSaveRestoreCopy
If this is set, an SGPR used for save/restore of the register used for the frame pointer.
Definition: SIMachineFunctionInfo.h:502
llvm::AMDGPU::getNamedOperandIdx
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::SIMachineFunctionInfo::getNumPreloadedSGPRs
unsigned getNumPreloadedSGPRs() const
Definition: SIMachineFunctionInfo.h:749
llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:141
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::MachineInstr::FrameDestroy
@ FrameDestroy
Definition: MachineInstr.h:86
buildGitPtr
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, const SIInstrInfo *TII, Register TargetReg)
Definition: SIFrameLowering.cpp:165
llvm::MachineBasicBlock::erase
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
Definition: MachineBasicBlock.cpp:1314
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::SIFrameLowering::hasFP
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
Definition: SIFrameLowering.cpp:1472
llvm::SIRegisterInfo::getFrameRegister
Register getFrameRegister(const MachineFunction &MF) const override
Definition: SIRegisterInfo.cpp:490
frameTriviallyRequiresSP
static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI)
Returns true if the frame will require a reference to the stack pointer.
Definition: SIFrameLowering.cpp:1465
llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition: MachineFunction.h:866
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:667
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
TargetMachine.h
GCNSubtarget.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:755
llvm::LivePhysRegs::addLiveIns
void addLiveIns(const MachineBasicBlock &MBB)
Adds all live-in registers of basic block MBB.
Definition: LivePhysRegs.cpp:238
llvm::TargetFrameLowering::getStackAlign
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
Definition: TargetFrameLowering.h:100
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
llvm::TargetFrameLowering::hasReservedCallFrame
virtual bool hasReservedCallFrame(const MachineFunction &MF) const
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
Definition: TargetFrameLowering.h:292
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::LivePhysRegs::addLiveOuts
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
Definition: LivePhysRegs.cpp:232
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
llvm::SIFrameLowering::requiresStackPointerReference
bool requiresStackPointerReference(const MachineFunction &MF) const
Definition: SIFrameLowering.cpp:1498
initLiveRegs
static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI, const SIMachineFunctionInfo *FuncInfo, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
Definition: SIFrameLowering.cpp:689
llvm::MachineInstr::FrameSetup
@ FrameSetup
Definition: MachineInstr.h:84
llvm::SIMachineFunctionInfo::getStackPtrOffsetReg
Register getStackPtrOffsetReg() const
Definition: SIMachineFunctionInfo.h:786
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::MachineFrameInfo::getStackID
uint8_t getStackID(int ObjectIdx) const
Definition: MachineFrameInfo.h:731
llvm::MachineFrameInfo::getStackSize
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Definition: MachineFrameInfo.h:585
llvm::MachineRegisterInfo::freezeReservedRegs
void freezeReservedRegs(const MachineFunction &)
freezeReservedRegs - Called by the register allocator to freeze the set of reserved registers before ...
Definition: MachineRegisterInfo.cpp:509
getVGPRSpillLaneOrTempRegister
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LivePhysRegs &LiveRegs, Register &TempSGPR, Optional< int > &FrameIndex, bool IsFP)
Definition: SIFrameLowering.cpp:59
llvm::MachineFrameInfo::getObjectOffset
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
Definition: MachineFrameInfo.h:526
llvm::MCRegisterInfo::isSubRegisterEq
bool isSubRegisterEq(MCRegister RegA, MCRegister RegB) const
Returns true if RegB is a sub-register of RegA or if RegB == RegA.
Definition: MCRegisterInfo.h:568
llvm::AMDGPU::convertSMRDOffsetUnits
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
Definition: AMDGPUBaseInfo.cpp:2430
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::SIMachineFunctionInfo::SGPRForBPSaveRestoreCopy
Register SGPRForBPSaveRestoreCopy
If this is set, an SGPR used for save/restore of the register used for the base pointer.
Definition: SIMachineFunctionInfo.h:507
llvm::BitVector::clearBitsNotInMask
void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask.
Definition: BitVector.h:718
llvm::BitVector
Definition: BitVector.h:75
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP
bool allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction &MF) const override
Control the placement of special register scavenging spill slots when allocating a stack frame.
Definition: SIFrameLowering.cpp:1395
llvm::MachineFrameInfo::isFixedObjectIndex
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
Definition: MachineFrameInfo.h:688
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MachineFrameInfo::getObjectIndexBegin
int getObjectIndexBegin() const
Return the minimum frame object index.
Definition: MachineFrameInfo.h:407
llvm::MachineInstrBuilder::addExternalSymbol
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:184
llvm::SIMachineFunctionInfo::getSGPRToVGPRSpills
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
Definition: SIMachineFunctionInfo.h:535
llvm::ArrayRef::slice
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:194
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::MachineFrameInfo::isDeadObjectIndex
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
Definition: MachineFrameInfo.h:745
llvm::SIFrameLowering::assignCalleeSavedSpillSlots
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
Definition: SIFrameLowering.cpp:1357
llvm::SIFrameLowering::emitPrologue
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
Definition: SIFrameLowering.cpp:743
llvm::TargetRegisterInfo::eliminateFrameIndex
virtual bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS=nullptr) const =0
This method must be overriden to eliminate abstract frame indices from instructions which may use the...
llvm::BitVector::empty
bool empty() const
empty - Tests whether there are no bits in this bitvector.
Definition: BitVector.h:149
llvm::AMDGPU::Hwreg::WIDTH_M1_SHIFT_
@ WIDTH_M1_SHIFT_
Definition: SIDefines.h:427
llvm::SIMachineFunctionInfo::hasSpilledSGPRs
bool hasSpilledSGPRs() const
Definition: SIMachineFunctionInfo.h:798
llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition: AMDGPUSubtarget.h:41
llvm::SIMachineFunctionInfo::haveFreeLanesForSGPRSpill
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, unsigned NumLane) const
returns true if NumLanes slots are available in VGPRs already used for SGPR spilling.
Definition: SIMachineFunctionInfo.cpp:287
llvm::TargetOptions::DisableFramePointerElim
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
Definition: TargetOptionsImpl.cpp:23
llvm::SIFrameLowering::determineCalleeSaves
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
Definition: SIFrameLowering.cpp:1253
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:657
llvm::MachineInstrBuilder::setMIFlag
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
Definition: MachineInstrBuilder.h:278
llvm::cl::opt< bool >
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:375
llvm::LivePhysRegs::stepBackward
void stepBackward(const MachineInstr &MI)
Simulates liveness when stepping backwards over an instruction(bundle).
Definition: LivePhysRegs.cpp:68
llvm::SIMachineFunctionInfo::getVGPRSpillAGPRs
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
Definition: SIMachineFunctionInfo.h:548
llvm::AMDGPU::Hwreg::ID_FLAT_SCR_HI
@ ID_FLAT_SCR_HI
Definition: SIDefines.h:404
AMDGPUMCTargetDesc.h
llvm::MachineOperand::setIsDead
void setIsDead(bool Val=true)
Definition: MachineOperand.h:515
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:416
llvm::SIMachineFunctionInfo::isStackRealigned
bool isStackRealigned() const
Definition: SIMachineFunctionInfo.h:822
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
uint64_t
llvm::MachineFrameInfo::getObjectSize
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
Definition: MachineFrameInfo.h:470
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:238
llvm::BitVector::any
bool any() const
any - Returns true if any bit is set.
Definition: BitVector.h:163
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET
@ PRIVATE_SEGMENT_WAVE_BYTE_OFFSET
Definition: AMDGPUArgumentUsageInfo.h:110
llvm::LivePhysRegs::available
bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const
Returns true if register Reg and no aliasing register is in the set.
Definition: LivePhysRegs.cpp:141
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:39
llvm::MachineRegisterInfo::getCalleeSavedRegs
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
Definition: MachineRegisterInfo.cpp:623
llvm::SIMachineFunctionInfo::FramePointerSaveIndex
Optional< int > FramePointerSaveIndex
Definition: SIMachineFunctionInfo.h:503
allStackObjectsAreDead
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)
Definition: SIFrameLowering.cpp:336
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::RegScavenger
Definition: RegisterScavenging.h:34
llvm::MachineFrameInfo::getObjectAlign
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
Definition: MachineFrameInfo.h:484
llvm::AMDGPUMachineFunction::isEntryFunction
bool isEntryFunction() const
Definition: AMDGPUMachineFunction.h:80
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:716
llvm::TargetStackID::ScalableVector
@ ScalableVector
Definition: TargetFrameLowering.h:30
llvm::SIFrameLowering::processFunctionBeforeFrameIndicesReplaced
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
Definition: SIFrameLowering.cpp:1226
llvm::MachineBasicBlock::getLastNonDebugInstr
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
Definition: MachineBasicBlock.cpp:264
llvm::MachineRegisterInfo::isAllocatable
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
Definition: MachineRegisterInfo.h:948
llvm::TargetMachine::Options
TargetOptions Options
Definition: TargetMachine.h:117
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:673
llvm::SIMachineFunctionInfo::getScavengeFI
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Definition: SIMachineFunctionInfo.cpp:486
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:261
llvm::RegScavenger::addScavengingFrameIndex
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Definition: RegisterScavenging.h:143
llvm::MachineInstrBuilder::addMemOperand
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Definition: MachineInstrBuilder.h:202
llvm::SIMachineFunctionInfo::allocateVGPRSpillToAGPR
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
Definition: SIMachineFunctionInfo.cpp:369
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::SIMachineFunctionInfo::getGITPtrLoReg
Register getGITPtrLoReg(const MachineFunction &MF) const
Definition: SIMachineFunctionInfo.cpp:511
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:239
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::MachineFrameInfo::hasPatchPoint
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
Definition: MachineFrameInfo.h:389
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::SIFrameLowering::isSupportedStackID
bool isSupportedStackID(TargetStackID::Value ID) const override
Definition: SIFrameLowering.cpp:676
llvm::MachineFrameInfo::CreateStackObject
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Definition: MachineFrameInfo.cpp:51
AMDGPU.h
llvm::LivePhysRegs::init
void init(const TargetRegisterInfo &TRI)
(re-)initializes and clears the set.
Definition: LivePhysRegs.h:68
MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition: AArch64SLSHardening.cpp:75
llvm::BitVector::clearBitsInMask
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
Definition: BitVector.h:706
llvm::TargetStackID::NoAlloc
@ NoAlloc
Definition: TargetFrameLowering.h:32
getScratchScaleFactor
static unsigned getScratchScaleFactor(const GCNSubtarget &ST)
Definition: SIFrameLowering.cpp:399
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::AMDGPU::isCompute
bool isCompute(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1827
uint32_t
llvm::StackOffset
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
Definition: TypeSize.h:134
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
SIFrameLowering.h
llvm::HexagonInstrInfo::isLoadFromStackSlot
unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
Definition: HexagonInstrInfo.cpp:287
llvm::SIMachineFunctionInfo::getScratchRSrcReg
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
Definition: SIMachineFunctionInfo.h:759
llvm::SIMachineFunctionInfo::getImplicitBufferPtrUserSGPR
Register getImplicitBufferPtrUserSGPR() const
Definition: SIMachineFunctionInfo.h:794
EnableSpillVGPRToAGPR
static cl::opt< bool > EnableSpillVGPRToAGPR("amdgpu-spill-vgpr-to-agpr", cl::desc("Enable spilling VGPRs to AGPRs"), cl::ReallyHidden, cl::init(true))
llvm::SIFrameLowering::processFunctionBeforeFrameFinalized
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
Definition: SIFrameLowering.cpp:1115
llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition: MachineMemOperand.h:134
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::MachineFrameInfo::getMaxAlign
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Definition: MachineFrameInfo.h:601
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition: MachineBasicBlock.h:404
llvm::MachineFrameInfo::isFrameAddressTaken
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Definition: MachineFrameInfo.h:371
llvm::RegScavenger::enterBasicBlock
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
Definition: RegisterScavenging.cpp:82
llvm::MachineFrameInfo::hasCalls
bool hasCalls() const
Return true if the current function has any function calls.
Definition: MachineFrameInfo.h:613
llvm::SIMachineFunctionInfo::setVGPRToAGPRSpillDead
void setVGPRToAGPRSpillDead(int FrameIndex)
Definition: SIMachineFunctionInfo.h:558
llvm::SIInstrInfo::isLegalMUBUFImmOffset
static bool isLegalMUBUFImmOffset(unsigned Imm)
Definition: SIInstrInfo.h:1119
llvm::ISD::FrameIndex
@ FrameIndex
Definition: ISDOpcodes.h:80
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:378
llvm::SIMachineFunctionInfo::removeDeadFrameIndices
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
Definition: SIMachineFunctionInfo.cpp:434
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
spilledToMemory
static bool spilledToMemory(const MachineFunction &MF, int SaveIndex)
Definition: SIFrameLowering.cpp:738
llvm::BitVector::test
bool test(unsigned Idx) const
Definition: BitVector.h:454
llvm::RegState::ImplicitDefine
@ ImplicitDefine
Definition: MachineInstrBuilder.h:63
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:623
allSGPRSpillsAreDead
static bool allSGPRSpillsAreDead(const MachineFunction &MF)
Definition: SIFrameLowering.cpp:1089
llvm::MachineBasicBlock::sortUniqueLiveIns
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
Definition: MachineBasicBlock.cpp:597
uint16_t
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:653
llvm::SIFrameLowering::determineCalleeSavesSGPR
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
Definition: SIFrameLowering.cpp:1312
buildScratchExecCopy
static Register buildScratchExecCopy(LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsProlog)
Definition: SIFrameLowering.cpp:706
MachineFrameInfo.h
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::SIMachineFunctionInfo::BasePointerSaveIndex
Optional< int > BasePointerSaveIndex
Definition: SIMachineFunctionInfo.h:508
llvm::SIFrameLowering::eliminateCallFramePseudoInstr
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
Definition: SIFrameLowering.cpp:1423
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::TargetStackID::Default
@ Default
Definition: TargetFrameLowering.h:28
llvm::SIMachineFunctionInfo::getAGPRSpillVGPRs
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
Definition: SIMachineFunctionInfo.h:544
llvm::SIMachineFunctionInfo::setScratchRSrcReg
void setScratchRSrcReg(Register Reg)
Definition: SIMachineFunctionInfo.h:763
llvm::TargetStackID::SGPRSpill
@ SGPRSpill
Definition: TargetFrameLowering.h:29
llvm::MachineRegisterInfo::isPhysRegModified
bool isPhysRegModified(MCRegister PhysReg, bool SkipNoReturnDef=false) const
Return true if the specified register is modified in this function.
Definition: MachineRegisterInfo.cpp:572
llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition: MachineMemOperand.h:136
llvm::BitVector::reset
BitVector & reset()
Definition: BitVector.h:385
llvm::SIInstrInfo
Definition: SIInstrInfo.h:44
llvm::TargetRegisterInfo::hasStackRealignment
bool hasStackRealignment(const MachineFunction &MF) const
True if stack realignment is required and still possible.
Definition: TargetRegisterInfo.h:968
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:106
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:357
llvm::MachinePointerInfo::getFixedStack
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition: MachineOperand.cpp:1019
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:305
buildPrologSpill
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI)
Definition: SIFrameLowering.cpp:124
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:325
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::SIInstrFlags::FlatScratch
@ FlatScratch
Definition: SIDefines.h:123
llvm::MachineBasicBlock::empty
bool empty() const
Definition: MachineBasicBlock.h:277
llvm::SIFrameLowering::emitEntryFunctionPrologue
void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
Definition: SIFrameLowering.cpp:403
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:377
llvm::AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER
@ PRIVATE_SEGMENT_BUFFER
Definition: AMDGPUArgumentUsageInfo.h:100
llvm::TargetRegisterInfo::getSubReg
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Definition: TargetRegisterInfo.h:1142
llvm::AMDGPU::Hwreg::ID_FLAT_SCR_LO
@ ID_FLAT_SCR_LO
Definition: SIDefines.h:403
llvm::TargetFrameLowering::determineCalleeSaves
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
Definition: TargetFrameLoweringImpl.cpp:83
llvm::MachineFrameInfo::hasStackMap
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
Definition: MachineFrameInfo.h:383
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::cl::desc
Definition: CommandLine.h:413
RegisterScavenging.h
findScratchNonCalleeSaveRegister
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, const TargetRegisterClass &RC, bool Unused=false)
Definition: SIFrameLowering.cpp:33
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:111
llvm::SIMachineFunctionInfo::SGPRSpillVGPR
Definition: SIMachineFunctionInfo.h:433
llvm::MachineInstrBundleIterator
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i....
Definition: MachineInstrBundleIterator.h:108
llvm::SIFrameLowering::getFrameIndexReference
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
Definition: SIFrameLowering.cpp:1106
llvm::SIMachineFunctionInfo::hasImplicitBufferPtr
bool hasImplicitBufferPtr() const
Definition: SIMachineFunctionInfo.h:713
llvm::SIMachineFunctionInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition: SIMachineFunctionInfo.h:671
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:307
llvm::SIMachineFunctionInfo::getFrameOffsetReg
Register getFrameOffsetReg() const
Definition: SIMachineFunctionInfo.h:768
llvm::SIMachineFunctionInfo::allocateWWMReservedSpillSlots
void allocateWWMReservedSpillSlots(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Definition: SIMachineFunctionInfo.cpp:472
llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
LivePhysRegs.h