LLVM  13.0.0git
SIFrameLowering.cpp
Go to the documentation of this file.
1 //===----------------------- SIFrameLowering.cpp --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 
9 #include "SIFrameLowering.h"
10 #include "AMDGPU.h"
11 #include "GCNSubtarget.h"
13 #include "SIMachineFunctionInfo.h"
18 
19 using namespace llvm;
20 
21 #define DEBUG_TYPE "frame-info"
22 
23 // Find a scratch register that we can use in the prologue. We avoid using
24 // callee-save registers since they may appear to be free when this is called
25 // from canUseAsPrologue (during shrink wrapping), but then no longer be free
26 // when this is called from emitPrologue.
28  LivePhysRegs &LiveRegs,
29  const TargetRegisterClass &RC,
30  bool Unused = false) {
31  // Mark callee saved registers as used so we will not choose them.
32  const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
33  for (unsigned i = 0; CSRegs[i]; ++i)
34  LiveRegs.addReg(CSRegs[i]);
35 
36  if (Unused) {
37  // We are looking for a register that can be used throughout the entire
38  // function, so any use is unacceptable.
39  for (MCRegister Reg : RC) {
40  if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
41  return Reg;
42  }
43  } else {
44  for (MCRegister Reg : RC) {
45  if (LiveRegs.available(MRI, Reg))
46  return Reg;
47  }
48  }
49 
50  return MCRegister();
51 }
52 
54  LivePhysRegs &LiveRegs,
55  Register &TempSGPR,
57  bool IsFP) {
59  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
60 
62  const SIRegisterInfo *TRI = ST.getRegisterInfo();
63 
64  // We need to save and restore the current FP/BP.
65 
66  // 1: If there is already a VGPR with free lanes, use it. We
67  // may already have to pay the penalty for spilling a CSR VGPR.
68  if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
69  int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
71 
72  if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
73  llvm_unreachable("allocate SGPR spill should have worked");
74 
75  FrameIndex = NewFI;
76 
77  LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
78  dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to "
79  << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
80  << '\n');
81  return;
82  }
83 
84  // 2: Next, try to save the FP/BP in an unused SGPR.
86  MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
87 
88  if (!TempSGPR) {
89  int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
91 
92  if (TRI->spillSGPRToVGPR() && MFI->allocateSGPRSpillToVGPR(MF, NewFI)) {
93  // 3: There's no free lane to spill, and no free register to save FP/BP,
94  // so we're forced to spill another VGPR to use for the spill.
95  FrameIndex = NewFI;
96 
97  LLVM_DEBUG(
98  auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
99  dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "
100  << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';);
101  } else {
102  // Remove dead <NewFI> index
103  MF.getFrameInfo().RemoveStackObject(NewFI);
104  // 4: If all else fails, spill the FP/BP to memory.
105  FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4));
106  LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling "
107  << (IsFP ? "FP" : "BP") << '\n');
108  }
109  } else {
110  LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to "
111  << printReg(TempSGPR, TRI) << '\n');
112  }
113 }
114 
115 // We need to specially emit stack operations here because a different frame
116 // register is used than in the rest of the function, as getFrameRegister would
117 // use.
118 static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
119  const SIMachineFunctionInfo &FuncInfo,
120  LivePhysRegs &LiveRegs, MachineFunction &MF,
122  int FI) {
123  unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
124  : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
125 
126  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
129  PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
130  FrameInfo.getObjectAlign(FI));
131  LiveRegs.addReg(SpillReg);
132  TRI.buildSpillLoadStore(I, Opc, FI, SpillReg, true,
133  FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
134  &LiveRegs);
135  LiveRegs.removeReg(SpillReg);
136 }
137 
138 static void buildEpilogRestore(const GCNSubtarget &ST,
139  const SIRegisterInfo &TRI,
140  const SIMachineFunctionInfo &FuncInfo,
141  LivePhysRegs &LiveRegs, MachineFunction &MF,
143  int FI) {
144  unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
145  : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
146 
147  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
150  PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
151  FrameInfo.getObjectAlign(FI));
152  TRI.buildSpillLoadStore(I, Opc, FI, SpillReg, false,
153  FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
154  &LiveRegs);
155 }
156 
158  const DebugLoc &DL, const SIInstrInfo *TII,
159  Register TargetReg) {
160  MachineFunction *MF = MBB.getParent();
162  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
163  const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
164  Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
165  Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
166 
167  if (MFI->getGITPtrHigh() != 0xffffffff) {
168  BuildMI(MBB, I, DL, SMovB32, TargetHi)
169  .addImm(MFI->getGITPtrHigh())
170  .addReg(TargetReg, RegState::ImplicitDefine);
171  } else {
172  const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
173  BuildMI(MBB, I, DL, GetPC64, TargetReg);
174  }
175  Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
176  MF->getRegInfo().addLiveIn(GitPtrLo);
177  MBB.addLiveIn(GitPtrLo);
178  BuildMI(MBB, I, DL, SMovB32, TargetLo)
179  .addReg(GitPtrLo);
180 }
181 
182 // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
183 void SIFrameLowering::emitEntryFunctionFlatScratchInit(
185  const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
186  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
187  const SIInstrInfo *TII = ST.getInstrInfo();
188  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
190 
191  // We don't need this if we only have spills since there is no user facing
192  // scratch.
193 
194  // TODO: If we know we don't have flat instructions earlier, we can omit
195  // this from the input registers.
196  //
197  // TODO: We only need to know if we access scratch space through a flat
198  // pointer. Because we only detect if flat instructions are used at all,
199  // this will be used more often than necessary on VI.
200 
201  Register FlatScrInitLo;
202  Register FlatScrInitHi;
203 
204  if (ST.isAmdPalOS()) {
205  // Extract the scratch offset from the descriptor in the GIT
206  LivePhysRegs LiveRegs;
207  LiveRegs.init(*TRI);
208  LiveRegs.addLiveIns(MBB);
209 
210  // Find unused reg to load flat scratch init into
212  Register FlatScrInit = AMDGPU::NoRegister;
213  ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
214  unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
215  AllSGPR64s = AllSGPR64s.slice(
216  std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
217  Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
218  for (MCPhysReg Reg : AllSGPR64s) {
219  if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) &&
220  !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
221  FlatScrInit = Reg;
222  break;
223  }
224  }
225  assert(FlatScrInit && "Failed to find free register for scratch init");
226 
227  FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
228  FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
229 
230  buildGitPtr(MBB, I, DL, TII, FlatScrInit);
231 
232  // We now have the GIT ptr - now get the scratch descriptor from the entry
233  // at offset 0 (or offset 16 for a compute shader).
235  const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
236  auto *MMO = MF.getMachineMemOperand(
237  PtrInfo,
240  8, Align(4));
241  unsigned Offset =
243  const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
244  unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
245  BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
246  .addReg(FlatScrInit)
247  .addImm(EncodedOffset) // offset
248  .addImm(0) // cpol
249  .addMemOperand(MMO);
250 
251  // Mask the offset in [47:0] of the descriptor
252  const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
253  BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
254  .addReg(FlatScrInitHi)
255  .addImm(0xffff);
256  } else {
257  Register FlatScratchInitReg =
259  assert(FlatScratchInitReg);
260 
262  MRI.addLiveIn(FlatScratchInitReg);
263  MBB.addLiveIn(FlatScratchInitReg);
264 
265  FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
266  FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
267  }
268 
269  // Do a 64-bit pointer add.
270  if (ST.flatScratchIsPointer()) {
271  if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
272  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
273  .addReg(FlatScrInitLo)
274  .addReg(ScratchWaveOffsetReg);
275  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
276  .addReg(FlatScrInitHi)
277  .addImm(0);
278  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
279  addReg(FlatScrInitLo).
280  addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
282  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
283  addReg(FlatScrInitHi).
284  addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
286  return;
287  }
288 
289  // For GFX9.
290  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
291  .addReg(FlatScrInitLo)
292  .addReg(ScratchWaveOffsetReg);
293  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
294  .addReg(FlatScrInitHi)
295  .addImm(0);
296 
297  return;
298  }
299 
300  assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
301 
302  // Copy the size in bytes.
303  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
304  .addReg(FlatScrInitHi, RegState::Kill);
305 
306  // Add wave offset in bytes to private base offset.
307  // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
308  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
309  .addReg(FlatScrInitLo)
310  .addReg(ScratchWaveOffsetReg);
311 
312  // Convert offset to 256-byte units.
313  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
314  .addReg(FlatScrInitLo, RegState::Kill)
315  .addImm(8);
316 }
317 
318 // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
319 // memory. They should have been removed by now.
320 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
321  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
322  I != E; ++I) {
323  if (!MFI.isDeadObjectIndex(I))
324  return false;
325  }
326 
327  return true;
328 }
329 
330 // Shift down registers reserved for the scratch RSRC.
331 Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
332  MachineFunction &MF) const {
333 
334  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
335  const SIInstrInfo *TII = ST.getInstrInfo();
336  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
339 
340  assert(MFI->isEntryFunction());
341 
342  Register ScratchRsrcReg = MFI->getScratchRSrcReg();
343 
344  if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
346  return Register();
347 
348  if (ST.hasSGPRInitBug() ||
349  ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
350  return ScratchRsrcReg;
351 
352  // We reserved the last registers for this. Shift it down to the end of those
353  // which were actually used.
354  //
355  // FIXME: It might be safer to use a pseudoregister before replacement.
356 
357  // FIXME: We should be able to eliminate unused input registers. We only
358  // cannot do this for the resources required for scratch access. For now we
359  // skip over user SGPRs and may leave unused holes.
360 
361  unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
362  ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
363  AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
364 
365  // Skip the last N reserved elements because they should have already been
366  // reserved for VCC etc.
367  Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
368  for (MCPhysReg Reg : AllSGPR128s) {
369  // Pick the first unallocated one. Make sure we don't clobber the other
370  // reserved input we needed. Also for PAL, make sure we don't clobber
371  // the GIT pointer passed in SGPR0 or SGPR8.
373  !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
374  MRI.replaceRegWith(ScratchRsrcReg, Reg);
375  MFI->setScratchRSrcReg(Reg);
376  return Reg;
377  }
378  }
379 
380  return ScratchRsrcReg;
381 }
382 
383 static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
384  return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
385 }
386 
388  MachineBasicBlock &MBB) const {
389  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
390 
391  // FIXME: If we only have SGPR spills, we won't actually be using scratch
392  // memory since these spill to VGPRs. We should be cleaning up these unused
393  // SGPR spill frame indices somewhere.
394 
395  // FIXME: We still have implicit uses on SGPR spill instructions in case they
396  // need to spill to vector memory. It's likely that will not happen, but at
397  // this point it appears we need the setup. This part of the prolog should be
398  // emitted after frame indices are eliminated.
399 
400  // FIXME: Remove all of the isPhysRegUsed checks
401 
403  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
404  const SIInstrInfo *TII = ST.getInstrInfo();
405  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
407  const Function &F = MF.getFunction();
408 
409  assert(MFI->isEntryFunction());
410 
411  Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
413  // FIXME: Hack to not crash in situations which emitted an error.
414  if (!PreloadedScratchWaveOffsetReg)
415  return;
416 
417  // We need to do the replacement of the private segment buffer register even
418  // if there are no stack objects. There could be stores to undef or a
419  // constant without an associated object.
420  //
421  // This will return `Register()` in cases where there are no actual
422  // uses of the SRSRC.
423  Register ScratchRsrcReg;
424  if (!ST.enableFlatScratch())
425  ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
426 
427  // Make the selected register live throughout the function.
428  if (ScratchRsrcReg) {
429  for (MachineBasicBlock &OtherBB : MF) {
430  if (&OtherBB != &MBB) {
431  OtherBB.addLiveIn(ScratchRsrcReg);
432  }
433  }
434  }
435 
436  // Now that we have fixed the reserved SRSRC we need to locate the
437  // (potentially) preloaded SRSRC.
438  Register PreloadedScratchRsrcReg;
439  if (ST.isAmdHsaOrMesa(F)) {
440  PreloadedScratchRsrcReg =
442  if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
443  // We added live-ins during argument lowering, but since they were not
444  // used they were deleted. We're adding the uses now, so add them back.
445  MRI.addLiveIn(PreloadedScratchRsrcReg);
446  MBB.addLiveIn(PreloadedScratchRsrcReg);
447  }
448  }
449 
450  // Debug location must be unknown since the first debug location is used to
451  // determine the end of the prologue.
452  DebugLoc DL;
454 
455  // We found the SRSRC first because it needs four registers and has an
456  // alignment requirement. If the SRSRC that we found is clobbering with
457  // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
458  // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
459  // wave offset to a free SGPR.
460  Register ScratchWaveOffsetReg;
461  if (TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
462  ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
463  unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
464  AllSGPRs = AllSGPRs.slice(
465  std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
466  Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
467  for (MCPhysReg Reg : AllSGPRs) {
469  !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
470  ScratchWaveOffsetReg = Reg;
471  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
472  .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
473  break;
474  }
475  }
476  } else {
477  ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
478  }
479  assert(ScratchWaveOffsetReg);
480 
482  Register SPReg = MFI->getStackPtrOffsetReg();
483  assert(SPReg != AMDGPU::SP_REG);
484  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
486  }
487 
488  if (hasFP(MF)) {
489  Register FPReg = MFI->getFrameOffsetReg();
490  assert(FPReg != AMDGPU::FP_REG);
491  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
492  }
493 
494  if (MFI->hasFlatScratchInit() || ScratchRsrcReg) {
495  MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
496  MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
497  }
498 
499  if (MFI->hasFlatScratchInit()) {
500  emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
501  }
502 
503  if (ScratchRsrcReg) {
504  emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
505  PreloadedScratchRsrcReg,
506  ScratchRsrcReg, ScratchWaveOffsetReg);
507  }
508 }
509 
510 // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
511 void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
513  const DebugLoc &DL, Register PreloadedScratchRsrcReg,
514  Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
515 
516  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
517  const SIInstrInfo *TII = ST.getInstrInfo();
518  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
520  const Function &Fn = MF.getFunction();
521 
522  if (ST.isAmdPalOS()) {
523  // The pointer to the GIT is formed from the offset passed in and either
524  // the amdgpu-git-ptr-high function attribute or the top part of the PC
525  Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
526 
527  buildGitPtr(MBB, I, DL, TII, Rsrc01);
528 
529  // We now have the GIT ptr - now get the scratch descriptor from the entry
530  // at offset 0 (or offset 16 for a compute shader).
532  const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
533  auto MMO = MF.getMachineMemOperand(PtrInfo,
537  16, Align(4));
538  unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
539  const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
540  unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
541  BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
542  .addReg(Rsrc01)
543  .addImm(EncodedOffset) // offset
544  .addImm(0) // cpol
545  .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
546  .addMemOperand(MMO);
547  } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
548  assert(!ST.isAmdHsaOrMesa(Fn));
549  const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
550 
551  Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
552  Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
553 
554  // Use relocations to get the pointer, and setup the other bits manually.
555  uint64_t Rsrc23 = TII->getScratchRsrcWords23();
556 
557  if (MFI->hasImplicitBufferPtr()) {
558  Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
559 
561  const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
562 
563  BuildMI(MBB, I, DL, Mov64, Rsrc01)
565  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
566  } else {
567  const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
568 
570  auto MMO = MF.getMachineMemOperand(
571  PtrInfo,
574  8, Align(4));
575  BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
577  .addImm(0) // offset
578  .addImm(0) // cpol
579  .addMemOperand(MMO)
580  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
581 
584  }
585  } else {
586  Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
587  Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
588 
589  BuildMI(MBB, I, DL, SMovB32, Rsrc0)
590  .addExternalSymbol("SCRATCH_RSRC_DWORD0")
591  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
592 
593  BuildMI(MBB, I, DL, SMovB32, Rsrc1)
594  .addExternalSymbol("SCRATCH_RSRC_DWORD1")
595  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
596 
597  }
598 
599  BuildMI(MBB, I, DL, SMovB32, Rsrc2)
600  .addImm(Rsrc23 & 0xffffffff)
601  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
602 
603  BuildMI(MBB, I, DL, SMovB32, Rsrc3)
604  .addImm(Rsrc23 >> 32)
605  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
606  } else if (ST.isAmdHsaOrMesa(Fn)) {
607  assert(PreloadedScratchRsrcReg);
608 
609  if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
610  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
611  .addReg(PreloadedScratchRsrcReg, RegState::Kill);
612  }
613  }
614 
615  // Add the scratch wave offset into the scratch RSRC.
616  //
617  // We only want to update the first 48 bits, which is the base address
618  // pointer, without touching the adjacent 16 bits of flags. We know this add
619  // cannot carry-out from bit 47, otherwise the scratch allocation would be
620  // impossible to fit in the 48-bit global address space.
621  //
622  // TODO: Evaluate if it is better to just construct an SRD using the flat
623  // scratch init and some constants rather than update the one we are passed.
624  Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
625  Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
626 
627  // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
628  // the kernel body via inreg arguments.
629  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
630  .addReg(ScratchRsrcSub0)
631  .addReg(ScratchWaveOffsetReg)
632  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
633  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
634  .addReg(ScratchRsrcSub1)
635  .addImm(0)
636  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
637 }
638 
640  switch (ID) {
644  return true;
646  return false;
647  }
648  llvm_unreachable("Invalid TargetStackID::Value");
649 }
650 
651 static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI,
652  const SIMachineFunctionInfo *FuncInfo,
654  MachineBasicBlock::iterator MBBI, bool IsProlog) {
655  if (LiveRegs.empty()) {
656  LiveRegs.init(TRI);
657  if (IsProlog) {
658  LiveRegs.addLiveIns(MBB);
659  } else {
660  // In epilog.
661  LiveRegs.addLiveOuts(MBB);
662  LiveRegs.stepBackward(*MBBI);
663  }
664  }
665 }
666 
667 // Activate all lanes, returns saved exec.
669  MachineFunction &MF,
672  bool IsProlog) {
673  Register ScratchExecCopy;
675  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
676  const SIInstrInfo *TII = ST.getInstrInfo();
677  const SIRegisterInfo &TRI = TII->getRegisterInfo();
679  DebugLoc DL;
680 
681  initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
682 
683  ScratchExecCopy = findScratchNonCalleeSaveRegister(
684  MRI, LiveRegs, *TRI.getWaveMaskRegClass());
685  if (!ScratchExecCopy)
686  report_fatal_error("failed to find free scratch register");
687 
688  LiveRegs.addReg(ScratchExecCopy);
689 
690  const unsigned OrSaveExec =
691  ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
692  BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy).addImm(-1);
693 
694  return ScratchExecCopy;
695 }
696 
697 // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR.
698 // Otherwise we are spilling to memory.
699 static bool spilledToMemory(const MachineFunction &MF, int SaveIndex) {
700  const MachineFrameInfo &MFI = MF.getFrameInfo();
701  return MFI.getStackID(SaveIndex) != TargetStackID::SGPRSpill;
702 }
703 
705  MachineBasicBlock &MBB) const {
707  if (FuncInfo->isEntryFunction()) {
709  return;
710  }
711 
712  const MachineFrameInfo &MFI = MF.getFrameInfo();
714  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
715  const SIInstrInfo *TII = ST.getInstrInfo();
716  const SIRegisterInfo &TRI = TII->getRegisterInfo();
717 
718  Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
719  Register FramePtrReg = FuncInfo->getFrameOffsetReg();
720  Register BasePtrReg =
721  TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
722  LivePhysRegs LiveRegs;
723 
725  DebugLoc DL;
726 
727  bool HasFP = false;
728  bool HasBP = false;
729  uint32_t NumBytes = MFI.getStackSize();
730  uint32_t RoundedSize = NumBytes;
731  // To avoid clobbering VGPRs in lanes that weren't active on function entry,
732  // turn on all lanes before doing the spill to memory.
733  Register ScratchExecCopy;
734 
735  Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
736  Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
737 
739  FuncInfo->getSGPRSpillVGPRs()) {
740  if (!Reg.FI.hasValue())
741  continue;
742 
743  if (!ScratchExecCopy)
744  ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI,
745  /*IsProlog*/ true);
746 
747  buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, Reg.VGPR, *Reg.FI);
748  }
749 
750  if (ScratchExecCopy) {
751  // FIXME: Split block and make terminator.
752  unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
753  MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
754  BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
755  .addReg(ScratchExecCopy, RegState::Kill);
756  LiveRegs.addReg(ScratchExecCopy);
757  }
758 
759  if (FPSaveIndex && spilledToMemory(MF, *FPSaveIndex)) {
760  const int FramePtrFI = *FPSaveIndex;
761  assert(!MFI.isDeadObjectIndex(FramePtrFI));
762 
763  initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
764 
766  MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
767  if (!TmpVGPR)
768  report_fatal_error("failed to find free scratch register");
769 
770  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
771  .addReg(FramePtrReg);
772 
773  buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR,
774  FramePtrFI);
775  }
776 
777  if (BPSaveIndex && spilledToMemory(MF, *BPSaveIndex)) {
778  const int BasePtrFI = *BPSaveIndex;
779  assert(!MFI.isDeadObjectIndex(BasePtrFI));
780 
781  initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
782 
784  MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
785  if (!TmpVGPR)
786  report_fatal_error("failed to find free scratch register");
787 
788  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
789  .addReg(BasePtrReg);
790 
791  buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR,
792  BasePtrFI);
793  }
794 
795  // In this case, spill the FP to a reserved VGPR.
796  if (FPSaveIndex && !spilledToMemory(MF, *FPSaveIndex)) {
797  const int FramePtrFI = *FPSaveIndex;
798  assert(!MFI.isDeadObjectIndex(FramePtrFI));
799 
800  assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill);
802  FuncInfo->getSGPRToVGPRSpills(FramePtrFI);
803  assert(Spill.size() == 1);
804 
805  // Save FP before setting it up.
806  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
807  .addReg(FramePtrReg)
808  .addImm(Spill[0].Lane)
809  .addReg(Spill[0].VGPR, RegState::Undef);
810  }
811 
812  // In this case, spill the BP to a reserved VGPR.
813  if (BPSaveIndex && !spilledToMemory(MF, *BPSaveIndex)) {
814  const int BasePtrFI = *BPSaveIndex;
815  assert(!MFI.isDeadObjectIndex(BasePtrFI));
816 
817  assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
819  FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
820  assert(Spill.size() == 1);
821 
822  // Save BP before setting it up.
823  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
824  .addReg(BasePtrReg)
825  .addImm(Spill[0].Lane)
826  .addReg(Spill[0].VGPR, RegState::Undef);
827  }
828 
829  // Emit the copy if we need an FP, and are using a free SGPR to save it.
830  if (FuncInfo->SGPRForFPSaveRestoreCopy) {
831  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
832  FuncInfo->SGPRForFPSaveRestoreCopy)
833  .addReg(FramePtrReg)
835  }
836 
837  // Emit the copy if we need a BP, and are using a free SGPR to save it.
838  if (FuncInfo->SGPRForBPSaveRestoreCopy) {
839  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
840  FuncInfo->SGPRForBPSaveRestoreCopy)
841  .addReg(BasePtrReg)
843  }
844 
845  // If a copy has been emitted for FP and/or BP, Make the SGPRs
846  // used in the copy instructions live throughout the function.
847  SmallVector<MCPhysReg, 2> TempSGPRs;
848  if (FuncInfo->SGPRForFPSaveRestoreCopy)
849  TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy);
850 
851  if (FuncInfo->SGPRForBPSaveRestoreCopy)
852  TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy);
853 
854  if (!TempSGPRs.empty()) {
855  for (MachineBasicBlock &MBB : MF) {
856  for (MCPhysReg Reg : TempSGPRs)
857  MBB.addLiveIn(Reg);
858 
860  }
861  if (!LiveRegs.empty()) {
862  LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
863  LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy);
864  }
865  }
866 
867  if (TRI.hasStackRealignment(MF)) {
868  HasFP = true;
869  const unsigned Alignment = MFI.getMaxAlign().value();
870 
871  RoundedSize += Alignment;
872  if (LiveRegs.empty()) {
873  LiveRegs.init(TRI);
874  LiveRegs.addLiveIns(MBB);
875  }
876 
877  // s_add_u32 s33, s32, NumBytes
878  // s_and_b32 s33, s33, 0b111...0000
879  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), FramePtrReg)
880  .addReg(StackPtrReg)
881  .addImm((Alignment - 1) * getScratchScaleFactor(ST))
883  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
884  .addReg(FramePtrReg, RegState::Kill)
885  .addImm(-Alignment * getScratchScaleFactor(ST))
887  FuncInfo->setIsStackRealigned(true);
888  } else if ((HasFP = hasFP(MF))) {
889  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
890  .addReg(StackPtrReg)
892  }
893 
894  // If we need a base pointer, set it up here. It's whatever the value of
895  // the stack pointer is at this point. Any variable size objects will be
896  // allocated after this, so we can still use the base pointer to reference
897  // the incoming arguments.
898  if ((HasBP = TRI.hasBasePointer(MF))) {
899  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
900  .addReg(StackPtrReg)
902  }
903 
904  if (HasFP && RoundedSize != 0) {
905  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
906  .addReg(StackPtrReg)
907  .addImm(RoundedSize * getScratchScaleFactor(ST))
909  }
910 
911  assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||
912  FuncInfo->FramePointerSaveIndex)) &&
913  "Needed to save FP but didn't save it anywhere");
914 
915  assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy &&
916  !FuncInfo->FramePointerSaveIndex)) &&
917  "Saved FP but didn't need it");
918 
919  assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy ||
920  FuncInfo->BasePointerSaveIndex)) &&
921  "Needed to save BP but didn't save it anywhere");
922 
923  assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy &&
924  !FuncInfo->BasePointerSaveIndex)) &&
925  "Saved BP but didn't need it");
926 }
927 
929  MachineBasicBlock &MBB) const {
930  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
931  if (FuncInfo->isEntryFunction())
932  return;
933 
934  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
935  const SIInstrInfo *TII = ST.getInstrInfo();
937  const SIRegisterInfo &TRI = TII->getRegisterInfo();
939  LivePhysRegs LiveRegs;
940  DebugLoc DL;
941 
942  const MachineFrameInfo &MFI = MF.getFrameInfo();
943  uint32_t NumBytes = MFI.getStackSize();
944  uint32_t RoundedSize = FuncInfo->isStackRealigned()
945  ? NumBytes + MFI.getMaxAlign().value()
946  : NumBytes;
947  const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
948  const Register FramePtrReg = FuncInfo->getFrameOffsetReg();
949  const Register BasePtrReg =
950  TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
951 
952  Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
953  Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
954 
955  if (RoundedSize != 0 && hasFP(MF)) {
956  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
957  .addReg(StackPtrReg)
958  .addImm(RoundedSize * getScratchScaleFactor(ST))
960  }
961 
962  if (FuncInfo->SGPRForFPSaveRestoreCopy) {
963  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
964  .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
966  }
967 
968  if (FuncInfo->SGPRForBPSaveRestoreCopy) {
969  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
970  .addReg(FuncInfo->SGPRForBPSaveRestoreCopy)
972  }
973 
974  if (FPSaveIndex) {
975  const int FramePtrFI = *FPSaveIndex;
976  assert(!MFI.isDeadObjectIndex(FramePtrFI));
977  if (spilledToMemory(MF, FramePtrFI)) {
978  initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
979 
981  MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
982  if (!TmpVGPR)
983  report_fatal_error("failed to find free scratch register");
984  buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR,
985  FramePtrFI);
986  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
987  .addReg(TmpVGPR, RegState::Kill);
988  } else {
989  // Reload from VGPR spill.
990  assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill);
992  FuncInfo->getSGPRToVGPRSpills(FramePtrFI);
993  assert(Spill.size() == 1);
994  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg)
995  .addReg(Spill[0].VGPR)
996  .addImm(Spill[0].Lane);
997  }
998  }
999 
1000  if (BPSaveIndex) {
1001  const int BasePtrFI = *BPSaveIndex;
1002  assert(!MFI.isDeadObjectIndex(BasePtrFI));
1003  if (spilledToMemory(MF, BasePtrFI)) {
1004  initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1005 
1007  MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
1008  if (!TmpVGPR)
1009  report_fatal_error("failed to find free scratch register");
1010  buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR,
1011  BasePtrFI);
1012  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg)
1013  .addReg(TmpVGPR, RegState::Kill);
1014  } else {
1015  // Reload from VGPR spill.
1016  assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
1018  FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
1019  assert(Spill.size() == 1);
1020  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg)
1021  .addReg(Spill[0].VGPR)
1022  .addImm(Spill[0].Lane);
1023  }
1024  }
1025 
1026  Register ScratchExecCopy;
1028  FuncInfo->getSGPRSpillVGPRs()) {
1029  if (!Reg.FI.hasValue())
1030  continue;
1031 
1032  if (!ScratchExecCopy)
1033  ScratchExecCopy =
1034  buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
1035 
1036  buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, Reg.VGPR,
1037  *Reg.FI);
1038  }
1039 
1040  if (ScratchExecCopy) {
1041  // FIXME: Split block and make terminator.
1042  unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1043  MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1044  BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
1045  .addReg(ScratchExecCopy, RegState::Kill);
1046  }
1047 }
1048 
1049 #ifndef NDEBUG
1050 static bool allSGPRSpillsAreDead(const MachineFunction &MF) {
1051  const MachineFrameInfo &MFI = MF.getFrameInfo();
1052  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1053  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
1054  I != E; ++I) {
1055  if (!MFI.isDeadObjectIndex(I) &&
1057  (I != FuncInfo->FramePointerSaveIndex &&
1058  I != FuncInfo->BasePointerSaveIndex)) {
1059  return false;
1060  }
1061  }
1062 
1063  return true;
1064 }
1065 #endif
1066 
1068  int FI,
1069  Register &FrameReg) const {
1070  const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1071 
1072  FrameReg = RI->getFrameRegister(MF);
1074 }
1075 
1077  MachineFunction &MF,
1078  RegScavenger *RS) const {
1079  MachineFrameInfo &MFI = MF.getFrameInfo();
1080 
1081  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1082  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1084 
1085  FuncInfo->removeDeadFrameIndices(MFI);
1087  "SGPR spill should have been removed in SILowerSGPRSpills");
1088 
1089  // FIXME: The other checks should be redundant with allStackObjectsAreDead,
1090  // but currently hasNonSpillStackObjects is set only from source
1091  // allocas. Stack temps produced from legalization are not counted currently.
1092  if (!allStackObjectsAreDead(MFI)) {
1093  assert(RS && "RegScavenger required if spilling");
1094 
1095  // Add an emergency spill slot
1096  RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
1097  }
1098 }
1099 
1100 // Only report VGPRs to generic code.
1102  BitVector &SavedVGPRs,
1103  RegScavenger *RS) const {
1104  TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
1106  if (MFI->isEntryFunction())
1107  return;
1108 
1109  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1110  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1111  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1112 
1113  // Ignore the SGPRs the default implementation found.
1114  SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
1115 
1116  // Do not save AGPRs prior to GFX90A because there was no easy way to do so.
1117  // In gfx908 there was do AGPR loads and stores and thus spilling also
1118  // require a temporary VGPR.
1119  if (!ST.hasGFX90AInsts())
1120  SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
1121 
1122  // hasFP only knows about stack objects that already exist. We're now
1123  // determining the stack slots that will be created, so we have to predict
1124  // them. Stack objects force FP usage with calls.
1125  //
1126  // Note a new VGPR CSR may be introduced if one is used for the spill, but we
1127  // don't want to report it here.
1128  //
1129  // FIXME: Is this really hasReservedCallFrame?
1130  const bool WillHaveFP =
1131  FrameInfo.hasCalls() &&
1132  (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1133 
1134  // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
1135  // so don't allow the default insertion to handle them.
1136  for (auto SSpill : MFI->getSGPRSpillVGPRs())
1137  SavedVGPRs.reset(SSpill.VGPR);
1138 
1139  LivePhysRegs LiveRegs;
1140  LiveRegs.init(*TRI);
1141 
1142  if (WillHaveFP || hasFP(MF)) {
1144  "Re-reserving spill slot for FP");
1146  MFI->FramePointerSaveIndex, true);
1147  }
1148 
1149  if (TRI->hasBasePointer(MF)) {
1150  if (MFI->SGPRForFPSaveRestoreCopy)
1151  LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy);
1152 
1154  !MFI->BasePointerSaveIndex && "Re-reserving spill slot for BP");
1156  MFI->BasePointerSaveIndex, false);
1157  }
1158 }
1159 
1161  BitVector &SavedRegs,
1162  RegScavenger *RS) const {
1163  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1165  if (MFI->isEntryFunction())
1166  return;
1167 
1168  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1169  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1170 
1171  // The SP is specifically managed and we don't want extra spills of it.
1172  SavedRegs.reset(MFI->getStackPtrOffsetReg());
1173 
1174  const BitVector AllSavedRegs = SavedRegs;
1175  SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
1176 
1177  // If clearing VGPRs changed the mask, we will have some CSR VGPR spills.
1178  const bool HaveAnyCSRVGPR = SavedRegs != AllSavedRegs;
1179 
1180  // We have to anticipate introducing CSR VGPR spills if we don't have any
1181  // stack objects already, since we require an FP if there is a call and stack.
1182  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1183  const bool WillHaveFP = FrameInfo.hasCalls() && HaveAnyCSRVGPR;
1184 
1185  // FP will be specially managed like SP.
1186  if (WillHaveFP || hasFP(MF))
1187  SavedRegs.reset(MFI->getFrameOffsetReg());
1188 }
1189 
1192  std::vector<CalleeSavedInfo> &CSI) const {
1193  if (CSI.empty())
1194  return true; // Early exit if no callee saved registers are modified!
1195 
1196  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1197  if (!FuncInfo->SGPRForFPSaveRestoreCopy &&
1198  !FuncInfo->SGPRForBPSaveRestoreCopy)
1199  return false;
1200 
1201  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1202  const SIRegisterInfo *RI = ST.getRegisterInfo();
1203  Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1204  Register BasePtrReg = RI->getBaseRegister();
1205  unsigned NumModifiedRegs = 0;
1206 
1207  if (FuncInfo->SGPRForFPSaveRestoreCopy)
1208  NumModifiedRegs++;
1209  if (FuncInfo->SGPRForBPSaveRestoreCopy)
1210  NumModifiedRegs++;
1211 
1212  for (auto &CS : CSI) {
1213  if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) {
1214  CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
1215  if (--NumModifiedRegs)
1216  break;
1217  } else if (CS.getReg() == BasePtrReg &&
1218  FuncInfo->SGPRForBPSaveRestoreCopy) {
1219  CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy);
1220  if (--NumModifiedRegs)
1221  break;
1222  }
1223  }
1224 
1225  return false;
1226 }
1227 
1229  MachineFunction &MF,
1232  int64_t Amount = I->getOperand(0).getImm();
1233  if (Amount == 0)
1234  return MBB.erase(I);
1235 
1236  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1237  const SIInstrInfo *TII = ST.getInstrInfo();
1238  const DebugLoc &DL = I->getDebugLoc();
1239  unsigned Opc = I->getOpcode();
1240  bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
1241  uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
1242 
1243  if (!hasReservedCallFrame(MF)) {
1244  Amount = alignTo(Amount, getStackAlign());
1245  assert(isUInt<32>(Amount) && "exceeded stack address space size");
1247  Register SPReg = MFI->getStackPtrOffsetReg();
1248 
1249  unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
1250  BuildMI(MBB, I, DL, TII->get(Op), SPReg)
1251  .addReg(SPReg)
1252  .addImm(Amount * getScratchScaleFactor(ST));
1253  } else if (CalleePopAmount != 0) {
1254  llvm_unreachable("is this used?");
1255  }
1256 
1257  return MBB.erase(I);
1258 }
1259 
1260 /// Returns true if the frame will require a reference to the stack pointer.
1261 ///
1262 /// This is the set of conditions common to setting up the stack pointer in a
1263 /// kernel, and for using a frame pointer in a callable function.
1264 ///
1265 /// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
1266 /// references SP.
1268  return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
1269 }
1270 
1271 // The FP for kernels is always known 0, so we never really need to setup an
1272 // explicit register for it. However, DisableFramePointerElim will force us to
1273 // use a register for it.
1275  const MachineFrameInfo &MFI = MF.getFrameInfo();
1276 
1277  // For entry functions we can use an immediate offset in most cases, so the
1278  // presence of calls doesn't imply we need a distinct frame pointer.
1279  if (MFI.hasCalls() &&
1281  // All offsets are unsigned, so need to be addressed in the same direction
1282  // as stack growth.
1283 
1284  // FIXME: This function is pretty broken, since it can be called before the
1285  // frame layout is determined or CSR spills are inserted.
1286  return MFI.getStackSize() != 0;
1287  }
1288 
1289  return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
1290  MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
1291  MF) ||
1293 }
1294 
1295 // This is essentially a reduced version of hasFP for entry functions. Since the
1296 // stack pointer is known 0 on entry to kernels, we never really need an FP
1297 // register. We may need to initialize the stack pointer depending on the frame
1298 // properties, which logically overlaps many of the cases where an ordinary
1299 // function would require an FP.
1301  const MachineFunction &MF) const {
1302  // Callable functions always require a stack pointer reference.
1304  "only expected to call this for entry points");
1305 
1306  const MachineFrameInfo &MFI = MF.getFrameInfo();
1307 
1308  // Entry points ordinarily don't need to initialize SP. We have to set it up
1309  // for callees if there are any. Also note tail calls are impossible/don't
1310  // make any sense for kernels.
1311  if (MFI.hasCalls())
1312  return true;
1313 
1314  // We still need to initialize the SP if we're doing anything weird that
1315  // references the SP, like variable sized stack objects.
1316  return frameTriviallyRequiresSP(MFI);
1317 }
llvm::MachineRegisterInfo::addLiveIn
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
Definition: MachineRegisterInfo.h:929
i
i
Definition: README.txt:29
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:158
llvm::MachineFrameInfo::hasVarSizedObjects
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
Definition: MachineFrameInfo.h:351
llvm::SIMachineFunctionInfo::setIsStackRealigned
void setIsStackRealigned(bool Realigned=true)
Definition: SIMachineFunctionInfo.h:792
llvm::SIMachineFunctionInfo::getSGPRSpillVGPRs
ArrayRef< SGPRSpillVGPR > getSGPRSpillVGPRs() const
Definition: SIMachineFunctionInfo.h:512
llvm::AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT
@ FLAT_SCRATCH_INIT
Definition: AMDGPUArgumentUsageInfo.h:105
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:132
llvm
Definition: AllocatorList.h:23
llvm::LivePhysRegs::removeReg
void removeReg(MCPhysReg Reg)
Removes a physical register, all its sub-registers, and all its super-registers from the set.
Definition: LivePhysRegs.h:89
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::LivePhysRegs::addReg
void addReg(MCPhysReg Reg)
Adds a physical register and all its sub-registers to the set.
Definition: LivePhysRegs.h:79
llvm::SIMachineFunctionInfo::getPreloadedReg
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
Definition: SIMachineFunctionInfo.h:692
llvm::SIFrameLowering::emitEpilogue
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
Definition: SIFrameLowering.cpp:928
SIMachineFunctionInfo.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::Function
Definition: Function.h:61
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition: AMDGPUSubtarget.h:40
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition: MachineFunction.cpp:430
llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition: MachineMemOperand.h:144
llvm::SIMachineFunctionInfo::allocateSGPRSpillToVGPR
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI)
Reserve a slice of a VGPR to support spilling for FrameIndex FI.
Definition: SIMachineFunctionInfo.cpp:268
llvm::LivePhysRegs
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:48
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:231
llvm::SIMachineFunctionInfo::getGITPtrHigh
unsigned getGITPtrHigh() const
Definition: SIMachineFunctionInfo.h:697
llvm::StackOffset::getFixed
ScalarTy getFixed() const
Definition: TypeSize.h:149
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition: MachineMemOperand.h:142
llvm::Optional< int >
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::GCNSubtarget
Definition: GCNSubtarget.h:38
llvm::MachineFrameInfo::RemoveStackObject
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
Definition: MachineFrameInfo.h:779
llvm::LivePhysRegs::empty
bool empty() const
Returns true if the set is empty.
Definition: LivePhysRegs.h:76
llvm::MachineFrameInfo::getObjectIndexEnd
int getObjectIndexEnd() const
Return one past the maximum frame object index.
Definition: MachineFrameInfo.h:391
llvm::SIMachineFunctionInfo::SGPRForFPSaveRestoreCopy
Register SGPRForFPSaveRestoreCopy
If this is set, an SGPR used for save/restore of the register used for the frame pointer.
Definition: SIMachineFunctionInfo.h:490
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::SIMachineFunctionInfo::getNumPreloadedSGPRs
unsigned getNumPreloadedSGPRs() const
Definition: SIMachineFunctionInfo.h:715
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::MachineInstr::FrameDestroy
@ FrameDestroy
Definition: MachineInstr.h:84
buildGitPtr
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, const SIInstrInfo *TII, Register TargetReg)
Definition: SIFrameLowering.cpp:157
llvm::MachineBasicBlock::erase
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
Definition: MachineBasicBlock.cpp:1322
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::SIFrameLowering::hasFP
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
Definition: SIFrameLowering.cpp:1274
llvm::SIRegisterInfo::getFrameRegister
Register getFrameRegister(const MachineFunction &MF) const override
Definition: SIRegisterInfo.cpp:386
frameTriviallyRequiresSP
static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI)
Returns true if the frame will require a reference to the stack pointer.
Definition: SIFrameLowering.cpp:1267
llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition: MachineFunction.h:749
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:565
TargetMachine.h
GCNSubtarget.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:653
llvm::LivePhysRegs::addLiveIns
void addLiveIns(const MachineBasicBlock &MBB)
Adds all live-in registers of basic block MBB.
Definition: LivePhysRegs.cpp:236
llvm::TargetFrameLowering::getStackAlign
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
Definition: TargetFrameLowering.h:99
llvm::TargetFrameLowering::hasReservedCallFrame
virtual bool hasReservedCallFrame(const MachineFunction &MF) const
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
Definition: TargetFrameLowering.h:278
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::LivePhysRegs::addLiveOuts
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
Definition: LivePhysRegs.cpp:230
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:196
llvm::SIFrameLowering::requiresStackPointerReference
bool requiresStackPointerReference(const MachineFunction &MF) const
Definition: SIFrameLowering.cpp:1300
initLiveRegs
static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI, const SIMachineFunctionInfo *FuncInfo, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
Definition: SIFrameLowering.cpp:651
llvm::MachineInstr::FrameSetup
@ FrameSetup
Definition: MachineInstr.h:82
buildScratchExecCopy
static Register buildScratchExecCopy(LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
Definition: SIFrameLowering.cpp:668
llvm::SIMachineFunctionInfo::getStackPtrOffsetReg
Register getStackPtrOffsetReg() const
Definition: SIMachineFunctionInfo.h:752
llvm::report_fatal_error
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::MachineFrameInfo::getStackID
uint8_t getStackID(int ObjectIdx) const
Definition: MachineFrameInfo.h:720
llvm::MachineFrameInfo::getStackSize
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Definition: MachineFrameInfo.h:563
getVGPRSpillLaneOrTempRegister
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LivePhysRegs &LiveRegs, Register &TempSGPR, Optional< int > &FrameIndex, bool IsFP)
Definition: SIFrameLowering.cpp:53
llvm::MachineFrameInfo::getObjectOffset
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
Definition: MachineFrameInfo.h:504
llvm::MCRegisterInfo::isSubRegisterEq
bool isSubRegisterEq(MCRegister RegA, MCRegister RegB) const
Returns true if RegB is a sub-register of RegA or if RegB == RegA.
Definition: MCRegisterInfo.h:568
llvm::AMDGPU::convertSMRDOffsetUnits
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
Definition: AMDGPUBaseInfo.cpp:1802
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:29
llvm::SIMachineFunctionInfo::SGPRForBPSaveRestoreCopy
Register SGPRForBPSaveRestoreCopy
If this is set, an SGPR used for save/restore of the register used for the base pointer.
Definition: SIMachineFunctionInfo.h:495
llvm::BitVector::clearBitsNotInMask
void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask.
Definition: BitVector.h:794
llvm::BitVector
Definition: BitVector.h:74
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::MachineFrameInfo::getObjectIndexBegin
int getObjectIndexBegin() const
Return the minimum frame object index.
Definition: MachineFrameInfo.h:388
llvm::MachineInstrBuilder::addExternalSymbol
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:185
llvm::ArrayRef::slice
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:193
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:49
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::MachineFrameInfo::isDeadObjectIndex
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
Definition: MachineFrameInfo.h:734
llvm::SIFrameLowering::assignCalleeSavedSpillSlots
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.
Definition: SIFrameLowering.cpp:1190
llvm::SIFrameLowering::emitPrologue
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
Definition: SIFrameLowering.cpp:704
llvm::AMDGPU::Hwreg::WIDTH_M1_SHIFT_
@ WIDTH_M1_SHIFT_
Definition: SIDefines.h:394
llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition: AMDGPUSubtarget.h:41
llvm::SIMachineFunctionInfo::haveFreeLanesForSGPRSpill
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, unsigned NumLane) const
returns true if NumLanes slots are available in VGPRs already used for SGPR spilling.
Definition: SIMachineFunctionInfo.cpp:260
llvm::TargetOptions::DisableFramePointerElim
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
Definition: TargetOptionsImpl.cpp:24
llvm::SIFrameLowering::determineCalleeSaves
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
Definition: SIFrameLowering.cpp:1101
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:555
llvm::MachineInstrBuilder::setMIFlag
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
Definition: MachineInstrBuilder.h:279
buildEpilogRestore
static void buildEpilogRestore(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock::iterator I, Register SpillReg, int FI)
Definition: SIFrameLowering.cpp:138
llvm::LivePhysRegs::stepBackward
void stepBackward(const MachineInstr &MI)
Simulates liveness when stepping backwards over an instruction(bundle).
Definition: LivePhysRegs.cpp:68
llvm::AMDGPU::Hwreg::ID_FLAT_SCR_HI
@ ID_FLAT_SCR_HI
Definition: SIDefines.h:369
AMDGPUMCTargetDesc.h
llvm::SIMachineFunctionInfo::isStackRealigned
bool isStackRealigned() const
Definition: SIMachineFunctionInfo.h:788
llvm::MachineFrameInfo::getObjectSize
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
Definition: MachineFrameInfo.h:451
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:228
llvm::BitVector::any
bool any() const
any - Returns true if any bit is set.
Definition: BitVector.h:181
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET
@ PRIVATE_SEGMENT_WAVE_BYTE_OFFSET
Definition: AMDGPUArgumentUsageInfo.h:109
llvm::LivePhysRegs::available
bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const
Returns true if register Reg and no aliasing register is in the set.
Definition: LivePhysRegs.cpp:139
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:37
llvm::MachineRegisterInfo::getCalleeSavedRegs
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
Definition: MachineRegisterInfo.cpp:620
llvm::SIMachineFunctionInfo::FramePointerSaveIndex
Optional< int > FramePointerSaveIndex
Definition: SIMachineFunctionInfo.h:491
allStackObjectsAreDead
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)
Definition: SIFrameLowering.cpp:320
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
llvm::RegScavenger
Definition: RegisterScavenging.h:34
llvm::MachineFrameInfo::getObjectAlign
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
Definition: MachineFrameInfo.h:471
llvm::AMDGPUMachineFunction::isEntryFunction
bool isEntryFunction() const
Definition: AMDGPUMachineFunction.h:78
buildPrologSpill
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock::iterator I, Register SpillReg, int FI)
Definition: SIFrameLowering.cpp:118
llvm::TargetStackID::ScalableVector
@ ScalableVector
Definition: TargetFrameLowering.h:30
llvm::isUInt< 32 >
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:412
llvm::MachineRegisterInfo::isAllocatable
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
Definition: MachineRegisterInfo.h:918
llvm::TargetMachine::Options
TargetOptions Options
Definition: TargetMachine.h:115
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:571
llvm::SIMachineFunctionInfo::getScavengeFI
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Definition: SIMachineFunctionInfo.cpp:443
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
llvm::RegScavenger::addScavengingFrameIndex
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Definition: RegisterScavenging.h:123
llvm::MachineInstrBuilder::addMemOperand
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Definition: MachineInstrBuilder.h:203
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:98
llvm::SIMachineFunctionInfo::getGITPtrLoReg
Register getGITPtrLoReg(const MachineFunction &MF) const
Definition: SIMachineFunctionInfo.cpp:468
llvm::MachineFunction
Definition: MachineFunction.h:227
llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:239
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::MachineFrameInfo::hasPatchPoint
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
Definition: MachineFrameInfo.h:384
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::SIFrameLowering::isSupportedStackID
bool isSupportedStackID(TargetStackID::Value ID) const override
Definition: SIFrameLowering.cpp:639
AMDGPU.h
llvm::LivePhysRegs::init
void init(const TargetRegisterInfo &TRI)
(re-)initializes and clears the set.
Definition: LivePhysRegs.h:66
MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition: AArch64SLSHardening.cpp:75
llvm::BitVector::clearBitsInMask
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
Definition: BitVector.h:782
llvm::TargetStackID::NoAlloc
@ NoAlloc
Definition: TargetFrameLowering.h:31
getScratchScaleFactor
static unsigned getScratchScaleFactor(const GCNSubtarget &ST)
Definition: SIFrameLowering.cpp:383
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::AMDGPU::isCompute
bool isCompute(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1332
uint32_t
llvm::StackOffset
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
Definition: TypeSize.h:134
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
SIFrameLowering.h
llvm::SIMachineFunctionInfo::getScratchRSrcReg
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
Definition: SIMachineFunctionInfo.h:725
llvm::SIMachineFunctionInfo::getImplicitBufferPtrUserSGPR
Register getImplicitBufferPtrUserSGPR() const
Definition: SIMachineFunctionInfo.h:760
llvm::SIFrameLowering::processFunctionBeforeFrameFinalized
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
Definition: SIFrameLowering.cpp:1076
llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition: MachineMemOperand.h:134
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::MachineFrameInfo::getMaxAlign
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Definition: MachineFrameInfo.h:585
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition: MachineBasicBlock.h:367
llvm::MachineFrameInfo::isFrameAddressTaken
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Definition: MachineFrameInfo.h:366
llvm::MachineFrameInfo::hasCalls
bool hasCalls() const
Return true if the current function has any function calls.
Definition: MachineFrameInfo.h:602
llvm::ISD::FrameIndex
@ FrameIndex
Definition: ISDOpcodes.h:73
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:380
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
spilledToMemory
static bool spilledToMemory(const MachineFunction &MF, int SaveIndex)
Definition: SIFrameLowering.cpp:699
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:521
allSGPRSpillsAreDead
static bool allSGPRSpillsAreDead(const MachineFunction &MF)
Definition: SIFrameLowering.cpp:1050
llvm::MachineBasicBlock::sortUniqueLiveIns
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
Definition: MachineBasicBlock.cpp:578
uint16_t
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:551
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::SIFrameLowering::determineCalleeSavesSGPR
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
Definition: SIFrameLowering.cpp:1160
MachineFrameInfo.h
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::SIMachineFunctionInfo::BasePointerSaveIndex
Optional< int > BasePointerSaveIndex
Definition: SIMachineFunctionInfo.h:496
llvm::SIFrameLowering::eliminateCallFramePseudoInstr
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
Definition: SIFrameLowering.cpp:1228
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:53
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::TargetStackID::Default
@ Default
Definition: TargetFrameLowering.h:28
llvm::MachineRegisterInfo::isPhysRegUsed
bool isPhysRegUsed(MCRegister PhysReg) const
Return true if the specified register is modified or read in this function.
Definition: MachineRegisterInfo.cpp:585
llvm::SIMachineFunctionInfo::setScratchRSrcReg
void setScratchRSrcReg(Register Reg)
Definition: SIMachineFunctionInfo.h:729
llvm::TargetStackID::SGPRSpill
@ SGPRSpill
Definition: TargetFrameLowering.h:29
llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition: MachineMemOperand.h:136
llvm::BitVector::reset
BitVector & reset()
Definition: BitVector.h:421
llvm::SIInstrInfo
Definition: SIInstrInfo.h:38
llvm::TargetRegisterInfo::hasStackRealignment
bool hasStackRealignment(const MachineFunction &MF) const
True if stack realignment is required and still possible.
Definition: TargetRegisterInfo.h:920
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:107
llvm::MachinePointerInfo::getFixedStack
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition: MachineOperand.cpp:995
llvm::SIMachineFunctionInfo::removeDeadFrameIndices
void removeDeadFrameIndices(MachineFrameInfo &MFI)
Definition: SIMachineFunctionInfo.cpp:423
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:329
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:331
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::SIFrameLowering::emitEntryFunctionPrologue
void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
Definition: SIFrameLowering.cpp:387
llvm::AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER
@ PRIVATE_SEGMENT_BUFFER
Definition: AMDGPUArgumentUsageInfo.h:100
llvm::TargetRegisterInfo::getSubReg
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Definition: TargetRegisterInfo.h:1078
llvm::AMDGPU::Hwreg::ID_FLAT_SCR_LO
@ ID_FLAT_SCR_LO
Definition: SIDefines.h:368
llvm::TargetFrameLowering::determineCalleeSaves
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
Definition: TargetFrameLoweringImpl.cpp:78
llvm::MachineFrameInfo::hasStackMap
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
Definition: MachineFrameInfo.h:378
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:376
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
RegisterScavenging.h
findScratchNonCalleeSaveRegister
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, const TargetRegisterClass &RC, bool Unused=false)
Definition: SIFrameLowering.cpp:27
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:110
llvm::SIMachineFunctionInfo::SGPRSpillVGPR
Definition: SIMachineFunctionInfo.h:448
llvm::MachineInstrBundleIterator
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i....
Definition: MachineInstrBundleIterator.h:108
llvm::RegState::ImplicitDefine
@ ImplicitDefine
Definition: MachineInstrBuilder.h:64
llvm::SIMachineFunctionInfo::getSGPRToVGPRSpills
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
Definition: SIMachineFunctionInfo.h:506
llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:208
llvm::SIFrameLowering::getFrameIndexReference
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
Definition: SIFrameLowering.cpp:1067
llvm::SIMachineFunctionInfo::hasImplicitBufferPtr
bool hasImplicitBufferPtr() const
Definition: SIMachineFunctionInfo.h:675
llvm::SIMachineFunctionInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition: SIMachineFunctionInfo.h:635
llvm::SIMachineFunctionInfo::getFrameOffsetReg
Register getFrameOffsetReg() const
Definition: SIMachineFunctionInfo.h:734
llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:22
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
LivePhysRegs.h