LLVM  9.0.0svn
SIFrameLowering.cpp
Go to the documentation of this file.
1 //===----------------------- SIFrameLowering.cpp --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 
9 #include "SIFrameLowering.h"
10 #include "AMDGPUSubtarget.h"
11 #include "SIInstrInfo.h"
12 #include "SIMachineFunctionInfo.h"
13 #include "SIRegisterInfo.h"
15 
21 
22 using namespace llvm;
23 
24 #define DEBUG_TYPE "frame-info"
25 
26 
28  const MachineFunction &MF) {
29  return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
30  ST.getMaxNumSGPRs(MF) / 4);
31 }
32 
34  const MachineFunction &MF) {
35  return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(),
36  ST.getMaxNumSGPRs(MF));
37 }
38 
39 // Find a scratch register that we can use at the start of the prologue to
40 // re-align the stack pointer. We avoid using callee-save registers since they
41 // may appear to be free when this is called from canUseAsPrologue (during
42 // shrink wrapping), but then no longer be free when this is called from
43 // emitPrologue.
44 //
45 // FIXME: This is a bit conservative, since in the above case we could use one
46 // of the callee-save registers as a scratch temp to re-align the stack pointer,
47 // but we would then have to make sure that we were in fact saving at least one
48 // callee-save register in the prologue, which is additional complexity that
49 // doesn't seem worth the benefit.
51  LivePhysRegs &LiveRegs,
52  const TargetRegisterClass &RC,
53  bool Unused = false) {
54  // Mark callee saved registers as used so we will not choose them.
55  const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
56  for (unsigned i = 0; CSRegs[i]; ++i)
57  LiveRegs.addReg(CSRegs[i]);
58 
59  if (Unused) {
60  // We are looking for a register that can be used throughout the entire
61  // function, so any use is unacceptable.
62  for (unsigned Reg : RC) {
63  if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
64  return Reg;
65  }
66  } else {
67  for (unsigned Reg : RC) {
68  if (LiveRegs.available(MRI, Reg))
69  return Reg;
70  }
71  }
72 
73  // If we require an unused register, this is used in contexts where failure is
74  // an option and has an alternative plan. In other contexts, this must
75  // succeed0.
76  if (!Unused)
77  report_fatal_error("failed to find free scratch register");
78 
79  return AMDGPU::NoRegister;
80 }
81 
83  LivePhysRegs LiveRegs;
84  LiveRegs.init(*MRI.getTargetRegisterInfo());
86  MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
87 }
88 
89 // We need to specially emit stack operations here because a different frame
90 // register is used than in the rest of the function, as getFrameRegister would
91 // use.
92 static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
94  const SIInstrInfo *TII, unsigned SpillReg,
95  unsigned ScratchRsrcReg, unsigned SPReg, int FI) {
96  MachineFunction *MF = MBB.getParent();
97  MachineFrameInfo &MFI = MF->getFrameInfo();
98 
99  int64_t Offset = MFI.getObjectOffset(FI);
100 
103  MFI.getObjectAlignment(FI));
104 
105  if (isUInt<12>(Offset)) {
106  BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET))
107  .addReg(SpillReg, RegState::Kill)
108  .addReg(ScratchRsrcReg)
109  .addReg(SPReg)
110  .addImm(Offset)
111  .addImm(0) // glc
112  .addImm(0) // slc
113  .addImm(0) // tfe
114  .addImm(0) // dlc
115  .addMemOperand(MMO);
116  return;
117  }
118 
120  MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
121 
122  BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
123  .addImm(Offset);
124 
125  BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFEN))
126  .addReg(SpillReg, RegState::Kill)
127  .addReg(OffsetReg, RegState::Kill)
128  .addReg(ScratchRsrcReg)
129  .addReg(SPReg)
130  .addImm(0)
131  .addImm(0) // glc
132  .addImm(0) // slc
133  .addImm(0) // tfe
134  .addImm(0) // dlc
135  .addMemOperand(MMO);
136 }
137 
140  const SIInstrInfo *TII, unsigned SpillReg,
141  unsigned ScratchRsrcReg, unsigned SPReg, int FI) {
142  MachineFunction *MF = MBB.getParent();
143  MachineFrameInfo &MFI = MF->getFrameInfo();
144  int64_t Offset = MFI.getObjectOffset(FI);
145 
148  MFI.getObjectAlignment(FI));
149 
150  if (isUInt<12>(Offset)) {
151  BuildMI(MBB, I, DebugLoc(),
152  TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFSET), SpillReg)
153  .addReg(ScratchRsrcReg)
154  .addReg(SPReg)
155  .addImm(Offset)
156  .addImm(0) // glc
157  .addImm(0) // slc
158  .addImm(0) // tfe
159  .addImm(0) // dlc
160  .addMemOperand(MMO);
161  return;
162  }
163 
165  MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
166 
167  BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
168  .addImm(Offset);
169 
170  BuildMI(MBB, I, DebugLoc(),
171  TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), SpillReg)
172  .addReg(OffsetReg, RegState::Kill)
173  .addReg(ScratchRsrcReg)
174  .addReg(SPReg)
175  .addImm(0)
176  .addImm(0) // glc
177  .addImm(0) // slc
178  .addImm(0) // tfe
179  .addImm(0) // dlc
180  .addMemOperand(MMO);
181 }
182 
183 void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST,
184  MachineFunction &MF,
185  MachineBasicBlock &MBB) const {
186  const SIInstrInfo *TII = ST.getInstrInfo();
187  const SIRegisterInfo* TRI = &TII->getRegisterInfo();
189 
190  // We don't need this if we only have spills since there is no user facing
191  // scratch.
192 
193  // TODO: If we know we don't have flat instructions earlier, we can omit
194  // this from the input registers.
195  //
196  // TODO: We only need to know if we access scratch space through a flat
197  // pointer. Because we only detect if flat instructions are used at all,
198  // this will be used more often than necessary on VI.
199 
200  // Debug location must be unknown since the first debug location is used to
201  // determine the end of the prologue.
202  DebugLoc DL;
204 
205  unsigned FlatScratchInitReg
207 
209  MRI.addLiveIn(FlatScratchInitReg);
210  MBB.addLiveIn(FlatScratchInitReg);
211 
212  unsigned FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
213  unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
214 
215  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
216 
217  // Do a 64-bit pointer add.
218  if (ST.flatScratchIsPointer()) {
220  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
221  .addReg(FlatScrInitLo)
222  .addReg(ScratchWaveOffsetReg);
223  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
224  .addReg(FlatScrInitHi)
225  .addImm(0);
226  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
227  addReg(FlatScrInitLo).
228  addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
230  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
231  addReg(FlatScrInitHi).
232  addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
234  return;
235  }
236 
237  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
238  .addReg(FlatScrInitLo)
239  .addReg(ScratchWaveOffsetReg);
240  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
241  .addReg(FlatScrInitHi)
242  .addImm(0);
243 
244  return;
245  }
246 
248 
249  // Copy the size in bytes.
250  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
251  .addReg(FlatScrInitHi, RegState::Kill);
252 
253  // Add wave offset in bytes to private base offset.
254  // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
255  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
256  .addReg(FlatScrInitLo)
257  .addReg(ScratchWaveOffsetReg);
258 
259  // Convert offset to 256-byte units.
260  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
261  .addReg(FlatScrInitLo, RegState::Kill)
262  .addImm(8);
263 }
264 
265 unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
266  const GCNSubtarget &ST,
267  const SIInstrInfo *TII,
268  const SIRegisterInfo *TRI,
270  MachineFunction &MF) const {
272 
273  // We need to insert initialization of the scratch resource descriptor.
274  unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
275  if (ScratchRsrcReg == AMDGPU::NoRegister ||
276  !MRI.isPhysRegUsed(ScratchRsrcReg))
277  return AMDGPU::NoRegister;
278 
279  if (ST.hasSGPRInitBug() ||
280  ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
281  return ScratchRsrcReg;
282 
283  // We reserved the last registers for this. Shift it down to the end of those
284  // which were actually used.
285  //
286  // FIXME: It might be safer to use a pseudoregister before replacement.
287 
288  // FIXME: We should be able to eliminate unused input registers. We only
289  // cannot do this for the resources required for scratch access. For now we
290  // skip over user SGPRs and may leave unused holes.
291 
292  // We find the resource first because it has an alignment requirement.
293 
294  unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
295  ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF);
296  AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
297 
298  // Skip the last N reserved elements because they should have already been
299  // reserved for VCC etc.
300  for (MCPhysReg Reg : AllSGPR128s) {
301  // Pick the first unallocated one. Make sure we don't clobber the other
302  // reserved input we needed.
303  if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
304  MRI.replaceRegWith(ScratchRsrcReg, Reg);
305  MFI->setScratchRSrcReg(Reg);
306  return Reg;
307  }
308  }
309 
310  return ScratchRsrcReg;
311 }
312 
313 // Shift down registers reserved for the scratch wave offset.
314 std::pair<unsigned, bool>
315 SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
316  const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI,
317  SIMachineFunctionInfo *MFI, MachineFunction &MF) const {
319  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
320 
321  assert(MFI->isEntryFunction());
322 
323  // No replacement necessary.
324  if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
325  (!hasFP(MF) && !MRI.isPhysRegUsed(ScratchWaveOffsetReg))) {
326  return std::make_pair(AMDGPU::NoRegister, false);
327  }
328 
329  if (ST.hasSGPRInitBug())
330  return std::make_pair(ScratchWaveOffsetReg, false);
331 
332  unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
333 
334  ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
335  if (NumPreloaded > AllSGPRs.size())
336  return std::make_pair(ScratchWaveOffsetReg, false);
337 
338  AllSGPRs = AllSGPRs.slice(NumPreloaded);
339 
340  // We need to drop register from the end of the list that we cannot use
341  // for the scratch wave offset.
342  // + 2 s102 and s103 do not exist on VI.
343  // + 2 for vcc
344  // + 2 for xnack_mask
345  // + 2 for flat_scratch
346  // + 4 for registers reserved for scratch resource register
347  // + 1 for register reserved for scratch wave offset. (By exluding this
348  // register from the list to consider, it means that when this
349  // register is being used for the scratch wave offset and there
350  // are no other free SGPRs, then the value will stay in this register.
351  // + 1 if stack pointer is used.
352  // ----
353  // 13 (+1)
354  unsigned ReservedRegCount = 13;
355 
356  if (AllSGPRs.size() < ReservedRegCount)
357  return std::make_pair(ScratchWaveOffsetReg, false);
358 
359  bool HandledScratchWaveOffsetReg =
360  ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
361  bool FPAdjusted = false;
362 
363  for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) {
364  // Pick the first unallocated SGPR. Be careful not to pick an alias of the
365  // scratch descriptor, since we haven’t added its uses yet.
366  if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
367  if (!HandledScratchWaveOffsetReg) {
368  HandledScratchWaveOffsetReg = true;
369 
370  MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
371  if (MFI->getScratchWaveOffsetReg() == MFI->getStackPtrOffsetReg()) {
372  assert(!hasFP(MF));
374  }
375 
377  MFI->setFrameOffsetReg(Reg);
378  ScratchWaveOffsetReg = Reg;
379  FPAdjusted = true;
380  break;
381  }
382  }
383  }
384 
385  return std::make_pair(ScratchWaveOffsetReg, FPAdjusted);
386 }
387 
389  MachineBasicBlock &MBB) const {
390  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
391 
393 
394  // If we only have SGPR spills, we won't actually be using scratch memory
395  // since these spill to VGPRs.
396  //
397  // FIXME: We should be cleaning up these unused SGPR spill frame indices
398  // somewhere.
399 
400  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
401  const SIInstrInfo *TII = ST.getInstrInfo();
402  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
404  const Function &F = MF.getFunction();
405 
406  // We need to do the replacement of the private segment buffer and wave offset
407  // register even if there are no stack objects. There could be stores to undef
408  // or a constant without an associated object.
409 
410  // FIXME: We still have implicit uses on SGPR spill instructions in case they
411  // need to spill to vector memory. It's likely that will not happen, but at
412  // this point it appears we need the setup. This part of the prolog should be
413  // emitted after frame indices are eliminated.
414 
415  if (MFI->hasFlatScratchInit())
416  emitFlatScratchInit(ST, MF, MBB);
417 
418  unsigned ScratchRsrcReg
419  = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
420 
421  unsigned ScratchWaveOffsetReg;
422  bool FPAdjusted;
423  std::tie(ScratchWaveOffsetReg, FPAdjusted) =
424  getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
425 
426  // We need to insert initialization of the scratch resource descriptor.
427  unsigned PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
429 
430  unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
431  if (ST.isAmdHsaOrMesa(F)) {
432  PreloadedPrivateBufferReg = MFI->getPreloadedReg(
434  }
435 
436  bool OffsetRegUsed = ScratchWaveOffsetReg != AMDGPU::NoRegister &&
437  MRI.isPhysRegUsed(ScratchWaveOffsetReg);
438  bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister &&
439  MRI.isPhysRegUsed(ScratchRsrcReg);
440 
441  // FIXME: Hack to not crash in situations which emitted an error.
442  if (PreloadedScratchWaveOffsetReg == AMDGPU::NoRegister)
443  return;
444 
445  // We added live-ins during argument lowering, but since they were not used
446  // they were deleted. We're adding the uses now, so add them back.
447  MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
448  MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
449 
450  if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
451  assert(ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F));
452  MRI.addLiveIn(PreloadedPrivateBufferReg);
453  MBB.addLiveIn(PreloadedPrivateBufferReg);
454  }
455 
456  // Make the register selected live throughout the function.
457  for (MachineBasicBlock &OtherBB : MF) {
458  if (&OtherBB == &MBB)
459  continue;
460 
461  if (OffsetRegUsed || FPAdjusted)
462  OtherBB.addLiveIn(ScratchWaveOffsetReg);
463 
464  if (ResourceRegUsed)
465  OtherBB.addLiveIn(ScratchRsrcReg);
466  }
467 
468  DebugLoc DL;
470 
471  // If we reserved the original input registers, we don't need to copy to the
472  // reserved registers.
473 
474  bool CopyBuffer = ResourceRegUsed &&
475  PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
476  ST.isAmdHsaOrMesa(F) &&
477  ScratchRsrcReg != PreloadedPrivateBufferReg;
478 
479  // This needs to be careful of the copying order to avoid overwriting one of
480  // the input registers before it's been copied to it's final
481  // destination. Usually the offset should be copied first.
482  bool CopyBufferFirst = TRI->isSubRegisterEq(PreloadedPrivateBufferReg,
483  ScratchWaveOffsetReg);
484  if (CopyBuffer && CopyBufferFirst) {
485  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
486  .addReg(PreloadedPrivateBufferReg, RegState::Kill);
487  }
488 
489  unsigned SPReg = MFI->getStackPtrOffsetReg();
490  assert(SPReg != AMDGPU::SP_REG);
491 
492  // FIXME: Remove the isPhysRegUsed checks
493  const bool HasFP = hasFP(MF);
494 
495  if (HasFP || OffsetRegUsed) {
496  assert(ScratchWaveOffsetReg);
497  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
498  .addReg(PreloadedScratchWaveOffsetReg, HasFP ? RegState::Kill : 0);
499  }
500 
501  if (CopyBuffer && !CopyBufferFirst) {
502  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
503  .addReg(PreloadedPrivateBufferReg, RegState::Kill);
504  }
505 
506  if (ResourceRegUsed) {
507  emitEntryFunctionScratchSetup(ST, MF, MBB, MFI, I,
508  PreloadedPrivateBufferReg, ScratchRsrcReg);
509  }
510 
511  if (HasFP) {
512  DebugLoc DL;
513  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
514  int64_t StackSize = FrameInfo.getStackSize();
515 
516  // On kernel entry, the private scratch wave offset is the SP value.
517  if (StackSize == 0) {
518  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), SPReg)
519  .addReg(MFI->getScratchWaveOffsetReg());
520  } else {
521  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), SPReg)
522  .addReg(MFI->getScratchWaveOffsetReg())
523  .addImm(StackSize * ST.getWavefrontSize());
524  }
525  }
526 }
527 
528 // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set.
529 void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
531  MachineBasicBlock::iterator I, unsigned PreloadedPrivateBufferReg,
532  unsigned ScratchRsrcReg) const {
533 
534  const SIInstrInfo *TII = ST.getInstrInfo();
535  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
536  const Function &Fn = MF.getFunction();
537  DebugLoc DL;
538 
539  if (ST.isAmdPalOS()) {
540  // The pointer to the GIT is formed from the offset passed in and either
541  // the amdgpu-git-ptr-high function attribute or the top part of the PC
542  unsigned RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
543  unsigned RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
544  unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
545 
546  const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
547 
548  if (MFI->getGITPtrHigh() != 0xffffffff) {
549  BuildMI(MBB, I, DL, SMovB32, RsrcHi)
550  .addImm(MFI->getGITPtrHigh())
551  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
552  } else {
553  const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
554  BuildMI(MBB, I, DL, GetPC64, Rsrc01);
555  }
556  auto GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
557  if (ST.hasMergedShaders()) {
558  switch (MF.getFunction().getCallingConv()) {
561  // Low GIT address is passed in s8 rather than s0 for an LS+HS or
562  // ES+GS merged shader on gfx9+.
563  GitPtrLo = AMDGPU::SGPR8;
564  break;
565  default:
566  break;
567  }
568  }
569  MF.getRegInfo().addLiveIn(GitPtrLo);
570  MBB.addLiveIn(GitPtrLo);
571  BuildMI(MBB, I, DL, SMovB32, RsrcLo)
572  .addReg(GitPtrLo)
573  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
574 
575  // We now have the GIT ptr - now get the scratch descriptor from the entry
576  // at offset 0 (or offset 16 for a compute shader).
577  PointerType *PtrTy =
580  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
581  const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
582  auto MMO = MF.getMachineMemOperand(PtrInfo,
586  16, 4);
587  unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
588  const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
589  unsigned EncodedOffset = AMDGPU::getSMRDEncodedOffset(Subtarget, Offset);
590  BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
591  .addReg(Rsrc01)
592  .addImm(EncodedOffset) // offset
593  .addImm(0) // glc
594  .addImm(0) // dlc
595  .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
596  .addMemOperand(MMO);
597  return;
598  }
599  if (ST.isMesaGfxShader(Fn)
600  || (PreloadedPrivateBufferReg == AMDGPU::NoRegister)) {
601  assert(!ST.isAmdHsaOrMesa(Fn));
602  const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
603 
604  unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
605  unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
606 
607  // Use relocations to get the pointer, and setup the other bits manually.
608  uint64_t Rsrc23 = TII->getScratchRsrcWords23();
609 
610  if (MFI->hasImplicitBufferPtr()) {
611  unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
612 
614  const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
615 
616  BuildMI(MBB, I, DL, Mov64, Rsrc01)
618  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
619  } else {
620  const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
621 
622  PointerType *PtrTy =
625  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
626  auto MMO = MF.getMachineMemOperand(PtrInfo,
630  8, 4);
631  BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
633  .addImm(0) // offset
634  .addImm(0) // glc
635  .addImm(0) // dlc
636  .addMemOperand(MMO)
637  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
638 
641  }
642  } else {
643  unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
644  unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
645 
646  BuildMI(MBB, I, DL, SMovB32, Rsrc0)
647  .addExternalSymbol("SCRATCH_RSRC_DWORD0")
648  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
649 
650  BuildMI(MBB, I, DL, SMovB32, Rsrc1)
651  .addExternalSymbol("SCRATCH_RSRC_DWORD1")
652  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
653 
654  }
655 
656  BuildMI(MBB, I, DL, SMovB32, Rsrc2)
657  .addImm(Rsrc23 & 0xffffffff)
658  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
659 
660  BuildMI(MBB, I, DL, SMovB32, Rsrc3)
661  .addImm(Rsrc23 >> 32)
662  .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
663  }
664 }
665 
667  switch (ID) {
671  return true;
672  }
673  llvm_unreachable("Invalid TargetStackID::Value");
674 }
675 
677  MachineBasicBlock &MBB) const {
679  if (FuncInfo->isEntryFunction()) {
680  emitEntryFunctionPrologue(MF, MBB);
681  return;
682  }
683 
684  const MachineFrameInfo &MFI = MF.getFrameInfo();
686  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
687  const SIInstrInfo *TII = ST.getInstrInfo();
688  const SIRegisterInfo &TRI = TII->getRegisterInfo();
689 
690  unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
691  unsigned FramePtrReg = FuncInfo->getFrameOffsetReg();
692  LivePhysRegs LiveRegs;
693 
694  MachineBasicBlock::iterator MBBI = MBB.begin();
695  DebugLoc DL;
696 
697  bool HasFP = false;
698  uint32_t NumBytes = MFI.getStackSize();
699  uint32_t RoundedSize = NumBytes;
700  // To avoid clobbering VGPRs in lanes that weren't active on function entry,
701  // turn on all lanes before doing the spill to memory.
702  unsigned ScratchExecCopy = AMDGPU::NoRegister;
703 
704  // Emit the copy if we need an FP, and are using a free SGPR to save it.
705  if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) {
706  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy)
707  .addReg(FramePtrReg)
709  }
710 
712  : FuncInfo->getSGPRSpillVGPRs()) {
713  if (!Reg.FI.hasValue())
714  continue;
715 
716  if (ScratchExecCopy == AMDGPU::NoRegister) {
717  if (LiveRegs.empty()) {
718  LiveRegs.init(TRI);
719  LiveRegs.addLiveIns(MBB);
720  if (FuncInfo->SGPRForFPSaveRestoreCopy)
721  LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy);
722  }
723 
724  ScratchExecCopy
725  = findScratchNonCalleeSaveRegister(MRI, LiveRegs,
726  *TRI.getWaveMaskRegClass());
727  assert(FuncInfo->SGPRForFPSaveRestoreCopy != ScratchExecCopy);
728 
729  const unsigned OrSaveExec = ST.isWave32() ?
730  AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
731  BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec),
732  ScratchExecCopy)
733  .addImm(-1);
734  }
735 
736  buildPrologSpill(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
737  FuncInfo->getScratchRSrcReg(),
738  StackPtrReg,
739  Reg.FI.getValue());
740  }
741 
742  if (ScratchExecCopy != AMDGPU::NoRegister) {
743  // FIXME: Split block and make terminator.
744  unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
745  unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
746  BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
747  .addReg(ScratchExecCopy, RegState::Kill);
748  LiveRegs.addReg(ScratchExecCopy);
749  }
750 
751 
752  if (FuncInfo->FramePointerSaveIndex) {
753  const int FI = FuncInfo->FramePointerSaveIndex.getValue();
754  assert(!MFI.isDeadObjectIndex(FI) &&
757  = FuncInfo->getSGPRToVGPRSpills(FI);
758  assert(Spill.size() == 1);
759 
760  // Save FP before setting it up.
761  // FIXME: This should respect spillSGPRToVGPR;
762  BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
763  Spill[0].VGPR)
764  .addReg(FramePtrReg)
765  .addImm(Spill[0].Lane)
766  .addReg(Spill[0].VGPR, RegState::Undef);
767  }
768 
769  if (TRI.needsStackRealignment(MF)) {
770  HasFP = true;
771  const unsigned Alignment = MFI.getMaxAlignment();
772 
773  RoundedSize += Alignment;
774  if (LiveRegs.empty()) {
775  LiveRegs.init(TRI);
776  LiveRegs.addLiveIns(MBB);
777  LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
778  }
779 
780  unsigned ScratchSPReg = findScratchNonCalleeSaveRegister(
781  MRI, LiveRegs, AMDGPU::SReg_32_XM0RegClass);
782  assert(ScratchSPReg != AMDGPU::NoRegister &&
783  ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy);
784 
785  // s_add_u32 tmp_reg, s32, NumBytes
786  // s_and_b32 s32, tmp_reg, 0b111...0000
787  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg)
788  .addReg(StackPtrReg)
789  .addImm((Alignment - 1) * ST.getWavefrontSize())
790  .setMIFlag(MachineInstr::FrameSetup);
791  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
792  .addReg(ScratchSPReg, RegState::Kill)
793  .addImm(-Alignment * ST.getWavefrontSize())
794  .setMIFlag(MachineInstr::FrameSetup);
795  FuncInfo->setIsStackRealigned(true);
796  } else if ((HasFP = hasFP(MF))) {
797  // If we need a base pointer, set it up here. It's whatever the value of
798  // the stack pointer is at this point. Any variable size objects will be
799  // allocated after this, so we can still use the base pointer to reference
800  // locals.
801  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
802  .addReg(StackPtrReg)
804  }
805 
806  if (HasFP && RoundedSize != 0) {
807  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
808  .addReg(StackPtrReg)
809  .addImm(RoundedSize * ST.getWavefrontSize())
810  .setMIFlag(MachineInstr::FrameSetup);
811  }
812 
813  assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister ||
814  FuncInfo->FramePointerSaveIndex)) &&
815  "Needed to save FP but didn't save it anywhere");
816 
817  assert((HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy == AMDGPU::NoRegister &&
818  !FuncInfo->FramePointerSaveIndex)) &&
819  "Saved FP but didn't need it");
820 }
821 
823  MachineBasicBlock &MBB) const {
824  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
825  if (FuncInfo->isEntryFunction())
826  return;
827 
828  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
829  const SIInstrInfo *TII = ST.getInstrInfo();
832  LivePhysRegs LiveRegs;
833  DebugLoc DL;
834 
835  const MachineFrameInfo &MFI = MF.getFrameInfo();
836  uint32_t NumBytes = MFI.getStackSize();
837  uint32_t RoundedSize = FuncInfo->isStackRealigned() ?
838  NumBytes + MFI.getMaxAlignment() : NumBytes;
839 
840  if (RoundedSize != 0 && hasFP(MF)) {
841  const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
842  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
843  .addReg(StackPtrReg)
844  .addImm(RoundedSize * ST.getWavefrontSize())
845  .setMIFlag(MachineInstr::FrameDestroy);
846  }
847 
848  if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) {
849  BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->getFrameOffsetReg())
850  .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
852  }
853 
854  if (FuncInfo->FramePointerSaveIndex) {
855  const int FI = FuncInfo->FramePointerSaveIndex.getValue();
856 
859 
861  = FuncInfo->getSGPRToVGPRSpills(FI);
862  assert(Spill.size() == 1);
863  BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
864  FuncInfo->getFrameOffsetReg())
865  .addReg(Spill[0].VGPR)
866  .addImm(Spill[0].Lane);
867  }
868 
869  unsigned ScratchExecCopy = AMDGPU::NoRegister;
871  : FuncInfo->getSGPRSpillVGPRs()) {
872  if (!Reg.FI.hasValue())
873  continue;
874 
875  const SIRegisterInfo &TRI = TII->getRegisterInfo();
876  if (ScratchExecCopy == AMDGPU::NoRegister) {
877  // See emitPrologue
878  if (LiveRegs.empty()) {
879  LiveRegs.init(*ST.getRegisterInfo());
880  LiveRegs.addLiveOuts(MBB);
881  LiveRegs.stepBackward(*MBBI);
882  }
883 
884  ScratchExecCopy = findScratchNonCalleeSaveRegister(
885  MRI, LiveRegs, *TRI.getWaveMaskRegClass());
886  LiveRegs.removeReg(ScratchExecCopy);
887 
888  const unsigned OrSaveExec =
889  ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
890 
891  BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy)
892  .addImm(-1);
893  }
894 
895  buildEpilogReload(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
896  FuncInfo->getScratchRSrcReg(),
897  FuncInfo->getStackPtrOffsetReg(), Reg.FI.getValue());
898  }
899 
900  if (ScratchExecCopy != AMDGPU::NoRegister) {
901  // FIXME: Split block and make terminator.
902  unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
903  unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
904  BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
905  .addReg(ScratchExecCopy, RegState::Kill);
906  }
907 }
908 
909 // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
910 // memory. They should have been removed by now.
911 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
912  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
913  I != E; ++I) {
914  if (!MFI.isDeadObjectIndex(I))
915  return false;
916  }
917 
918  return true;
919 }
920 
921 #ifndef NDEBUG
922 static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI,
923  Optional<int> FramePointerSaveIndex) {
924  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
925  I != E; ++I) {
926  if (!MFI.isDeadObjectIndex(I) &&
928  FramePointerSaveIndex && I != FramePointerSaveIndex) {
929  return false;
930  }
931  }
932 
933  return true;
934 }
935 #endif
936 
938  unsigned &FrameReg) const {
939  const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
940 
941  FrameReg = RI->getFrameRegister(MF);
942  return MF.getFrameInfo().getObjectOffset(FI);
943 }
944 
946  MachineFunction &MF,
947  RegScavenger *RS) const {
948  MachineFrameInfo &MFI = MF.getFrameInfo();
949 
950  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
951  const SIRegisterInfo *TRI = ST.getRegisterInfo();
953 
954  FuncInfo->removeDeadFrameIndices(MFI);
956  "SGPR spill should have been removed in SILowerSGPRSpills");
957 
958  // FIXME: The other checks should be redundant with allStackObjectsAreDead,
959  // but currently hasNonSpillStackObjects is set only from source
960  // allocas. Stack temps produced from legalization are not counted currently.
961  if (!allStackObjectsAreDead(MFI)) {
962  assert(RS && "RegScavenger required if spilling");
963 
964  if (FuncInfo->isEntryFunction()) {
965  int ScavengeFI = MFI.CreateFixedObject(
966  TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
967  RS->addScavengingFrameIndex(ScavengeFI);
968  } else {
969  int ScavengeFI = MFI.CreateStackObject(
970  TRI->getSpillSize(AMDGPU::SGPR_32RegClass),
971  TRI->getSpillAlignment(AMDGPU::SGPR_32RegClass),
972  false);
973  RS->addScavengingFrameIndex(ScavengeFI);
974  }
975  }
976 }
977 
978 // Only report VGPRs to generic code.
980  BitVector &SavedVGPRs,
981  RegScavenger *RS) const {
982  TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
984  if (MFI->isEntryFunction())
985  return;
986 
987  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
988  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
989  const SIRegisterInfo *TRI = ST.getRegisterInfo();
990 
991  // Ignore the SGPRs the default implementation found.
992  SavedVGPRs.clearBitsNotInMask(TRI->getAllVGPRRegMask());
993 
994  // hasFP only knows about stack objects that already exist. We're now
995  // determining the stack slots that will be created, so we have to predict
996  // them. Stack objects force FP usage with calls.
997  //
998  // Note a new VGPR CSR may be introduced if one is used for the spill, but we
999  // don't want to report it here.
1000  //
1001  // FIXME: Is this really hasReservedCallFrame?
1002  const bool WillHaveFP =
1003  FrameInfo.hasCalls() &&
1004  (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1005 
1006  // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
1007  // so don't allow the default insertion to handle them.
1008  for (auto SSpill : MFI->getSGPRSpillVGPRs())
1009  SavedVGPRs.reset(SSpill.VGPR);
1010 
1011  const bool HasFP = WillHaveFP || hasFP(MF);
1012  if (!HasFP)
1013  return;
1014 
1015  if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
1016  int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
1018 
1019  // If there is already a VGPR with free lanes, use it. We may already have
1020  // to pay the penalty for spilling a CSR VGPR.
1021  if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
1022  llvm_unreachable("allocate SGPR spill should have worked");
1023 
1024  MFI->FramePointerSaveIndex = NewFI;
1025 
1026  LLVM_DEBUG(
1027  auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
1028  dbgs() << "Spilling FP to " << printReg(Spill.VGPR, TRI)
1029  << ':' << Spill.Lane << '\n');
1030  return;
1031  }
1032 
1034 
1035  if (!MFI->SGPRForFPSaveRestoreCopy) {
1036  // There's no free lane to spill, and no free register to save FP, so we're
1037  // forced to spill another VGPR to use for the spill.
1038  int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
1040  if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
1041  llvm_unreachable("allocate SGPR spill should have worked");
1042  MFI->FramePointerSaveIndex = NewFI;
1043 
1044  LLVM_DEBUG(
1045  auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
1046  dbgs() << "FP requires fallback spill to " << printReg(Spill.VGPR, TRI)
1047  << ':' << Spill.Lane << '\n';);
1048  } else {
1049  LLVM_DEBUG(dbgs() << "Saving FP with copy to " <<
1050  printReg(MFI->SGPRForFPSaveRestoreCopy, TRI) << '\n');
1051  }
1052 }
1053 
1055  BitVector &SavedRegs,
1056  RegScavenger *RS) const {
1057  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1059  if (MFI->isEntryFunction())
1060  return;
1061 
1062  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1063  const SIRegisterInfo *TRI = ST.getRegisterInfo();
1064 
1065  // The SP is specifically managed and we don't want extra spills of it.
1066  SavedRegs.reset(MFI->getStackPtrOffsetReg());
1067  SavedRegs.clearBitsInMask(TRI->getAllVGPRRegMask());
1068 }
1069 
1071  MachineFunction &MF, const TargetRegisterInfo *TRI,
1072  std::vector<CalleeSavedInfo> &CSI) const {
1073  if (CSI.empty())
1074  return true; // Early exit if no callee saved registers are modified!
1075 
1076  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1077  if (!FuncInfo->SGPRForFPSaveRestoreCopy)
1078  return false;
1079 
1080  for (auto &CS : CSI) {
1081  if (CS.getReg() == FuncInfo->getFrameOffsetReg()) {
1082  if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister)
1083  CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
1084  break;
1085  }
1086  }
1087 
1088  return false;
1089 }
1090 
1092  MachineFunction &MF,
1093  MachineBasicBlock &MBB,
1094  MachineBasicBlock::iterator I) const {
1095  int64_t Amount = I->getOperand(0).getImm();
1096  if (Amount == 0)
1097  return MBB.erase(I);
1098 
1099  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1100  const SIInstrInfo *TII = ST.getInstrInfo();
1101  const DebugLoc &DL = I->getDebugLoc();
1102  unsigned Opc = I->getOpcode();
1103  bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
1104  uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
1105 
1106  if (!hasReservedCallFrame(MF)) {
1107  unsigned Align = getStackAlignment();
1108 
1109  Amount = alignTo(Amount, Align);
1110  assert(isUInt<32>(Amount) && "exceeded stack address space size");
1112  unsigned SPReg = MFI->getStackPtrOffsetReg();
1113 
1114  unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
1115  BuildMI(MBB, I, DL, TII->get(Op), SPReg)
1116  .addReg(SPReg)
1117  .addImm(Amount * ST.getWavefrontSize());
1118  } else if (CalleePopAmount != 0) {
1119  llvm_unreachable("is this used?");
1120  }
1121 
1122  return MBB.erase(I);
1123 }
1124 
1126  const MachineFrameInfo &MFI = MF.getFrameInfo();
1127  if (MFI.hasCalls()) {
1128  // All offsets are unsigned, so need to be addressed in the same direction
1129  // as stack growth.
1130 
1131  // FIXME: This function is pretty broken, since it can be called before the
1132  // frame layout is determined or CSR spills are inserted.
1133  if (MFI.getStackSize() != 0)
1134  return true;
1135 
1136  // For the entry point, the input wave scratch offset must be copied to the
1137  // API SP if there are calls.
1139  return true;
1140  }
1141 
1142  return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
1143  MFI.hasStackMap() || MFI.hasPatchPoint() ||
1144  MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF) ||
1146 }
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:348
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
Definition: BitVector.h:781
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Interface definition for SIRegisterInfo.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed...
AMDGPU specific subclass of TargetSubtarget.
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void addLiveIn(unsigned Reg, unsigned vreg=0)
addLiveIn - Add the specified register as a live-in.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
unsigned Reg
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:632
void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
const SIInstrInfo * getInstrInfo() const override
bool hasMergedShaders() const
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:197
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
bool isSupportedStackID(TargetStackID::Value ID) const override
F(f)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:684
void setIsStackRealigned(bool Realigned=true)
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:176
void removeDeadFrameIndices(MachineFrameInfo &MFI)
int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it...
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:171
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const SIInstrInfo *TII, unsigned SpillReg, unsigned ScratchRsrcReg, unsigned SPReg, int FI)
bool empty() const
Returns true if the set is empty.
Definition: LivePhysRegs.h:76
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
A description of a memory reference used in the backend.
bool isMesaGfxShader(const Function &F) const
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
const HexagonInstrInfo * TII
uint64_t getScratchRsrcWords23() const
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
The memory access is dereferenceable (i.e., doesn&#39;t trap).
static MCPhysReg findUnusedSGPRNonCalleeSaved(MachineRegisterInfo &MRI)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI, Optional< int > FramePointerSaveIndex)
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, unsigned NumLane) const
returns true if NumLanes slots are available in VGPRs already used for SGPR spilling.
void setStackPtrOffsetReg(unsigned Reg)
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:255
int getObjectIndexBegin() const
Return the minimum frame object index.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI)
Reserve a slice of a VGPR to support spilling for FrameIndex FI.
Class to represent pointers.
Definition: DerivedTypes.h:544
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isCompute(CallingConv::ID cc)
void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsNotInMask - Clear a bit in this vector for every &#39;0&#39; bit in Mask.
Definition: BitVector.h:793
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
This file declares the machine register scavenger class.
const TargetRegisterInfo * getTargetRegisterInfo() const
unsigned const MachineRegisterInfo * MRI
unsigned reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch wave offset in case spilling is needed...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
void addLiveIns(const MachineBasicBlock &MBB)
Adds all live-in registers of basic block MBB.
static ArrayRef< MCPhysReg > getAllSGPR128(const GCNSubtarget &ST, const MachineFunction &MF)
bool any() const
any - Returns true if any bit is set.
Definition: BitVector.h:180
void init(const TargetRegisterInfo &TRI)
(re-)initializes and clears the set.
Definition: LivePhysRegs.h:66
void stepBackward(const MachineInstr &MI)
Simulates liveness when stepping backwards over an instruction(bundle).
static ArrayRef< MCPhysReg > getAllSGPRs(const GCNSubtarget &ST, const MachineFunction &MF)
BitVector & reset()
Definition: BitVector.h:438
unsigned getMaxAlignment() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Generation getGeneration() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
ArrayRef< SGPRSpillVGPRCSR > getSGPRSpillVGPRs() const
void setScratchWaveOffsetReg(unsigned Reg)
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const MachineBasicBlock & front() const
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
This class contains a discriminated union of information about pointers in memory operands...
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
virtual bool hasReservedCallFrame(const MachineFunction &MF) const
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required, we reserve argument space for call sites in the function immediately on entry to the current function.
The memory access writes data.
unsigned getWavefrontSize() const
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const
Returns true if register Reg and no aliasing register is in the set.
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
bool hasSGPRInitBug() const
ArrayRef< T > drop_back(size_t N=1) const
Drop the last N elements of the array.
Definition: ArrayRef.h:193
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array...
Definition: ArrayRef.h:178
void replaceRegWith(unsigned FromReg, unsigned ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
bool flatScratchIsPointer() const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Provides AMDGPU specific target descriptions.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:48
static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const SIInstrInfo *TII, unsigned SpillReg, unsigned ScratchRsrcReg, unsigned SPReg, int FI)
TargetOptions Options
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned char TargetFlags=0) const
unsigned SGPRForFPSaveRestoreCopy
If this is set, an SGPR used for save/restore of the register used for the frame pointer.
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned getImplicitBufferPtrUserSGPR() const
The memory access always returns the same value (or traps).
bool isAmdHsaOrMesa(const Function &F) const
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Address space for constant memory (VTX2).
Definition: AMDGPU.h:269
const TargetRegisterClass * getWaveMaskRegClass() const
uint8_t getStackID(int ObjectIdx) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Register getFrameRegister(const MachineFunction &MF) const override
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
void addReg(MCPhysReg Reg)
Adds a physical register and all its sub-registers to the set.
Definition: LivePhysRegs.h:79
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:207
static unsigned findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, const TargetRegisterClass &RC, bool Unused=false)
void removeReg(MCPhysReg Reg)
Removes a physical register, all its sub-registers, and all its super-registers from the set...
Definition: LivePhysRegs.h:89
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register...
bool hasCalls() const
Return true if the current function has any function calls.
Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
const SIRegisterInfo * getRegisterInfo() const override