LLVM  10.0.0svn
SIRegisterInfo.cpp
Go to the documentation of this file.
1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// SI implementation of the TargetRegisterInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SIRegisterInfo.h"
15 #include "AMDGPURegisterBankInfo.h"
16 #include "AMDGPUSubtarget.h"
17 #include "SIInstrInfo.h"
18 #include "SIMachineFunctionInfo.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/LLVMContext.h"
29 
30 using namespace llvm;
31 
32 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
33  for (unsigned i = 0; PSets[i] != -1; ++i) {
34  if (PSets[i] == (int)PSetID)
35  return true;
36  }
37  return false;
38 }
39 
40 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
41  BitVector &PressureSets) const {
42  for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
43  const int *PSets = getRegUnitPressureSets(*U);
44  if (hasPressureSet(PSets, PSetID)) {
45  PressureSets.set(PSetID);
46  break;
47  }
48  }
49 }
50 
52  "amdgpu-spill-sgpr-to-smem",
53  cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
54  cl::init(false));
55 
57  "amdgpu-spill-sgpr-to-vgpr",
58  cl::desc("Enable spilling VGPRs to SGPRs"),
60  cl::init(true));
61 
64  ST(ST),
65  SGPRPressureSets(getNumRegPressureSets()),
66  VGPRPressureSets(getNumRegPressureSets()),
67  AGPRPressureSets(getNumRegPressureSets()),
68  SpillSGPRToVGPR(false),
69  SpillSGPRToSMEM(false),
70  isWave32(ST.isWave32()) {
71  if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
72  SpillSGPRToSMEM = true;
73  else if (EnableSpillSGPRToVGPR)
74  SpillSGPRToVGPR = true;
75 
76  unsigned NumRegPressureSets = getNumRegPressureSets();
77 
78  SGPRSetID = NumRegPressureSets;
79  VGPRSetID = NumRegPressureSets;
80  AGPRSetID = NumRegPressureSets;
81 
82  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
83  classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
84  classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
85  classifyPressureSet(i, AMDGPU::AGPR0, AGPRPressureSets);
86  }
87 
88  // Determine the number of reg units for each pressure set.
89  std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
90  for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
91  const int *PSets = getRegUnitPressureSets(i);
92  for (unsigned j = 0; PSets[j] != -1; ++j) {
93  ++PressureSetRegUnits[PSets[j]];
94  }
95  }
96 
97  unsigned VGPRMax = 0, SGPRMax = 0, AGPRMax = 0;
98  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
99  if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
100  VGPRSetID = i;
101  VGPRMax = PressureSetRegUnits[i];
102  continue;
103  }
104  if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
105  SGPRSetID = i;
106  SGPRMax = PressureSetRegUnits[i];
107  }
108  if (isAGPRPressureSet(i) && PressureSetRegUnits[i] > AGPRMax) {
109  AGPRSetID = i;
110  AGPRMax = PressureSetRegUnits[i];
111  continue;
112  }
113  }
114 
115  assert(SGPRSetID < NumRegPressureSets &&
116  VGPRSetID < NumRegPressureSets &&
117  AGPRSetID < NumRegPressureSets);
118 }
119 
121  const MachineFunction &MF) const {
122 
123  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
124  unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
125  unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
126  return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass);
127 }
128 
129 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
130  unsigned Reg;
131 
132  // Try to place it in a hole after PrivateSegmentBufferReg.
133  if (RegCount & 3) {
134  // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
135  // alignment constraints, so we have a hole where can put the wave offset.
136  Reg = RegCount - 1;
137  } else {
138  // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
139  // wave offset before it.
140  Reg = RegCount - 5;
141  }
142 
143  return Reg;
144 }
145 
147  const MachineFunction &MF) const {
148  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
150  return AMDGPU::SGPR_32RegClass.getRegister(Reg);
151 }
152 
154  BitVector Reserved(getNumRegs());
155 
156  // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
157  // this seems likely to result in bugs, so I'm marking them as reserved.
158  reserveRegisterTuples(Reserved, AMDGPU::EXEC);
159  reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
160 
161  // M0 has to be reserved so that llvm accepts it as a live-in into a block.
162  reserveRegisterTuples(Reserved, AMDGPU::M0);
163 
164  // Reserve src_vccz, src_execz, src_scc.
165  reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
166  reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
167  reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
168 
169  // Reserve the memory aperture registers.
170  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
171  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
172  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
173  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
174 
175  // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
176  reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
177 
178  // Reserve xnack_mask registers - support is not implemented in Codegen.
179  reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
180 
181  // Reserve lds_direct register - support is not implemented in Codegen.
182  reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
183 
184  // Reserve Trap Handler registers - support is not implemented in Codegen.
185  reserveRegisterTuples(Reserved, AMDGPU::TBA);
186  reserveRegisterTuples(Reserved, AMDGPU::TMA);
187  reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
188  reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
189  reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
190  reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
191  reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
192  reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
193  reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
194  reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
195 
196  // Reserve null register - it shall never be allocated
197  reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL);
198 
199  // Disallow vcc_hi allocation in wave32. It may be allocated but most likely
200  // will result in bugs.
201  if (isWave32) {
202  Reserved.set(AMDGPU::VCC);
203  Reserved.set(AMDGPU::VCC_HI);
204  }
205 
206  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
207 
208  unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
209  unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
210  for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
211  unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
212  reserveRegisterTuples(Reserved, Reg);
213  }
214 
215  unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
216  unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
217  for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
218  unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
219  reserveRegisterTuples(Reserved, Reg);
220  Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
221  reserveRegisterTuples(Reserved, Reg);
222  }
223 
224  // Reserve all the rest AGPRs if there are no instructions to use it.
225  if (!ST.hasMAIInsts()) {
226  for (unsigned i = 0; i < MaxNumVGPRs; ++i) {
227  unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
228  reserveRegisterTuples(Reserved, Reg);
229  }
230  }
231 
233 
234  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
235  if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
236  // Reserve 1 SGPR for scratch wave offset in case we need to spill.
237  reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
238  }
239 
240  unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
241  if (ScratchRSrcReg != AMDGPU::NoRegister) {
242  // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
243  // to spill.
244  // TODO: May need to reserve a VGPR if doing LDS spilling.
245  reserveRegisterTuples(Reserved, ScratchRSrcReg);
246  assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
247  }
248 
249  // We have to assume the SP is needed in case there are calls in the function,
250  // which is detected after the function is lowered. If we aren't really going
251  // to need SP, don't bother reserving it.
252  unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
253 
254  if (StackPtrReg != AMDGPU::NoRegister) {
255  reserveRegisterTuples(Reserved, StackPtrReg);
256  assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
257  }
258 
259  unsigned FrameReg = MFI->getFrameOffsetReg();
260  if (FrameReg != AMDGPU::NoRegister) {
261  reserveRegisterTuples(Reserved, FrameReg);
262  assert(!isSubRegister(ScratchRSrcReg, FrameReg));
263  }
264 
265  for (unsigned Reg : MFI->WWMReservedRegs) {
266  reserveRegisterTuples(Reserved, Reg);
267  }
268 
269  // FIXME: Stop using reserved registers for this.
270  for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
271  reserveRegisterTuples(Reserved, Reg);
272 
273  for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
274  reserveRegisterTuples(Reserved, Reg);
275 
276  return Reserved;
277 }
278 
281  // On entry, the base address is 0, so it can't possibly need any more
282  // alignment.
283 
284  // FIXME: Should be able to specify the entry frame alignment per calling
285  // convention instead.
286  if (Info->isEntryFunction())
287  return false;
288 
290 }
291 
294  if (Info->isEntryFunction()) {
295  const MachineFrameInfo &MFI = Fn.getFrameInfo();
296  return MFI.hasStackObjects() || MFI.hasCalls();
297  }
298 
299  // May need scavenger for dealing with callee saved registers.
300  return true;
301 }
302 
304  const MachineFunction &MF) const {
305  // Do not use frame virtual registers. They used to be used for SGPRs, but
306  // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the
307  // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a
308  // spill.
309  return false;
310 }
311 
313  const MachineFunction &MF) const {
314  const MachineFrameInfo &MFI = MF.getFrameInfo();
315  return MFI.hasStackObjects();
316 }
317 
319  const MachineFunction &) const {
320  // There are no special dedicated stack or frame pointers.
321  return true;
322 }
323 
325  // This helps catch bugs as verifier errors.
326  return true;
327 }
328 
331 
332  int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
333  AMDGPU::OpName::offset);
334  return MI->getOperand(OffIdx).getImm();
335 }
336 
338  int Idx) const {
339  if (!SIInstrInfo::isMUBUF(*MI))
340  return 0;
341 
343  AMDGPU::OpName::vaddr) &&
344  "Should never see frame index on non-address operand");
345 
346  return getMUBUFInstrOffset(MI);
347 }
348 
350  if (!MI->mayLoadOrStore())
351  return false;
352 
353  int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
354 
355  return !isUInt<12>(FullOffset);
356 }
357 
359  unsigned BaseReg,
360  int FrameIdx,
361  int64_t Offset) const {
363  DebugLoc DL; // Defaults to "unknown"
364 
365  if (Ins != MBB->end())
366  DL = Ins->getDebugLoc();
367 
368  MachineFunction *MF = MBB->getParent();
369  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
370  const SIInstrInfo *TII = Subtarget.getInstrInfo();
371 
372  if (Offset == 0) {
373  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
374  .addFrameIndex(FrameIdx);
375  return;
376  }
377 
379  Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
380 
381  Register FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
382 
383  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
384  .addImm(Offset);
385  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
386  .addFrameIndex(FrameIdx);
387 
388  TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
389  .addReg(OffsetReg, RegState::Kill)
390  .addReg(FIReg)
391  .addImm(0); // clamp bit
392 }
393 
395  int64_t Offset) const {
396 
397  MachineBasicBlock *MBB = MI.getParent();
398  MachineFunction *MF = MBB->getParent();
399  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
400  const SIInstrInfo *TII = Subtarget.getInstrInfo();
401 
402 #ifndef NDEBUG
403  // FIXME: Is it possible to be storing a frame index to itself?
404  bool SeenFI = false;
405  for (const MachineOperand &MO: MI.operands()) {
406  if (MO.isFI()) {
407  if (SeenFI)
408  llvm_unreachable("should not see multiple frame indices");
409 
410  SeenFI = true;
411  }
412  }
413 #endif
414 
415  MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
416  assert(FIOp && FIOp->isFI() && "frame index must be address operand");
417  assert(TII->isMUBUF(MI));
418  assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
419  MF->getInfo<SIMachineFunctionInfo>()->getStackPtrOffsetReg() &&
420  "should only be seeing stack pointer offset relative FrameIndex");
421 
422  MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
423  int64_t NewOffset = OffsetOp->getImm() + Offset;
424  assert(isUInt<12>(NewOffset) && "offset should be legal");
425 
426  FIOp->ChangeToRegister(BaseReg, false);
427  OffsetOp->setImm(NewOffset);
428 }
429 
431  unsigned BaseReg,
432  int64_t Offset) const {
433  if (!SIInstrInfo::isMUBUF(*MI))
434  return false;
435 
436  int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
437 
438  return isUInt<12>(NewOffset);
439 }
440 
442  const MachineFunction &MF, unsigned Kind) const {
443  // This is inaccurate. It depends on the instruction and address space. The
444  // only place where we should hit this is for dealing with frame indexes /
445  // private accesses, so this is correct in that case.
446  return &AMDGPU::VGPR_32RegClass;
447 }
448 
449 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
450 
451  switch (Op) {
452  case AMDGPU::SI_SPILL_S1024_SAVE:
453  case AMDGPU::SI_SPILL_S1024_RESTORE:
454  case AMDGPU::SI_SPILL_V1024_SAVE:
455  case AMDGPU::SI_SPILL_V1024_RESTORE:
456  case AMDGPU::SI_SPILL_A1024_SAVE:
457  case AMDGPU::SI_SPILL_A1024_RESTORE:
458  return 32;
459  case AMDGPU::SI_SPILL_S512_SAVE:
460  case AMDGPU::SI_SPILL_S512_RESTORE:
461  case AMDGPU::SI_SPILL_V512_SAVE:
462  case AMDGPU::SI_SPILL_V512_RESTORE:
463  case AMDGPU::SI_SPILL_A512_SAVE:
464  case AMDGPU::SI_SPILL_A512_RESTORE:
465  return 16;
466  case AMDGPU::SI_SPILL_S256_SAVE:
467  case AMDGPU::SI_SPILL_S256_RESTORE:
468  case AMDGPU::SI_SPILL_V256_SAVE:
469  case AMDGPU::SI_SPILL_V256_RESTORE:
470  return 8;
471  case AMDGPU::SI_SPILL_S160_SAVE:
472  case AMDGPU::SI_SPILL_S160_RESTORE:
473  case AMDGPU::SI_SPILL_V160_SAVE:
474  case AMDGPU::SI_SPILL_V160_RESTORE:
475  return 5;
476  case AMDGPU::SI_SPILL_S128_SAVE:
477  case AMDGPU::SI_SPILL_S128_RESTORE:
478  case AMDGPU::SI_SPILL_V128_SAVE:
479  case AMDGPU::SI_SPILL_V128_RESTORE:
480  case AMDGPU::SI_SPILL_A128_SAVE:
481  case AMDGPU::SI_SPILL_A128_RESTORE:
482  return 4;
483  case AMDGPU::SI_SPILL_S96_SAVE:
484  case AMDGPU::SI_SPILL_S96_RESTORE:
485  case AMDGPU::SI_SPILL_V96_SAVE:
486  case AMDGPU::SI_SPILL_V96_RESTORE:
487  return 3;
488  case AMDGPU::SI_SPILL_S64_SAVE:
489  case AMDGPU::SI_SPILL_S64_RESTORE:
490  case AMDGPU::SI_SPILL_V64_SAVE:
491  case AMDGPU::SI_SPILL_V64_RESTORE:
492  case AMDGPU::SI_SPILL_A64_SAVE:
493  case AMDGPU::SI_SPILL_A64_RESTORE:
494  return 2;
495  case AMDGPU::SI_SPILL_S32_SAVE:
496  case AMDGPU::SI_SPILL_S32_RESTORE:
497  case AMDGPU::SI_SPILL_V32_SAVE:
498  case AMDGPU::SI_SPILL_V32_RESTORE:
499  case AMDGPU::SI_SPILL_A32_SAVE:
500  case AMDGPU::SI_SPILL_A32_RESTORE:
501  return 1;
502  default: llvm_unreachable("Invalid spill opcode");
503  }
504 }
505 
506 static int getOffsetMUBUFStore(unsigned Opc) {
507  switch (Opc) {
508  case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
509  return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
510  case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
511  return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
512  case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
513  return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
514  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
515  return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
516  case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
517  return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
518  case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
519  return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
520  case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
521  return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
522  default:
523  return -1;
524  }
525 }
526 
527 static int getOffsetMUBUFLoad(unsigned Opc) {
528  switch (Opc) {
529  case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
530  return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
531  case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
532  return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
533  case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
534  return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
535  case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
536  return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
537  case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
538  return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
539  case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
540  return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
541  case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
542  return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
543  case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
544  return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
545  case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
546  return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
547  case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
548  return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
549  case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
550  return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
551  case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
552  return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
553  case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
554  return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
555  default:
556  return -1;
557  }
558 }
559 
561  int Index,
562  unsigned Lane,
563  unsigned ValueReg,
564  bool IsKill) {
565  MachineBasicBlock *MBB = MI->getParent();
566  MachineFunction *MF = MI->getParent()->getParent();
568  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
569  const SIInstrInfo *TII = ST.getInstrInfo();
570 
571  MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane);
572 
573  if (Reg == AMDGPU::NoRegister)
574  return MachineInstrBuilder();
575 
576  bool IsStore = MI->mayStore();
578  auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
579 
580  unsigned Dst = IsStore ? Reg : ValueReg;
581  unsigned Src = IsStore ? ValueReg : Reg;
582  unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32
583  : AMDGPU::V_ACCVGPR_READ_B32;
584 
585  return BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
586  .addReg(Src, getKillRegState(IsKill));
587 }
588 
589 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
590 // need to handle the case where an SGPR may need to be spilled while spilling.
592  MachineFrameInfo &MFI,
594  int Index,
595  int64_t Offset) {
596  MachineBasicBlock *MBB = MI->getParent();
597  const DebugLoc &DL = MI->getDebugLoc();
598  bool IsStore = MI->mayStore();
599 
600  unsigned Opc = MI->getOpcode();
601  int LoadStoreOp = IsStore ?
603  if (LoadStoreOp == -1)
604  return false;
605 
606  const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
607  if (spillVGPRtoAGPR(MI, Index, 0, Reg->getReg(), false).getInstr())
608  return true;
609 
610  MachineInstrBuilder NewMI =
611  BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
612  .add(*Reg)
613  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
614  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
615  .addImm(Offset)
616  .addImm(0) // glc
617  .addImm(0) // slc
618  .addImm(0) // tfe
619  .addImm(0) // dlc
620  .addImm(0) // swz
621  .cloneMemRefs(*MI);
622 
623  const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
624  AMDGPU::OpName::vdata_in);
625  if (VDataIn)
626  NewMI.add(*VDataIn);
627  return true;
628 }
629 
630 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
631  unsigned LoadStoreOp,
632  int Index,
633  unsigned ValueReg,
634  bool IsKill,
635  unsigned ScratchRsrcReg,
636  unsigned ScratchOffsetReg,
637  int64_t InstOffset,
638  MachineMemOperand *MMO,
639  RegScavenger *RS) const {
640  MachineBasicBlock *MBB = MI->getParent();
641  MachineFunction *MF = MI->getParent()->getParent();
642  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
643  const SIInstrInfo *TII = ST.getInstrInfo();
644  const MachineFrameInfo &MFI = MF->getFrameInfo();
645 
646  const MCInstrDesc &Desc = TII->get(LoadStoreOp);
647  const DebugLoc &DL = MI->getDebugLoc();
648  bool IsStore = Desc.mayStore();
649 
650  bool Scavenged = false;
651  unsigned SOffset = ScratchOffsetReg;
652 
653  const unsigned EltSize = 4;
654  const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
655  unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
656  unsigned Size = NumSubRegs * EltSize;
657  int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
658  int64_t ScratchOffsetRegDelta = 0;
659 
660  unsigned Align = MFI.getObjectAlignment(Index);
661  const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
662 
663  Register TmpReg =
664  hasAGPRs(RC) ? TII->getNamedOperand(*MI, AMDGPU::OpName::tmp)->getReg()
665  : Register();
666 
667  assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
668 
669  if (!isUInt<12>(Offset + Size - EltSize)) {
670  SOffset = AMDGPU::NoRegister;
671 
672  // We currently only support spilling VGPRs to EltSize boundaries, meaning
673  // we can simplify the adjustment of Offset here to just scale with
674  // WavefrontSize.
675  Offset *= ST.getWavefrontSize();
676 
677  // We don't have access to the register scavenger if this function is called
678  // during PEI::scavengeFrameVirtualRegs().
679  if (RS)
680  SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false);
681 
682  if (SOffset == AMDGPU::NoRegister) {
683  // There are no free SGPRs, and since we are in the process of spilling
684  // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
685  // on SI/CI and on VI it is true until we implement spilling using scalar
686  // stores), we have no way to free up an SGPR. Our solution here is to
687  // add the offset directly to the ScratchOffset register, and then
688  // subtract the offset after the spill to return ScratchOffset to it's
689  // original value.
690  SOffset = ScratchOffsetReg;
691  ScratchOffsetRegDelta = Offset;
692  } else {
693  Scavenged = true;
694  }
695 
696  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
697  .addReg(ScratchOffsetReg)
698  .addImm(Offset);
699 
700  Offset = 0;
701  }
702 
703  for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
704  Register SubReg = NumSubRegs == 1
705  ? Register(ValueReg)
706  : getSubReg(ValueReg, getSubRegFromChannel(i));
707 
708  unsigned SOffsetRegState = 0;
709  unsigned SrcDstRegState = getDefRegState(!IsStore);
710  if (i + 1 == e) {
711  SOffsetRegState |= getKillRegState(Scavenged);
712  // The last implicit use carries the "Kill" flag.
713  SrcDstRegState |= getKillRegState(IsKill);
714  }
715 
716  auto MIB = spillVGPRtoAGPR(MI, Index, i, SubReg, IsKill);
717 
718  if (!MIB.getInstr()) {
719  unsigned FinalReg = SubReg;
720  if (TmpReg != AMDGPU::NoRegister) {
721  if (IsStore)
722  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg)
723  .addReg(SubReg, getKillRegState(IsKill));
724  SubReg = TmpReg;
725  }
726 
727  MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
728  MachineMemOperand *NewMMO
729  = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
730  EltSize, MinAlign(Align, EltSize * i));
731 
732  MIB = BuildMI(*MBB, MI, DL, Desc)
733  .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
734  .addReg(ScratchRsrcReg)
735  .addReg(SOffset, SOffsetRegState)
736  .addImm(Offset)
737  .addImm(0) // glc
738  .addImm(0) // slc
739  .addImm(0) // tfe
740  .addImm(0) // dlc
741  .addImm(0) // swz
742  .addMemOperand(NewMMO);
743 
744  if (!IsStore && TmpReg != AMDGPU::NoRegister)
745  MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32),
746  FinalReg)
747  .addReg(TmpReg, RegState::Kill);
748  }
749 
750  if (NumSubRegs > 1)
751  MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
752  }
753 
754  if (ScratchOffsetRegDelta != 0) {
755  // Subtract the offset we added to the ScratchOffset register.
756  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
757  .addReg(ScratchOffsetReg)
758  .addImm(ScratchOffsetRegDelta);
759  }
760 }
761 
762 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
763  bool Store) {
764  if (SuperRegSize % 16 == 0) {
765  return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
766  AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
767  }
768 
769  if (SuperRegSize % 8 == 0) {
770  return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
771  AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
772  }
773 
774  return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
775  AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
776 }
777 
779  int Index,
780  RegScavenger *RS,
781  bool OnlyToVGPR) const {
782  MachineBasicBlock *MBB = MI->getParent();
783  MachineFunction *MF = MBB->getParent();
785  DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
786 
788  = MFI->getSGPRToVGPRSpills(Index);
789  bool SpillToVGPR = !VGPRSpills.empty();
790  if (OnlyToVGPR && !SpillToVGPR)
791  return false;
792 
793  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
794  const SIInstrInfo *TII = ST.getInstrInfo();
795 
796  Register SuperReg = MI->getOperand(0).getReg();
797  bool IsKill = MI->getOperand(0).isKill();
798  const DebugLoc &DL = MI->getDebugLoc();
799 
800  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
801 
802  bool SpillToSMEM = spillSGPRToSMEM();
803  if (SpillToSMEM && OnlyToVGPR)
804  return false;
805 
806  Register FrameReg = getFrameRegister(*MF);
807 
808  assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
809  SuperReg != MFI->getFrameOffsetReg() &&
810  SuperReg != MFI->getScratchWaveOffsetReg()));
811 
812  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
813 
814  unsigned OffsetReg = AMDGPU::M0;
815  unsigned M0CopyReg = AMDGPU::NoRegister;
816 
817  if (SpillToSMEM) {
818  if (RS->isRegUsed(AMDGPU::M0)) {
819  M0CopyReg = RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
820  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
821  .addReg(AMDGPU::M0);
822  }
823  }
824 
825  unsigned ScalarStoreOp;
826  unsigned EltSize = 4;
827  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
828  if (SpillToSMEM && isSGPRClass(RC)) {
829  // XXX - if private_element_size is larger than 4 it might be useful to be
830  // able to spill wider vmem spills.
831  std::tie(EltSize, ScalarStoreOp) =
832  getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
833  }
834 
835  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
836  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
837 
838  // Scavenged temporary VGPR to use. It must be scavenged once for any number
839  // of spilled subregs.
840  Register TmpVGPR;
841 
842  // SubReg carries the "Kill" flag when SubReg == SuperReg.
843  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
844  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
845  Register SubReg =
846  NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
847 
848  if (SpillToSMEM) {
849  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
850 
851  // The allocated memory size is really the wavefront size * the frame
852  // index size. The widest register class is 64 bytes, so a 4-byte scratch
853  // allocation is enough to spill this in a single stack object.
854  //
855  // FIXME: Frame size/offsets are computed earlier than this, so the extra
856  // space is still unnecessarily allocated.
857 
858  unsigned Align = FrameInfo.getObjectAlignment(Index);
859  MachinePointerInfo PtrInfo
860  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
861  MachineMemOperand *MMO
863  EltSize, MinAlign(Align, EltSize * i));
864 
865  // SMEM instructions only support a single offset, so increment the wave
866  // offset.
867 
868  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
869  if (Offset != 0) {
870  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
871  .addReg(FrameReg)
872  .addImm(Offset);
873  } else {
874  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
875  .addReg(FrameReg);
876  }
877 
878  BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
879  .addReg(SubReg, getKillRegState(IsKill)) // sdata
880  .addReg(MFI->getScratchRSrcReg()) // sbase
881  .addReg(OffsetReg, RegState::Kill) // soff
882  .addImm(0) // glc
883  .addImm(0) // dlc
884  .addMemOperand(MMO);
885 
886  continue;
887  }
888 
889  if (SpillToVGPR) {
890  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
891 
892  // During SGPR spilling to VGPR, determine if the VGPR is defined. The
893  // only circumstance in which we say it is undefined is when it is the
894  // first spill to this VGPR in the first basic block.
895  bool VGPRDefined = true;
896  if (MBB == &MF->front())
897  VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
898 
899  // Mark the "old value of vgpr" input undef only if this is the first sgpr
900  // spill to this specific vgpr in the first basic block.
901  BuildMI(*MBB, MI, DL,
902  TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
903  Spill.VGPR)
904  .addReg(SubReg, getKillRegState(IsKill))
905  .addImm(Spill.Lane)
906  .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
907 
908  // FIXME: Since this spills to another register instead of an actual
909  // frame index, we should delete the frame index when all references to
910  // it are fixed.
911  } else {
912  // XXX - Can to VGPR spill fail for some subregisters but not others?
913  if (OnlyToVGPR)
914  return false;
915 
916  // Spill SGPR to a frame index.
917  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
918  if (!TmpVGPR.isValid())
919  TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
920  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
921 
923  = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
924  .addReg(SubReg, SubKillState);
925 
926  // There could be undef components of a spilled super register.
927  // TODO: Can we detect this and skip the spill?
928  if (NumSubRegs > 1) {
929  // The last implicit use of the SuperReg carries the "Kill" flag.
930  unsigned SuperKillState = 0;
931  if (i + 1 == e)
932  SuperKillState |= getKillRegState(IsKill);
933  Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
934  }
935 
936  unsigned Align = FrameInfo.getObjectAlignment(Index);
937  MachinePointerInfo PtrInfo
938  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
939  MachineMemOperand *MMO
941  EltSize, MinAlign(Align, EltSize * i));
942  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
943  .addReg(TmpVGPR, RegState::Kill) // src
944  .addFrameIndex(Index) // vaddr
945  .addReg(MFI->getScratchRSrcReg()) // srrsrc
946  .addReg(MFI->getStackPtrOffsetReg()) // soffset
947  .addImm(i * 4) // offset
948  .addMemOperand(MMO);
949  }
950  }
951 
952  if (M0CopyReg != AMDGPU::NoRegister) {
953  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
954  .addReg(M0CopyReg, RegState::Kill);
955  }
956 
957  MI->eraseFromParent();
958  MFI->addToSpilledSGPRs(NumSubRegs);
959  return true;
960 }
961 
963  int Index,
964  RegScavenger *RS,
965  bool OnlyToVGPR) const {
966  MachineFunction *MF = MI->getParent()->getParent();
967  MachineBasicBlock *MBB = MI->getParent();
969 
971  = MFI->getSGPRToVGPRSpills(Index);
972  bool SpillToVGPR = !VGPRSpills.empty();
973  if (OnlyToVGPR && !SpillToVGPR)
974  return false;
975 
976  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
977  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
978  const SIInstrInfo *TII = ST.getInstrInfo();
979  const DebugLoc &DL = MI->getDebugLoc();
980 
981  Register SuperReg = MI->getOperand(0).getReg();
982  bool SpillToSMEM = spillSGPRToSMEM();
983  if (SpillToSMEM && OnlyToVGPR)
984  return false;
985 
986  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
987 
988  unsigned OffsetReg = AMDGPU::M0;
989  unsigned M0CopyReg = AMDGPU::NoRegister;
990 
991  if (SpillToSMEM) {
992  if (RS->isRegUsed(AMDGPU::M0)) {
993  M0CopyReg = RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
994  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
995  .addReg(AMDGPU::M0);
996  }
997  }
998 
999  unsigned EltSize = 4;
1000  unsigned ScalarLoadOp;
1001 
1002  Register FrameReg = getFrameRegister(*MF);
1003 
1004  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
1005  if (SpillToSMEM && isSGPRClass(RC)) {
1006  // XXX - if private_element_size is larger than 4 it might be useful to be
1007  // able to spill wider vmem spills.
1008  std::tie(EltSize, ScalarLoadOp) =
1009  getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
1010  }
1011 
1012  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
1013  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
1014 
1015  // SubReg carries the "Kill" flag when SubReg == SuperReg.
1016  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
1017 
1018  Register TmpVGPR;
1019 
1020  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
1021  Register SubReg =
1022  NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
1023 
1024  if (SpillToSMEM) {
1025  // FIXME: Size may be > 4 but extra bytes wasted.
1026  unsigned Align = FrameInfo.getObjectAlignment(Index);
1027  MachinePointerInfo PtrInfo
1028  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
1029  MachineMemOperand *MMO
1031  EltSize, MinAlign(Align, EltSize * i));
1032 
1033  // Add i * 4 offset
1034  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
1035  if (Offset != 0) {
1036  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
1037  .addReg(FrameReg)
1038  .addImm(Offset);
1039  } else {
1040  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
1041  .addReg(FrameReg);
1042  }
1043 
1044  auto MIB =
1045  BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
1046  .addReg(MFI->getScratchRSrcReg()) // sbase
1047  .addReg(OffsetReg, RegState::Kill) // soff
1048  .addImm(0) // glc
1049  .addImm(0) // dlc
1050  .addMemOperand(MMO);
1051 
1052  if (NumSubRegs > 1 && i == 0)
1053  MIB.addReg(SuperReg, RegState::ImplicitDefine);
1054 
1055  continue;
1056  }
1057 
1058  if (SpillToVGPR) {
1059  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
1060  auto MIB =
1061  BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
1062  SubReg)
1063  .addReg(Spill.VGPR)
1064  .addImm(Spill.Lane);
1065 
1066  if (NumSubRegs > 1 && i == 0)
1067  MIB.addReg(SuperReg, RegState::ImplicitDefine);
1068  } else {
1069  if (OnlyToVGPR)
1070  return false;
1071 
1072  // Restore SGPR from a stack slot.
1073  // FIXME: We should use S_LOAD_DWORD here for VI.
1074  if (!TmpVGPR.isValid())
1075  TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
1076  unsigned Align = FrameInfo.getObjectAlignment(Index);
1077 
1078  MachinePointerInfo PtrInfo
1079  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
1080 
1081  MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
1082  MachineMemOperand::MOLoad, EltSize,
1083  MinAlign(Align, EltSize * i));
1084 
1085  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpVGPR)
1086  .addFrameIndex(Index) // vaddr
1087  .addReg(MFI->getScratchRSrcReg()) // srsrc
1088  .addReg(MFI->getStackPtrOffsetReg()) // soffset
1089  .addImm(i * 4) // offset
1090  .addMemOperand(MMO);
1091 
1092  auto MIB =
1093  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
1094  .addReg(TmpVGPR, RegState::Kill);
1095 
1096  if (NumSubRegs > 1)
1097  MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
1098  }
1099  }
1100 
1101  if (M0CopyReg != AMDGPU::NoRegister) {
1102  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
1103  .addReg(M0CopyReg, RegState::Kill);
1104  }
1105 
1106  MI->eraseFromParent();
1107  return true;
1108 }
1109 
1110 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
1111 /// a VGPR and the stack slot can be safely eliminated when all other users are
1112 /// handled.
1115  int FI,
1116  RegScavenger *RS) const {
1117  switch (MI->getOpcode()) {
1118  case AMDGPU::SI_SPILL_S1024_SAVE:
1119  case AMDGPU::SI_SPILL_S512_SAVE:
1120  case AMDGPU::SI_SPILL_S256_SAVE:
1121  case AMDGPU::SI_SPILL_S160_SAVE:
1122  case AMDGPU::SI_SPILL_S128_SAVE:
1123  case AMDGPU::SI_SPILL_S96_SAVE:
1124  case AMDGPU::SI_SPILL_S64_SAVE:
1125  case AMDGPU::SI_SPILL_S32_SAVE:
1126  return spillSGPR(MI, FI, RS, true);
1127  case AMDGPU::SI_SPILL_S1024_RESTORE:
1128  case AMDGPU::SI_SPILL_S512_RESTORE:
1129  case AMDGPU::SI_SPILL_S256_RESTORE:
1130  case AMDGPU::SI_SPILL_S160_RESTORE:
1131  case AMDGPU::SI_SPILL_S128_RESTORE:
1132  case AMDGPU::SI_SPILL_S96_RESTORE:
1133  case AMDGPU::SI_SPILL_S64_RESTORE:
1134  case AMDGPU::SI_SPILL_S32_RESTORE:
1135  return restoreSGPR(MI, FI, RS, true);
1136  default:
1137  llvm_unreachable("not an SGPR spill instruction");
1138  }
1139 }
1140 
1142  int SPAdj, unsigned FIOperandNum,
1143  RegScavenger *RS) const {
1144  MachineFunction *MF = MI->getParent()->getParent();
1145  MachineBasicBlock *MBB = MI->getParent();
1147  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
1148  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
1149  const SIInstrInfo *TII = ST.getInstrInfo();
1150  DebugLoc DL = MI->getDebugLoc();
1151 
1152  assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");
1153 
1154  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
1155  int Index = MI->getOperand(FIOperandNum).getIndex();
1156 
1157  Register FrameReg = getFrameRegister(*MF);
1158 
1159  switch (MI->getOpcode()) {
1160  // SGPR register spill
1161  case AMDGPU::SI_SPILL_S1024_SAVE:
1162  case AMDGPU::SI_SPILL_S512_SAVE:
1163  case AMDGPU::SI_SPILL_S256_SAVE:
1164  case AMDGPU::SI_SPILL_S160_SAVE:
1165  case AMDGPU::SI_SPILL_S128_SAVE:
1166  case AMDGPU::SI_SPILL_S96_SAVE:
1167  case AMDGPU::SI_SPILL_S64_SAVE:
1168  case AMDGPU::SI_SPILL_S32_SAVE: {
1169  spillSGPR(MI, Index, RS);
1170  break;
1171  }
1172 
1173  // SGPR register restore
1174  case AMDGPU::SI_SPILL_S1024_RESTORE:
1175  case AMDGPU::SI_SPILL_S512_RESTORE:
1176  case AMDGPU::SI_SPILL_S256_RESTORE:
1177  case AMDGPU::SI_SPILL_S160_RESTORE:
1178  case AMDGPU::SI_SPILL_S128_RESTORE:
1179  case AMDGPU::SI_SPILL_S96_RESTORE:
1180  case AMDGPU::SI_SPILL_S64_RESTORE:
1181  case AMDGPU::SI_SPILL_S32_RESTORE: {
1182  restoreSGPR(MI, Index, RS);
1183  break;
1184  }
1185 
1186  // VGPR register spill
1187  case AMDGPU::SI_SPILL_V1024_SAVE:
1188  case AMDGPU::SI_SPILL_V512_SAVE:
1189  case AMDGPU::SI_SPILL_V256_SAVE:
1190  case AMDGPU::SI_SPILL_V160_SAVE:
1191  case AMDGPU::SI_SPILL_V128_SAVE:
1192  case AMDGPU::SI_SPILL_V96_SAVE:
1193  case AMDGPU::SI_SPILL_V64_SAVE:
1194  case AMDGPU::SI_SPILL_V32_SAVE:
1195  case AMDGPU::SI_SPILL_A1024_SAVE:
1196  case AMDGPU::SI_SPILL_A512_SAVE:
1197  case AMDGPU::SI_SPILL_A128_SAVE:
1198  case AMDGPU::SI_SPILL_A64_SAVE:
1199  case AMDGPU::SI_SPILL_A32_SAVE: {
1200  const MachineOperand *VData = TII->getNamedOperand(*MI,
1201  AMDGPU::OpName::vdata);
1202  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1203  MFI->getStackPtrOffsetReg());
1204 
1205  buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1206  Index,
1207  VData->getReg(), VData->isKill(),
1208  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1209  FrameReg,
1210  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1211  *MI->memoperands_begin(),
1212  RS);
1213  MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1214  MI->eraseFromParent();
1215  break;
1216  }
1217  case AMDGPU::SI_SPILL_V32_RESTORE:
1218  case AMDGPU::SI_SPILL_V64_RESTORE:
1219  case AMDGPU::SI_SPILL_V96_RESTORE:
1220  case AMDGPU::SI_SPILL_V128_RESTORE:
1221  case AMDGPU::SI_SPILL_V160_RESTORE:
1222  case AMDGPU::SI_SPILL_V256_RESTORE:
1223  case AMDGPU::SI_SPILL_V512_RESTORE:
1224  case AMDGPU::SI_SPILL_V1024_RESTORE:
1225  case AMDGPU::SI_SPILL_A32_RESTORE:
1226  case AMDGPU::SI_SPILL_A64_RESTORE:
1227  case AMDGPU::SI_SPILL_A128_RESTORE:
1228  case AMDGPU::SI_SPILL_A512_RESTORE:
1229  case AMDGPU::SI_SPILL_A1024_RESTORE: {
1230  const MachineOperand *VData = TII->getNamedOperand(*MI,
1231  AMDGPU::OpName::vdata);
1232  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1233  MFI->getStackPtrOffsetReg());
1234 
1235  buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1236  Index,
1237  VData->getReg(), VData->isKill(),
1238  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1239  FrameReg,
1240  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1241  *MI->memoperands_begin(),
1242  RS);
1243  MI->eraseFromParent();
1244  break;
1245  }
1246 
1247  default: {
1248  const DebugLoc &DL = MI->getDebugLoc();
1249  bool IsMUBUF = TII->isMUBUF(*MI);
1250 
1251  if (!IsMUBUF && !MFI->isEntryFunction()) {
1252  // Convert to an absolute stack address by finding the offset from the
1253  // scratch wave base and scaling by the wave size.
1254  //
1255  // In an entry function/kernel the offset is already the absolute
1256  // address relative to the frame register.
1257 
1258  Register TmpDiffReg =
1259  RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
1260 
1261  // If there's no free SGPR, in-place modify the FP
1262  Register DiffReg = TmpDiffReg.isValid() ? TmpDiffReg : FrameReg;
1263 
1264  bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1265  Register ResultReg = IsCopy ?
1266  MI->getOperand(0).getReg() :
1267  RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
1268 
1269  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
1270  .addReg(FrameReg)
1272 
1273  int64_t Offset = FrameInfo.getObjectOffset(Index);
1274  if (Offset == 0) {
1275  // XXX - This never happens because of emergency scavenging slot at 0?
1276  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1277  .addImm(ST.getWavefrontSizeLog2())
1278  .addReg(DiffReg);
1279  } else {
1280  if (auto MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) {
1281  Register ScaledReg =
1282  RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MIB, 0);
1283 
1284  BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
1285  ScaledReg)
1286  .addImm(ST.getWavefrontSizeLog2())
1287  .addReg(DiffReg, RegState::Kill);
1288 
1289  const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
1290 
1291  // TODO: Fold if use instruction is another add of a constant.
1292  if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
1293  // FIXME: This can fail
1294  MIB.addImm(Offset);
1295  MIB.addReg(ScaledReg, RegState::Kill);
1296  if (!IsVOP2)
1297  MIB.addImm(0); // clamp bit
1298  } else {
1299  Register ConstOffsetReg =
1300  RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MIB, 0, false);
1301 
1302  // This should always be able to use the unused carry out.
1303  assert(ConstOffsetReg && "this scavenge should not be able to fail");
1304 
1305  BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1306  .addImm(Offset);
1307  MIB.addReg(ConstOffsetReg, RegState::Kill);
1308  MIB.addReg(ScaledReg, RegState::Kill);
1309  MIB.addImm(0); // clamp bit
1310  }
1311  } else {
1312  // We have to produce a carry out, and we there isn't a free SGPR
1313  // pair for it. We can keep the whole computation on the SALU to
1314  // avoid clobbering an additional register at the cost of an extra
1315  // mov.
1316 
1317  // We may have 1 free scratch SGPR even though a carry out is
1318  // unavailable. Only one additional mov is needed.
1319  Register TmpScaledReg =
1320  RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
1321  Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : DiffReg;
1322 
1323  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
1324  .addReg(DiffReg, RegState::Kill)
1326  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), ScaledReg)
1327  .addReg(ScaledReg, RegState::Kill)
1328  .addImm(Offset);
1329  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
1330  .addReg(ScaledReg, RegState::Kill);
1331 
1332  // If there were truly no free SGPRs, we need to undo everything.
1333  if (!TmpScaledReg.isValid()) {
1334  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScaledReg)
1335  .addReg(ScaledReg, RegState::Kill)
1336  .addImm(Offset);
1337  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
1338  .addReg(DiffReg, RegState::Kill)
1340  }
1341  }
1342  }
1343 
1344  if (!TmpDiffReg.isValid()) {
1345  // Restore the FP.
1346  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), FrameReg)
1347  .addReg(FrameReg)
1349  }
1350 
1351  // Don't introduce an extra copy if we're just materializing in a mov.
1352  if (IsCopy)
1353  MI->eraseFromParent();
1354  else
1355  FIOp.ChangeToRegister(ResultReg, false, false, true);
1356  return;
1357  }
1358 
1359  if (IsMUBUF) {
1360  // Disable offen so we don't need a 0 vgpr base.
1361  assert(static_cast<int>(FIOperandNum) ==
1362  AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1363  AMDGPU::OpName::vaddr));
1364 
1365  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1366  MFI->getStackPtrOffsetReg());
1367 
1368  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->setReg(FrameReg);
1369 
1370  int64_t Offset = FrameInfo.getObjectOffset(Index);
1371  int64_t OldImm
1372  = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1373  int64_t NewOffset = OldImm + Offset;
1374 
1375  if (isUInt<12>(NewOffset) &&
1376  buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
1377  MI->eraseFromParent();
1378  return;
1379  }
1380  }
1381 
1382  // If the offset is simply too big, don't convert to a scratch wave offset
1383  // relative index.
1384 
1385  int64_t Offset = FrameInfo.getObjectOffset(Index);
1386  FIOp.ChangeToImmediate(Offset);
1387  if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1388  Register TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
1389  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1390  .addImm(Offset);
1391  FIOp.ChangeToRegister(TmpReg, false, false, true);
1392  }
1393  }
1394  }
1395 }
1396 
1399 }
1400 
1401 // FIXME: This is very slow. It might be worth creating a map from physreg to
1402 // register class.
1405 
1406  static const TargetRegisterClass *const BaseClasses[] = {
1407  &AMDGPU::VGPR_32RegClass,
1408  &AMDGPU::SReg_32RegClass,
1409  &AMDGPU::AGPR_32RegClass,
1410  &AMDGPU::VReg_64RegClass,
1411  &AMDGPU::SReg_64RegClass,
1412  &AMDGPU::AReg_64RegClass,
1413  &AMDGPU::VReg_96RegClass,
1414  &AMDGPU::SReg_96RegClass,
1415  &AMDGPU::VReg_128RegClass,
1416  &AMDGPU::SReg_128RegClass,
1417  &AMDGPU::AReg_128RegClass,
1418  &AMDGPU::VReg_160RegClass,
1419  &AMDGPU::SReg_160RegClass,
1420  &AMDGPU::VReg_256RegClass,
1421  &AMDGPU::SReg_256RegClass,
1422  &AMDGPU::VReg_512RegClass,
1423  &AMDGPU::SReg_512RegClass,
1424  &AMDGPU::AReg_512RegClass,
1425  &AMDGPU::SReg_1024RegClass,
1426  &AMDGPU::VReg_1024RegClass,
1427  &AMDGPU::AReg_1024RegClass,
1428  &AMDGPU::SCC_CLASSRegClass,
1429  &AMDGPU::Pseudo_SReg_32RegClass,
1430  &AMDGPU::Pseudo_SReg_128RegClass,
1431  };
1432 
1433  for (const TargetRegisterClass *BaseClass : BaseClasses) {
1434  if (BaseClass->contains(Reg)) {
1435  return BaseClass;
1436  }
1437  }
1438  return nullptr;
1439 }
1440 
1441 // TODO: It might be helpful to have some target specific flags in
1442 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
1444  unsigned Size = getRegSizeInBits(*RC);
1445  switch (Size) {
1446  case 32:
1447  return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
1448  case 64:
1449  return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
1450  case 96:
1451  return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
1452  case 128:
1453  return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
1454  case 160:
1455  return getCommonSubClass(&AMDGPU::VReg_160RegClass, RC) != nullptr;
1456  case 256:
1457  return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
1458  case 512:
1459  return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
1460  case 1024:
1461  return getCommonSubClass(&AMDGPU::VReg_1024RegClass, RC) != nullptr;
1462  case 1:
1463  return getCommonSubClass(&AMDGPU::VReg_1RegClass, RC) != nullptr;
1464  default:
1465  assert(Size < 32 && "Invalid register class size");
1466  return false;
1467  }
1468 }
1469 
1471  unsigned Size = getRegSizeInBits(*RC);
1472  if (Size < 32)
1473  return false;
1474  switch (Size) {
1475  case 32:
1476  return getCommonSubClass(&AMDGPU::AGPR_32RegClass, RC) != nullptr;
1477  case 64:
1478  return getCommonSubClass(&AMDGPU::AReg_64RegClass, RC) != nullptr;
1479  case 96:
1480  return false;
1481  case 128:
1482  return getCommonSubClass(&AMDGPU::AReg_128RegClass, RC) != nullptr;
1483  case 160:
1484  case 256:
1485  return false;
1486  case 512:
1487  return getCommonSubClass(&AMDGPU::AReg_512RegClass, RC) != nullptr;
1488  case 1024:
1489  return getCommonSubClass(&AMDGPU::AReg_1024RegClass, RC) != nullptr;
1490  default:
1491  llvm_unreachable("Invalid register class size");
1492  }
1493 }
1494 
1496  const TargetRegisterClass *SRC) const {
1497  switch (getRegSizeInBits(*SRC)) {
1498  case 32:
1499  return &AMDGPU::VGPR_32RegClass;
1500  case 64:
1501  return &AMDGPU::VReg_64RegClass;
1502  case 96:
1503  return &AMDGPU::VReg_96RegClass;
1504  case 128:
1505  return &AMDGPU::VReg_128RegClass;
1506  case 160:
1507  return &AMDGPU::VReg_160RegClass;
1508  case 256:
1509  return &AMDGPU::VReg_256RegClass;
1510  case 512:
1511  return &AMDGPU::VReg_512RegClass;
1512  case 1024:
1513  return &AMDGPU::VReg_1024RegClass;
1514  case 1:
1515  return &AMDGPU::VReg_1RegClass;
1516  default:
1517  llvm_unreachable("Invalid register class size");
1518  }
1519 }
1520 
1522  const TargetRegisterClass *SRC) const {
1523  switch (getRegSizeInBits(*SRC)) {
1524  case 32:
1525  return &AMDGPU::AGPR_32RegClass;
1526  case 64:
1527  return &AMDGPU::AReg_64RegClass;
1528  case 128:
1529  return &AMDGPU::AReg_128RegClass;
1530  case 512:
1531  return &AMDGPU::AReg_512RegClass;
1532  case 1024:
1533  return &AMDGPU::AReg_1024RegClass;
1534  default:
1535  llvm_unreachable("Invalid register class size");
1536  }
1537 }
1538 
1540  const TargetRegisterClass *VRC) const {
1541  switch (getRegSizeInBits(*VRC)) {
1542  case 32:
1543  return &AMDGPU::SGPR_32RegClass;
1544  case 64:
1545  return &AMDGPU::SReg_64RegClass;
1546  case 96:
1547  return &AMDGPU::SReg_96RegClass;
1548  case 128:
1549  return &AMDGPU::SGPR_128RegClass;
1550  case 160:
1551  return &AMDGPU::SReg_160RegClass;
1552  case 256:
1553  return &AMDGPU::SReg_256RegClass;
1554  case 512:
1555  return &AMDGPU::SReg_512RegClass;
1556  case 1024:
1557  return &AMDGPU::SReg_1024RegClass;
1558  default:
1559  llvm_unreachable("Invalid register class size");
1560  }
1561 }
1562 
1564  const TargetRegisterClass *RC, unsigned SubIdx) const {
1565  if (SubIdx == AMDGPU::NoSubRegister)
1566  return RC;
1567 
1568  // We can assume that each lane corresponds to one 32-bit register.
1569  unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
1570  if (isSGPRClass(RC)) {
1571  switch (Count) {
1572  case 1:
1573  return &AMDGPU::SGPR_32RegClass;
1574  case 2:
1575  return &AMDGPU::SReg_64RegClass;
1576  case 3:
1577  return &AMDGPU::SReg_96RegClass;
1578  case 4:
1579  return &AMDGPU::SGPR_128RegClass;
1580  case 5:
1581  return &AMDGPU::SReg_160RegClass;
1582  case 8:
1583  return &AMDGPU::SReg_256RegClass;
1584  case 16:
1585  return &AMDGPU::SReg_512RegClass;
1586  case 32: /* fall-through */
1587  default:
1588  llvm_unreachable("Invalid sub-register class size");
1589  }
1590  } else if (hasAGPRs(RC)) {
1591  switch (Count) {
1592  case 1:
1593  return &AMDGPU::AGPR_32RegClass;
1594  case 2:
1595  return &AMDGPU::AReg_64RegClass;
1596  case 4:
1597  return &AMDGPU::AReg_128RegClass;
1598  case 16:
1599  return &AMDGPU::AReg_512RegClass;
1600  case 32: /* fall-through */
1601  default:
1602  llvm_unreachable("Invalid sub-register class size");
1603  }
1604  } else {
1605  switch (Count) {
1606  case 1:
1607  return &AMDGPU::VGPR_32RegClass;
1608  case 2:
1609  return &AMDGPU::VReg_64RegClass;
1610  case 3:
1611  return &AMDGPU::VReg_96RegClass;
1612  case 4:
1613  return &AMDGPU::VReg_128RegClass;
1614  case 5:
1615  return &AMDGPU::VReg_160RegClass;
1616  case 8:
1617  return &AMDGPU::VReg_256RegClass;
1618  case 16:
1619  return &AMDGPU::VReg_512RegClass;
1620  case 32: /* fall-through */
1621  default:
1622  llvm_unreachable("Invalid sub-register class size");
1623  }
1624  }
1625 }
1626 
1627 bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
1628  if (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
1630  return !ST.hasMFMAInlineLiteralBug();
1631 
1632  return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
1633  OpType <= AMDGPU::OPERAND_SRC_LAST;
1634 }
1635 
1637  const TargetRegisterClass *DefRC,
1638  unsigned DefSubReg,
1639  const TargetRegisterClass *SrcRC,
1640  unsigned SrcSubReg) const {
1641  // We want to prefer the smallest register class possible, so we don't want to
1642  // stop and rewrite on anything that looks like a subregister
1643  // extract. Operations mostly don't care about the super register class, so we
1644  // only want to stop on the most basic of copies between the same register
1645  // class.
1646  //
1647  // e.g. if we have something like
1648  // %0 = ...
1649  // %1 = ...
1650  // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
1651  // %3 = COPY %2, sub0
1652  //
1653  // We want to look through the COPY to find:
1654  // => %3 = COPY %0
1655 
1656  // Plain copy.
1657  return getCommonSubClass(DefRC, SrcRC) != nullptr;
1658 }
1659 
1660 /// Returns a register that is not used at any point in the function.
1661 /// If all registers are used, then this function will return
1662 // AMDGPU::NoRegister.
1663 unsigned
1665  const TargetRegisterClass *RC,
1666  const MachineFunction &MF) const {
1667 
1668  for (unsigned Reg : *RC)
1669  if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1670  return Reg;
1671  return AMDGPU::NoRegister;
1672 }
1673 
1675  unsigned EltSize) const {
1676  if (EltSize == 4) {
1677  static const int16_t Sub0_31[] = {
1678  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1679  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1680  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1681  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1682  AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
1683  AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
1684  AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
1685  AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31,
1686  };
1687 
1688  static const int16_t Sub0_15[] = {
1689  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1690  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1691  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1692  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1693  };
1694 
1695  static const int16_t Sub0_7[] = {
1696  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1697  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1698  };
1699 
1700  static const int16_t Sub0_4[] = {
1701  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
1702  };
1703 
1704  static const int16_t Sub0_3[] = {
1705  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1706  };
1707 
1708  static const int16_t Sub0_2[] = {
1709  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1710  };
1711 
1712  static const int16_t Sub0_1[] = {
1713  AMDGPU::sub0, AMDGPU::sub1,
1714  };
1715 
1716  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1717  case 32:
1718  return {};
1719  case 64:
1720  return makeArrayRef(Sub0_1);
1721  case 96:
1722  return makeArrayRef(Sub0_2);
1723  case 128:
1724  return makeArrayRef(Sub0_3);
1725  case 160:
1726  return makeArrayRef(Sub0_4);
1727  case 256:
1728  return makeArrayRef(Sub0_7);
1729  case 512:
1730  return makeArrayRef(Sub0_15);
1731  case 1024:
1732  return makeArrayRef(Sub0_31);
1733  default:
1734  llvm_unreachable("unhandled register size");
1735  }
1736  }
1737 
1738  if (EltSize == 8) {
1739  static const int16_t Sub0_31_64[] = {
1740  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1741  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1742  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1743  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
1744  AMDGPU::sub16_sub17, AMDGPU::sub18_sub19,
1745  AMDGPU::sub20_sub21, AMDGPU::sub22_sub23,
1746  AMDGPU::sub24_sub25, AMDGPU::sub26_sub27,
1747  AMDGPU::sub28_sub29, AMDGPU::sub30_sub31
1748  };
1749 
1750  static const int16_t Sub0_15_64[] = {
1751  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1752  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1753  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1754  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1755  };
1756 
1757  static const int16_t Sub0_7_64[] = {
1758  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1759  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1760  };
1761 
1762 
1763  static const int16_t Sub0_3_64[] = {
1764  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1765  };
1766 
1767  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1768  case 64:
1769  return {};
1770  case 128:
1771  return makeArrayRef(Sub0_3_64);
1772  case 256:
1773  return makeArrayRef(Sub0_7_64);
1774  case 512:
1775  return makeArrayRef(Sub0_15_64);
1776  case 1024:
1777  return makeArrayRef(Sub0_31_64);
1778  default:
1779  llvm_unreachable("unhandled register size");
1780  }
1781  }
1782 
1783  if (EltSize == 16) {
1784 
1785  static const int16_t Sub0_31_128[] = {
1786  AMDGPU::sub0_sub1_sub2_sub3,
1787  AMDGPU::sub4_sub5_sub6_sub7,
1788  AMDGPU::sub8_sub9_sub10_sub11,
1789  AMDGPU::sub12_sub13_sub14_sub15,
1790  AMDGPU::sub16_sub17_sub18_sub19,
1791  AMDGPU::sub20_sub21_sub22_sub23,
1792  AMDGPU::sub24_sub25_sub26_sub27,
1793  AMDGPU::sub28_sub29_sub30_sub31
1794  };
1795 
1796  static const int16_t Sub0_15_128[] = {
1797  AMDGPU::sub0_sub1_sub2_sub3,
1798  AMDGPU::sub4_sub5_sub6_sub7,
1799  AMDGPU::sub8_sub9_sub10_sub11,
1800  AMDGPU::sub12_sub13_sub14_sub15
1801  };
1802 
1803  static const int16_t Sub0_7_128[] = {
1804  AMDGPU::sub0_sub1_sub2_sub3,
1805  AMDGPU::sub4_sub5_sub6_sub7
1806  };
1807 
1808  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1809  case 128:
1810  return {};
1811  case 256:
1812  return makeArrayRef(Sub0_7_128);
1813  case 512:
1814  return makeArrayRef(Sub0_15_128);
1815  case 1024:
1816  return makeArrayRef(Sub0_31_128);
1817  default:
1818  llvm_unreachable("unhandled register size");
1819  }
1820  }
1821 
1822  assert(EltSize == 32 && "unhandled elt size");
1823 
1824  static const int16_t Sub0_31_256[] = {
1825  AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
1826  AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15,
1827  AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23,
1828  AMDGPU::sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
1829  };
1830 
1831  static const int16_t Sub0_15_256[] = {
1832  AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
1833  AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15
1834  };
1835 
1836  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1837  case 256:
1838  return {};
1839  case 512:
1840  return makeArrayRef(Sub0_15_256);
1841  case 1024:
1842  return makeArrayRef(Sub0_31_256);
1843  default:
1844  llvm_unreachable("unhandled register size");
1845  }
1846 }
1847 
1848 const TargetRegisterClass*
1850  unsigned Reg) const {
1851  if (Register::isVirtualRegister(Reg))
1852  return MRI.getRegClass(Reg);
1853 
1854  return getPhysRegClass(Reg);
1855 }
1856 
1858  unsigned Reg) const {
1859  const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1860  assert(RC && "Register class for the reg not found");
1861  return hasVGPRs(RC);
1862 }
1863 
1865  unsigned Reg) const {
1866  const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1867  assert(RC && "Register class for the reg not found");
1868  return hasAGPRs(RC);
1869 }
1870 
1872  const TargetRegisterClass *SrcRC,
1873  unsigned SubReg,
1874  const TargetRegisterClass *DstRC,
1875  unsigned DstSubReg,
1876  const TargetRegisterClass *NewRC,
1877  LiveIntervals &LIS) const {
1878  unsigned SrcSize = getRegSizeInBits(*SrcRC);
1879  unsigned DstSize = getRegSizeInBits(*DstRC);
1880  unsigned NewSize = getRegSizeInBits(*NewRC);
1881 
1882  // Do not increase size of registers beyond dword, we would need to allocate
1883  // adjacent registers and constraint regalloc more than needed.
1884 
1885  // Always allow dword coalescing.
1886  if (SrcSize <= 32 || DstSize <= 32)
1887  return true;
1888 
1889  return NewSize <= DstSize || NewSize <= SrcSize;
1890 }
1891 
1893  MachineFunction &MF) const {
1894 
1895  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1897 
1898  unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
1899  MF.getFunction());
1900  switch (RC->getID()) {
1901  default:
1902  return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
1903  case AMDGPU::VGPR_32RegClassID:
1904  return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
1905  case AMDGPU::SGPR_32RegClassID:
1906  return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
1907  }
1908 }
1909 
1911  unsigned Idx) const {
1912  if (Idx == getVGPRPressureSet() || Idx == getAGPRPressureSet())
1913  return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
1914  const_cast<MachineFunction &>(MF));
1915 
1916  if (Idx == getSGPRPressureSet())
1917  return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
1918  const_cast<MachineFunction &>(MF));
1919 
1920  return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
1921 }
1922 
1923 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
1924  static const int Empty[] = { -1 };
1925 
1926  if (hasRegUnit(AMDGPU::M0, RegUnit))
1927  return Empty;
1928  return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
1929 }
1930 
1932  // Not a callee saved register.
1933  return AMDGPU::SGPR30_SGPR31;
1934 }
1935 
1936 const TargetRegisterClass *
1938  const RegisterBank &RB,
1939  const MachineRegisterInfo &MRI) const {
1940  switch (Size) {
1941  case 1: {
1942  switch (RB.getID()) {
1943  case AMDGPU::VGPRRegBankID:
1944  return &AMDGPU::VGPR_32RegClass;
1945  case AMDGPU::VCCRegBankID:
1946  return isWave32 ?
1947  &AMDGPU::SReg_32_XM0_XEXECRegClass : &AMDGPU::SReg_64_XEXECRegClass;
1948  case AMDGPU::SGPRRegBankID:
1949  return &AMDGPU::SReg_32_XM0RegClass;
1950  case AMDGPU::SCCRegBankID:
1951  // This needs to return an allocatable class, so don't bother returning
1952  // the dummy SCC class.
1953  return &AMDGPU::SReg_32_XM0RegClass;
1954  default:
1955  llvm_unreachable("unknown register bank");
1956  }
1957  }
1958  case 32:
1959  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1960  &AMDGPU::SReg_32_XM0RegClass;
1961  case 64:
1962  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
1963  &AMDGPU::SReg_64_XEXECRegClass;
1964  case 96:
1965  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
1966  &AMDGPU::SReg_96RegClass;
1967  case 128:
1968  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
1969  &AMDGPU::SGPR_128RegClass;
1970  case 160:
1971  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass :
1972  &AMDGPU::SReg_160RegClass;
1973  case 256:
1974  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_256RegClass :
1975  &AMDGPU::SReg_256RegClass;
1976  case 512:
1977  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass :
1978  &AMDGPU::SReg_512RegClass;
1979  case 1024:
1980  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_1024RegClass :
1981  &AMDGPU::SReg_1024RegClass;
1982  default:
1983  if (Size < 32)
1984  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1985  &AMDGPU::SReg_32_XM0RegClass;
1986  return nullptr;
1987  }
1988 }
1989 
1990 const TargetRegisterClass *
1992  const MachineRegisterInfo &MRI) const {
1993  const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg());
1994  if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank*>())
1995  return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI);
1996 
1997  const TargetRegisterClass *RC = RCOrRB.get<const TargetRegisterClass*>();
1998  return getAllocatableClass(RC);
1999 }
2000 
2001 unsigned SIRegisterInfo::getVCC() const {
2002  return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
2003 }
2004 
2005 const TargetRegisterClass *
2006 SIRegisterInfo::getRegClass(unsigned RCID) const {
2007  switch ((int)RCID) {
2008  case AMDGPU::SReg_1RegClassID:
2009  return getBoolRC();
2010  case AMDGPU::SReg_1_XEXECRegClassID:
2011  return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
2012  : &AMDGPU::SReg_64_XEXECRegClass;
2013  case -1:
2014  return nullptr;
2015  default:
2016  return AMDGPURegisterInfo::getRegClass(RCID);
2017  }
2018 }
2019 
2020 // Find reaching register definition
2022  MachineInstr &Use,
2024  LiveIntervals *LIS) const {
2025  auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
2026  SlotIndex UseIdx = LIS->getInstructionIndex(Use);
2027  SlotIndex DefIdx;
2028 
2029  if (Register::isVirtualRegister(Reg)) {
2030  if (!LIS->hasInterval(Reg))
2031  return nullptr;
2032  LiveInterval &LI = LIS->getInterval(Reg);
2033  LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
2034  : MRI.getMaxLaneMaskForVReg(Reg);
2035  VNInfo *V = nullptr;
2036  if (LI.hasSubRanges()) {
2037  for (auto &S : LI.subranges()) {
2038  if ((S.LaneMask & SubLanes) == SubLanes) {
2039  V = S.getVNInfoAt(UseIdx);
2040  break;
2041  }
2042  }
2043  } else {
2044  V = LI.getVNInfoAt(UseIdx);
2045  }
2046  if (!V)
2047  return nullptr;
2048  DefIdx = V->def;
2049  } else {
2050  // Find last def.
2051  for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units) {
2052  LiveRange &LR = LIS->getRegUnit(*Units);
2053  if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
2054  if (!DefIdx.isValid() ||
2055  MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
2056  LIS->getInstructionFromIndex(V->def)))
2057  DefIdx = V->def;
2058  } else {
2059  return nullptr;
2060  }
2061  }
2062  }
2063 
2064  MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
2065 
2066  if (!Def || !MDT.dominates(Def, &Use))
2067  return nullptr;
2068 
2069  assert(Def->modifiesRegister(Reg, this));
2070 
2071  return Def;
2072 }
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const override
const MachineInstrBuilder & add(const MachineOperand &MO) const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
BitVector & set()
Definition: BitVector.h:397
Interface definition for SIRegisterInfo.
unsigned getVCC() const
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed...
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
AMDGPU specific subclass of TargetSubtarget.
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override
bool hasScalarStores() const
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:60
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
LaneBitmask getMaxLaneMaskForVReg(unsigned Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
SIRegisterInfo(const GCNSubtarget &ST)
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override
static int getOffsetMUBUFLoad(unsigned Opc)
StringRef getRegAsmName(unsigned Reg) const override
bool hasStackObjects() const
Return true if there are any stack objects in this function.
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:679
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:179
static std::pair< unsigned, unsigned > getSpillEltSize(unsigned SuperRegSize, bool Store)
unsigned Reg
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:854
const SIInstrInfo * getInstrInfo() const override
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:151
static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount)
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
bool isAGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
VNInfo - Value Number Information.
Definition: LiveInterval.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:477
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const override
bool canRealignStack(const MachineFunction &MF) const override
T get() const
Returns the value of the specified pointer type.
Definition: PointerUnion.h:194
static unsigned getNumSubRegsForSpillOp(unsigned Op)
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:156
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank, const MachineRegisterInfo &MRI) const
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwises returns null...
Definition: PointerUnion.h:201
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:737
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
A description of a memory reference used in the backend.
MachineInstr * findReachingDef(unsigned Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:764
unsigned SubReg
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
static int getRegClass(RegisterKind Is, unsigned RegWidth)
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:792
virtual bool canRealignStack(const MachineFunction &MF) const
True if the stack can be realigned for the target.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned getID() const
Return the register class ID number.
static bool hasPressureSet(const int *PSets, unsigned PSetID)
bool isSGPRClass(const TargetRegisterClass *RC) const
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool hasInterval(Register Reg) const
bool opCanUseInlineConstant(unsigned OpType) const
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank, const MachineRegisterInfo &MRI) const
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:443
const RegClassOrRegBank & getRegClassOrRegBank(unsigned Reg) const
Return the register bank or register class of Reg.
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
bool isSGPRPressureSet(unsigned SetID) const
static const char * getRegisterName(unsigned RegNo)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
If OnlyToVGPR is true, this will only succeed if this.
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
unsigned getKillRegState(bool B)
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const int * getRegUnitPressureSets(unsigned RegUnit) const override
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:412
unsigned getDefRegState(bool B)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:661
static MachineInstrBuilder spillVGPRtoAGPR(MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
This file declares the machine register scavenger class.
const TargetRegisterInfo * getTargetRegisterInfo() const
unsigned const MachineRegisterInfo * MRI
unsigned reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch wave offset in case spilling is needed...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
unsigned getReturnAddressReg(const MachineFunction &MF) const
static cl::opt< bool > EnableSpillSGPRToSMEM("amdgpu-spill-sgpr-to-smem", cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), cl::init(false))
static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
bool hasVGPRs(const TargetRegisterClass *RC) const
int64_t getMUBUFInstrOffset(const MachineInstr *MI) const
constexpr double e
Definition: MathExtras.h:57
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getSGPRPressureSet() const
LiveInterval & getInterval(Register Reg)
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
const MachineBasicBlock & front() const
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
unsigned getVGPRPressureSet() const
The memory access writes data.
unsigned getWavefrontSize() const
bool isAGPRPressureSet(unsigned SetID) const
bool hasAGPRs(const TargetRegisterClass *RC) const
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
static uint64_t add(uint64_t LeftOp, uint64_t RightOp)
Definition: FileCheck.cpp:215
MachineOperand class - Representation of each machine instruction operand.
unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF) const
Returns a register that is not used at any point in the function.
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
bool hasMFMAInlineLiteralBug() const
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Promote Memory to Register
Definition: Mem2Reg.cpp:109
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), cl::ReallyHidden, cl::init(true))
This class implements the register bank concept.
Definition: RegisterBank.h:28
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const
MachinePointerInfo getWithOffset(int64_t O) const
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
const MCRegisterClass * MC
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const
Special case of eliminateFrameIndex.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:424
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
bool isValid() const
Definition: Register.h:115
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool hasInv2PiInlineImm() const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
const MachinePointerInfo & getPointerInfo() const
const TargetRegisterClass * getBoolRC() const
void reserveRegisterTuples(BitVector &, unsigned Reg) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Register scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available and do the appropriate bookkeeping.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getAGPRPressureSet() const
Flags getFlags() const
Return the raw flags of the source value,.
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
uint32_t Size
Definition: Profile.cpp:46
static int getOffsetMUBUFStore(unsigned Opc)
bool spillSGPRToSMEM() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool isVGPRPressureSet(unsigned SetID) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Register getFrameRegister(const MachineFunction &MF) const override
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
bool hasMAIInsts() const
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
Register getReg() const
getReg - Returns the register number.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
unsigned getWavefrontSizeLog2() const
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:47
bool hasCalls() const
Return true if the current function has any function calls.
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
Definition: PointerUnion.h:156