LLVM  10.0.0svn
SIRegisterInfo.cpp
Go to the documentation of this file.
1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// SI implementation of the TargetRegisterInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SIRegisterInfo.h"
15 #include "AMDGPURegisterBankInfo.h"
16 #include "AMDGPUSubtarget.h"
17 #include "SIInstrInfo.h"
18 #include "SIMachineFunctionInfo.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/LLVMContext.h"
29 
30 using namespace llvm;
31 
32 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
33  for (unsigned i = 0; PSets[i] != -1; ++i) {
34  if (PSets[i] == (int)PSetID)
35  return true;
36  }
37  return false;
38 }
39 
40 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
41  BitVector &PressureSets) const {
42  for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
43  const int *PSets = getRegUnitPressureSets(*U);
44  if (hasPressureSet(PSets, PSetID)) {
45  PressureSets.set(PSetID);
46  break;
47  }
48  }
49 }
50 
52  "amdgpu-spill-sgpr-to-smem",
53  cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
54  cl::init(false));
55 
57  "amdgpu-spill-sgpr-to-vgpr",
58  cl::desc("Enable spilling VGPRs to SGPRs"),
60  cl::init(true));
61 
64  SGPRPressureSets(getNumRegPressureSets()),
65  VGPRPressureSets(getNumRegPressureSets()),
66  AGPRPressureSets(getNumRegPressureSets()),
67  SpillSGPRToVGPR(false),
68  SpillSGPRToSMEM(false),
69  isWave32(ST.isWave32()) {
70  if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
71  SpillSGPRToSMEM = true;
72  else if (EnableSpillSGPRToVGPR)
73  SpillSGPRToVGPR = true;
74 
75  unsigned NumRegPressureSets = getNumRegPressureSets();
76 
77  SGPRSetID = NumRegPressureSets;
78  VGPRSetID = NumRegPressureSets;
79  AGPRSetID = NumRegPressureSets;
80 
81  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
82  classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
83  classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
84  classifyPressureSet(i, AMDGPU::AGPR0, AGPRPressureSets);
85  }
86 
87  // Determine the number of reg units for each pressure set.
88  std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
89  for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
90  const int *PSets = getRegUnitPressureSets(i);
91  for (unsigned j = 0; PSets[j] != -1; ++j) {
92  ++PressureSetRegUnits[PSets[j]];
93  }
94  }
95 
96  unsigned VGPRMax = 0, SGPRMax = 0, AGPRMax = 0;
97  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
98  if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
99  VGPRSetID = i;
100  VGPRMax = PressureSetRegUnits[i];
101  continue;
102  }
103  if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
104  SGPRSetID = i;
105  SGPRMax = PressureSetRegUnits[i];
106  }
107  if (isAGPRPressureSet(i) && PressureSetRegUnits[i] > AGPRMax) {
108  AGPRSetID = i;
109  AGPRMax = PressureSetRegUnits[i];
110  continue;
111  }
112  }
113 
114  assert(SGPRSetID < NumRegPressureSets &&
115  VGPRSetID < NumRegPressureSets &&
116  AGPRSetID < NumRegPressureSets);
117 }
118 
120  const MachineFunction &MF) const {
121 
122  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
123  unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
124  unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
125  return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
126 }
127 
128 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
129  unsigned Reg;
130 
131  // Try to place it in a hole after PrivateSegmentBufferReg.
132  if (RegCount & 3) {
133  // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
134  // alignment constraints, so we have a hole where can put the wave offset.
135  Reg = RegCount - 1;
136  } else {
137  // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
138  // wave offset before it.
139  Reg = RegCount - 5;
140  }
141 
142  return Reg;
143 }
144 
146  const MachineFunction &MF) const {
147  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
149  return AMDGPU::SGPR_32RegClass.getRegister(Reg);
150 }
151 
153  BitVector Reserved(getNumRegs());
154 
155  // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
156  // this seems likely to result in bugs, so I'm marking them as reserved.
157  reserveRegisterTuples(Reserved, AMDGPU::EXEC);
158  reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
159 
160  // M0 has to be reserved so that llvm accepts it as a live-in into a block.
161  reserveRegisterTuples(Reserved, AMDGPU::M0);
162 
163  // Reserve src_vccz, src_execz, src_scc.
164  reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
165  reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
166  reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
167 
168  // Reserve the memory aperture registers.
169  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
170  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
171  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
172  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
173 
174  // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
175  reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
176 
177  // Reserve xnack_mask registers - support is not implemented in Codegen.
178  reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
179 
180  // Reserve lds_direct register - support is not implemented in Codegen.
181  reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
182 
183  // Reserve Trap Handler registers - support is not implemented in Codegen.
184  reserveRegisterTuples(Reserved, AMDGPU::TBA);
185  reserveRegisterTuples(Reserved, AMDGPU::TMA);
186  reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
187  reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
188  reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
189  reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
190  reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
191  reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
192  reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
193  reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
194 
195  // Reserve null register - it shall never be allocated
196  reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL);
197 
198  // Disallow vcc_hi allocation in wave32. It may be allocated but most likely
199  // will result in bugs.
200  if (isWave32) {
201  Reserved.set(AMDGPU::VCC);
202  Reserved.set(AMDGPU::VCC_HI);
203  }
204 
205  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
206 
207  unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
208  unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
209  for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
210  unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
211  reserveRegisterTuples(Reserved, Reg);
212  }
213 
214  unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
215  unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
216  for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
217  unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
218  reserveRegisterTuples(Reserved, Reg);
219  Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
220  reserveRegisterTuples(Reserved, Reg);
221  }
222 
223  // Reserve all the rest AGPRs if there are no instructions to use it.
224  if (!ST.hasMAIInsts()) {
225  for (unsigned i = 0; i < MaxNumVGPRs; ++i) {
226  unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
227  reserveRegisterTuples(Reserved, Reg);
228  }
229  }
230 
232 
233  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
234  if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
235  // Reserve 1 SGPR for scratch wave offset in case we need to spill.
236  reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
237  }
238 
239  unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
240  if (ScratchRSrcReg != AMDGPU::NoRegister) {
241  // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
242  // to spill.
243  // TODO: May need to reserve a VGPR if doing LDS spilling.
244  reserveRegisterTuples(Reserved, ScratchRSrcReg);
245  assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
246  }
247 
248  // We have to assume the SP is needed in case there are calls in the function,
249  // which is detected after the function is lowered. If we aren't really going
250  // to need SP, don't bother reserving it.
251  unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
252 
253  if (StackPtrReg != AMDGPU::NoRegister) {
254  reserveRegisterTuples(Reserved, StackPtrReg);
255  assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
256  }
257 
258  unsigned FrameReg = MFI->getFrameOffsetReg();
259  if (FrameReg != AMDGPU::NoRegister) {
260  reserveRegisterTuples(Reserved, FrameReg);
261  assert(!isSubRegister(ScratchRSrcReg, FrameReg));
262  }
263 
264  for (unsigned Reg : MFI->WWMReservedRegs) {
265  reserveRegisterTuples(Reserved, Reg);
266  }
267 
268  // FIXME: Stop using reserved registers for this.
269  for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
270  reserveRegisterTuples(Reserved, Reg);
271 
272  for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
273  reserveRegisterTuples(Reserved, Reg);
274 
275  return Reserved;
276 }
277 
280  // On entry, the base address is 0, so it can't possibly need any more
281  // alignment.
282 
283  // FIXME: Should be able to specify the entry frame alignment per calling
284  // convention instead.
285  if (Info->isEntryFunction())
286  return false;
287 
289 }
290 
293  if (Info->isEntryFunction()) {
294  const MachineFrameInfo &MFI = Fn.getFrameInfo();
295  return MFI.hasStackObjects() || MFI.hasCalls();
296  }
297 
298  // May need scavenger for dealing with callee saved registers.
299  return true;
300 }
301 
303  const MachineFunction &MF) const {
304  const MachineFrameInfo &MFI = MF.getFrameInfo();
305  if (MFI.hasStackObjects())
306  return true;
307 
308  // May need to deal with callee saved registers.
310  return !Info->isEntryFunction();
311 }
312 
314  const MachineFunction &MF) const {
315  const MachineFrameInfo &MFI = MF.getFrameInfo();
316  if (!MFI.hasStackObjects())
317  return false;
318 
319  // The scavenger is used for large frames which may require finding a free
320  // register for large offsets.
321  if (!isUInt<12>(MFI.getStackSize()))
322  return true;
323 
324  // If using scalar stores, for spills, m0 is needed for the scalar store
325  // offset (pre-GFX9). m0 is unallocatable, so we can't create a virtual
326  // register for it during frame index elimination, so the scavenger is
327  // directly needed.
328  return MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
329  MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
330 }
331 
333  const MachineFunction &) const {
334  // There are no special dedicated stack or frame pointers.
335  return true;
336 }
337 
339  // This helps catch bugs as verifier errors.
340  return true;
341 }
342 
345 
346  int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
347  AMDGPU::OpName::offset);
348  return MI->getOperand(OffIdx).getImm();
349 }
350 
352  int Idx) const {
353  if (!SIInstrInfo::isMUBUF(*MI))
354  return 0;
355 
357  AMDGPU::OpName::vaddr) &&
358  "Should never see frame index on non-address operand");
359 
360  return getMUBUFInstrOffset(MI);
361 }
362 
364  if (!MI->mayLoadOrStore())
365  return false;
366 
367  int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
368 
369  return !isUInt<12>(FullOffset);
370 }
371 
373  unsigned BaseReg,
374  int FrameIdx,
375  int64_t Offset) const {
377  DebugLoc DL; // Defaults to "unknown"
378 
379  if (Ins != MBB->end())
380  DL = Ins->getDebugLoc();
381 
382  MachineFunction *MF = MBB->getParent();
383  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
384  const SIInstrInfo *TII = Subtarget.getInstrInfo();
385 
386  if (Offset == 0) {
387  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
388  .addFrameIndex(FrameIdx);
389  return;
390  }
391 
393  Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
394 
395  Register FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
396 
397  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
398  .addImm(Offset);
399  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
400  .addFrameIndex(FrameIdx);
401 
402  TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
403  .addReg(OffsetReg, RegState::Kill)
404  .addReg(FIReg)
405  .addImm(0); // clamp bit
406 }
407 
409  int64_t Offset) const {
410 
411  MachineBasicBlock *MBB = MI.getParent();
412  MachineFunction *MF = MBB->getParent();
413  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
414  const SIInstrInfo *TII = Subtarget.getInstrInfo();
415 
416 #ifndef NDEBUG
417  // FIXME: Is it possible to be storing a frame index to itself?
418  bool SeenFI = false;
419  for (const MachineOperand &MO: MI.operands()) {
420  if (MO.isFI()) {
421  if (SeenFI)
422  llvm_unreachable("should not see multiple frame indices");
423 
424  SeenFI = true;
425  }
426  }
427 #endif
428 
429  MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
430  assert(FIOp && FIOp->isFI() && "frame index must be address operand");
431  assert(TII->isMUBUF(MI));
432  assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
433  MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
434  "should only be seeing frame offset relative FrameIndex");
435 
436 
437  MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
438  int64_t NewOffset = OffsetOp->getImm() + Offset;
439  assert(isUInt<12>(NewOffset) && "offset should be legal");
440 
441  FIOp->ChangeToRegister(BaseReg, false);
442  OffsetOp->setImm(NewOffset);
443 }
444 
446  unsigned BaseReg,
447  int64_t Offset) const {
448  if (!SIInstrInfo::isMUBUF(*MI))
449  return false;
450 
451  int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
452 
453  return isUInt<12>(NewOffset);
454 }
455 
457  const MachineFunction &MF, unsigned Kind) const {
458  // This is inaccurate. It depends on the instruction and address space. The
459  // only place where we should hit this is for dealing with frame indexes /
460  // private accesses, so this is correct in that case.
461  return &AMDGPU::VGPR_32RegClass;
462 }
463 
464 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
465 
466  switch (Op) {
467  case AMDGPU::SI_SPILL_S1024_SAVE:
468  case AMDGPU::SI_SPILL_S1024_RESTORE:
469  case AMDGPU::SI_SPILL_V1024_SAVE:
470  case AMDGPU::SI_SPILL_V1024_RESTORE:
471  case AMDGPU::SI_SPILL_A1024_SAVE:
472  case AMDGPU::SI_SPILL_A1024_RESTORE:
473  return 32;
474  case AMDGPU::SI_SPILL_S512_SAVE:
475  case AMDGPU::SI_SPILL_S512_RESTORE:
476  case AMDGPU::SI_SPILL_V512_SAVE:
477  case AMDGPU::SI_SPILL_V512_RESTORE:
478  case AMDGPU::SI_SPILL_A512_SAVE:
479  case AMDGPU::SI_SPILL_A512_RESTORE:
480  return 16;
481  case AMDGPU::SI_SPILL_S256_SAVE:
482  case AMDGPU::SI_SPILL_S256_RESTORE:
483  case AMDGPU::SI_SPILL_V256_SAVE:
484  case AMDGPU::SI_SPILL_V256_RESTORE:
485  return 8;
486  case AMDGPU::SI_SPILL_S160_SAVE:
487  case AMDGPU::SI_SPILL_S160_RESTORE:
488  case AMDGPU::SI_SPILL_V160_SAVE:
489  case AMDGPU::SI_SPILL_V160_RESTORE:
490  return 5;
491  case AMDGPU::SI_SPILL_S128_SAVE:
492  case AMDGPU::SI_SPILL_S128_RESTORE:
493  case AMDGPU::SI_SPILL_V128_SAVE:
494  case AMDGPU::SI_SPILL_V128_RESTORE:
495  case AMDGPU::SI_SPILL_A128_SAVE:
496  case AMDGPU::SI_SPILL_A128_RESTORE:
497  return 4;
498  case AMDGPU::SI_SPILL_S96_SAVE:
499  case AMDGPU::SI_SPILL_S96_RESTORE:
500  case AMDGPU::SI_SPILL_V96_SAVE:
501  case AMDGPU::SI_SPILL_V96_RESTORE:
502  return 3;
503  case AMDGPU::SI_SPILL_S64_SAVE:
504  case AMDGPU::SI_SPILL_S64_RESTORE:
505  case AMDGPU::SI_SPILL_V64_SAVE:
506  case AMDGPU::SI_SPILL_V64_RESTORE:
507  case AMDGPU::SI_SPILL_A64_SAVE:
508  case AMDGPU::SI_SPILL_A64_RESTORE:
509  return 2;
510  case AMDGPU::SI_SPILL_S32_SAVE:
511  case AMDGPU::SI_SPILL_S32_RESTORE:
512  case AMDGPU::SI_SPILL_V32_SAVE:
513  case AMDGPU::SI_SPILL_V32_RESTORE:
514  case AMDGPU::SI_SPILL_A32_SAVE:
515  case AMDGPU::SI_SPILL_A32_RESTORE:
516  return 1;
517  default: llvm_unreachable("Invalid spill opcode");
518  }
519 }
520 
521 static int getOffsetMUBUFStore(unsigned Opc) {
522  switch (Opc) {
523  case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
524  return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
525  case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
526  return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
527  case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
528  return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
529  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
530  return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
531  case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
532  return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
533  case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
534  return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
535  case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
536  return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
537  default:
538  return -1;
539  }
540 }
541 
542 static int getOffsetMUBUFLoad(unsigned Opc) {
543  switch (Opc) {
544  case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
545  return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
546  case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
547  return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
548  case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
549  return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
550  case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
551  return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
552  case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
553  return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
554  case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
555  return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
556  case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
557  return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
558  case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
559  return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
560  case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
561  return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
562  case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
563  return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
564  case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
565  return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
566  case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
567  return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
568  case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
569  return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
570  default:
571  return -1;
572  }
573 }
574 
576  int Index,
577  unsigned Lane,
578  unsigned ValueReg,
579  bool IsKill) {
580  MachineBasicBlock *MBB = MI->getParent();
581  MachineFunction *MF = MI->getParent()->getParent();
583  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
584  const SIInstrInfo *TII = ST.getInstrInfo();
585 
586  MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane);
587 
588  if (Reg == AMDGPU::NoRegister)
589  return MachineInstrBuilder();
590 
591  bool IsStore = MI->mayStore();
593  auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
594 
595  unsigned Dst = IsStore ? Reg : ValueReg;
596  unsigned Src = IsStore ? ValueReg : Reg;
597  unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32
598  : AMDGPU::V_ACCVGPR_READ_B32;
599 
600  return BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
601  .addReg(Src, getKillRegState(IsKill));
602 }
603 
604 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
605 // need to handle the case where an SGPR may need to be spilled while spilling.
607  MachineFrameInfo &MFI,
609  int Index,
610  int64_t Offset) {
611  MachineBasicBlock *MBB = MI->getParent();
612  const DebugLoc &DL = MI->getDebugLoc();
613  bool IsStore = MI->mayStore();
614 
615  unsigned Opc = MI->getOpcode();
616  int LoadStoreOp = IsStore ?
618  if (LoadStoreOp == -1)
619  return false;
620 
621  const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
622  if (spillVGPRtoAGPR(MI, Index, 0, Reg->getReg(), false).getInstr())
623  return true;
624 
625  MachineInstrBuilder NewMI =
626  BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
627  .add(*Reg)
628  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
629  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
630  .addImm(Offset)
631  .addImm(0) // glc
632  .addImm(0) // slc
633  .addImm(0) // tfe
634  .addImm(0) // dlc
635  .cloneMemRefs(*MI);
636 
637  const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
638  AMDGPU::OpName::vdata_in);
639  if (VDataIn)
640  NewMI.add(*VDataIn);
641  return true;
642 }
643 
644 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
645  unsigned LoadStoreOp,
646  int Index,
647  unsigned ValueReg,
648  bool IsKill,
649  unsigned ScratchRsrcReg,
650  unsigned ScratchOffsetReg,
651  int64_t InstOffset,
652  MachineMemOperand *MMO,
653  RegScavenger *RS) const {
654  MachineBasicBlock *MBB = MI->getParent();
655  MachineFunction *MF = MI->getParent()->getParent();
656  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
657  const SIInstrInfo *TII = ST.getInstrInfo();
658  const MachineFrameInfo &MFI = MF->getFrameInfo();
659 
660  const MCInstrDesc &Desc = TII->get(LoadStoreOp);
661  const DebugLoc &DL = MI->getDebugLoc();
662  bool IsStore = Desc.mayStore();
663 
664  bool Scavenged = false;
665  unsigned SOffset = ScratchOffsetReg;
666 
667  const unsigned EltSize = 4;
668  const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
669  unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
670  unsigned Size = NumSubRegs * EltSize;
671  int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
672  int64_t ScratchOffsetRegDelta = 0;
673 
674  unsigned Align = MFI.getObjectAlignment(Index);
675  const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
676 
677  Register TmpReg =
678  hasAGPRs(RC) ? TII->getNamedOperand(*MI, AMDGPU::OpName::tmp)->getReg()
679  : Register();
680 
681  assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
682 
683  if (!isUInt<12>(Offset + Size - EltSize)) {
684  SOffset = AMDGPU::NoRegister;
685 
686  // We currently only support spilling VGPRs to EltSize boundaries, meaning
687  // we can simplify the adjustment of Offset here to just scale with
688  // WavefrontSize.
689  Offset *= ST.getWavefrontSize();
690 
691  // We don't have access to the register scavenger if this function is called
692  // during PEI::scavengeFrameVirtualRegs().
693  if (RS)
694  SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false);
695 
696  if (SOffset == AMDGPU::NoRegister) {
697  // There are no free SGPRs, and since we are in the process of spilling
698  // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
699  // on SI/CI and on VI it is true until we implement spilling using scalar
700  // stores), we have no way to free up an SGPR. Our solution here is to
701  // add the offset directly to the ScratchOffset register, and then
702  // subtract the offset after the spill to return ScratchOffset to it's
703  // original value.
704  SOffset = ScratchOffsetReg;
705  ScratchOffsetRegDelta = Offset;
706  } else {
707  Scavenged = true;
708  }
709 
710  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
711  .addReg(ScratchOffsetReg)
712  .addImm(Offset);
713 
714  Offset = 0;
715  }
716 
717  for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
718  Register SubReg = NumSubRegs == 1
719  ? Register(ValueReg)
720  : getSubReg(ValueReg, getSubRegFromChannel(i));
721 
722  unsigned SOffsetRegState = 0;
723  unsigned SrcDstRegState = getDefRegState(!IsStore);
724  if (i + 1 == e) {
725  SOffsetRegState |= getKillRegState(Scavenged);
726  // The last implicit use carries the "Kill" flag.
727  SrcDstRegState |= getKillRegState(IsKill);
728  }
729 
730  auto MIB = spillVGPRtoAGPR(MI, Index, i, SubReg, IsKill);
731 
732  if (!MIB.getInstr()) {
733  unsigned FinalReg = SubReg;
734  if (TmpReg != AMDGPU::NoRegister) {
735  if (IsStore)
736  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg)
737  .addReg(SubReg, getKillRegState(IsKill));
738  SubReg = TmpReg;
739  }
740 
741  MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
742  MachineMemOperand *NewMMO
743  = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
744  EltSize, MinAlign(Align, EltSize * i));
745 
746  MIB = BuildMI(*MBB, MI, DL, Desc)
747  .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
748  .addReg(ScratchRsrcReg)
749  .addReg(SOffset, SOffsetRegState)
750  .addImm(Offset)
751  .addImm(0) // glc
752  .addImm(0) // slc
753  .addImm(0) // tfe
754  .addImm(0) // dlc
755  .addMemOperand(NewMMO);
756 
757  if (!IsStore && TmpReg != AMDGPU::NoRegister)
758  MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32),
759  FinalReg)
760  .addReg(TmpReg, RegState::Kill);
761  }
762 
763  if (NumSubRegs > 1)
764  MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
765  }
766 
767  if (ScratchOffsetRegDelta != 0) {
768  // Subtract the offset we added to the ScratchOffset register.
769  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
770  .addReg(ScratchOffsetReg)
771  .addImm(ScratchOffsetRegDelta);
772  }
773 }
774 
775 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
776  bool Store) {
777  if (SuperRegSize % 16 == 0) {
778  return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
779  AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
780  }
781 
782  if (SuperRegSize % 8 == 0) {
783  return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
784  AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
785  }
786 
787  return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
788  AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
789 }
790 
792  int Index,
793  RegScavenger *RS,
794  bool OnlyToVGPR) const {
795  MachineBasicBlock *MBB = MI->getParent();
796  MachineFunction *MF = MBB->getParent();
798  DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
799 
801  = MFI->getSGPRToVGPRSpills(Index);
802  bool SpillToVGPR = !VGPRSpills.empty();
803  if (OnlyToVGPR && !SpillToVGPR)
804  return false;
805 
807  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
808  const SIInstrInfo *TII = ST.getInstrInfo();
809 
810  Register SuperReg = MI->getOperand(0).getReg();
811  bool IsKill = MI->getOperand(0).isKill();
812  const DebugLoc &DL = MI->getDebugLoc();
813 
814  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
815 
816  bool SpillToSMEM = spillSGPRToSMEM();
817  if (SpillToSMEM && OnlyToVGPR)
818  return false;
819 
820  Register FrameReg = getFrameRegister(*MF);
821 
822  assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
823  SuperReg != MFI->getFrameOffsetReg() &&
824  SuperReg != MFI->getScratchWaveOffsetReg()));
825 
826  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
827 
828  unsigned OffsetReg = AMDGPU::M0;
829  unsigned M0CopyReg = AMDGPU::NoRegister;
830 
831  if (SpillToSMEM) {
832  if (RS->isRegUsed(AMDGPU::M0)) {
833  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
834  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
835  .addReg(AMDGPU::M0);
836  }
837  }
838 
839  unsigned ScalarStoreOp;
840  unsigned EltSize = 4;
841  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
842  if (SpillToSMEM && isSGPRClass(RC)) {
843  // XXX - if private_element_size is larger than 4 it might be useful to be
844  // able to spill wider vmem spills.
845  std::tie(EltSize, ScalarStoreOp) =
846  getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
847  }
848 
849  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
850  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
851 
852  // SubReg carries the "Kill" flag when SubReg == SuperReg.
853  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
854  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
855  Register SubReg =
856  NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
857 
858  if (SpillToSMEM) {
859  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
860 
861  // The allocated memory size is really the wavefront size * the frame
862  // index size. The widest register class is 64 bytes, so a 4-byte scratch
863  // allocation is enough to spill this in a single stack object.
864  //
865  // FIXME: Frame size/offsets are computed earlier than this, so the extra
866  // space is still unnecessarily allocated.
867 
868  unsigned Align = FrameInfo.getObjectAlignment(Index);
869  MachinePointerInfo PtrInfo
870  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
871  MachineMemOperand *MMO
873  EltSize, MinAlign(Align, EltSize * i));
874 
875  // SMEM instructions only support a single offset, so increment the wave
876  // offset.
877 
878  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
879  if (Offset != 0) {
880  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
881  .addReg(FrameReg)
882  .addImm(Offset);
883  } else {
884  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
885  .addReg(FrameReg);
886  }
887 
888  BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
889  .addReg(SubReg, getKillRegState(IsKill)) // sdata
890  .addReg(MFI->getScratchRSrcReg()) // sbase
891  .addReg(OffsetReg, RegState::Kill) // soff
892  .addImm(0) // glc
893  .addImm(0) // dlc
894  .addMemOperand(MMO);
895 
896  continue;
897  }
898 
899  if (SpillToVGPR) {
900  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
901 
902  // During SGPR spilling to VGPR, determine if the VGPR is defined. The
903  // only circumstance in which we say it is undefined is when it is the
904  // first spill to this VGPR in the first basic block.
905  bool VGPRDefined = true;
906  if (MBB == &MF->front())
907  VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
908 
909  // Mark the "old value of vgpr" input undef only if this is the first sgpr
910  // spill to this specific vgpr in the first basic block.
911  BuildMI(*MBB, MI, DL,
912  TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
913  Spill.VGPR)
914  .addReg(SubReg, getKillRegState(IsKill))
915  .addImm(Spill.Lane)
916  .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
917 
918  // FIXME: Since this spills to another register instead of an actual
919  // frame index, we should delete the frame index when all references to
920  // it are fixed.
921  } else {
922  // XXX - Can to VGPR spill fail for some subregisters but not others?
923  if (OnlyToVGPR)
924  return false;
925 
926  // Spill SGPR to a frame index.
927  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
928  Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
929  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
930 
932  = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
933  .addReg(SubReg, SubKillState);
934 
935 
936  // There could be undef components of a spilled super register.
937  // TODO: Can we detect this and skip the spill?
938  if (NumSubRegs > 1) {
939  // The last implicit use of the SuperReg carries the "Kill" flag.
940  unsigned SuperKillState = 0;
941  if (i + 1 == e)
942  SuperKillState |= getKillRegState(IsKill);
943  Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
944  }
945 
946  unsigned Align = FrameInfo.getObjectAlignment(Index);
947  MachinePointerInfo PtrInfo
948  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
949  MachineMemOperand *MMO
951  EltSize, MinAlign(Align, EltSize * i));
952  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
953  .addReg(TmpReg, RegState::Kill) // src
954  .addFrameIndex(Index) // vaddr
955  .addReg(MFI->getScratchRSrcReg()) // srrsrc
956  .addReg(MFI->getStackPtrOffsetReg()) // soffset
957  .addImm(i * 4) // offset
958  .addMemOperand(MMO);
959  }
960  }
961 
962  if (M0CopyReg != AMDGPU::NoRegister) {
963  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
964  .addReg(M0CopyReg, RegState::Kill);
965  }
966 
967  MI->eraseFromParent();
968  MFI->addToSpilledSGPRs(NumSubRegs);
969  return true;
970 }
971 
973  int Index,
974  RegScavenger *RS,
975  bool OnlyToVGPR) const {
976  MachineFunction *MF = MI->getParent()->getParent();
978  MachineBasicBlock *MBB = MI->getParent();
980 
982  = MFI->getSGPRToVGPRSpills(Index);
983  bool SpillToVGPR = !VGPRSpills.empty();
984  if (OnlyToVGPR && !SpillToVGPR)
985  return false;
986 
987  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
988  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
989  const SIInstrInfo *TII = ST.getInstrInfo();
990  const DebugLoc &DL = MI->getDebugLoc();
991 
992  Register SuperReg = MI->getOperand(0).getReg();
993  bool SpillToSMEM = spillSGPRToSMEM();
994  if (SpillToSMEM && OnlyToVGPR)
995  return false;
996 
997  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
998 
999  unsigned OffsetReg = AMDGPU::M0;
1000  unsigned M0CopyReg = AMDGPU::NoRegister;
1001 
1002  if (SpillToSMEM) {
1003  if (RS->isRegUsed(AMDGPU::M0)) {
1004  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1005  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
1006  .addReg(AMDGPU::M0);
1007  }
1008  }
1009 
1010  unsigned EltSize = 4;
1011  unsigned ScalarLoadOp;
1012 
1013  Register FrameReg = getFrameRegister(*MF);
1014 
1015  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
1016  if (SpillToSMEM && isSGPRClass(RC)) {
1017  // XXX - if private_element_size is larger than 4 it might be useful to be
1018  // able to spill wider vmem spills.
1019  std::tie(EltSize, ScalarLoadOp) =
1020  getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
1021  }
1022 
1023  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
1024  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
1025 
1026  // SubReg carries the "Kill" flag when SubReg == SuperReg.
1027  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
1028 
1029  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
1030  Register SubReg =
1031  NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
1032 
1033  if (SpillToSMEM) {
1034  // FIXME: Size may be > 4 but extra bytes wasted.
1035  unsigned Align = FrameInfo.getObjectAlignment(Index);
1036  MachinePointerInfo PtrInfo
1037  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
1038  MachineMemOperand *MMO
1040  EltSize, MinAlign(Align, EltSize * i));
1041 
1042  // Add i * 4 offset
1043  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
1044  if (Offset != 0) {
1045  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
1046  .addReg(FrameReg)
1047  .addImm(Offset);
1048  } else {
1049  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
1050  .addReg(FrameReg);
1051  }
1052 
1053  auto MIB =
1054  BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
1055  .addReg(MFI->getScratchRSrcReg()) // sbase
1056  .addReg(OffsetReg, RegState::Kill) // soff
1057  .addImm(0) // glc
1058  .addImm(0) // dlc
1059  .addMemOperand(MMO);
1060 
1061  if (NumSubRegs > 1 && i == 0)
1062  MIB.addReg(SuperReg, RegState::ImplicitDefine);
1063 
1064  continue;
1065  }
1066 
1067  if (SpillToVGPR) {
1068  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
1069  auto MIB =
1070  BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
1071  SubReg)
1072  .addReg(Spill.VGPR)
1073  .addImm(Spill.Lane);
1074 
1075  if (NumSubRegs > 1 && i == 0)
1076  MIB.addReg(SuperReg, RegState::ImplicitDefine);
1077  } else {
1078  if (OnlyToVGPR)
1079  return false;
1080 
1081  // Restore SGPR from a stack slot.
1082  // FIXME: We should use S_LOAD_DWORD here for VI.
1083  Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1084  unsigned Align = FrameInfo.getObjectAlignment(Index);
1085 
1086  MachinePointerInfo PtrInfo
1087  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
1088 
1089  MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
1090  MachineMemOperand::MOLoad, EltSize,
1091  MinAlign(Align, EltSize * i));
1092 
1093  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
1094  .addFrameIndex(Index) // vaddr
1095  .addReg(MFI->getScratchRSrcReg()) // srsrc
1096  .addReg(MFI->getStackPtrOffsetReg()) // soffset
1097  .addImm(i * 4) // offset
1098  .addMemOperand(MMO);
1099 
1100  auto MIB =
1101  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
1102  .addReg(TmpReg, RegState::Kill);
1103 
1104  if (NumSubRegs > 1)
1105  MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
1106  }
1107  }
1108 
1109  if (M0CopyReg != AMDGPU::NoRegister) {
1110  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
1111  .addReg(M0CopyReg, RegState::Kill);
1112  }
1113 
1114  MI->eraseFromParent();
1115  return true;
1116 }
1117 
1118 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
1119 /// a VGPR and the stack slot can be safely eliminated when all other users are
1120 /// handled.
1123  int FI,
1124  RegScavenger *RS) const {
1125  switch (MI->getOpcode()) {
1126  case AMDGPU::SI_SPILL_S1024_SAVE:
1127  case AMDGPU::SI_SPILL_S512_SAVE:
1128  case AMDGPU::SI_SPILL_S256_SAVE:
1129  case AMDGPU::SI_SPILL_S160_SAVE:
1130  case AMDGPU::SI_SPILL_S128_SAVE:
1131  case AMDGPU::SI_SPILL_S96_SAVE:
1132  case AMDGPU::SI_SPILL_S64_SAVE:
1133  case AMDGPU::SI_SPILL_S32_SAVE:
1134  return spillSGPR(MI, FI, RS, true);
1135  case AMDGPU::SI_SPILL_S1024_RESTORE:
1136  case AMDGPU::SI_SPILL_S512_RESTORE:
1137  case AMDGPU::SI_SPILL_S256_RESTORE:
1138  case AMDGPU::SI_SPILL_S160_RESTORE:
1139  case AMDGPU::SI_SPILL_S128_RESTORE:
1140  case AMDGPU::SI_SPILL_S96_RESTORE:
1141  case AMDGPU::SI_SPILL_S64_RESTORE:
1142  case AMDGPU::SI_SPILL_S32_RESTORE:
1143  return restoreSGPR(MI, FI, RS, true);
1144  default:
1145  llvm_unreachable("not an SGPR spill instruction");
1146  }
1147 }
1148 
1150  int SPAdj, unsigned FIOperandNum,
1151  RegScavenger *RS) const {
1152  MachineFunction *MF = MI->getParent()->getParent();
1154  MachineBasicBlock *MBB = MI->getParent();
1156  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
1157  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
1158  const SIInstrInfo *TII = ST.getInstrInfo();
1159  DebugLoc DL = MI->getDebugLoc();
1160 
1161  assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");
1162 
1163  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
1164  int Index = MI->getOperand(FIOperandNum).getIndex();
1165 
1166  Register FrameReg = getFrameRegister(*MF);
1167 
1168  switch (MI->getOpcode()) {
1169  // SGPR register spill
1170  case AMDGPU::SI_SPILL_S1024_SAVE:
1171  case AMDGPU::SI_SPILL_S512_SAVE:
1172  case AMDGPU::SI_SPILL_S256_SAVE:
1173  case AMDGPU::SI_SPILL_S160_SAVE:
1174  case AMDGPU::SI_SPILL_S128_SAVE:
1175  case AMDGPU::SI_SPILL_S96_SAVE:
1176  case AMDGPU::SI_SPILL_S64_SAVE:
1177  case AMDGPU::SI_SPILL_S32_SAVE: {
1178  spillSGPR(MI, Index, RS);
1179  break;
1180  }
1181 
1182  // SGPR register restore
1183  case AMDGPU::SI_SPILL_S1024_RESTORE:
1184  case AMDGPU::SI_SPILL_S512_RESTORE:
1185  case AMDGPU::SI_SPILL_S256_RESTORE:
1186  case AMDGPU::SI_SPILL_S160_RESTORE:
1187  case AMDGPU::SI_SPILL_S128_RESTORE:
1188  case AMDGPU::SI_SPILL_S96_RESTORE:
1189  case AMDGPU::SI_SPILL_S64_RESTORE:
1190  case AMDGPU::SI_SPILL_S32_RESTORE: {
1191  restoreSGPR(MI, Index, RS);
1192  break;
1193  }
1194 
1195  // VGPR register spill
1196  case AMDGPU::SI_SPILL_V1024_SAVE:
1197  case AMDGPU::SI_SPILL_V512_SAVE:
1198  case AMDGPU::SI_SPILL_V256_SAVE:
1199  case AMDGPU::SI_SPILL_V160_SAVE:
1200  case AMDGPU::SI_SPILL_V128_SAVE:
1201  case AMDGPU::SI_SPILL_V96_SAVE:
1202  case AMDGPU::SI_SPILL_V64_SAVE:
1203  case AMDGPU::SI_SPILL_V32_SAVE:
1204  case AMDGPU::SI_SPILL_A1024_SAVE:
1205  case AMDGPU::SI_SPILL_A512_SAVE:
1206  case AMDGPU::SI_SPILL_A128_SAVE:
1207  case AMDGPU::SI_SPILL_A64_SAVE:
1208  case AMDGPU::SI_SPILL_A32_SAVE: {
1209  const MachineOperand *VData = TII->getNamedOperand(*MI,
1210  AMDGPU::OpName::vdata);
1211  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1212  MFI->getStackPtrOffsetReg());
1213 
1214  buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1215  Index,
1216  VData->getReg(), VData->isKill(),
1217  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1218  FrameReg,
1219  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1220  *MI->memoperands_begin(),
1221  RS);
1222  MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1223  MI->eraseFromParent();
1224  break;
1225  }
1226  case AMDGPU::SI_SPILL_V32_RESTORE:
1227  case AMDGPU::SI_SPILL_V64_RESTORE:
1228  case AMDGPU::SI_SPILL_V96_RESTORE:
1229  case AMDGPU::SI_SPILL_V128_RESTORE:
1230  case AMDGPU::SI_SPILL_V160_RESTORE:
1231  case AMDGPU::SI_SPILL_V256_RESTORE:
1232  case AMDGPU::SI_SPILL_V512_RESTORE:
1233  case AMDGPU::SI_SPILL_V1024_RESTORE:
1234  case AMDGPU::SI_SPILL_A32_RESTORE:
1235  case AMDGPU::SI_SPILL_A64_RESTORE:
1236  case AMDGPU::SI_SPILL_A128_RESTORE:
1237  case AMDGPU::SI_SPILL_A512_RESTORE:
1238  case AMDGPU::SI_SPILL_A1024_RESTORE: {
1239  const MachineOperand *VData = TII->getNamedOperand(*MI,
1240  AMDGPU::OpName::vdata);
1241  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1242  MFI->getStackPtrOffsetReg());
1243 
1244  buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1245  Index,
1246  VData->getReg(), VData->isKill(),
1247  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1248  FrameReg,
1249  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1250  *MI->memoperands_begin(),
1251  RS);
1252  MI->eraseFromParent();
1253  break;
1254  }
1255 
1256  default: {
1257  const DebugLoc &DL = MI->getDebugLoc();
1258  bool IsMUBUF = TII->isMUBUF(*MI);
1259 
1260  if (!IsMUBUF && !MFI->isEntryFunction()) {
1261  // Convert to an absolute stack address by finding the offset from the
1262  // scratch wave base and scaling by the wave size.
1263  //
1264  // In an entry function/kernel the offset is already the absolute
1265  // address relative to the frame register.
1266 
1267  Register DiffReg =
1268  MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1269 
1270  bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1271  Register ResultReg = IsCopy ?
1272  MI->getOperand(0).getReg() :
1273  MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1274 
1275  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
1276  .addReg(FrameReg)
1278 
1279  int64_t Offset = FrameInfo.getObjectOffset(Index);
1280  if (Offset == 0) {
1281  // XXX - This never happens because of emergency scavenging slot at 0?
1282  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1283  .addImm(Log2_32(ST.getWavefrontSize()))
1284  .addReg(DiffReg);
1285  } else {
1286  Register ScaledReg =
1287  MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1288 
1289  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
1290  .addImm(Log2_32(ST.getWavefrontSize()))
1291  .addReg(DiffReg, RegState::Kill);
1292 
1293  // TODO: Fold if use instruction is another add of a constant.
1295  TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1296  .addImm(Offset)
1297  .addReg(ScaledReg, RegState::Kill)
1298  .addImm(0); // clamp bit
1299  } else {
1300  Register ConstOffsetReg =
1301  MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1302 
1303  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1304  .addImm(Offset);
1305  TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1306  .addReg(ConstOffsetReg, RegState::Kill)
1307  .addReg(ScaledReg, RegState::Kill)
1308  .addImm(0); // clamp bit
1309  }
1310  }
1311 
1312  // Don't introduce an extra copy if we're just materializing in a mov.
1313  if (IsCopy)
1314  MI->eraseFromParent();
1315  else
1316  FIOp.ChangeToRegister(ResultReg, false, false, true);
1317  return;
1318  }
1319 
1320  if (IsMUBUF) {
1321  // Disable offen so we don't need a 0 vgpr base.
1322  assert(static_cast<int>(FIOperandNum) ==
1323  AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1324  AMDGPU::OpName::vaddr));
1325 
1326  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1327  MFI->getStackPtrOffsetReg());
1328 
1329  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->setReg(FrameReg);
1330 
1331  int64_t Offset = FrameInfo.getObjectOffset(Index);
1332  int64_t OldImm
1333  = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1334  int64_t NewOffset = OldImm + Offset;
1335 
1336  if (isUInt<12>(NewOffset) &&
1337  buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
1338  MI->eraseFromParent();
1339  return;
1340  }
1341  }
1342 
1343  // If the offset is simply too big, don't convert to a scratch wave offset
1344  // relative index.
1345 
1346  int64_t Offset = FrameInfo.getObjectOffset(Index);
1347  FIOp.ChangeToImmediate(Offset);
1348  if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1349  Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1350  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1351  .addImm(Offset);
1352  FIOp.ChangeToRegister(TmpReg, false, false, true);
1353  }
1354  }
1355  }
1356 }
1357 
1360 }
1361 
1362 // FIXME: This is very slow. It might be worth creating a map from physreg to
1363 // register class.
1366 
1367  static const TargetRegisterClass *const BaseClasses[] = {
1368  &AMDGPU::VGPR_32RegClass,
1369  &AMDGPU::SReg_32RegClass,
1370  &AMDGPU::AGPR_32RegClass,
1371  &AMDGPU::VReg_64RegClass,
1372  &AMDGPU::SReg_64RegClass,
1373  &AMDGPU::AReg_64RegClass,
1374  &AMDGPU::VReg_96RegClass,
1375  &AMDGPU::SReg_96RegClass,
1376  &AMDGPU::VReg_128RegClass,
1377  &AMDGPU::SReg_128RegClass,
1378  &AMDGPU::AReg_128RegClass,
1379  &AMDGPU::VReg_160RegClass,
1380  &AMDGPU::SReg_160RegClass,
1381  &AMDGPU::VReg_256RegClass,
1382  &AMDGPU::SReg_256RegClass,
1383  &AMDGPU::VReg_512RegClass,
1384  &AMDGPU::SReg_512RegClass,
1385  &AMDGPU::AReg_512RegClass,
1386  &AMDGPU::SReg_1024RegClass,
1387  &AMDGPU::VReg_1024RegClass,
1388  &AMDGPU::AReg_1024RegClass,
1389  &AMDGPU::SCC_CLASSRegClass,
1390  &AMDGPU::Pseudo_SReg_32RegClass,
1391  &AMDGPU::Pseudo_SReg_128RegClass,
1392  };
1393 
1394  for (const TargetRegisterClass *BaseClass : BaseClasses) {
1395  if (BaseClass->contains(Reg)) {
1396  return BaseClass;
1397  }
1398  }
1399  return nullptr;
1400 }
1401 
1402 // TODO: It might be helpful to have some target specific flags in
1403 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
1405  unsigned Size = getRegSizeInBits(*RC);
1406  if (Size < 32)
1407  return false;
1408  switch (Size) {
1409  case 32:
1410  return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
1411  case 64:
1412  return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
1413  case 96:
1414  return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
1415  case 128:
1416  return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
1417  case 160:
1418  return getCommonSubClass(&AMDGPU::VReg_160RegClass, RC) != nullptr;
1419  case 256:
1420  return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
1421  case 512:
1422  return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
1423  case 1024:
1424  return getCommonSubClass(&AMDGPU::VReg_1024RegClass, RC) != nullptr;
1425  default:
1426  llvm_unreachable("Invalid register class size");
1427  }
1428 }
1429 
1431  unsigned Size = getRegSizeInBits(*RC);
1432  if (Size < 32)
1433  return false;
1434  switch (Size) {
1435  case 32:
1436  return getCommonSubClass(&AMDGPU::AGPR_32RegClass, RC) != nullptr;
1437  case 64:
1438  return getCommonSubClass(&AMDGPU::AReg_64RegClass, RC) != nullptr;
1439  case 96:
1440  return false;
1441  case 128:
1442  return getCommonSubClass(&AMDGPU::AReg_128RegClass, RC) != nullptr;
1443  case 160:
1444  case 256:
1445  return false;
1446  case 512:
1447  return getCommonSubClass(&AMDGPU::AReg_512RegClass, RC) != nullptr;
1448  case 1024:
1449  return getCommonSubClass(&AMDGPU::AReg_1024RegClass, RC) != nullptr;
1450  default:
1451  llvm_unreachable("Invalid register class size");
1452  }
1453 }
1454 
1456  const TargetRegisterClass *SRC) const {
1457  switch (getRegSizeInBits(*SRC)) {
1458  case 32:
1459  return &AMDGPU::VGPR_32RegClass;
1460  case 64:
1461  return &AMDGPU::VReg_64RegClass;
1462  case 96:
1463  return &AMDGPU::VReg_96RegClass;
1464  case 128:
1465  return &AMDGPU::VReg_128RegClass;
1466  case 160:
1467  return &AMDGPU::VReg_160RegClass;
1468  case 256:
1469  return &AMDGPU::VReg_256RegClass;
1470  case 512:
1471  return &AMDGPU::VReg_512RegClass;
1472  case 1024:
1473  return &AMDGPU::VReg_1024RegClass;
1474  default:
1475  llvm_unreachable("Invalid register class size");
1476  }
1477 }
1478 
1480  const TargetRegisterClass *SRC) const {
1481  switch (getRegSizeInBits(*SRC)) {
1482  case 32:
1483  return &AMDGPU::AGPR_32RegClass;
1484  case 64:
1485  return &AMDGPU::AReg_64RegClass;
1486  case 128:
1487  return &AMDGPU::AReg_128RegClass;
1488  case 512:
1489  return &AMDGPU::AReg_512RegClass;
1490  case 1024:
1491  return &AMDGPU::AReg_1024RegClass;
1492  default:
1493  llvm_unreachable("Invalid register class size");
1494  }
1495 }
1496 
1498  const TargetRegisterClass *VRC) const {
1499  switch (getRegSizeInBits(*VRC)) {
1500  case 32:
1501  return &AMDGPU::SGPR_32RegClass;
1502  case 64:
1503  return &AMDGPU::SReg_64RegClass;
1504  case 96:
1505  return &AMDGPU::SReg_96RegClass;
1506  case 128:
1507  return &AMDGPU::SReg_128RegClass;
1508  case 160:
1509  return &AMDGPU::SReg_160RegClass;
1510  case 256:
1511  return &AMDGPU::SReg_256RegClass;
1512  case 512:
1513  return &AMDGPU::SReg_512RegClass;
1514  case 1024:
1515  return &AMDGPU::SReg_1024RegClass;
1516  default:
1517  llvm_unreachable("Invalid register class size");
1518  }
1519 }
1520 
1522  const TargetRegisterClass *RC, unsigned SubIdx) const {
1523  if (SubIdx == AMDGPU::NoSubRegister)
1524  return RC;
1525 
1526  // We can assume that each lane corresponds to one 32-bit register.
1527  unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
1528  if (isSGPRClass(RC)) {
1529  switch (Count) {
1530  case 1:
1531  return &AMDGPU::SGPR_32RegClass;
1532  case 2:
1533  return &AMDGPU::SReg_64RegClass;
1534  case 3:
1535  return &AMDGPU::SReg_96RegClass;
1536  case 4:
1537  return &AMDGPU::SReg_128RegClass;
1538  case 5:
1539  return &AMDGPU::SReg_160RegClass;
1540  case 8:
1541  return &AMDGPU::SReg_256RegClass;
1542  case 16:
1543  return &AMDGPU::SReg_512RegClass;
1544  case 32: /* fall-through */
1545  default:
1546  llvm_unreachable("Invalid sub-register class size");
1547  }
1548  } else if (hasAGPRs(RC)) {
1549  switch (Count) {
1550  case 1:
1551  return &AMDGPU::AGPR_32RegClass;
1552  case 2:
1553  return &AMDGPU::AReg_64RegClass;
1554  case 4:
1555  return &AMDGPU::AReg_128RegClass;
1556  case 16:
1557  return &AMDGPU::AReg_512RegClass;
1558  case 32: /* fall-through */
1559  default:
1560  llvm_unreachable("Invalid sub-register class size");
1561  }
1562  } else {
1563  switch (Count) {
1564  case 1:
1565  return &AMDGPU::VGPR_32RegClass;
1566  case 2:
1567  return &AMDGPU::VReg_64RegClass;
1568  case 3:
1569  return &AMDGPU::VReg_96RegClass;
1570  case 4:
1571  return &AMDGPU::VReg_128RegClass;
1572  case 5:
1573  return &AMDGPU::VReg_160RegClass;
1574  case 8:
1575  return &AMDGPU::VReg_256RegClass;
1576  case 16:
1577  return &AMDGPU::VReg_512RegClass;
1578  case 32: /* fall-through */
1579  default:
1580  llvm_unreachable("Invalid sub-register class size");
1581  }
1582  }
1583 }
1584 
1586  const TargetRegisterClass *DefRC,
1587  unsigned DefSubReg,
1588  const TargetRegisterClass *SrcRC,
1589  unsigned SrcSubReg) const {
1590  // We want to prefer the smallest register class possible, so we don't want to
1591  // stop and rewrite on anything that looks like a subregister
1592  // extract. Operations mostly don't care about the super register class, so we
1593  // only want to stop on the most basic of copies between the same register
1594  // class.
1595  //
1596  // e.g. if we have something like
1597  // %0 = ...
1598  // %1 = ...
1599  // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
1600  // %3 = COPY %2, sub0
1601  //
1602  // We want to look through the COPY to find:
1603  // => %3 = COPY %0
1604 
1605  // Plain copy.
1606  return getCommonSubClass(DefRC, SrcRC) != nullptr;
1607 }
1608 
1609 /// Returns a register that is not used at any point in the function.
1610 /// If all registers are used, then this function will return
1611 // AMDGPU::NoRegister.
1612 unsigned
1614  const TargetRegisterClass *RC,
1615  const MachineFunction &MF) const {
1616 
1617  for (unsigned Reg : *RC)
1618  if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1619  return Reg;
1620  return AMDGPU::NoRegister;
1621 }
1622 
1624  unsigned EltSize) const {
1625  if (EltSize == 4) {
1626  static const int16_t Sub0_31[] = {
1627  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1628  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1629  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1630  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1631  AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
1632  AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
1633  AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
1634  AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31,
1635  };
1636 
1637  static const int16_t Sub0_15[] = {
1638  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1639  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1640  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1641  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1642  };
1643 
1644  static const int16_t Sub0_7[] = {
1645  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1646  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1647  };
1648 
1649  static const int16_t Sub0_4[] = {
1650  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
1651  };
1652 
1653  static const int16_t Sub0_3[] = {
1654  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1655  };
1656 
1657  static const int16_t Sub0_2[] = {
1658  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1659  };
1660 
1661  static const int16_t Sub0_1[] = {
1662  AMDGPU::sub0, AMDGPU::sub1,
1663  };
1664 
1665  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1666  case 32:
1667  return {};
1668  case 64:
1669  return makeArrayRef(Sub0_1);
1670  case 96:
1671  return makeArrayRef(Sub0_2);
1672  case 128:
1673  return makeArrayRef(Sub0_3);
1674  case 160:
1675  return makeArrayRef(Sub0_4);
1676  case 256:
1677  return makeArrayRef(Sub0_7);
1678  case 512:
1679  return makeArrayRef(Sub0_15);
1680  case 1024:
1681  return makeArrayRef(Sub0_31);
1682  default:
1683  llvm_unreachable("unhandled register size");
1684  }
1685  }
1686 
1687  if (EltSize == 8) {
1688  static const int16_t Sub0_31_64[] = {
1689  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1690  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1691  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1692  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
1693  AMDGPU::sub16_sub17, AMDGPU::sub18_sub19,
1694  AMDGPU::sub20_sub21, AMDGPU::sub22_sub23,
1695  AMDGPU::sub24_sub25, AMDGPU::sub26_sub27,
1696  AMDGPU::sub28_sub29, AMDGPU::sub30_sub31
1697  };
1698 
1699  static const int16_t Sub0_15_64[] = {
1700  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1701  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1702  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1703  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1704  };
1705 
1706  static const int16_t Sub0_7_64[] = {
1707  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1708  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1709  };
1710 
1711 
1712  static const int16_t Sub0_3_64[] = {
1713  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1714  };
1715 
1716  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1717  case 64:
1718  return {};
1719  case 128:
1720  return makeArrayRef(Sub0_3_64);
1721  case 256:
1722  return makeArrayRef(Sub0_7_64);
1723  case 512:
1724  return makeArrayRef(Sub0_15_64);
1725  case 1024:
1726  return makeArrayRef(Sub0_31_64);
1727  default:
1728  llvm_unreachable("unhandled register size");
1729  }
1730  }
1731 
1732  if (EltSize == 16) {
1733 
1734  static const int16_t Sub0_31_128[] = {
1735  AMDGPU::sub0_sub1_sub2_sub3,
1736  AMDGPU::sub4_sub5_sub6_sub7,
1737  AMDGPU::sub8_sub9_sub10_sub11,
1738  AMDGPU::sub12_sub13_sub14_sub15,
1739  AMDGPU::sub16_sub17_sub18_sub19,
1740  AMDGPU::sub20_sub21_sub22_sub23,
1741  AMDGPU::sub24_sub25_sub26_sub27,
1742  AMDGPU::sub28_sub29_sub30_sub31
1743  };
1744 
1745  static const int16_t Sub0_15_128[] = {
1746  AMDGPU::sub0_sub1_sub2_sub3,
1747  AMDGPU::sub4_sub5_sub6_sub7,
1748  AMDGPU::sub8_sub9_sub10_sub11,
1749  AMDGPU::sub12_sub13_sub14_sub15
1750  };
1751 
1752  static const int16_t Sub0_7_128[] = {
1753  AMDGPU::sub0_sub1_sub2_sub3,
1754  AMDGPU::sub4_sub5_sub6_sub7
1755  };
1756 
1757  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1758  case 128:
1759  return {};
1760  case 256:
1761  return makeArrayRef(Sub0_7_128);
1762  case 512:
1763  return makeArrayRef(Sub0_15_128);
1764  case 1024:
1765  return makeArrayRef(Sub0_31_128);
1766  default:
1767  llvm_unreachable("unhandled register size");
1768  }
1769  }
1770 
1771  assert(EltSize == 32 && "unhandled elt size");
1772 
1773  static const int16_t Sub0_31_256[] = {
1774  AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
1775  AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15,
1776  AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23,
1777  AMDGPU::sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
1778  };
1779 
1780  static const int16_t Sub0_15_256[] = {
1781  AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
1782  AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15
1783  };
1784 
1785  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1786  case 256:
1787  return {};
1788  case 512:
1789  return makeArrayRef(Sub0_15_256);
1790  case 1024:
1791  return makeArrayRef(Sub0_31_256);
1792  default:
1793  llvm_unreachable("unhandled register size");
1794  }
1795 }
1796 
1797 const TargetRegisterClass*
1799  unsigned Reg) const {
1800  if (Register::isVirtualRegister(Reg))
1801  return MRI.getRegClass(Reg);
1802 
1803  return getPhysRegClass(Reg);
1804 }
1805 
1807  unsigned Reg) const {
1808  const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1809  assert(RC && "Register class for the reg not found");
1810  return hasVGPRs(RC);
1811 }
1812 
1814  unsigned Reg) const {
1815  const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1816  assert(RC && "Register class for the reg not found");
1817  return hasAGPRs(RC);
1818 }
1819 
1821  const TargetRegisterClass *SrcRC,
1822  unsigned SubReg,
1823  const TargetRegisterClass *DstRC,
1824  unsigned DstSubReg,
1825  const TargetRegisterClass *NewRC,
1826  LiveIntervals &LIS) const {
1827  unsigned SrcSize = getRegSizeInBits(*SrcRC);
1828  unsigned DstSize = getRegSizeInBits(*DstRC);
1829  unsigned NewSize = getRegSizeInBits(*NewRC);
1830 
1831  // Do not increase size of registers beyond dword, we would need to allocate
1832  // adjacent registers and constraint regalloc more than needed.
1833 
1834  // Always allow dword coalescing.
1835  if (SrcSize <= 32 || DstSize <= 32)
1836  return true;
1837 
1838  return NewSize <= DstSize || NewSize <= SrcSize;
1839 }
1840 
1842  MachineFunction &MF) const {
1843 
1844  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1846 
1847  unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
1848  MF.getFunction());
1849  switch (RC->getID()) {
1850  default:
1851  return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
1852  case AMDGPU::VGPR_32RegClassID:
1853  return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
1854  case AMDGPU::SGPR_32RegClassID:
1855  return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
1856  }
1857 }
1858 
1860  unsigned Idx) const {
1861  if (Idx == getVGPRPressureSet() || Idx == getAGPRPressureSet())
1862  return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
1863  const_cast<MachineFunction &>(MF));
1864 
1865  if (Idx == getSGPRPressureSet())
1866  return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
1867  const_cast<MachineFunction &>(MF));
1868 
1869  return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
1870 }
1871 
1872 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
1873  static const int Empty[] = { -1 };
1874 
1875  if (hasRegUnit(AMDGPU::M0, RegUnit))
1876  return Empty;
1877  return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
1878 }
1879 
1881  // Not a callee saved register.
1882  return AMDGPU::SGPR30_SGPR31;
1883 }
1884 
1885 const TargetRegisterClass *
1887  const RegisterBank &RB,
1888  const MachineRegisterInfo &MRI) const {
1889  switch (Size) {
1890  case 1: {
1891  switch (RB.getID()) {
1892  case AMDGPU::VGPRRegBankID:
1893  return &AMDGPU::VGPR_32RegClass;
1894  case AMDGPU::VCCRegBankID:
1895  return isWave32 ?
1896  &AMDGPU::SReg_32_XM0_XEXECRegClass : &AMDGPU::SReg_64_XEXECRegClass;
1897  case AMDGPU::SGPRRegBankID:
1898  return &AMDGPU::SReg_32_XM0RegClass;
1899  case AMDGPU::SCCRegBankID:
1900  // This needs to return an allocatable class, so don't bother returning
1901  // the dummy SCC class.
1902  return &AMDGPU::SReg_32_XM0RegClass;
1903  default:
1904  llvm_unreachable("unknown register bank");
1905  }
1906  }
1907  case 32:
1908  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1909  &AMDGPU::SReg_32_XM0RegClass;
1910  case 64:
1911  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
1912  &AMDGPU::SReg_64_XEXECRegClass;
1913  case 96:
1914  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
1915  &AMDGPU::SReg_96RegClass;
1916  case 128:
1917  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
1918  &AMDGPU::SReg_128RegClass;
1919  case 160:
1920  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass :
1921  &AMDGPU::SReg_160RegClass;
1922  case 256:
1923  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_256RegClass :
1924  &AMDGPU::SReg_256RegClass;
1925  case 512:
1926  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass :
1927  &AMDGPU::SReg_512RegClass;
1928  default:
1929  if (Size < 32)
1930  return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1931  &AMDGPU::SReg_32_XM0RegClass;
1932  return nullptr;
1933  }
1934 }
1935 
1936 const TargetRegisterClass *
1938  const MachineRegisterInfo &MRI) const {
1939  if (const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg()))
1940  return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI);
1941  return nullptr;
1942 }
1943 
1944 unsigned SIRegisterInfo::getVCC() const {
1945  return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
1946 }
1947 
1948 const TargetRegisterClass *
1949 SIRegisterInfo::getRegClass(unsigned RCID) const {
1950  switch ((int)RCID) {
1951  case AMDGPU::SReg_1RegClassID:
1952  return getBoolRC();
1953  case AMDGPU::SReg_1_XEXECRegClassID:
1954  return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
1955  : &AMDGPU::SReg_64_XEXECRegClass;
1956  case -1:
1957  return nullptr;
1958  default:
1959  return AMDGPURegisterInfo::getRegClass(RCID);
1960  }
1961 }
1962 
1963 // Find reaching register definition
1965  MachineInstr &Use,
1967  LiveIntervals *LIS) const {
1968  auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
1969  SlotIndex UseIdx = LIS->getInstructionIndex(Use);
1970  SlotIndex DefIdx;
1971 
1972  if (Register::isVirtualRegister(Reg)) {
1973  if (!LIS->hasInterval(Reg))
1974  return nullptr;
1975  LiveInterval &LI = LIS->getInterval(Reg);
1976  LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
1977  : MRI.getMaxLaneMaskForVReg(Reg);
1978  VNInfo *V = nullptr;
1979  if (LI.hasSubRanges()) {
1980  for (auto &S : LI.subranges()) {
1981  if ((S.LaneMask & SubLanes) == SubLanes) {
1982  V = S.getVNInfoAt(UseIdx);
1983  break;
1984  }
1985  }
1986  } else {
1987  V = LI.getVNInfoAt(UseIdx);
1988  }
1989  if (!V)
1990  return nullptr;
1991  DefIdx = V->def;
1992  } else {
1993  // Find last def.
1994  for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units) {
1995  LiveRange &LR = LIS->getRegUnit(*Units);
1996  if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
1997  if (!DefIdx.isValid() ||
1998  MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
1999  LIS->getInstructionFromIndex(V->def)))
2000  DefIdx = V->def;
2001  } else {
2002  return nullptr;
2003  }
2004  }
2005  }
2006 
2007  MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
2008 
2009  if (!Def || !MDT.dominates(Def, &Use))
2010  return nullptr;
2011 
2012  assert(Def->modifiesRegister(Reg, this));
2013 
2014  return Def;
2015 }
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const override
const MachineInstrBuilder & add(const MachineOperand &MO) const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
BitVector & set()
Definition: BitVector.h:397
Interface definition for SIRegisterInfo.
unsigned getVCC() const
static unsigned getSubRegFromChannel(unsigned Channel)
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed...
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
AMDGPU specific subclass of TargetSubtarget.
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override
bool hasScalarStores() const
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:60
bool isRegUsed(unsigned Reg, bool includeReserved=true) const
Return if a specific register is currently used.
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
LaneBitmask getMaxLaneMaskForVReg(unsigned Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
SIRegisterInfo(const GCNSubtarget &ST)
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override
static int getOffsetMUBUFLoad(unsigned Opc)
StringRef getRegAsmName(unsigned Reg) const override
bool hasStackObjects() const
Return true if there are any stack objects in this function.
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:675
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:178
static std::pair< unsigned, unsigned > getSpillEltSize(unsigned SuperRegSize, bool Store)
unsigned Reg
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:832
const SIInstrInfo * getInstrInfo() const override
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:151
static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount)
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
bool isAGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
VNInfo - Value Number Information.
Definition: LiveInterval.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:461
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const override
bool canRealignStack(const MachineFunction &MF) const override
static unsigned getNumSubRegsForSpillOp(unsigned Op)
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:156
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank, const MachineRegisterInfo &MRI) const
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:722
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
A description of a memory reference used in the backend.
MachineInstr * findReachingDef(unsigned Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:760
unsigned SubReg
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
static int getRegClass(RegisterKind Is, unsigned RegWidth)
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:788
virtual bool canRealignStack(const MachineFunction &MF) const
True if the stack can be realigned for the target.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned getID() const
Return the register class ID number.
static bool hasPressureSet(const int *PSets, unsigned PSetID)
bool isSGPRClass(const TargetRegisterClass *RC) const
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool hasInterval(Register Reg) const
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank, const MachineRegisterInfo &MRI) const
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:436
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
bool isSGPRPressureSet(unsigned SetID) const
static const char * getRegisterName(unsigned RegNo)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
If OnlyToVGPR is true, this will only succeed if this.
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
unsigned getKillRegState(bool B)
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const int * getRegUnitPressureSets(unsigned RegUnit) const override
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:408
unsigned getDefRegState(bool B)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:614
static MachineInstrBuilder spillVGPRtoAGPR(MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
This file declares the machine register scavenger class.
const TargetRegisterInfo * getTargetRegisterInfo() const
unsigned const MachineRegisterInfo * MRI
unsigned reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch wave offset in case spilling is needed...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
unsigned getReturnAddressReg(const MachineFunction &MF) const
static cl::opt< bool > EnableSpillSGPRToSMEM("amdgpu-spill-sgpr-to-smem", cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), cl::init(false))
static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
bool hasVGPRs(const TargetRegisterClass *RC) const
int64_t getMUBUFInstrOffset(const MachineInstr *MI) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getSGPRPressureSet() const
const RegisterBank * getRegBankOrNull(unsigned Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
LiveInterval & getInterval(Register Reg)
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
const MachineBasicBlock & front() const
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
unsigned getVGPRPressureSet() const
The memory access writes data.
unsigned getWavefrontSize() const
bool isAGPRPressureSet(unsigned SetID) const
bool hasAGPRs(const TargetRegisterClass *RC) const
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
static uint64_t add(uint64_t LeftOp, uint64_t RightOp)
Definition: FileCheck.cpp:243
MachineOperand class - Representation of each machine instruction operand.
unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF) const
Returns a register that is not used at any point in the function.
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Promote Memory to Register
Definition: Mem2Reg.cpp:109
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), cl::ReallyHidden, cl::init(true))
This class implements the register bank concept.
Definition: RegisterBank.h:28
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const
MachinePointerInfo getWithOffset(int64_t O) const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
const MCRegisterClass * MC
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const
Special case of eliminateFrameIndex.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:419
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool hasInv2PiInlineImm() const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
const MachinePointerInfo & getPointerInfo() const
const TargetRegisterClass * getBoolRC() const
void reserveRegisterTuples(BitVector &, unsigned Reg) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getAGPRPressureSet() const
Flags getFlags() const
Return the raw flags of the source value,.
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
uint32_t Size
Definition: Profile.cpp:46
static int getOffsetMUBUFStore(unsigned Opc)
bool spillSGPRToSMEM() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool isVGPRPressureSet(unsigned SetID) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Register getFrameRegister(const MachineFunction &MF) const override
unsigned scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available and do the appropriate bookkeeping.
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
bool hasMAIInsts() const
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
Register getReg() const
getReg - Returns the register number.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:47
bool hasCalls() const
Return true if the current function has any function calls.