LLVM  9.0.0svn
SIRegisterInfo.cpp
Go to the documentation of this file.
1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// SI implementation of the TargetRegisterInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SIRegisterInfo.h"
15 #include "AMDGPURegisterBankInfo.h"
16 #include "AMDGPUSubtarget.h"
17 #include "SIInstrInfo.h"
18 #include "SIMachineFunctionInfo.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/LLVMContext.h"
28 
29 using namespace llvm;
30 
31 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
32  for (unsigned i = 0; PSets[i] != -1; ++i) {
33  if (PSets[i] == (int)PSetID)
34  return true;
35  }
36  return false;
37 }
38 
39 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
40  BitVector &PressureSets) const {
41  for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
42  const int *PSets = getRegUnitPressureSets(*U);
43  if (hasPressureSet(PSets, PSetID)) {
44  PressureSets.set(PSetID);
45  break;
46  }
47  }
48 }
49 
51  "amdgpu-spill-sgpr-to-smem",
52  cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
53  cl::init(false));
54 
56  "amdgpu-spill-sgpr-to-vgpr",
57  cl::desc("Enable spilling VGPRs to SGPRs"),
59  cl::init(true));
60 
63  SGPRPressureSets(getNumRegPressureSets()),
64  VGPRPressureSets(getNumRegPressureSets()),
65  SpillSGPRToVGPR(false),
66  SpillSGPRToSMEM(false),
67  isWave32(ST.isWave32()) {
68  if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
69  SpillSGPRToSMEM = true;
70  else if (EnableSpillSGPRToVGPR)
71  SpillSGPRToVGPR = true;
72 
73  unsigned NumRegPressureSets = getNumRegPressureSets();
74 
75  SGPRSetID = NumRegPressureSets;
76  VGPRSetID = NumRegPressureSets;
77 
78  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
79  classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
80  classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
81  }
82 
83  // Determine the number of reg units for each pressure set.
84  std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
85  for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
86  const int *PSets = getRegUnitPressureSets(i);
87  for (unsigned j = 0; PSets[j] != -1; ++j) {
88  ++PressureSetRegUnits[PSets[j]];
89  }
90  }
91 
92  unsigned VGPRMax = 0, SGPRMax = 0;
93  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
94  if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
95  VGPRSetID = i;
96  VGPRMax = PressureSetRegUnits[i];
97  continue;
98  }
99  if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
100  SGPRSetID = i;
101  SGPRMax = PressureSetRegUnits[i];
102  }
103  }
104 
105  assert(SGPRSetID < NumRegPressureSets &&
106  VGPRSetID < NumRegPressureSets);
107 }
108 
110  const MachineFunction &MF) const {
111 
112  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
113  unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
114  unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
115  return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
116 }
117 
118 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
119  unsigned Reg;
120 
121  // Try to place it in a hole after PrivateSegmentBufferReg.
122  if (RegCount & 3) {
123  // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
124  // alignment constraints, so we have a hole where can put the wave offset.
125  Reg = RegCount - 1;
126  } else {
127  // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
128  // wave offset before it.
129  Reg = RegCount - 5;
130  }
131 
132  return Reg;
133 }
134 
136  const MachineFunction &MF) const {
137  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
139  return AMDGPU::SGPR_32RegClass.getRegister(Reg);
140 }
141 
143  BitVector Reserved(getNumRegs());
144 
145  // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
146  // this seems likely to result in bugs, so I'm marking them as reserved.
147  reserveRegisterTuples(Reserved, AMDGPU::EXEC);
148  reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
149 
150  // M0 has to be reserved so that llvm accepts it as a live-in into a block.
151  reserveRegisterTuples(Reserved, AMDGPU::M0);
152 
153  // Reserve src_vccz, src_execz, src_scc.
154  reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
155  reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
156  reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
157 
158  // Reserve the memory aperture registers.
159  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
160  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
161  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
162  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
163 
164  // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
165  reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
166 
167  // Reserve xnack_mask registers - support is not implemented in Codegen.
168  reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
169 
170  // Reserve lds_direct register - support is not implemented in Codegen.
171  reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
172 
173  // Reserve Trap Handler registers - support is not implemented in Codegen.
174  reserveRegisterTuples(Reserved, AMDGPU::TBA);
175  reserveRegisterTuples(Reserved, AMDGPU::TMA);
176  reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
177  reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
178  reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
179  reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
180  reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
181  reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
182  reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
183  reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
184 
185  // Reserve null register - it shall never be allocated
186  reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL);
187 
188  // Disallow vcc_hi allocation in wave32. It may be allocated but most likely
189  // will result in bugs.
190  if (isWave32) {
191  Reserved.set(AMDGPU::VCC);
192  Reserved.set(AMDGPU::VCC_HI);
193  }
194 
195  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
196 
197  unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
198  unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
199  for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
200  unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
201  reserveRegisterTuples(Reserved, Reg);
202  }
203 
204  unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
205  unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
206  for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
207  unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
208  reserveRegisterTuples(Reserved, Reg);
209  }
210 
212 
213  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
214  if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
215  // Reserve 1 SGPR for scratch wave offset in case we need to spill.
216  reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
217  }
218 
219  unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
220  if (ScratchRSrcReg != AMDGPU::NoRegister) {
221  // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
222  // to spill.
223  // TODO: May need to reserve a VGPR if doing LDS spilling.
224  reserveRegisterTuples(Reserved, ScratchRSrcReg);
225  assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
226  }
227 
228  // We have to assume the SP is needed in case there are calls in the function,
229  // which is detected after the function is lowered. If we aren't really going
230  // to need SP, don't bother reserving it.
231  unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
232 
233  if (StackPtrReg != AMDGPU::NoRegister) {
234  reserveRegisterTuples(Reserved, StackPtrReg);
235  assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
236  }
237 
238  unsigned FrameReg = MFI->getFrameOffsetReg();
239  if (FrameReg != AMDGPU::NoRegister) {
240  reserveRegisterTuples(Reserved, FrameReg);
241  assert(!isSubRegister(ScratchRSrcReg, FrameReg));
242  }
243 
244  for (unsigned Reg : MFI->WWMReservedRegs) {
245  reserveRegisterTuples(Reserved, Reg);
246  }
247 
248  return Reserved;
249 }
250 
253  // On entry, the base address is 0, so it can't possibly need any more
254  // alignment.
255 
256  // FIXME: Should be able to specify the entry frame alignment per calling
257  // convention instead.
258  if (Info->isEntryFunction())
259  return false;
260 
262 }
263 
266  if (Info->isEntryFunction()) {
267  const MachineFrameInfo &MFI = Fn.getFrameInfo();
268  return MFI.hasStackObjects() || MFI.hasCalls();
269  }
270 
271  // May need scavenger for dealing with callee saved registers.
272  return true;
273 }
274 
276  const MachineFunction &MF) const {
277  const MachineFrameInfo &MFI = MF.getFrameInfo();
278  if (MFI.hasStackObjects())
279  return true;
280 
281  // May need to deal with callee saved registers.
283  return !Info->isEntryFunction();
284 }
285 
287  const MachineFunction &MF) const {
288  const MachineFrameInfo &MFI = MF.getFrameInfo();
289  if (!MFI.hasStackObjects())
290  return false;
291 
292  // The scavenger is used for large frames which may require finding a free
293  // register for large offsets.
294  if (!isUInt<12>(MFI.getStackSize()))
295  return true;
296 
297  // If using scalar stores, for spills, m0 is needed for the scalar store
298  // offset (pre-GFX9). m0 is unallocatable, so we can't create a virtual
299  // register for it during frame index elimination, so the scavenger is
300  // directly needed.
301  return MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
302  MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
303 }
304 
306  const MachineFunction &) const {
307  // There are no special dedicated stack or frame pointers.
308  return true;
309 }
310 
312  // This helps catch bugs as verifier errors.
313  return true;
314 }
315 
318 
319  int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
320  AMDGPU::OpName::offset);
321  return MI->getOperand(OffIdx).getImm();
322 }
323 
325  int Idx) const {
326  if (!SIInstrInfo::isMUBUF(*MI))
327  return 0;
328 
330  AMDGPU::OpName::vaddr) &&
331  "Should never see frame index on non-address operand");
332 
333  return getMUBUFInstrOffset(MI);
334 }
335 
337  if (!MI->mayLoadOrStore())
338  return false;
339 
340  int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
341 
342  return !isUInt<12>(FullOffset);
343 }
344 
346  unsigned BaseReg,
347  int FrameIdx,
348  int64_t Offset) const {
350  DebugLoc DL; // Defaults to "unknown"
351 
352  if (Ins != MBB->end())
353  DL = Ins->getDebugLoc();
354 
355  MachineFunction *MF = MBB->getParent();
356  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
357  const SIInstrInfo *TII = Subtarget.getInstrInfo();
358 
359  if (Offset == 0) {
360  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
361  .addFrameIndex(FrameIdx);
362  return;
363  }
364 
366  unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
367 
368  unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
369 
370  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
371  .addImm(Offset);
372  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
373  .addFrameIndex(FrameIdx);
374 
375  TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
376  .addReg(OffsetReg, RegState::Kill)
377  .addReg(FIReg)
378  .addImm(0); // clamp bit
379 }
380 
382  int64_t Offset) const {
383 
384  MachineBasicBlock *MBB = MI.getParent();
385  MachineFunction *MF = MBB->getParent();
386  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
387  const SIInstrInfo *TII = Subtarget.getInstrInfo();
388 
389 #ifndef NDEBUG
390  // FIXME: Is it possible to be storing a frame index to itself?
391  bool SeenFI = false;
392  for (const MachineOperand &MO: MI.operands()) {
393  if (MO.isFI()) {
394  if (SeenFI)
395  llvm_unreachable("should not see multiple frame indices");
396 
397  SeenFI = true;
398  }
399  }
400 #endif
401 
402  MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
403  assert(FIOp && FIOp->isFI() && "frame index must be address operand");
404  assert(TII->isMUBUF(MI));
405  assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
406  MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
407  "should only be seeing frame offset relative FrameIndex");
408 
409 
410  MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
411  int64_t NewOffset = OffsetOp->getImm() + Offset;
412  assert(isUInt<12>(NewOffset) && "offset should be legal");
413 
414  FIOp->ChangeToRegister(BaseReg, false);
415  OffsetOp->setImm(NewOffset);
416 }
417 
419  unsigned BaseReg,
420  int64_t Offset) const {
421  if (!SIInstrInfo::isMUBUF(*MI))
422  return false;
423 
424  int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
425 
426  return isUInt<12>(NewOffset);
427 }
428 
430  const MachineFunction &MF, unsigned Kind) const {
431  // This is inaccurate. It depends on the instruction and address space. The
432  // only place where we should hit this is for dealing with frame indexes /
433  // private accesses, so this is correct in that case.
434  return &AMDGPU::VGPR_32RegClass;
435 }
436 
437 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
438 
439  switch (Op) {
440  case AMDGPU::SI_SPILL_S512_SAVE:
441  case AMDGPU::SI_SPILL_S512_RESTORE:
442  case AMDGPU::SI_SPILL_V512_SAVE:
443  case AMDGPU::SI_SPILL_V512_RESTORE:
444  return 16;
445  case AMDGPU::SI_SPILL_S256_SAVE:
446  case AMDGPU::SI_SPILL_S256_RESTORE:
447  case AMDGPU::SI_SPILL_V256_SAVE:
448  case AMDGPU::SI_SPILL_V256_RESTORE:
449  return 8;
450  case AMDGPU::SI_SPILL_S160_SAVE:
451  case AMDGPU::SI_SPILL_S160_RESTORE:
452  case AMDGPU::SI_SPILL_V160_SAVE:
453  case AMDGPU::SI_SPILL_V160_RESTORE:
454  return 5;
455  case AMDGPU::SI_SPILL_S128_SAVE:
456  case AMDGPU::SI_SPILL_S128_RESTORE:
457  case AMDGPU::SI_SPILL_V128_SAVE:
458  case AMDGPU::SI_SPILL_V128_RESTORE:
459  return 4;
460  case AMDGPU::SI_SPILL_S96_SAVE:
461  case AMDGPU::SI_SPILL_S96_RESTORE:
462  case AMDGPU::SI_SPILL_V96_SAVE:
463  case AMDGPU::SI_SPILL_V96_RESTORE:
464  return 3;
465  case AMDGPU::SI_SPILL_S64_SAVE:
466  case AMDGPU::SI_SPILL_S64_RESTORE:
467  case AMDGPU::SI_SPILL_V64_SAVE:
468  case AMDGPU::SI_SPILL_V64_RESTORE:
469  return 2;
470  case AMDGPU::SI_SPILL_S32_SAVE:
471  case AMDGPU::SI_SPILL_S32_RESTORE:
472  case AMDGPU::SI_SPILL_V32_SAVE:
473  case AMDGPU::SI_SPILL_V32_RESTORE:
474  return 1;
475  default: llvm_unreachable("Invalid spill opcode");
476  }
477 }
478 
479 static int getOffsetMUBUFStore(unsigned Opc) {
480  switch (Opc) {
481  case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
482  return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
483  case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
484  return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
485  case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
486  return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
487  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
488  return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
489  case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
490  return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
491  case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
492  return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
493  case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
494  return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
495  default:
496  return -1;
497  }
498 }
499 
500 static int getOffsetMUBUFLoad(unsigned Opc) {
501  switch (Opc) {
502  case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
503  return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
504  case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
505  return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
506  case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
507  return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
508  case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
509  return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
510  case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
511  return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
512  case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
513  return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
514  case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
515  return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
516  case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
517  return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
518  case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
519  return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
520  case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
521  return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
522  case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
523  return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
524  case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
525  return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
526  case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
527  return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
528  default:
529  return -1;
530  }
531 }
532 
533 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
534 // need to handle the case where an SGPR may need to be spilled while spilling.
536  MachineFrameInfo &MFI,
538  int Index,
539  int64_t Offset) {
540  MachineBasicBlock *MBB = MI->getParent();
541  const DebugLoc &DL = MI->getDebugLoc();
542  bool IsStore = MI->mayStore();
543 
544  unsigned Opc = MI->getOpcode();
545  int LoadStoreOp = IsStore ?
547  if (LoadStoreOp == -1)
548  return false;
549 
550  const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
551  MachineInstrBuilder NewMI =
552  BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
553  .add(*Reg)
554  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
555  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
556  .addImm(Offset)
557  .addImm(0) // glc
558  .addImm(0) // slc
559  .addImm(0) // tfe
560  .addImm(0) // dlc
561  .cloneMemRefs(*MI);
562 
563  const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
564  AMDGPU::OpName::vdata_in);
565  if (VDataIn)
566  NewMI.add(*VDataIn);
567  return true;
568 }
569 
570 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
571  unsigned LoadStoreOp,
572  int Index,
573  unsigned ValueReg,
574  bool IsKill,
575  unsigned ScratchRsrcReg,
576  unsigned ScratchOffsetReg,
577  int64_t InstOffset,
578  MachineMemOperand *MMO,
579  RegScavenger *RS) const {
580  MachineBasicBlock *MBB = MI->getParent();
581  MachineFunction *MF = MI->getParent()->getParent();
582  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
583  const SIInstrInfo *TII = ST.getInstrInfo();
584  const MachineFrameInfo &MFI = MF->getFrameInfo();
585 
586  const MCInstrDesc &Desc = TII->get(LoadStoreOp);
587  const DebugLoc &DL = MI->getDebugLoc();
588  bool IsStore = Desc.mayStore();
589 
590  bool Scavenged = false;
591  unsigned SOffset = ScratchOffsetReg;
592 
593  const unsigned EltSize = 4;
594  const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
595  unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
596  unsigned Size = NumSubRegs * EltSize;
597  int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
598  int64_t ScratchOffsetRegDelta = 0;
599 
600  unsigned Align = MFI.getObjectAlignment(Index);
601  const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
602 
603  assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
604 
605  if (!isUInt<12>(Offset + Size - EltSize)) {
606  SOffset = AMDGPU::NoRegister;
607 
608  // We currently only support spilling VGPRs to EltSize boundaries, meaning
609  // we can simplify the adjustment of Offset here to just scale with
610  // WavefrontSize.
611  Offset *= ST.getWavefrontSize();
612 
613  // We don't have access to the register scavenger if this function is called
614  // during PEI::scavengeFrameVirtualRegs().
615  if (RS)
616  SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false);
617 
618  if (SOffset == AMDGPU::NoRegister) {
619  // There are no free SGPRs, and since we are in the process of spilling
620  // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
621  // on SI/CI and on VI it is true until we implement spilling using scalar
622  // stores), we have no way to free up an SGPR. Our solution here is to
623  // add the offset directly to the ScratchOffset register, and then
624  // subtract the offset after the spill to return ScratchOffset to it's
625  // original value.
626  SOffset = ScratchOffsetReg;
627  ScratchOffsetRegDelta = Offset;
628  } else {
629  Scavenged = true;
630  }
631 
632  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
633  .addReg(ScratchOffsetReg)
634  .addImm(Offset);
635 
636  Offset = 0;
637  }
638 
639  for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
640  unsigned SubReg = NumSubRegs == 1 ?
641  ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
642 
643  unsigned SOffsetRegState = 0;
644  unsigned SrcDstRegState = getDefRegState(!IsStore);
645  if (i + 1 == e) {
646  SOffsetRegState |= getKillRegState(Scavenged);
647  // The last implicit use carries the "Kill" flag.
648  SrcDstRegState |= getKillRegState(IsKill);
649  }
650 
651  MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
652  MachineMemOperand *NewMMO
653  = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
654  EltSize, MinAlign(Align, EltSize * i));
655 
656  auto MIB = BuildMI(*MBB, MI, DL, Desc)
657  .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
658  .addReg(ScratchRsrcReg)
659  .addReg(SOffset, SOffsetRegState)
660  .addImm(Offset)
661  .addImm(0) // glc
662  .addImm(0) // slc
663  .addImm(0) // tfe
664  .addImm(0) // dlc
665  .addMemOperand(NewMMO);
666 
667  if (NumSubRegs > 1)
668  MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
669  }
670 
671  if (ScratchOffsetRegDelta != 0) {
672  // Subtract the offset we added to the ScratchOffset register.
673  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
674  .addReg(ScratchOffsetReg)
675  .addImm(ScratchOffsetRegDelta);
676  }
677 }
678 
679 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
680  bool Store) {
681  if (SuperRegSize % 16 == 0) {
682  return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
683  AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
684  }
685 
686  if (SuperRegSize % 8 == 0) {
687  return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
688  AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
689  }
690 
691  return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
692  AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
693 }
694 
696  int Index,
697  RegScavenger *RS,
698  bool OnlyToVGPR) const {
699  MachineBasicBlock *MBB = MI->getParent();
700  MachineFunction *MF = MBB->getParent();
702  DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
703 
705  = MFI->getSGPRToVGPRSpills(Index);
706  bool SpillToVGPR = !VGPRSpills.empty();
707  if (OnlyToVGPR && !SpillToVGPR)
708  return false;
709 
711  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
712  const SIInstrInfo *TII = ST.getInstrInfo();
713 
714  unsigned SuperReg = MI->getOperand(0).getReg();
715  bool IsKill = MI->getOperand(0).isKill();
716  const DebugLoc &DL = MI->getDebugLoc();
717 
718  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
719 
720  bool SpillToSMEM = spillSGPRToSMEM();
721  if (SpillToSMEM && OnlyToVGPR)
722  return false;
723 
724  unsigned FrameReg = getFrameRegister(*MF);
725 
726  assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
727  SuperReg != MFI->getFrameOffsetReg() &&
728  SuperReg != MFI->getScratchWaveOffsetReg()));
729 
730  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
731 
732  unsigned OffsetReg = AMDGPU::M0;
733  unsigned M0CopyReg = AMDGPU::NoRegister;
734 
735  if (SpillToSMEM) {
736  if (RS->isRegUsed(AMDGPU::M0)) {
737  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
738  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
739  .addReg(AMDGPU::M0);
740  }
741  }
742 
743  unsigned ScalarStoreOp;
744  unsigned EltSize = 4;
745  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
746  if (SpillToSMEM && isSGPRClass(RC)) {
747  // XXX - if private_element_size is larger than 4 it might be useful to be
748  // able to spill wider vmem spills.
749  std::tie(EltSize, ScalarStoreOp) =
750  getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
751  }
752 
753  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
754  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
755 
756  // SubReg carries the "Kill" flag when SubReg == SuperReg.
757  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
758  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
759  unsigned SubReg = NumSubRegs == 1 ?
760  SuperReg : getSubReg(SuperReg, SplitParts[i]);
761 
762  if (SpillToSMEM) {
763  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
764 
765  // The allocated memory size is really the wavefront size * the frame
766  // index size. The widest register class is 64 bytes, so a 4-byte scratch
767  // allocation is enough to spill this in a single stack object.
768  //
769  // FIXME: Frame size/offsets are computed earlier than this, so the extra
770  // space is still unnecessarily allocated.
771 
772  unsigned Align = FrameInfo.getObjectAlignment(Index);
773  MachinePointerInfo PtrInfo
774  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
775  MachineMemOperand *MMO
777  EltSize, MinAlign(Align, EltSize * i));
778 
779  // SMEM instructions only support a single offset, so increment the wave
780  // offset.
781 
782  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
783  if (Offset != 0) {
784  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
785  .addReg(FrameReg)
786  .addImm(Offset);
787  } else {
788  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
789  .addReg(FrameReg);
790  }
791 
792  BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
793  .addReg(SubReg, getKillRegState(IsKill)) // sdata
794  .addReg(MFI->getScratchRSrcReg()) // sbase
795  .addReg(OffsetReg, RegState::Kill) // soff
796  .addImm(0) // glc
797  .addImm(0) // dlc
798  .addMemOperand(MMO);
799 
800  continue;
801  }
802 
803  if (SpillToVGPR) {
804  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
805 
806  // During SGPR spilling to VGPR, determine if the VGPR is defined. The
807  // only circumstance in which we say it is undefined is when it is the
808  // first spill to this VGPR in the first basic block.
809  bool VGPRDefined = true;
810  if (MBB == &MF->front())
811  VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
812 
813  // Mark the "old value of vgpr" input undef only if this is the first sgpr
814  // spill to this specific vgpr in the first basic block.
815  BuildMI(*MBB, MI, DL,
816  TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
817  Spill.VGPR)
818  .addReg(SubReg, getKillRegState(IsKill))
819  .addImm(Spill.Lane)
820  .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
821 
822  // FIXME: Since this spills to another register instead of an actual
823  // frame index, we should delete the frame index when all references to
824  // it are fixed.
825  } else {
826  // XXX - Can to VGPR spill fail for some subregisters but not others?
827  if (OnlyToVGPR)
828  return false;
829 
830  // Spill SGPR to a frame index.
831  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
832  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
833  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
834 
836  = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
837  .addReg(SubReg, SubKillState);
838 
839 
840  // There could be undef components of a spilled super register.
841  // TODO: Can we detect this and skip the spill?
842  if (NumSubRegs > 1) {
843  // The last implicit use of the SuperReg carries the "Kill" flag.
844  unsigned SuperKillState = 0;
845  if (i + 1 == e)
846  SuperKillState |= getKillRegState(IsKill);
847  Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
848  }
849 
850  unsigned Align = FrameInfo.getObjectAlignment(Index);
851  MachinePointerInfo PtrInfo
852  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
853  MachineMemOperand *MMO
855  EltSize, MinAlign(Align, EltSize * i));
856  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
857  .addReg(TmpReg, RegState::Kill) // src
858  .addFrameIndex(Index) // vaddr
859  .addReg(MFI->getScratchRSrcReg()) // srrsrc
860  .addReg(MFI->getStackPtrOffsetReg()) // soffset
861  .addImm(i * 4) // offset
862  .addMemOperand(MMO);
863  }
864  }
865 
866  if (M0CopyReg != AMDGPU::NoRegister) {
867  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
868  .addReg(M0CopyReg, RegState::Kill);
869  }
870 
871  MI->eraseFromParent();
872  MFI->addToSpilledSGPRs(NumSubRegs);
873  return true;
874 }
875 
877  int Index,
878  RegScavenger *RS,
879  bool OnlyToVGPR) const {
880  MachineFunction *MF = MI->getParent()->getParent();
882  MachineBasicBlock *MBB = MI->getParent();
884 
886  = MFI->getSGPRToVGPRSpills(Index);
887  bool SpillToVGPR = !VGPRSpills.empty();
888  if (OnlyToVGPR && !SpillToVGPR)
889  return false;
890 
891  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
892  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
893  const SIInstrInfo *TII = ST.getInstrInfo();
894  const DebugLoc &DL = MI->getDebugLoc();
895 
896  unsigned SuperReg = MI->getOperand(0).getReg();
897  bool SpillToSMEM = spillSGPRToSMEM();
898  if (SpillToSMEM && OnlyToVGPR)
899  return false;
900 
901  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
902 
903  unsigned OffsetReg = AMDGPU::M0;
904  unsigned M0CopyReg = AMDGPU::NoRegister;
905 
906  if (SpillToSMEM) {
907  if (RS->isRegUsed(AMDGPU::M0)) {
908  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
909  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
910  .addReg(AMDGPU::M0);
911  }
912  }
913 
914  unsigned EltSize = 4;
915  unsigned ScalarLoadOp;
916 
917  unsigned FrameReg = getFrameRegister(*MF);
918 
919  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
920  if (SpillToSMEM && isSGPRClass(RC)) {
921  // XXX - if private_element_size is larger than 4 it might be useful to be
922  // able to spill wider vmem spills.
923  std::tie(EltSize, ScalarLoadOp) =
924  getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
925  }
926 
927  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
928  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
929 
930  // SubReg carries the "Kill" flag when SubReg == SuperReg.
931  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
932 
933  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
934  unsigned SubReg = NumSubRegs == 1 ?
935  SuperReg : getSubReg(SuperReg, SplitParts[i]);
936 
937  if (SpillToSMEM) {
938  // FIXME: Size may be > 4 but extra bytes wasted.
939  unsigned Align = FrameInfo.getObjectAlignment(Index);
940  MachinePointerInfo PtrInfo
941  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
942  MachineMemOperand *MMO
944  EltSize, MinAlign(Align, EltSize * i));
945 
946  // Add i * 4 offset
947  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
948  if (Offset != 0) {
949  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
950  .addReg(FrameReg)
951  .addImm(Offset);
952  } else {
953  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
954  .addReg(FrameReg);
955  }
956 
957  auto MIB =
958  BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
959  .addReg(MFI->getScratchRSrcReg()) // sbase
960  .addReg(OffsetReg, RegState::Kill) // soff
961  .addImm(0) // glc
962  .addImm(0) // dlc
963  .addMemOperand(MMO);
964 
965  if (NumSubRegs > 1 && i == 0)
966  MIB.addReg(SuperReg, RegState::ImplicitDefine);
967 
968  continue;
969  }
970 
971  if (SpillToVGPR) {
972  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
973  auto MIB =
974  BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
975  SubReg)
976  .addReg(Spill.VGPR)
977  .addImm(Spill.Lane);
978 
979  if (NumSubRegs > 1 && i == 0)
980  MIB.addReg(SuperReg, RegState::ImplicitDefine);
981  } else {
982  if (OnlyToVGPR)
983  return false;
984 
985  // Restore SGPR from a stack slot.
986  // FIXME: We should use S_LOAD_DWORD here for VI.
987  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
988  unsigned Align = FrameInfo.getObjectAlignment(Index);
989 
990  MachinePointerInfo PtrInfo
991  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
992 
993  MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
994  MachineMemOperand::MOLoad, EltSize,
995  MinAlign(Align, EltSize * i));
996 
997  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
998  .addFrameIndex(Index) // vaddr
999  .addReg(MFI->getScratchRSrcReg()) // srsrc
1000  .addReg(MFI->getStackPtrOffsetReg()) // soffset
1001  .addImm(i * 4) // offset
1002  .addMemOperand(MMO);
1003 
1004  auto MIB =
1005  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
1006  .addReg(TmpReg, RegState::Kill);
1007 
1008  if (NumSubRegs > 1)
1009  MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
1010  }
1011  }
1012 
1013  if (M0CopyReg != AMDGPU::NoRegister) {
1014  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
1015  .addReg(M0CopyReg, RegState::Kill);
1016  }
1017 
1018  MI->eraseFromParent();
1019  return true;
1020 }
1021 
1022 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
1023 /// a VGPR and the stack slot can be safely eliminated when all other users are
1024 /// handled.
1027  int FI,
1028  RegScavenger *RS) const {
1029  switch (MI->getOpcode()) {
1030  case AMDGPU::SI_SPILL_S512_SAVE:
1031  case AMDGPU::SI_SPILL_S256_SAVE:
1032  case AMDGPU::SI_SPILL_S160_SAVE:
1033  case AMDGPU::SI_SPILL_S128_SAVE:
1034  case AMDGPU::SI_SPILL_S96_SAVE:
1035  case AMDGPU::SI_SPILL_S64_SAVE:
1036  case AMDGPU::SI_SPILL_S32_SAVE:
1037  return spillSGPR(MI, FI, RS, true);
1038  case AMDGPU::SI_SPILL_S512_RESTORE:
1039  case AMDGPU::SI_SPILL_S256_RESTORE:
1040  case AMDGPU::SI_SPILL_S160_RESTORE:
1041  case AMDGPU::SI_SPILL_S128_RESTORE:
1042  case AMDGPU::SI_SPILL_S96_RESTORE:
1043  case AMDGPU::SI_SPILL_S64_RESTORE:
1044  case AMDGPU::SI_SPILL_S32_RESTORE:
1045  return restoreSGPR(MI, FI, RS, true);
1046  default:
1047  llvm_unreachable("not an SGPR spill instruction");
1048  }
1049 }
1050 
1052  int SPAdj, unsigned FIOperandNum,
1053  RegScavenger *RS) const {
1054  MachineFunction *MF = MI->getParent()->getParent();
1056  MachineBasicBlock *MBB = MI->getParent();
1058  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
1059  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
1060  const SIInstrInfo *TII = ST.getInstrInfo();
1061  DebugLoc DL = MI->getDebugLoc();
1062 
1063  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
1064  int Index = MI->getOperand(FIOperandNum).getIndex();
1065 
1066  unsigned FrameReg = getFrameRegister(*MF);
1067 
1068  switch (MI->getOpcode()) {
1069  // SGPR register spill
1070  case AMDGPU::SI_SPILL_S512_SAVE:
1071  case AMDGPU::SI_SPILL_S256_SAVE:
1072  case AMDGPU::SI_SPILL_S160_SAVE:
1073  case AMDGPU::SI_SPILL_S128_SAVE:
1074  case AMDGPU::SI_SPILL_S96_SAVE:
1075  case AMDGPU::SI_SPILL_S64_SAVE:
1076  case AMDGPU::SI_SPILL_S32_SAVE: {
1077  spillSGPR(MI, Index, RS);
1078  break;
1079  }
1080 
1081  // SGPR register restore
1082  case AMDGPU::SI_SPILL_S512_RESTORE:
1083  case AMDGPU::SI_SPILL_S256_RESTORE:
1084  case AMDGPU::SI_SPILL_S160_RESTORE:
1085  case AMDGPU::SI_SPILL_S128_RESTORE:
1086  case AMDGPU::SI_SPILL_S96_RESTORE:
1087  case AMDGPU::SI_SPILL_S64_RESTORE:
1088  case AMDGPU::SI_SPILL_S32_RESTORE: {
1089  restoreSGPR(MI, Index, RS);
1090  break;
1091  }
1092 
1093  // VGPR register spill
1094  case AMDGPU::SI_SPILL_V512_SAVE:
1095  case AMDGPU::SI_SPILL_V256_SAVE:
1096  case AMDGPU::SI_SPILL_V160_SAVE:
1097  case AMDGPU::SI_SPILL_V128_SAVE:
1098  case AMDGPU::SI_SPILL_V96_SAVE:
1099  case AMDGPU::SI_SPILL_V64_SAVE:
1100  case AMDGPU::SI_SPILL_V32_SAVE: {
1101  const MachineOperand *VData = TII->getNamedOperand(*MI,
1102  AMDGPU::OpName::vdata);
1103  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1104  MFI->getStackPtrOffsetReg());
1105 
1106  buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1107  Index,
1108  VData->getReg(), VData->isKill(),
1109  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1110  FrameReg,
1111  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1112  *MI->memoperands_begin(),
1113  RS);
1114  MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1115  MI->eraseFromParent();
1116  break;
1117  }
1118  case AMDGPU::SI_SPILL_V32_RESTORE:
1119  case AMDGPU::SI_SPILL_V64_RESTORE:
1120  case AMDGPU::SI_SPILL_V96_RESTORE:
1121  case AMDGPU::SI_SPILL_V128_RESTORE:
1122  case AMDGPU::SI_SPILL_V160_RESTORE:
1123  case AMDGPU::SI_SPILL_V256_RESTORE:
1124  case AMDGPU::SI_SPILL_V512_RESTORE: {
1125  const MachineOperand *VData = TII->getNamedOperand(*MI,
1126  AMDGPU::OpName::vdata);
1127  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1128  MFI->getStackPtrOffsetReg());
1129 
1130  buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1131  Index,
1132  VData->getReg(), VData->isKill(),
1133  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1134  FrameReg,
1135  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1136  *MI->memoperands_begin(),
1137  RS);
1138  MI->eraseFromParent();
1139  break;
1140  }
1141 
1142  default: {
1143  const DebugLoc &DL = MI->getDebugLoc();
1144  bool IsMUBUF = TII->isMUBUF(*MI);
1145 
1146  if (!IsMUBUF && !MFI->isEntryFunction()) {
1147  // Convert to an absolute stack address by finding the offset from the
1148  // scratch wave base and scaling by the wave size.
1149  //
1150  // In an entry function/kernel the offset is already the absolute
1151  // address relative to the frame register.
1152 
1153  unsigned DiffReg
1154  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1155 
1156  bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1157  unsigned ResultReg = IsCopy ?
1158  MI->getOperand(0).getReg() :
1159  MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1160 
1161  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
1162  .addReg(FrameReg)
1164 
1165  int64_t Offset = FrameInfo.getObjectOffset(Index);
1166  if (Offset == 0) {
1167  // XXX - This never happens because of emergency scavenging slot at 0?
1168  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1169  .addImm(Log2_32(ST.getWavefrontSize()))
1170  .addReg(DiffReg);
1171  } else {
1172  unsigned ScaledReg
1173  = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1174 
1175  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
1176  .addImm(Log2_32(ST.getWavefrontSize()))
1177  .addReg(DiffReg, RegState::Kill);
1178 
1179  // TODO: Fold if use instruction is another add of a constant.
1181  TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1182  .addImm(Offset)
1183  .addReg(ScaledReg, RegState::Kill)
1184  .addImm(0); // clamp bit
1185  } else {
1186  unsigned ConstOffsetReg
1187  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1188 
1189  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1190  .addImm(Offset);
1191  TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1192  .addReg(ConstOffsetReg, RegState::Kill)
1193  .addReg(ScaledReg, RegState::Kill)
1194  .addImm(0); // clamp bit
1195  }
1196  }
1197 
1198  // Don't introduce an extra copy if we're just materializing in a mov.
1199  if (IsCopy)
1200  MI->eraseFromParent();
1201  else
1202  FIOp.ChangeToRegister(ResultReg, false, false, true);
1203  return;
1204  }
1205 
1206  if (IsMUBUF) {
1207  // Disable offen so we don't need a 0 vgpr base.
1208  assert(static_cast<int>(FIOperandNum) ==
1209  AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1210  AMDGPU::OpName::vaddr));
1211 
1212  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1213  MFI->getStackPtrOffsetReg());
1214 
1215  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->setReg(FrameReg);
1216 
1217  int64_t Offset = FrameInfo.getObjectOffset(Index);
1218  int64_t OldImm
1219  = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1220  int64_t NewOffset = OldImm + Offset;
1221 
1222  if (isUInt<12>(NewOffset) &&
1223  buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
1224  MI->eraseFromParent();
1225  return;
1226  }
1227  }
1228 
1229  // If the offset is simply too big, don't convert to a scratch wave offset
1230  // relative index.
1231 
1232  int64_t Offset = FrameInfo.getObjectOffset(Index);
1233  FIOp.ChangeToImmediate(Offset);
1234  if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1235  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1236  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1237  .addImm(Offset);
1238  FIOp.ChangeToRegister(TmpReg, false, false, true);
1239  }
1240  }
1241  }
1242 }
1243 
1245  #define AMDGPU_REG_ASM_NAMES
1246  #include "AMDGPURegAsmNames.inc.cpp"
1247 
1248  #define REG_RANGE(BeginReg, EndReg, RegTable) \
1249  if (Reg >= BeginReg && Reg <= EndReg) { \
1250  unsigned Index = Reg - BeginReg; \
1251  assert(Index < array_lengthof(RegTable)); \
1252  return RegTable[Index]; \
1253  }
1254 
1255  REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
1256  REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR105, SGPR32RegNames);
1257  REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
1258  REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR104_SGPR105, SGPR64RegNames);
1259  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
1260  VGPR96RegNames);
1261 
1262  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
1263  AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
1264  VGPR128RegNames);
1265  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
1266  AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
1267  SGPR128RegNames);
1268 
1269  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
1270  AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1271  VGPR256RegNames);
1272 
1273  REG_RANGE(
1274  AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
1275  AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1276  VGPR512RegNames);
1277 
1278  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
1279  AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1280  SGPR256RegNames);
1281 
1282  REG_RANGE(
1283  AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
1284  AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1285  SGPR512RegNames
1286  );
1287 
1288 #undef REG_RANGE
1289 
1290  // FIXME: Rename flat_scr so we don't need to special case this.
1291  switch (Reg) {
1292  case AMDGPU::FLAT_SCR:
1293  return "flat_scratch";
1294  case AMDGPU::FLAT_SCR_LO:
1295  return "flat_scratch_lo";
1296  case AMDGPU::FLAT_SCR_HI:
1297  return "flat_scratch_hi";
1298  default:
1299  // For the special named registers the default is fine.
1301  }
1302 }
1303 
1304 // FIXME: This is very slow. It might be worth creating a map from physreg to
1305 // register class.
1308 
1309  static const TargetRegisterClass *const BaseClasses[] = {
1310  &AMDGPU::VGPR_32RegClass,
1311  &AMDGPU::SReg_32RegClass,
1312  &AMDGPU::VReg_64RegClass,
1313  &AMDGPU::SReg_64RegClass,
1314  &AMDGPU::VReg_96RegClass,
1315  &AMDGPU::SReg_96RegClass,
1316  &AMDGPU::VReg_128RegClass,
1317  &AMDGPU::SReg_128RegClass,
1318  &AMDGPU::VReg_160RegClass,
1319  &AMDGPU::SReg_160RegClass,
1320  &AMDGPU::VReg_256RegClass,
1321  &AMDGPU::SReg_256RegClass,
1322  &AMDGPU::VReg_512RegClass,
1323  &AMDGPU::SReg_512RegClass,
1324  &AMDGPU::SCC_CLASSRegClass,
1325  &AMDGPU::Pseudo_SReg_32RegClass,
1326  &AMDGPU::Pseudo_SReg_128RegClass,
1327  };
1328 
1329  for (const TargetRegisterClass *BaseClass : BaseClasses) {
1330  if (BaseClass->contains(Reg)) {
1331  return BaseClass;
1332  }
1333  }
1334  return nullptr;
1335 }
1336 
1337 // TODO: It might be helpful to have some target specific flags in
1338 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
1340  unsigned Size = getRegSizeInBits(*RC);
1341  if (Size < 32)
1342  return false;
1343  switch (Size) {
1344  case 32:
1345  return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
1346  case 64:
1347  return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
1348  case 96:
1349  return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
1350  case 128:
1351  return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
1352  case 160:
1353  return getCommonSubClass(&AMDGPU::VReg_160RegClass, RC) != nullptr;
1354  case 256:
1355  return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
1356  case 512:
1357  return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
1358  default:
1359  llvm_unreachable("Invalid register class size");
1360  }
1361 }
1362 
1364  const TargetRegisterClass *SRC) const {
1365  switch (getRegSizeInBits(*SRC)) {
1366  case 32:
1367  return &AMDGPU::VGPR_32RegClass;
1368  case 64:
1369  return &AMDGPU::VReg_64RegClass;
1370  case 96:
1371  return &AMDGPU::VReg_96RegClass;
1372  case 128:
1373  return &AMDGPU::VReg_128RegClass;
1374  case 160:
1375  return &AMDGPU::VReg_160RegClass;
1376  case 256:
1377  return &AMDGPU::VReg_256RegClass;
1378  case 512:
1379  return &AMDGPU::VReg_512RegClass;
1380  default:
1381  llvm_unreachable("Invalid register class size");
1382  }
1383 }
1384 
1386  const TargetRegisterClass *VRC) const {
1387  switch (getRegSizeInBits(*VRC)) {
1388  case 32:
1389  return &AMDGPU::SGPR_32RegClass;
1390  case 64:
1391  return &AMDGPU::SReg_64RegClass;
1392  case 96:
1393  return &AMDGPU::SReg_96RegClass;
1394  case 128:
1395  return &AMDGPU::SReg_128RegClass;
1396  case 160:
1397  return &AMDGPU::SReg_160RegClass;
1398  case 256:
1399  return &AMDGPU::SReg_256RegClass;
1400  case 512:
1401  return &AMDGPU::SReg_512RegClass;
1402  default:
1403  llvm_unreachable("Invalid register class size");
1404  }
1405 }
1406 
1408  const TargetRegisterClass *RC, unsigned SubIdx) const {
1409  if (SubIdx == AMDGPU::NoSubRegister)
1410  return RC;
1411 
1412  // We can assume that each lane corresponds to one 32-bit register.
1413  unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
1414  if (isSGPRClass(RC)) {
1415  switch (Count) {
1416  case 1:
1417  return &AMDGPU::SGPR_32RegClass;
1418  case 2:
1419  return &AMDGPU::SReg_64RegClass;
1420  case 3:
1421  return &AMDGPU::SReg_96RegClass;
1422  case 4:
1423  return &AMDGPU::SReg_128RegClass;
1424  case 5:
1425  return &AMDGPU::SReg_160RegClass;
1426  case 8:
1427  return &AMDGPU::SReg_256RegClass;
1428  case 16: /* fall-through */
1429  default:
1430  llvm_unreachable("Invalid sub-register class size");
1431  }
1432  } else {
1433  switch (Count) {
1434  case 1:
1435  return &AMDGPU::VGPR_32RegClass;
1436  case 2:
1437  return &AMDGPU::VReg_64RegClass;
1438  case 3:
1439  return &AMDGPU::VReg_96RegClass;
1440  case 4:
1441  return &AMDGPU::VReg_128RegClass;
1442  case 5:
1443  return &AMDGPU::VReg_160RegClass;
1444  case 8:
1445  return &AMDGPU::VReg_256RegClass;
1446  case 16: /* fall-through */
1447  default:
1448  llvm_unreachable("Invalid sub-register class size");
1449  }
1450  }
1451 }
1452 
1454  const TargetRegisterClass *DefRC,
1455  unsigned DefSubReg,
1456  const TargetRegisterClass *SrcRC,
1457  unsigned SrcSubReg) const {
1458  // We want to prefer the smallest register class possible, so we don't want to
1459  // stop and rewrite on anything that looks like a subregister
1460  // extract. Operations mostly don't care about the super register class, so we
1461  // only want to stop on the most basic of copies between the same register
1462  // class.
1463  //
1464  // e.g. if we have something like
1465  // %0 = ...
1466  // %1 = ...
1467  // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
1468  // %3 = COPY %2, sub0
1469  //
1470  // We want to look through the COPY to find:
1471  // => %3 = COPY %0
1472 
1473  // Plain copy.
1474  return getCommonSubClass(DefRC, SrcRC) != nullptr;
1475 }
1476 
1477 /// Returns a register that is not used at any point in the function.
1478 /// If all registers are used, then this function will return
1479 // AMDGPU::NoRegister.
1480 unsigned
1482  const TargetRegisterClass *RC,
1483  const MachineFunction &MF) const {
1484 
1485  for (unsigned Reg : *RC)
1486  if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1487  return Reg;
1488  return AMDGPU::NoRegister;
1489 }
1490 
1492  unsigned EltSize) const {
1493  if (EltSize == 4) {
1494  static const int16_t Sub0_15[] = {
1495  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1496  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1497  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1498  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1499  };
1500 
1501  static const int16_t Sub0_7[] = {
1502  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1503  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1504  };
1505 
1506  static const int16_t Sub0_4[] = {
1507  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
1508  };
1509 
1510  static const int16_t Sub0_3[] = {
1511  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1512  };
1513 
1514  static const int16_t Sub0_2[] = {
1515  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1516  };
1517 
1518  static const int16_t Sub0_1[] = {
1519  AMDGPU::sub0, AMDGPU::sub1,
1520  };
1521 
1522  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1523  case 32:
1524  return {};
1525  case 64:
1526  return makeArrayRef(Sub0_1);
1527  case 96:
1528  return makeArrayRef(Sub0_2);
1529  case 128:
1530  return makeArrayRef(Sub0_3);
1531  case 160:
1532  return makeArrayRef(Sub0_4);
1533  case 256:
1534  return makeArrayRef(Sub0_7);
1535  case 512:
1536  return makeArrayRef(Sub0_15);
1537  default:
1538  llvm_unreachable("unhandled register size");
1539  }
1540  }
1541 
1542  if (EltSize == 8) {
1543  static const int16_t Sub0_15_64[] = {
1544  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1545  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1546  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1547  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1548  };
1549 
1550  static const int16_t Sub0_7_64[] = {
1551  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1552  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1553  };
1554 
1555 
1556  static const int16_t Sub0_3_64[] = {
1557  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1558  };
1559 
1560  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1561  case 64:
1562  return {};
1563  case 128:
1564  return makeArrayRef(Sub0_3_64);
1565  case 256:
1566  return makeArrayRef(Sub0_7_64);
1567  case 512:
1568  return makeArrayRef(Sub0_15_64);
1569  default:
1570  llvm_unreachable("unhandled register size");
1571  }
1572  }
1573 
1574  assert(EltSize == 16 && "unhandled register spill split size");
1575 
1576  static const int16_t Sub0_15_128[] = {
1577  AMDGPU::sub0_sub1_sub2_sub3,
1578  AMDGPU::sub4_sub5_sub6_sub7,
1579  AMDGPU::sub8_sub9_sub10_sub11,
1580  AMDGPU::sub12_sub13_sub14_sub15
1581  };
1582 
1583  static const int16_t Sub0_7_128[] = {
1584  AMDGPU::sub0_sub1_sub2_sub3,
1585  AMDGPU::sub4_sub5_sub6_sub7
1586  };
1587 
1588  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1589  case 128:
1590  return {};
1591  case 256:
1592  return makeArrayRef(Sub0_7_128);
1593  case 512:
1594  return makeArrayRef(Sub0_15_128);
1595  default:
1596  llvm_unreachable("unhandled register size");
1597  }
1598 }
1599 
1600 const TargetRegisterClass*
1602  unsigned Reg) const {
1604  return MRI.getRegClass(Reg);
1605 
1606  return getPhysRegClass(Reg);
1607 }
1608 
1610  unsigned Reg) const {
1611  const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1612  assert(RC && "Register class for the reg not found");
1613  return hasVGPRs(RC);
1614 }
1615 
1617  const TargetRegisterClass *SrcRC,
1618  unsigned SubReg,
1619  const TargetRegisterClass *DstRC,
1620  unsigned DstSubReg,
1621  const TargetRegisterClass *NewRC,
1622  LiveIntervals &LIS) const {
1623  unsigned SrcSize = getRegSizeInBits(*SrcRC);
1624  unsigned DstSize = getRegSizeInBits(*DstRC);
1625  unsigned NewSize = getRegSizeInBits(*NewRC);
1626 
1627  // Do not increase size of registers beyond dword, we would need to allocate
1628  // adjacent registers and constraint regalloc more than needed.
1629 
1630  // Always allow dword coalescing.
1631  if (SrcSize <= 32 || DstSize <= 32)
1632  return true;
1633 
1634  return NewSize <= DstSize || NewSize <= SrcSize;
1635 }
1636 
1638  MachineFunction &MF) const {
1639 
1640  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1642 
1643  unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
1644  MF.getFunction());
1645  switch (RC->getID()) {
1646  default:
1647  return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
1648  case AMDGPU::VGPR_32RegClassID:
1649  return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
1650  case AMDGPU::SGPR_32RegClassID:
1651  return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
1652  }
1653 }
1654 
1656  unsigned Idx) const {
1657  if (Idx == getVGPRPressureSet())
1658  return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
1659  const_cast<MachineFunction &>(MF));
1660 
1661  if (Idx == getSGPRPressureSet())
1662  return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
1663  const_cast<MachineFunction &>(MF));
1664 
1665  return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
1666 }
1667 
1668 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
1669  static const int Empty[] = { -1 };
1670 
1671  if (hasRegUnit(AMDGPU::M0, RegUnit))
1672  return Empty;
1673  return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
1674 }
1675 
1677  // Not a callee saved register.
1678  return AMDGPU::SGPR30_SGPR31;
1679 }
1680 
1681 const TargetRegisterClass *
1683  const MachineRegisterInfo &MRI) const {
1684  unsigned Size = getRegSizeInBits(MO.getReg(), MRI);
1685  const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
1686  if (!RB)
1687  return nullptr;
1688 
1689  Size = PowerOf2Ceil(Size);
1690  switch (Size) {
1691  case 1:
1692  if (RB->getID() == AMDGPU::SCCRegBankID)
1693  return &AMDGPU::SReg_32_XM0RegClass;
1694  break;
1695  case 32:
1696  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1697  &AMDGPU::SReg_32_XM0RegClass;
1698  case 64:
1699  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
1700  &AMDGPU::SReg_64_XEXECRegClass;
1701  case 96:
1702  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
1703  &AMDGPU::SReg_96RegClass;
1704  case 128:
1705  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
1706  &AMDGPU::SReg_128RegClass;
1707  case 160:
1708  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass :
1709  &AMDGPU::SReg_160RegClass;
1710  case 256:
1711  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_256RegClass :
1712  &AMDGPU::SReg_256RegClass;
1713  case 512:
1714  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass :
1715  &AMDGPU::SReg_512RegClass;
1716  default:
1717  break;
1718  }
1719  llvm_unreachable("not implemented");
1720 }
1721 
1722 unsigned SIRegisterInfo::getVCC() const {
1723  return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
1724 }
1725 
1726 const TargetRegisterClass *
1727 SIRegisterInfo::getRegClass(unsigned RCID) const {
1728  switch ((int)RCID) {
1729  case AMDGPU::SReg_1RegClassID:
1730  return getBoolRC();
1731  case AMDGPU::SReg_1_XEXECRegClassID:
1732  return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
1733  : &AMDGPU::SReg_64_XEXECRegClass;
1734  case -1:
1735  return nullptr;
1736  default:
1737  return AMDGPURegisterInfo::getRegClass(RCID);
1738  }
1739 }
1740 
1741 // Find reaching register definition
1743  MachineInstr &Use,
1745  LiveIntervals *LIS) const {
1746  auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
1747  SlotIndex UseIdx = LIS->getInstructionIndex(Use);
1748  SlotIndex DefIdx;
1749 
1751  if (!LIS->hasInterval(Reg))
1752  return nullptr;
1753  LiveInterval &LI = LIS->getInterval(Reg);
1754  LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
1755  : MRI.getMaxLaneMaskForVReg(Reg);
1756  VNInfo *V = nullptr;
1757  if (LI.hasSubRanges()) {
1758  for (auto &S : LI.subranges()) {
1759  if ((S.LaneMask & SubLanes) == SubLanes) {
1760  V = S.getVNInfoAt(UseIdx);
1761  break;
1762  }
1763  }
1764  } else {
1765  V = LI.getVNInfoAt(UseIdx);
1766  }
1767  if (!V)
1768  return nullptr;
1769  DefIdx = V->def;
1770  } else {
1771  // Find last def.
1772  for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units) {
1773  LiveRange &LR = LIS->getRegUnit(*Units);
1774  if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
1775  if (!DefIdx.isValid() ||
1776  MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
1777  LIS->getInstructionFromIndex(V->def)))
1778  DefIdx = V->def;
1779  } else {
1780  return nullptr;
1781  }
1782  }
1783  }
1784 
1785  MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
1786 
1787  if (!Def || !MDT.dominates(Def, &Use))
1788  return nullptr;
1789 
1790  assert(Def->modifiesRegister(Reg, this));
1791 
1792  return Def;
1793 }
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const override
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:397
Interface definition for SIRegisterInfo.
unsigned getVCC() const
static unsigned getSubRegFromChannel(unsigned Channel)
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed...
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
AMDGPU specific subclass of TargetSubtarget.
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override
bool hasScalarStores() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:60
bool isRegUsed(unsigned Reg, bool includeReserved=true) const
Return if a specific register is currently used.
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
LaneBitmask getMaxLaneMaskForVReg(unsigned Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
SIRegisterInfo(const GCNSubtarget &ST)
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override
static int getOffsetMUBUFLoad(unsigned Opc)
StringRef getRegAsmName(unsigned Reg) const override
bool hasStackObjects() const
Return true if there are any stack objects in this function.
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:675
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static std::pair< unsigned, unsigned > getSpillEltSize(unsigned SuperRegSize, bool Store)
unsigned Reg
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:831
const SIInstrInfo * getInstrInfo() const override
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:151
static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount)
A debug info location.
Definition: DebugLoc.h:33
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
VNInfo - Value Number Information.
Definition: LiveInterval.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:460
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const override
bool canRealignStack(const MachineFunction &MF) const override
static unsigned getNumSubRegsForSpillOp(unsigned Op)
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:156
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:717
A description of a memory reference used in the backend.
MachineInstr * findReachingDef(unsigned Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:760
unsigned SubReg
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
static int getRegClass(RegisterKind Is, unsigned RegWidth)
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:788
virtual bool canRealignStack(const MachineFunction &MF) const
True if the stack can be realigned for the target.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned getID() const
Return the register class ID number.
static bool hasPressureSet(const int *PSets, unsigned PSetID)
bool isSGPRClass(const TargetRegisterClass *RC) const
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:436
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
bool isSGPRPressureSet(unsigned SetID) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
If OnlyToVGPR is true, this will only succeed if this.
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
virtual StringRef getRegAsmName(unsigned Reg) const
Return the assembly name for Reg.
unsigned getKillRegState(bool B)
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const int * getRegUnitPressureSets(unsigned RegUnit) const override
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
#define REG_RANGE(BeginReg, EndReg, RegTable)
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:408
unsigned getDefRegState(bool B)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:609
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
This file declares the machine register scavenger class.
unsigned const MachineRegisterInfo * MRI
bool hasInterval(unsigned Reg) const
unsigned getFrameRegister(const MachineFunction &MF) const override
unsigned reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch wave offset in case spilling is needed...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
unsigned getReturnAddressReg(const MachineFunction &MF) const
static cl::opt< bool > EnableSpillSGPRToSMEM("amdgpu-spill-sgpr-to-smem", cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), cl::init(false))
static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
bool hasVGPRs(const TargetRegisterClass *RC) const
int64_t getMUBUFInstrOffset(const MachineInstr *MI) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getSGPRPressureSet() const
const RegisterBank * getRegBankOrNull(unsigned Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
const MachineBasicBlock & front() const
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPRPressureSet() const
The memory access writes data.
unsigned getWavefrontSize() const
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
static uint64_t add(uint64_t LeftOp, uint64_t RightOp)
Definition: FileCheck.cpp:169
MachineOperand class - Representation of each machine instruction operand.
unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF) const
Returns a register that is not used at any point in the function.
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), cl::ReallyHidden, cl::init(true))
LiveInterval & getInterval(unsigned Reg)
This class implements the register bank concept.
Definition: RegisterBank.h:28
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const
MachinePointerInfo getWithOffset(int64_t O) const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
const MCRegisterClass * MC
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const
Special case of eliminateFrameIndex.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:405
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:255
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool hasInv2PiInlineImm() const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
const MachinePointerInfo & getPointerInfo() const
const TargetRegisterClass * getBoolRC() const
void reserveRegisterTuples(BitVector &, unsigned Reg) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Flags getFlags() const
Return the raw flags of the source value,.
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
uint32_t Size
Definition: Profile.cpp:46
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static int getOffsetMUBUFStore(unsigned Opc)
bool spillSGPRToSMEM() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool isVGPRPressureSet(unsigned SetID) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available and do the appropriate bookkeeping.
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:658
bool hasCalls() const
Return true if the current function has any function calls.