LLVM  14.0.0git
GCNNSAReassign.cpp
Go to the documentation of this file.
1 //===-- GCNNSAReassign.cpp - Reassign registers in NSA unstructions -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief Try to reassign registers on GFX10+ from non-sequential to sequential
11 /// in NSA image instructions. Later SIShrinkInstructions pass will relace NSA
12 /// with sequential versions where possible.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPU.h"
17 #include "GCNSubtarget.h"
18 #include "SIMachineFunctionInfo.h"
19 #include "llvm/ADT/Statistic.h"
23 #include "llvm/InitializePasses.h"
24 
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "amdgpu-nsa-reassign"
28 
29 STATISTIC(NumNSAInstructions,
30  "Number of NSA instructions with non-sequential address found");
31 STATISTIC(NumNSAConverted,
32  "Number of NSA instructions changed to sequential");
33 
34 namespace {
35 
36 class GCNNSAReassign : public MachineFunctionPass {
37 public:
38  static char ID;
39 
40  GCNNSAReassign() : MachineFunctionPass(ID) {
42  }
43 
44  bool runOnMachineFunction(MachineFunction &MF) override;
45 
46  StringRef getPassName() const override { return "GCN NSA Reassign"; }
47 
48  void getAnalysisUsage(AnalysisUsage &AU) const override {
50  AU.addRequired<VirtRegMap>();
52  AU.setPreservesAll();
54  }
55 
56 private:
57  typedef enum {
58  NOT_NSA, // Not an NSA instruction
59  FIXED, // NSA which we cannot modify
60  NON_CONTIGUOUS, // NSA with non-sequential address which we can try
61  // to optimize.
62  CONTIGUOUS // NSA with all sequential address registers
63  } NSA_Status;
64 
65  const GCNSubtarget *ST;
66 
67  const MachineRegisterInfo *MRI;
68 
69  const SIRegisterInfo *TRI;
70 
71  VirtRegMap *VRM;
72 
73  LiveRegMatrix *LRM;
74 
75  LiveIntervals *LIS;
76 
77  unsigned MaxNumVGPRs;
78 
79  const MCPhysReg *CSRegs;
80 
81  NSA_Status CheckNSA(const MachineInstr &MI, bool Fast = false) const;
82 
83  bool tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
84  unsigned StartReg) const;
85 
86  bool canAssign(unsigned StartReg, unsigned NumRegs) const;
87 
88  bool scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const;
89 };
90 
91 } // End anonymous namespace.
92 
93 INITIALIZE_PASS_BEGIN(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign",
94  false, false)
98 INITIALIZE_PASS_END(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign",
100 
101 
102 char GCNNSAReassign::ID = 0;
103 
104 char &llvm::GCNNSAReassignID = GCNNSAReassign::ID;
105 
106 bool
107 GCNNSAReassign::tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
108  unsigned StartReg) const {
109  unsigned NumRegs = Intervals.size();
110 
111  for (unsigned N = 0; N < NumRegs; ++N)
112  if (VRM->hasPhys(Intervals[N]->reg()))
113  LRM->unassign(*Intervals[N]);
114 
115  for (unsigned N = 0; N < NumRegs; ++N)
116  if (LRM->checkInterference(*Intervals[N], MCRegister::from(StartReg + N)))
117  return false;
118 
119  for (unsigned N = 0; N < NumRegs; ++N)
120  LRM->assign(*Intervals[N], MCRegister::from(StartReg + N));
121 
122  return true;
123 }
124 
125 bool GCNNSAReassign::canAssign(unsigned StartReg, unsigned NumRegs) const {
126  for (unsigned N = 0; N < NumRegs; ++N) {
127  unsigned Reg = StartReg + N;
128  if (!MRI->isAllocatable(Reg))
129  return false;
130 
131  for (unsigned I = 0; CSRegs[I]; ++I)
132  if (TRI->isSubRegisterEq(Reg, CSRegs[I]) &&
133  !LRM->isPhysRegUsed(CSRegs[I]))
134  return false;
135  }
136 
137  return true;
138 }
139 
140 bool
141 GCNNSAReassign::scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const {
142  unsigned NumRegs = Intervals.size();
143 
144  if (NumRegs > MaxNumVGPRs)
145  return false;
146  unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0;
147 
148  for (unsigned Reg = AMDGPU::VGPR0; Reg <= MaxReg; ++Reg) {
149  if (!canAssign(Reg, NumRegs))
150  continue;
151 
152  if (tryAssignRegisters(Intervals, Reg))
153  return true;
154  }
155 
156  return false;
157 }
158 
159 GCNNSAReassign::NSA_Status
160 GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const {
161  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
162  if (!Info || Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
163  return NSA_Status::NOT_NSA;
164 
165  int VAddr0Idx =
166  AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
167 
168  unsigned VgprBase = 0;
169  bool NSA = false;
170  for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
171  const MachineOperand &Op = MI.getOperand(VAddr0Idx + I);
172  Register Reg = Op.getReg();
173  if (Reg.isPhysical() || !VRM->isAssignedReg(Reg))
174  return NSA_Status::FIXED;
175 
176  Register PhysReg = VRM->getPhys(Reg);
177 
178  if (!Fast) {
179  if (!PhysReg)
180  return NSA_Status::FIXED;
181 
182  // Bail if address is not a VGPR32. That should be possible to extend the
183  // optimization to work with subregs of a wider register tuples, but the
184  // logic to find free registers will be much more complicated with much
185  // less chances for success. That seems reasonable to assume that in most
186  // cases a tuple is used because a vector variable contains different
187  // parts of an address and it is either already consequitive or cannot
188  // be reassigned if not. If needed it is better to rely on register
189  // coalescer to process such address tuples.
190  if (MRI->getRegClass(Reg) != &AMDGPU::VGPR_32RegClass || Op.getSubReg())
191  return NSA_Status::FIXED;
192 
193  // InlineSpiller does not call LRM::assign() after an LI split leaving
194  // it in an inconsistent state, so we cannot call LRM::unassign().
195  // See llvm bug #48911.
196  // Skip reassign if a register has originated from such split.
197  // FIXME: Remove the workaround when bug #48911 is fixed.
198  if (VRM->getPreSplitReg(Reg))
199  return NSA_Status::FIXED;
200 
202 
203  if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg)
204  return NSA_Status::FIXED;
205 
206  for (auto U : MRI->use_nodbg_operands(Reg)) {
207  if (U.isImplicit())
208  return NSA_Status::FIXED;
209  const MachineInstr *UseInst = U.getParent();
210  if (UseInst->isCopy() && UseInst->getOperand(0).getReg() == PhysReg)
211  return NSA_Status::FIXED;
212  }
213 
214  if (!LIS->hasInterval(Reg))
215  return NSA_Status::FIXED;
216  }
217 
218  if (I == 0)
219  VgprBase = PhysReg;
220  else if (VgprBase + I != PhysReg)
221  NSA = true;
222  }
223 
224  return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS;
225 }
226 
227 bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
228  ST = &MF.getSubtarget<GCNSubtarget>();
229  if (ST->getGeneration() < GCNSubtarget::GFX10)
230  return false;
231 
232  MRI = &MF.getRegInfo();
233  TRI = ST->getRegisterInfo();
234  VRM = &getAnalysis<VirtRegMap>();
235  LRM = &getAnalysis<LiveRegMatrix>();
236  LIS = &getAnalysis<LiveIntervals>();
237 
239  MaxNumVGPRs = ST->getMaxNumVGPRs(MF);
240  MaxNumVGPRs = std::min(ST->getMaxNumVGPRs(MFI->getOccupancy()), MaxNumVGPRs);
241  CSRegs = MRI->getCalleeSavedRegs();
242 
243  using Candidate = std::pair<const MachineInstr*, bool>;
244  SmallVector<Candidate, 32> Candidates;
245  for (const MachineBasicBlock &MBB : MF) {
246  for (const MachineInstr &MI : MBB) {
247  switch (CheckNSA(MI)) {
248  default:
249  continue;
250  case NSA_Status::CONTIGUOUS:
251  Candidates.push_back(std::make_pair(&MI, true));
252  break;
253  case NSA_Status::NON_CONTIGUOUS:
254  Candidates.push_back(std::make_pair(&MI, false));
255  ++NumNSAInstructions;
256  break;
257  }
258  }
259  }
260 
261  bool Changed = false;
262  for (auto &C : Candidates) {
263  if (C.second)
264  continue;
265 
266  const MachineInstr *MI = C.first;
267  if (CheckNSA(*MI, true) == NSA_Status::CONTIGUOUS) {
268  // Already happen to be fixed.
269  C.second = true;
270  ++NumNSAConverted;
271  continue;
272  }
273 
274  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI->getOpcode());
275  int VAddr0Idx =
276  AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr0);
277 
280  SlotIndex MinInd, MaxInd;
281  for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
282  const MachineOperand &Op = MI->getOperand(VAddr0Idx + I);
283  Register Reg = Op.getReg();
284  LiveInterval *LI = &LIS->getInterval(Reg);
285  if (llvm::is_contained(Intervals, LI)) {
286  // Same register used, unable to make sequential
287  Intervals.clear();
288  break;
289  }
290  Intervals.push_back(LI);
291  OrigRegs.push_back(VRM->getPhys(Reg));
292  if (LI->empty()) {
293  // The address input is undef, so it doesn't contribute to the relevant
294  // range. Seed a reasonable index range if required.
295  if (I == 0)
296  MinInd = MaxInd = LIS->getInstructionIndex(*MI);
297  continue;
298  }
299  MinInd = I != 0 ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();
300  MaxInd = I != 0 ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();
301  }
302 
303  if (Intervals.empty())
304  continue;
305 
306  LLVM_DEBUG(dbgs() << "Attempting to reassign NSA: " << *MI
307  << "\tOriginal allocation:\t";
308  for (auto *LI
309  : Intervals) dbgs()
310  << " " << llvm::printReg((VRM->getPhys(LI->reg())), TRI);
311  dbgs() << '\n');
312 
313  bool Success = scavengeRegs(Intervals);
314  if (!Success) {
315  LLVM_DEBUG(dbgs() << "\tCannot reallocate.\n");
316  if (VRM->hasPhys(Intervals.back()->reg())) // Did not change allocation.
317  continue;
318  } else {
319  // Check we did not make it worse for other instructions.
320  auto I = std::lower_bound(Candidates.begin(), &C, MinInd,
321  [this](const Candidate &C, SlotIndex I) {
322  return LIS->getInstructionIndex(*C.first) < I;
323  });
324  for (auto E = Candidates.end(); Success && I != E &&
325  LIS->getInstructionIndex(*I->first) < MaxInd; ++I) {
326  if (I->second && CheckNSA(*I->first, true) < NSA_Status::CONTIGUOUS) {
327  Success = false;
328  LLVM_DEBUG(dbgs() << "\tNSA conversion conflict with " << *I->first);
329  }
330  }
331  }
332 
333  if (!Success) {
334  for (unsigned I = 0; I < Info->VAddrDwords; ++I)
335  if (VRM->hasPhys(Intervals[I]->reg()))
336  LRM->unassign(*Intervals[I]);
337 
338  for (unsigned I = 0; I < Info->VAddrDwords; ++I)
339  LRM->assign(*Intervals[I], OrigRegs[I]);
340 
341  continue;
342  }
343 
344  C.second = true;
345  ++NumNSAConverted;
346  LLVM_DEBUG(
347  dbgs() << "\tNew allocation:\t\t ["
348  << llvm::printReg((VRM->getPhys(Intervals.front()->reg())), TRI)
349  << " : "
350  << llvm::printReg((VRM->getPhys(Intervals.back()->reg())), TRI)
351  << "]\n");
352  Changed = true;
353  }
354 
355  return Changed;
356 }
LiveRegMatrix.h
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
Reg
unsigned Reg
Definition: MachineSink.cpp:1558
llvm::LiveRange::empty
bool empty() const
Definition: LiveInterval.h:374
SIMachineFunctionInfo.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::MCRegister::from
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
Definition: MCRegister.h:67
llvm::lower_bound
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1710
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign", false, false) INITIALIZE_PASS_END(GCNNSAReassign
Statistic.h
llvm::VirtRegMap
Definition: VirtRegMap.h:33
llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:409
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::MachineInstr::isCopy
bool isCopy() const
Definition: MachineInstr.h:1285
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::AMDGPU::getNamedOperandIdx
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1559
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::LiveRange::beginIndex
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:377
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:651
llvm::MachineRegisterInfo::use_nodbg_operands
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
Definition: MachineRegisterInfo.h:526
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
GCNSubtarget.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:739
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:499
DEBUG_TYPE
#define DEBUG_TYPE
Definition: GCNNSAReassign.cpp:27
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:142
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::MCRegisterInfo::isSubRegisterEq
bool isSubRegisterEq(MCRegister RegA, MCRegister RegB) const
Returns true if RegB is a sub-register of RegA or if RegB == RegA.
Definition: MCRegisterInfo.h:568
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::LiveInterval
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:680
llvm::SlotIndex
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition: AMDGPUSubtarget.h:41
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:641
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
LiveIntervals.h
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::MachineRegisterInfo::getCalleeSavedRegs
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
Definition: MachineRegisterInfo.cpp:619
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1665
MachineFunctionPass.h
llvm::MachineRegisterInfo::isAllocatable
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
Definition: MachineRegisterInfo.h:946
llvm::AMDGPU::getMIMGInfo
const LLVM_READONLY MIMGInfo * getMIMGInfo(unsigned Opc)
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::initializeGCNNSAReassignPass
void initializeGCNNSAReassignPass(PassRegistry &)
llvm::SIMachineFunctionInfo::getOccupancy
unsigned getOccupancy() const
Definition: SIMachineFunctionInfo.h:929
llvm::MachineFunction
Definition: MachineFunction.h:241
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
AMDGPU.h
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::LiveRange::endIndex
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:384
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
Reassign
GCN NSA Reassign
Definition: GCNNSAReassign.cpp:98
llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:130
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:325
Success
#define Success
Definition: AArch64Disassembler.cpp:260
llvm::AMDGPU::MIMGInfo
Definition: AMDGPUBaseInfo.h:359
llvm::LiveIntervals
Definition: LiveIntervals.h:54
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::GCNNSAReassignID
char & GCNNSAReassignID
Definition: GCNNSAReassign.cpp:104
N
#define N
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:335
llvm::LiveInterval::reg
Register reg() const
Definition: LiveInterval.h:711
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:110
InitializePasses.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::LiveRegMatrix
Definition: LiveRegMatrix.h:40