LLVM  16.0.0git
GCNNSAReassign.cpp
Go to the documentation of this file.
1 //===-- GCNNSAReassign.cpp - Reassign registers in NSA instructions -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief Try to reassign registers on GFX10+ from non-sequential to sequential
11 /// in NSA image instructions. Later SIShrinkInstructions pass will replace NSA
12 /// with sequential versions where possible.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPU.h"
17 #include "GCNSubtarget.h"
18 #include "SIMachineFunctionInfo.h"
19 #include "SIRegisterInfo.h"
20 #include "llvm/ADT/Statistic.h"
25 #include "llvm/InitializePasses.h"
26 
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "amdgpu-nsa-reassign"
30 
31 STATISTIC(NumNSAInstructions,
32  "Number of NSA instructions with non-sequential address found");
33 STATISTIC(NumNSAConverted,
34  "Number of NSA instructions changed to sequential");
35 
36 namespace {
37 
38 class GCNNSAReassign : public MachineFunctionPass {
39 public:
40  static char ID;
41 
42  GCNNSAReassign() : MachineFunctionPass(ID) {
44  }
45 
46  bool runOnMachineFunction(MachineFunction &MF) override;
47 
48  StringRef getPassName() const override { return "GCN NSA Reassign"; }
49 
50  void getAnalysisUsage(AnalysisUsage &AU) const override {
52  AU.addRequired<VirtRegMap>();
54  AU.setPreservesAll();
56  }
57 
58 private:
59  typedef enum {
60  NOT_NSA, // Not an NSA instruction
61  FIXED, // NSA which we cannot modify
62  NON_CONTIGUOUS, // NSA with non-sequential address which we can try
63  // to optimize.
64  CONTIGUOUS // NSA with all sequential address registers
65  } NSA_Status;
66 
67  const GCNSubtarget *ST;
68 
69  const MachineRegisterInfo *MRI;
70 
71  const SIRegisterInfo *TRI;
72 
73  VirtRegMap *VRM;
74 
75  LiveRegMatrix *LRM;
76 
77  LiveIntervals *LIS;
78 
79  unsigned MaxNumVGPRs;
80 
81  const MCPhysReg *CSRegs;
82 
83  NSA_Status CheckNSA(const MachineInstr &MI, bool Fast = false) const;
84 
85  bool tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
86  unsigned StartReg) const;
87 
88  bool canAssign(unsigned StartReg, unsigned NumRegs) const;
89 
90  bool scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const;
91 };
92 
93 } // End anonymous namespace.
94 
95 INITIALIZE_PASS_BEGIN(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign",
96  false, false)
100 INITIALIZE_PASS_END(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign",
102 
103 
104 char GCNNSAReassign::ID = 0;
105 
106 char &llvm::GCNNSAReassignID = GCNNSAReassign::ID;
107 
108 bool
109 GCNNSAReassign::tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
110  unsigned StartReg) const {
111  unsigned NumRegs = Intervals.size();
112 
113  for (unsigned N = 0; N < NumRegs; ++N)
114  if (VRM->hasPhys(Intervals[N]->reg()))
115  LRM->unassign(*Intervals[N]);
116 
117  for (unsigned N = 0; N < NumRegs; ++N)
118  if (LRM->checkInterference(*Intervals[N], MCRegister::from(StartReg + N)))
119  return false;
120 
121  for (unsigned N = 0; N < NumRegs; ++N)
122  LRM->assign(*Intervals[N], MCRegister::from(StartReg + N));
123 
124  return true;
125 }
126 
127 bool GCNNSAReassign::canAssign(unsigned StartReg, unsigned NumRegs) const {
128  for (unsigned N = 0; N < NumRegs; ++N) {
129  unsigned Reg = StartReg + N;
130  if (!MRI->isAllocatable(Reg))
131  return false;
132 
133  for (unsigned I = 0; CSRegs[I]; ++I)
134  if (TRI->isSubRegisterEq(Reg, CSRegs[I]) &&
135  !LRM->isPhysRegUsed(CSRegs[I]))
136  return false;
137  }
138 
139  return true;
140 }
141 
142 bool
143 GCNNSAReassign::scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const {
144  unsigned NumRegs = Intervals.size();
145 
146  if (NumRegs > MaxNumVGPRs)
147  return false;
148  unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0;
149 
150  for (unsigned Reg = AMDGPU::VGPR0; Reg <= MaxReg; ++Reg) {
151  if (!canAssign(Reg, NumRegs))
152  continue;
153 
154  if (tryAssignRegisters(Intervals, Reg))
155  return true;
156  }
157 
158  return false;
159 }
160 
161 GCNNSAReassign::NSA_Status
162 GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const {
163  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
164  if (!Info)
165  return NSA_Status::NOT_NSA;
166 
167  switch (Info->MIMGEncoding) {
168  case AMDGPU::MIMGEncGfx10NSA:
169  case AMDGPU::MIMGEncGfx11NSA:
170  break;
171  default:
172  return NSA_Status::NOT_NSA;
173  }
174 
175  int VAddr0Idx =
176  AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
177 
178  unsigned VgprBase = 0;
179  bool NSA = false;
180  for (unsigned I = 0; I < Info->VAddrOperands; ++I) {
181  const MachineOperand &Op = MI.getOperand(VAddr0Idx + I);
182  Register Reg = Op.getReg();
183  if (Reg.isPhysical() || !VRM->isAssignedReg(Reg))
184  return NSA_Status::FIXED;
185 
186  Register PhysReg = VRM->getPhys(Reg);
187 
188  if (!Fast) {
189  if (!PhysReg)
190  return NSA_Status::FIXED;
191 
192  // TODO: address the below limitation to handle GFX11 BVH instructions
193  // Bail if address is not a VGPR32. That should be possible to extend the
194  // optimization to work with subregs of a wider register tuples, but the
195  // logic to find free registers will be much more complicated with much
196  // less chances for success. That seems reasonable to assume that in most
197  // cases a tuple is used because a vector variable contains different
198  // parts of an address and it is either already consecutive or cannot
199  // be reassigned if not. If needed it is better to rely on register
200  // coalescer to process such address tuples.
201  if (TRI->getRegSizeInBits(*MRI->getRegClass(Reg)) != 32 || Op.getSubReg())
202  return NSA_Status::FIXED;
203 
204  // InlineSpiller does not call LRM::assign() after an LI split leaving
205  // it in an inconsistent state, so we cannot call LRM::unassign().
206  // See llvm bug #48911.
207  // Skip reassign if a register has originated from such split.
208  // FIXME: Remove the workaround when bug #48911 is fixed.
209  if (VRM->getPreSplitReg(Reg))
210  return NSA_Status::FIXED;
211 
213 
214  if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg)
215  return NSA_Status::FIXED;
216 
217  for (auto U : MRI->use_nodbg_operands(Reg)) {
218  if (U.isImplicit())
219  return NSA_Status::FIXED;
220  const MachineInstr *UseInst = U.getParent();
221  if (UseInst->isCopy() && UseInst->getOperand(0).getReg() == PhysReg)
222  return NSA_Status::FIXED;
223  }
224 
225  if (!LIS->hasInterval(Reg))
226  return NSA_Status::FIXED;
227  }
228 
229  if (I == 0)
230  VgprBase = PhysReg;
231  else if (VgprBase + I != PhysReg)
232  NSA = true;
233  }
234 
235  return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS;
236 }
237 
238 bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
239  ST = &MF.getSubtarget<GCNSubtarget>();
240  if (ST->getGeneration() < GCNSubtarget::GFX10)
241  return false;
242 
243  MRI = &MF.getRegInfo();
244  TRI = ST->getRegisterInfo();
245  VRM = &getAnalysis<VirtRegMap>();
246  LRM = &getAnalysis<LiveRegMatrix>();
247  LIS = &getAnalysis<LiveIntervals>();
248 
250  MaxNumVGPRs = ST->getMaxNumVGPRs(MF);
251  MaxNumVGPRs = std::min(ST->getMaxNumVGPRs(MFI->getOccupancy()), MaxNumVGPRs);
252  CSRegs = MRI->getCalleeSavedRegs();
253 
254  using Candidate = std::pair<const MachineInstr*, bool>;
255  SmallVector<Candidate, 32> Candidates;
256  for (const MachineBasicBlock &MBB : MF) {
257  for (const MachineInstr &MI : MBB) {
258  switch (CheckNSA(MI)) {
259  default:
260  continue;
261  case NSA_Status::CONTIGUOUS:
262  Candidates.push_back(std::make_pair(&MI, true));
263  break;
264  case NSA_Status::NON_CONTIGUOUS:
265  Candidates.push_back(std::make_pair(&MI, false));
266  ++NumNSAInstructions;
267  break;
268  }
269  }
270  }
271 
272  bool Changed = false;
273  for (auto &C : Candidates) {
274  if (C.second)
275  continue;
276 
277  const MachineInstr *MI = C.first;
278  if (CheckNSA(*MI, true) == NSA_Status::CONTIGUOUS) {
279  // Already happen to be fixed.
280  C.second = true;
281  ++NumNSAConverted;
282  continue;
283  }
284 
285  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI->getOpcode());
286  int VAddr0Idx =
287  AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr0);
288 
291  SlotIndex MinInd, MaxInd;
292  for (unsigned I = 0; I < Info->VAddrOperands; ++I) {
293  const MachineOperand &Op = MI->getOperand(VAddr0Idx + I);
294  Register Reg = Op.getReg();
295  LiveInterval *LI = &LIS->getInterval(Reg);
296  if (llvm::is_contained(Intervals, LI)) {
297  // Same register used, unable to make sequential
298  Intervals.clear();
299  break;
300  }
301  Intervals.push_back(LI);
302  OrigRegs.push_back(VRM->getPhys(Reg));
303  if (LI->empty()) {
304  // The address input is undef, so it doesn't contribute to the relevant
305  // range. Seed a reasonable index range if required.
306  if (I == 0)
307  MinInd = MaxInd = LIS->getInstructionIndex(*MI);
308  continue;
309  }
310  MinInd = I != 0 ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();
311  MaxInd = I != 0 ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();
312  }
313 
314  if (Intervals.empty())
315  continue;
316 
317  LLVM_DEBUG(dbgs() << "Attempting to reassign NSA: " << *MI
318  << "\tOriginal allocation:\t";
319  for (auto *LI
320  : Intervals) dbgs()
321  << " " << llvm::printReg((VRM->getPhys(LI->reg())), TRI);
322  dbgs() << '\n');
323 
324  bool Success = scavengeRegs(Intervals);
325  if (!Success) {
326  LLVM_DEBUG(dbgs() << "\tCannot reallocate.\n");
327  if (VRM->hasPhys(Intervals.back()->reg())) // Did not change allocation.
328  continue;
329  } else {
330  // Check we did not make it worse for other instructions.
331  auto I = std::lower_bound(Candidates.begin(), &C, MinInd,
332  [this](const Candidate &C, SlotIndex I) {
333  return LIS->getInstructionIndex(*C.first) < I;
334  });
335  for (auto E = Candidates.end(); Success && I != E &&
336  LIS->getInstructionIndex(*I->first) < MaxInd; ++I) {
337  if (I->second && CheckNSA(*I->first, true) < NSA_Status::CONTIGUOUS) {
338  Success = false;
339  LLVM_DEBUG(dbgs() << "\tNSA conversion conflict with " << *I->first);
340  }
341  }
342  }
343 
344  if (!Success) {
345  for (unsigned I = 0; I < Info->VAddrOperands; ++I)
346  if (VRM->hasPhys(Intervals[I]->reg()))
347  LRM->unassign(*Intervals[I]);
348 
349  for (unsigned I = 0; I < Info->VAddrOperands; ++I)
350  LRM->assign(*Intervals[I], OrigRegs[I]);
351 
352  continue;
353  }
354 
355  C.second = true;
356  ++NumNSAConverted;
357  LLVM_DEBUG(
358  dbgs() << "\tNew allocation:\t\t ["
359  << llvm::printReg((VRM->getPhys(Intervals.front()->reg())), TRI)
360  << " : "
361  << llvm::printReg((VRM->getPhys(Intervals.back()->reg())), TRI)
362  << "]\n");
363  Changed = true;
364  }
365 
366  return Changed;
367 }
LiveRegMatrix.h
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:109
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::LiveRange::empty
bool empty() const
Definition: LiveInterval.h:382
SIMachineFunctionInfo.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::MCRegister::from
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
Definition: MCRegister.h:67
llvm::lower_bound
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1922
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign", false, false) INITIALIZE_PASS_END(GCNNSAReassign
Statistic.h
llvm::VirtRegMap
Definition: VirtRegMap.h:33
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:407
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::MachineInstr::isCopy
bool isCopy() const
Definition: MachineInstr.h:1336
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::AMDGPU::getNamedOperandIdx
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::LiveRange::beginIndex
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:385
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:670
llvm::MachineRegisterInfo::use_nodbg_operands
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
Definition: MachineRegisterInfo.h:534
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
GCNSubtarget.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:758
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
DEBUG_TYPE
#define DEBUG_TYPE
Definition: GCNNSAReassign.cpp:29
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:141
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::MCRegisterInfo::isSubRegisterEq
bool isSubRegisterEq(MCRegister RegA, MCRegister RegB) const
Returns true if RegB is a sub-register of RegA or if RegB == RegA.
Definition: MCRegisterInfo.h:568
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::LiveInterval
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:686
llvm::SlotIndex
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:82
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:647
llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition: AMDGPUSubtarget.h:41
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:660
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
LiveIntervals.h
VirtRegMap.h
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::MachineRegisterInfo::getCalleeSavedRegs
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
Definition: MachineRegisterInfo.cpp:623
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1868
MachineFunctionPass.h
llvm::MachineRegisterInfo::isAllocatable
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
Definition: MachineRegisterInfo.h:948
llvm::AMDGPU::getMIMGInfo
const LLVM_READONLY MIMGInfo * getMIMGInfo(unsigned Opc)
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::initializeGCNNSAReassignPass
void initializeGCNNSAReassignPass(PassRegistry &)
llvm::SIMachineFunctionInfo::getOccupancy
unsigned getOccupancy() const
Definition: SIMachineFunctionInfo.h:928
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
AMDGPU.h
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::LiveRange::endIndex
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:392
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
Reassign
GCN NSA Reassign
Definition: GCNNSAReassign.cpp:100
llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:130
llvm::TargetRegisterInfo::getRegSizeInBits
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Definition: TargetRegisterInfo.h:279
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:351
Success
#define Success
Definition: AArch64Disassembler.cpp:295
llvm::LiveIntervals
Definition: LiveIntervals.h:53
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:614
llvm::GCNNSAReassignID
char & GCNNSAReassignID
Definition: GCNNSAReassign.cpp:106
N
#define N
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:326
llvm::LiveInterval::reg
Register reg() const
Definition: LiveInterval.h:717
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:111
InitializePasses.h
SIRegisterInfo.h
llvm::LiveRegMatrix
Definition: LiveRegMatrix.h:40