LLVM  16.0.0git
AMDGPUSetWavePriority.cpp
Go to the documentation of this file.
1 //===- AMDGPUSetWavePriority.cpp - Set wave priority ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Pass to temporarily raise the wave priority beginning the start of
11 /// the shader function until its last VMEM instructions to allow younger
12 /// waves to issue their VMEM instructions as well.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPU.h"
17 #include "GCNSubtarget.h"
19 #include "SIInstrInfo.h"
22 #include "llvm/InitializePasses.h"
23 #include "llvm/Support/Allocator.h"
24 
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "amdgpu-set-wave-priority"
28 
30  "amdgpu-set-wave-priority-valu-insts-threshold",
31  cl::desc("VALU instruction count threshold for adjusting wave priority"),
32  cl::init(100), cl::Hidden);
33 
34 namespace {
35 
36 struct MBBInfo {
37  MBBInfo() = default;
38  unsigned NumVALUInstsAtStart = 0;
39  bool MayReachVMEMLoad = false;
40  MachineInstr *LastVMEMLoad = nullptr;
41 };
42 
44 
45 class AMDGPUSetWavePriority : public MachineFunctionPass {
46 public:
47  static char ID;
48 
49  AMDGPUSetWavePriority() : MachineFunctionPass(ID) {}
50 
51  StringRef getPassName() const override { return "Set wave priority"; }
52 
53  bool runOnMachineFunction(MachineFunction &MF) override;
54 
55 private:
56  MachineInstr *BuildSetprioMI(MachineBasicBlock &MBB,
58  unsigned priority) const;
59 
60  const SIInstrInfo *TII;
61 };
62 
63 } // End anonymous namespace.
64 
65 INITIALIZE_PASS(AMDGPUSetWavePriority, DEBUG_TYPE, "Set wave priority", false,
66  false)
67 
68 char AMDGPUSetWavePriority::ID = 0;
69 
71  return new AMDGPUSetWavePriority();
72 }
73 
75 AMDGPUSetWavePriority::BuildSetprioMI(MachineBasicBlock &MBB,
77  unsigned priority) const {
78  return BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_SETPRIO))
79  .addImm(priority);
80 }
81 
82 // Checks that for every predecessor Pred that can reach a VMEM load,
83 // none of Pred's successors can reach a VMEM load.
85  MBBInfoSet &MBBInfos) {
86  for (const MachineBasicBlock *Pred : MBB.predecessors()) {
87  if (!MBBInfos[Pred].MayReachVMEMLoad)
88  continue;
89  for (const MachineBasicBlock *Succ : Pred->successors()) {
90  if (MBBInfos[Succ].MayReachVMEMLoad)
91  return false;
92  }
93  }
94  return true;
95 }
96 
97 static bool isVMEMLoad(const MachineInstr &MI) {
98  return SIInstrInfo::isVMEM(MI) && MI.mayLoad();
99 }
100 
101 bool AMDGPUSetWavePriority::runOnMachineFunction(MachineFunction &MF) {
102  const unsigned HighPriority = 3;
103  const unsigned LowPriority = 0;
104 
105  Function &F = MF.getFunction();
106  if (skipFunction(F) || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
107  return false;
108 
109  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
110  TII = ST.getInstrInfo();
111 
112  unsigned VALUInstsThreshold = DefaultVALUInstsThreshold;
113  Attribute A = F.getFnAttribute("amdgpu-wave-priority-threshold");
114  if (A.isValid())
115  A.getValueAsString().getAsInteger(0, VALUInstsThreshold);
116 
117  // Find VMEM loads that may be executed before long-enough sequences of
118  // VALU instructions. We currently assume that backedges/loops, branch
119  // probabilities and other details can be ignored, so we essentially
120  // determine the largest number of VALU instructions along every
121  // possible path from the start of the function that may potentially be
122  // executed provided no backedge is ever taken.
123  MBBInfoSet MBBInfos;
124  for (MachineBasicBlock *MBB : post_order(&MF)) {
125  bool AtStart = true;
126  unsigned MaxNumVALUInstsInMiddle = 0;
127  unsigned NumVALUInstsAtEnd = 0;
128  for (MachineInstr &MI : *MBB) {
129  if (isVMEMLoad(MI)) {
130  AtStart = false;
131  MBBInfo &Info = MBBInfos[MBB];
132  Info.NumVALUInstsAtStart = 0;
133  MaxNumVALUInstsInMiddle = 0;
134  NumVALUInstsAtEnd = 0;
135  Info.LastVMEMLoad = &MI;
136  } else if (SIInstrInfo::isDS(MI)) {
137  AtStart = false;
138  MaxNumVALUInstsInMiddle =
139  std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd);
140  NumVALUInstsAtEnd = 0;
141  } else if (SIInstrInfo::isVALU(MI)) {
142  if (AtStart)
143  ++MBBInfos[MBB].NumVALUInstsAtStart;
144  ++NumVALUInstsAtEnd;
145  }
146  }
147 
148  bool SuccsMayReachVMEMLoad = false;
149  unsigned NumFollowingVALUInsts = 0;
150  for (const MachineBasicBlock *Succ : MBB->successors()) {
151  SuccsMayReachVMEMLoad |= MBBInfos[Succ].MayReachVMEMLoad;
152  NumFollowingVALUInsts =
153  std::max(NumFollowingVALUInsts, MBBInfos[Succ].NumVALUInstsAtStart);
154  }
155  MBBInfo &Info = MBBInfos[MBB];
156  if (AtStart)
157  Info.NumVALUInstsAtStart += NumFollowingVALUInsts;
158  NumVALUInstsAtEnd += NumFollowingVALUInsts;
159 
160  unsigned MaxNumVALUInsts =
161  std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd);
162  Info.MayReachVMEMLoad =
163  SuccsMayReachVMEMLoad ||
164  (Info.LastVMEMLoad && MaxNumVALUInsts >= VALUInstsThreshold);
165  }
166 
167  MachineBasicBlock &Entry = MF.front();
168  if (!MBBInfos[&Entry].MayReachVMEMLoad)
169  return false;
170 
171  // Raise the priority at the beginning of the shader.
172  MachineBasicBlock::iterator I = Entry.begin(), E = Entry.end();
173  while (I != E && !SIInstrInfo::isVALU(*I) && !I->isTerminator())
174  ++I;
175  BuildSetprioMI(Entry, I, HighPriority);
176 
177  // Lower the priority on edges where control leaves blocks from which
178  // the VMEM loads are reachable.
179  SmallSet<MachineBasicBlock *, 16> PriorityLoweringBlocks;
180  for (MachineBasicBlock &MBB : MF) {
181  if (MBBInfos[&MBB].MayReachVMEMLoad) {
182  if (MBB.succ_empty())
183  PriorityLoweringBlocks.insert(&MBB);
184  continue;
185  }
186 
188  for (MachineBasicBlock *Pred : MBB.predecessors()) {
189  if (MBBInfos[Pred].MayReachVMEMLoad)
190  PriorityLoweringBlocks.insert(Pred);
191  }
192  continue;
193  }
194 
195  // Where lowering the priority in predecessors is not possible, the
196  // block receiving control either was not part of a loop in the first
197  // place or the loop simplification/canonicalization pass should have
198  // already tried to split the edge and insert a preheader, and if for
199  // whatever reason it failed to do so, then this leaves us with the
200  // only option of lowering the priority within the loop.
201  PriorityLoweringBlocks.insert(&MBB);
202  }
203 
204  for (MachineBasicBlock *MBB : PriorityLoweringBlocks) {
205  BuildSetprioMI(
206  *MBB,
207  MBBInfos[MBB].LastVMEMLoad
208  ? std::next(MachineBasicBlock::iterator(MBBInfos[MBB].LastVMEMLoad))
209  : MBB->begin(),
210  LowPriority);
211  }
212 
213  return true;
214 }
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:109
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::createAMDGPUSetWavePriorityPass
FunctionPass * createAMDGPUSetWavePriorityPass()
llvm::Function
Definition: Function.h:60
llvm::Attribute
Definition: Attributes.h:67
isVMEMLoad
static bool isVMEMLoad(const MachineInstr &MI)
Definition: AMDGPUSetWavePriority.cpp:97
Allocator.h
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
CanLowerPriorityDirectlyInPredecessors
static bool CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock &MBB, MBBInfoSet &MBBInfos)
Definition: AMDGPUSetWavePriority.cpp:84
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition: MachineFunction.h:866
GCNSubtarget.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1854
DefaultVALUInstsThreshold
static cl::opt< unsigned > DefaultVALUInstsThreshold("amdgpu-set-wave-priority-valu-insts-threshold", cl::desc("VALU instruction count threshold for adjusting wave priority"), cl::init(100), cl::Hidden)
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:657
llvm::cl::opt
Definition: CommandLine.h:1411
AMDGPUMCTargetDesc.h
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::DenseMap
Definition: DenseMap.h:714
llvm::SIInstrInfo::isDS
static bool isDS(const MachineInstr &MI)
Definition: SIInstrInfo.h:491
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:446
MachineFunctionPass.h
llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition: MachineBasicBlock.h:386
SIInstrInfo.h
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::MachineBasicBlock::succ_empty
bool succ_empty() const
Definition: MachineBasicBlock.h:384
llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition: MachineBasicBlock.h:392
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
AMDGPU.h
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:623
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUSetWavePriority.cpp:27
llvm::SIInstrInfo::isVMEM
static bool isVMEM(const MachineInstr &MI)
Definition: SIInstrInfo.h:369
llvm::post_order
iterator_range< po_iterator< T > > post_order(const T &G)
Definition: PostOrderIterator.h:189
llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:178
llvm::SIInstrInfo
Definition: SIInstrInfo.h:44
PostOrderIterator.h
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:357
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:305
llvm::SIInstrInfo::isVALU
static bool isVALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:361
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::cl::desc
Definition: CommandLine.h:412
llvm::MachineInstrBundleIterator< MachineInstr >
InitializePasses.h