LLVM  16.0.0git
AMDGPUUnifyDivergentExitNodes.cpp
Go to the documentation of this file.
1 //===- AMDGPUUnifyDivergentExitNodes.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This is a variant of the UnifyFunctionExitNodes pass. Rather than ensuring
10 // there is at most one ret and one unreachable instruction, it ensures there is
11 // at most one divergent exiting block.
12 //
13 // StructurizeCFG can't deal with multi-exit regions formed by branches to
14 // multiple return nodes. It is not desirable to structurize regions with
15 // uniform branches, so unifying those to the same return block as divergent
16 // branches inhibits use of scalar branching. It still can't deal with the case
17 // where one branch goes to return, and one unreachable. Replace unreachable in
18 // this case with a return.
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #include "AMDGPU.h"
23 #include "SIDefines.h"
24 #include "llvm/ADT/ArrayRef.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/StringRef.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/CFG.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/IRBuilder.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/IntrinsicsAMDGPU.h"
42 #include "llvm/IR/Type.h"
43 #include "llvm/InitializePasses.h"
44 #include "llvm/Pass.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Transforms/Scalar.h"
47 #include "llvm/Transforms/Utils.h"
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "amdgpu-unify-divergent-exit-nodes"
53 
54 namespace {
55 
56 class AMDGPUUnifyDivergentExitNodes : public FunctionPass {
57 private:
58  const TargetTransformInfo *TTI = nullptr;
59 
60 public:
61  static char ID; // Pass identification, replacement for typeid
62 
63  AMDGPUUnifyDivergentExitNodes() : FunctionPass(ID) {
65  }
66 
67  // We can preserve non-critical-edgeness when we unify function exit nodes
68  void getAnalysisUsage(AnalysisUsage &AU) const override;
69  BasicBlock *unifyReturnBlockSet(Function &F, DomTreeUpdater &DTU,
70  ArrayRef<BasicBlock *> ReturningBlocks,
71  StringRef Name);
72  bool runOnFunction(Function &F) override;
73 };
74 
75 } // end anonymous namespace
76 
78 
80 
81 INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
82  "Unify divergent function exit nodes", false, false)
86 INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
87  "Unify divergent function exit nodes", false, false)
88 
89 void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
91  AU.addRequired<DominatorTreeWrapperPass>();
92 
93  AU.addRequired<PostDominatorTreeWrapperPass>();
94 
95  AU.addRequired<LegacyDivergenceAnalysis>();
96 
98  AU.addPreserved<DominatorTreeWrapperPass>();
99  // FIXME: preserve PostDominatorTreeWrapperPass
100  }
101 
102  // No divergent values are changed, only blocks and branch edges.
103  AU.addPreserved<LegacyDivergenceAnalysis>();
104 
105  // We preserve the non-critical-edgeness property
106  AU.addPreservedID(BreakCriticalEdgesID);
107 
108  // This is a cluster of orthogonal Transforms
109  AU.addPreservedID(LowerSwitchID);
111 
112  AU.addRequired<TargetTransformInfoWrapperPass>();
113 }
114 
115 /// \returns true if \p BB is reachable through only uniform branches.
116 /// XXX - Is there a more efficient way to find this?
118  BasicBlock &BB) {
121 
122  while (!Stack.empty()) {
123  BasicBlock *Top = Stack.pop_back_val();
124  if (!DA.isUniform(Top->getTerminator()))
125  return false;
126 
127  for (BasicBlock *Pred : predecessors(Top)) {
128  if (Visited.insert(Pred).second)
129  Stack.push_back(Pred);
130  }
131  }
132 
133  return true;
134 }
135 
136 BasicBlock *AMDGPUUnifyDivergentExitNodes::unifyReturnBlockSet(
137  Function &F, DomTreeUpdater &DTU, ArrayRef<BasicBlock *> ReturningBlocks,
138  StringRef Name) {
139  // Otherwise, we need to insert a new basic block into the function, add a PHI
140  // nodes (if the function returns values), and convert all of the return
141  // instructions into unconditional branches.
142  BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), Name, &F);
143  IRBuilder<> B(NewRetBlock);
144 
145  PHINode *PN = nullptr;
146  if (F.getReturnType()->isVoidTy()) {
147  B.CreateRetVoid();
148  } else {
149  // If the function doesn't return void... add a PHI node to the block...
150  PN = B.CreatePHI(F.getReturnType(), ReturningBlocks.size(),
151  "UnifiedRetVal");
152  B.CreateRet(PN);
153  }
154 
155  // Loop over all of the blocks, replacing the return instruction with an
156  // unconditional branch.
157  std::vector<DominatorTree::UpdateType> Updates;
158  Updates.reserve(ReturningBlocks.size());
159  for (BasicBlock *BB : ReturningBlocks) {
160  // Add an incoming element to the PHI node for every return instruction that
161  // is merging into this new block...
162  if (PN)
163  PN->addIncoming(BB->getTerminator()->getOperand(0), BB);
164 
165  // Remove and delete the return inst.
166  BB->getTerminator()->eraseFromParent();
167  BranchInst::Create(NewRetBlock, BB);
168  Updates.push_back({DominatorTree::Insert, BB, NewRetBlock});
169  }
170 
172  DTU.applyUpdates(Updates);
173  Updates.clear();
174 
175  for (BasicBlock *BB : ReturningBlocks) {
176  // Cleanup possible branch to unconditional branch to the return.
177  simplifyCFG(BB, *TTI, RequireAndPreserveDomTree ? &DTU : nullptr,
178  SimplifyCFGOptions().bonusInstThreshold(2));
179  }
180 
181  return NewRetBlock;
182 }
183 
185  DominatorTree *DT = nullptr;
187  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
188 
189  auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
190 
191  // If there's only one exit, we don't need to do anything.
192  if (PDT.root_size() <= 1)
193  return false;
194 
195  LegacyDivergenceAnalysis &DA = getAnalysis<LegacyDivergenceAnalysis>();
196  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
197 
198  // Loop over all of the blocks in a function, tracking all of the blocks that
199  // return.
200  SmallVector<BasicBlock *, 4> ReturningBlocks;
201  SmallVector<BasicBlock *, 4> UnreachableBlocks;
202 
203  // Dummy return block for infinite loop.
204  BasicBlock *DummyReturnBB = nullptr;
205 
206  bool Changed = false;
207  std::vector<DominatorTree::UpdateType> Updates;
208 
209  // TODO: For now we unify all exit blocks, even though they are uniformly
210  // reachable, if there are any exits not uniformly reached. This is to
211  // workaround the limitation of structurizer, which can not handle multiple
212  // function exits. After structurizer is able to handle multiple function
213  // exits, we should only unify UnreachableBlocks that are not uniformly
214  // reachable.
215  bool HasDivergentExitBlock = llvm::any_of(
216  PDT.roots(), [&](auto BB) { return !isUniformlyReached(DA, *BB); });
217 
218  for (BasicBlock *BB : PDT.roots()) {
219  if (isa<ReturnInst>(BB->getTerminator())) {
220  if (HasDivergentExitBlock)
221  ReturningBlocks.push_back(BB);
222  } else if (isa<UnreachableInst>(BB->getTerminator())) {
223  if (HasDivergentExitBlock)
224  UnreachableBlocks.push_back(BB);
225  } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
226 
227  ConstantInt *BoolTrue = ConstantInt::getTrue(F.getContext());
228  if (DummyReturnBB == nullptr) {
229  DummyReturnBB = BasicBlock::Create(F.getContext(),
230  "DummyReturnBlock", &F);
231  Type *RetTy = F.getReturnType();
232  Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
233  ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
234  ReturningBlocks.push_back(DummyReturnBB);
235  }
236 
237  if (BI->isUnconditional()) {
238  BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
239  BI->eraseFromParent(); // Delete the unconditional branch.
240  // Add a new conditional branch with a dummy edge to the return block.
241  BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB);
242  Updates.push_back({DominatorTree::Insert, BB, DummyReturnBB});
243  } else { // Conditional branch.
245 
246  // Create a new transition block to hold the conditional branch.
247  BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
248 
249  Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
250 
251  // 'Successors' become successors of TransitionBB instead of BB,
252  // and TransitionBB becomes a single successor of BB.
253  Updates.push_back({DominatorTree::Insert, BB, TransitionBB});
254  for (BasicBlock *Successor : Successors) {
255  Updates.push_back({DominatorTree::Insert, TransitionBB, Successor});
256  Updates.push_back({DominatorTree::Delete, BB, Successor});
257  }
258 
259  // Create a branch that will always branch to the transition block and
260  // references DummyReturnBB.
261  BB->getTerminator()->eraseFromParent();
262  BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB);
263  Updates.push_back({DominatorTree::Insert, BB, DummyReturnBB});
264  }
265  Changed = true;
266  }
267  }
268 
269  if (!UnreachableBlocks.empty()) {
270  BasicBlock *UnreachableBlock = nullptr;
271 
272  if (UnreachableBlocks.size() == 1) {
273  UnreachableBlock = UnreachableBlocks.front();
274  } else {
275  UnreachableBlock = BasicBlock::Create(F.getContext(),
276  "UnifiedUnreachableBlock", &F);
277  new UnreachableInst(F.getContext(), UnreachableBlock);
278 
279  Updates.reserve(Updates.size() + UnreachableBlocks.size());
280  for (BasicBlock *BB : UnreachableBlocks) {
281  // Remove and delete the unreachable inst.
282  BB->getTerminator()->eraseFromParent();
283  BranchInst::Create(UnreachableBlock, BB);
284  Updates.push_back({DominatorTree::Insert, BB, UnreachableBlock});
285  }
286  Changed = true;
287  }
288 
289  if (!ReturningBlocks.empty()) {
290  // Don't create a new unreachable inst if we have a return. The
291  // structurizer/annotator can't handle the multiple exits
292 
293  Type *RetTy = F.getReturnType();
294  Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
295  // Remove and delete the unreachable inst.
296  UnreachableBlock->getTerminator()->eraseFromParent();
297 
298  Function *UnreachableIntrin =
299  Intrinsic::getDeclaration(F.getParent(), Intrinsic::amdgcn_unreachable);
300 
301  // Insert a call to an intrinsic tracking that this is an unreachable
302  // point, in case we want to kill the active lanes or something later.
303  CallInst::Create(UnreachableIntrin, {}, "", UnreachableBlock);
304 
305  // Don't create a scalar trap. We would only want to trap if this code was
306  // really reached, but a scalar trap would happen even if no lanes
307  // actually reached here.
308  ReturnInst::Create(F.getContext(), RetVal, UnreachableBlock);
309  ReturningBlocks.push_back(UnreachableBlock);
310  Changed = true;
311  }
312  }
313 
314  // FIXME: add PDT here once simplifycfg is ready.
317  DTU.applyUpdates(Updates);
318  Updates.clear();
319 
320  // Now handle return blocks.
321  if (ReturningBlocks.empty())
322  return Changed; // No blocks return
323 
324  if (ReturningBlocks.size() == 1)
325  return Changed; // Already has a single return block
326 
327  unifyReturnBlockSet(F, DTU, ReturningBlocks, "UnifiedReturnBlock");
328  return true;
329 }
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
Scalar.h
llvm::DomTreeUpdater::applyUpdates
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
Definition: DomTreeUpdater.cpp:230
llvm::Function
Definition: Function.h:60
StringRef.h
Pass.h
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:173
llvm::IRBuilder<>
DomTreeUpdater.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::BasicBlock::eraseFromParent
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:132
llvm::DominatorTreeBase< BasicBlock, false >::Insert
static constexpr UpdateKind Insert
Definition: GenericDomTree.h:242
isUniformlyReached
static bool isUniformlyReached(const LegacyDivergenceAnalysis &DA, BasicBlock &BB)
Definition: AMDGPUUnifyDivergentExitNodes.cpp:117
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
Constants.h
PostDominators.h
Intrinsics.h
InstrTypes.h
llvm::PostDominatorTreeWrapperPass
Definition: PostDominators.h:73
llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1517
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:141
llvm::initializeAMDGPUUnifyDivergentExitNodesPass
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:306
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
llvm::DomTreeUpdater
Definition: DomTreeUpdater.h:28
llvm::LegacyDivergenceAnalysis
Definition: LegacyDivergenceAnalysis.h:31
SmallPtrSet.h
Utils.h
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
CFG.h
BasicBlock.h
llvm::AArch64PACKey::DA
@ DA
Definition: AArch64BaseInfo.h:821
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:81
llvm::AMDGPUUnifyDivergentExitNodesID
char & AMDGPUUnifyDivergentExitNodesID
Definition: AMDGPUUnifyDivergentExitNodes.cpp:79
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2648
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUUnifyDivergentExitNodes.cpp:52
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2847
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3188
llvm::nodes
iterator_range< typename GraphTraits< GraphType >::nodes_iterator > nodes(const GraphType &G)
Definition: GraphTraits.h:110
ArrayRef.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE, "Unify divergent function exit nodes", false, false) INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes
IRBuilder.h
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
llvm::simplifyCFG
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
Definition: SimplifyCFG.cpp:7328
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1741
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
AMDGPU.h
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:97
llvm::DomTreeUpdater::UpdateStrategy::Eager
@ Eager
llvm::RequireAndPreserveDomTree
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
llvm::LowerSwitchID
char & LowerSwitchID
Definition: LowerSwitch.cpp:577
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1481
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:85
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:827
llvm::Successor
@ Successor
Definition: SIMachineScheduler.h:35
SIDefines.h
Casting.h
Function.h
llvm::ReturnInst::Create
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3077
llvm::BreakCriticalEdgesID
char & BreakCriticalEdgesID
llvm::SimplifyCFGOptions
Definition: SimplifyCFGOptions.h:23
Instructions.h
LegacyDivergenceAnalysis.h
SmallVector.h
exit
declare void exit(i32) noreturn nounwind This compiles into
Definition: README.txt:1072
Dominators.h
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2697
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::UnreachableInst
This function has undefined behavior.
Definition: Instructions.h:4768
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3132
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::DominatorTreeBase< BasicBlock, false >::Delete
static constexpr UpdateKind Delete
Definition: GenericDomTree.h:243
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1732