LLVM  13.0.0git
AMDGPUUnifyDivergentExitNodes.cpp
Go to the documentation of this file.
1 //===- AMDGPUUnifyDivergentExitNodes.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This is a variant of the UnifyDivergentExitNodes pass. Rather than ensuring
10 // there is at most one ret and one unreachable instruction, it ensures there is
11 // at most one divergent exiting block.
12 //
13 // StructurizeCFG can't deal with multi-exit regions formed by branches to
14 // multiple return nodes. It is not desirable to structurize regions with
15 // uniform branches, so unifying those to the same return block as divergent
16 // branches inhibits use of scalar branching. It still can't deal with the case
17 // where one branch goes to return, and one unreachable. Replace unreachable in
18 // this case with a return.
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #include "AMDGPU.h"
23 #include "SIDefines.h"
24 #include "llvm/ADT/ArrayRef.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/StringRef.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/CFG.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/IRBuilder.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/IntrinsicsAMDGPU.h"
42 #include "llvm/IR/Type.h"
43 #include "llvm/InitializePasses.h"
44 #include "llvm/Pass.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Transforms/Scalar.h"
47 #include "llvm/Transforms/Utils.h"
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "amdgpu-unify-divergent-exit-nodes"
53 
54 namespace {
55 
56 class AMDGPUUnifyDivergentExitNodes : public FunctionPass {
57 private:
58  const TargetTransformInfo *TTI = nullptr;
59 
60 public:
61  static char ID; // Pass identification, replacement for typeid
62 
63  AMDGPUUnifyDivergentExitNodes() : FunctionPass(ID) {
65  }
66 
67  // We can preserve non-critical-edgeness when we unify function exit nodes
68  void getAnalysisUsage(AnalysisUsage &AU) const override;
69  BasicBlock *unifyReturnBlockSet(Function &F, DomTreeUpdater &DTU,
70  ArrayRef<BasicBlock *> ReturningBlocks,
71  bool InsertExport, StringRef Name);
72  bool runOnFunction(Function &F) override;
73 };
74 
75 } // end anonymous namespace
76 
78 
80 
81 INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
82  "Unify divergent function exit nodes", false, false)
86 INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
87  "Unify divergent function exit nodes", false, false)
88 
89 void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
91  AU.addRequired<DominatorTreeWrapperPass>();
92 
93  AU.addRequired<PostDominatorTreeWrapperPass>();
94 
95  AU.addRequired<LegacyDivergenceAnalysis>();
96 
98  AU.addPreserved<DominatorTreeWrapperPass>();
99  // FIXME: preserve PostDominatorTreeWrapperPass
100  }
101 
102  // No divergent values are changed, only blocks and branch edges.
103  AU.addPreserved<LegacyDivergenceAnalysis>();
104 
105  // We preserve the non-critical-edgeness property
106  AU.addPreservedID(BreakCriticalEdgesID);
107 
108  // This is a cluster of orthogonal Transforms
109  AU.addPreservedID(LowerSwitchID);
111 
112  AU.addRequired<TargetTransformInfoWrapperPass>();
113 }
114 
115 /// \returns true if \p BB is reachable through only uniform branches.
116 /// XXX - Is there a more efficient way to find this?
118  BasicBlock &BB) {
121 
122  while (!Stack.empty()) {
123  BasicBlock *Top = Stack.pop_back_val();
124  if (!DA.isUniform(Top->getTerminator()))
125  return false;
126 
127  for (BasicBlock *Pred : predecessors(Top)) {
128  if (Visited.insert(Pred).second)
129  Stack.push_back(Pred);
130  }
131  }
132 
133  return true;
134 }
135 
136 static void removeDoneExport(Function &F) {
137  ConstantInt *BoolFalse = ConstantInt::getFalse(F.getContext());
138  for (BasicBlock &BB : F) {
139  for (Instruction &I : BB) {
140  if (IntrinsicInst *Intrin = llvm::dyn_cast<IntrinsicInst>(&I)) {
141  if (Intrin->getIntrinsicID() == Intrinsic::amdgcn_exp) {
142  Intrin->setArgOperand(6, BoolFalse); // done
143  } else if (Intrin->getIntrinsicID() == Intrinsic::amdgcn_exp_compr) {
144  Intrin->setArgOperand(4, BoolFalse); // done
145  }
146  }
147  }
148  }
149 }
150 
151 BasicBlock *AMDGPUUnifyDivergentExitNodes::unifyReturnBlockSet(
152  Function &F, DomTreeUpdater &DTU, ArrayRef<BasicBlock *> ReturningBlocks,
153  bool InsertExport, StringRef Name) {
154  // Otherwise, we need to insert a new basic block into the function, add a PHI
155  // nodes (if the function returns values), and convert all of the return
156  // instructions into unconditional branches.
157  BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), Name, &F);
158  IRBuilder<> B(NewRetBlock);
159 
160  if (InsertExport) {
161  // Ensure that there's only one "done" export in the shader by removing the
162  // "done" bit set on the original final export. More than one "done" export
163  // can lead to undefined behavior.
165 
166  Value *Undef = UndefValue::get(B.getFloatTy());
167  B.CreateIntrinsic(Intrinsic::amdgcn_exp, { B.getFloatTy() },
168  {
169  B.getInt32(AMDGPU::Exp::ET_NULL),
170  B.getInt32(0), // enabled channels
171  Undef, Undef, Undef, Undef, // values
172  B.getTrue(), // done
173  B.getTrue(), // valid mask
174  });
175  }
176 
177  PHINode *PN = nullptr;
178  if (F.getReturnType()->isVoidTy()) {
179  B.CreateRetVoid();
180  } else {
181  // If the function doesn't return void... add a PHI node to the block...
182  PN = B.CreatePHI(F.getReturnType(), ReturningBlocks.size(),
183  "UnifiedRetVal");
184  assert(!InsertExport);
185  B.CreateRet(PN);
186  }
187 
188  // Loop over all of the blocks, replacing the return instruction with an
189  // unconditional branch.
190  std::vector<DominatorTree::UpdateType> Updates;
191  Updates.reserve(ReturningBlocks.size());
192  for (BasicBlock *BB : ReturningBlocks) {
193  // Add an incoming element to the PHI node for every return instruction that
194  // is merging into this new block...
195  if (PN)
196  PN->addIncoming(BB->getTerminator()->getOperand(0), BB);
197 
198  // Remove and delete the return inst.
199  BB->getTerminator()->eraseFromParent();
200  BranchInst::Create(NewRetBlock, BB);
201  Updates.push_back({DominatorTree::Insert, BB, NewRetBlock});
202  }
203 
205  DTU.applyUpdates(Updates);
206  Updates.clear();
207 
208  for (BasicBlock *BB : ReturningBlocks) {
209  // Cleanup possible branch to unconditional branch to the return.
210  simplifyCFG(BB, *TTI, RequireAndPreserveDomTree ? &DTU : nullptr,
211  SimplifyCFGOptions().bonusInstThreshold(2));
212  }
213 
214  return NewRetBlock;
215 }
216 
218  DominatorTree *DT = nullptr;
220  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
221 
222  auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
223 
224  // If there's only one exit, we don't need to do anything, unless this is a
225  // pixel shader and that exit is an infinite loop, since we still have to
226  // insert an export in that case.
227  if (PDT.root_size() <= 1 && F.getCallingConv() != CallingConv::AMDGPU_PS)
228  return false;
229 
230  LegacyDivergenceAnalysis &DA = getAnalysis<LegacyDivergenceAnalysis>();
231  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
232 
233  // Loop over all of the blocks in a function, tracking all of the blocks that
234  // return.
235  SmallVector<BasicBlock *, 4> ReturningBlocks;
236  SmallVector<BasicBlock *, 4> UniformlyReachedRetBlocks;
237  SmallVector<BasicBlock *, 4> UnreachableBlocks;
238 
239  // Dummy return block for infinite loop.
240  BasicBlock *DummyReturnBB = nullptr;
241 
242  bool InsertExport = false;
243 
244  bool Changed = false;
245  std::vector<DominatorTree::UpdateType> Updates;
246 
247  for (BasicBlock *BB : PDT.roots()) {
248  if (isa<ReturnInst>(BB->getTerminator())) {
249  if (!isUniformlyReached(DA, *BB))
250  ReturningBlocks.push_back(BB);
251  else
252  UniformlyReachedRetBlocks.push_back(BB);
253  } else if (isa<UnreachableInst>(BB->getTerminator())) {
254  if (!isUniformlyReached(DA, *BB))
255  UnreachableBlocks.push_back(BB);
256  } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
257 
258  ConstantInt *BoolTrue = ConstantInt::getTrue(F.getContext());
259  if (DummyReturnBB == nullptr) {
260  DummyReturnBB = BasicBlock::Create(F.getContext(),
261  "DummyReturnBlock", &F);
262  Type *RetTy = F.getReturnType();
263  Value *RetVal = RetTy->isVoidTy() ? nullptr : UndefValue::get(RetTy);
264 
265  // For pixel shaders, the producer guarantees that an export is
266  // executed before each return instruction. However, if there is an
267  // infinite loop and we insert a return ourselves, we need to uphold
268  // that guarantee by inserting a null export. This can happen e.g. in
269  // an infinite loop with kill instructions, which is supposed to
270  // terminate. However, we don't need to do this if there is a non-void
271  // return value, since then there is an epilog afterwards which will
272  // still export.
273  //
274  // Note: In the case where only some threads enter the infinite loop,
275  // this can result in the null export happening redundantly after the
276  // original exports. However, The last "real" export happens after all
277  // the threads that didn't enter an infinite loop converged, which
278  // means that the only extra threads to execute the null export are
279  // threads that entered the infinite loop, and they only could've
280  // exited through being killed which sets their exec bit to 0.
281  // Therefore, unless there's an actual infinite loop, which can have
282  // invalid results, or there's a kill after the last export, which we
283  // assume the frontend won't do, this export will have the same exec
284  // mask as the last "real" export, and therefore the valid mask will be
285  // overwritten with the same value and will still be correct. Also,
286  // even though this forces an extra unnecessary export wait, we assume
287  // that this happens rare enough in practice to that we don't have to
288  // worry about performance.
289  if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
290  RetTy->isVoidTy()) {
291  InsertExport = true;
292  }
293 
294  ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
295  ReturningBlocks.push_back(DummyReturnBB);
296  }
297 
298  if (BI->isUnconditional()) {
299  BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
300  BI->eraseFromParent(); // Delete the unconditional branch.
301  // Add a new conditional branch with a dummy edge to the return block.
302  BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB);
303  Updates.push_back({DominatorTree::Insert, BB, DummyReturnBB});
304  } else { // Conditional branch.
306 
307  // Create a new transition block to hold the conditional branch.
308  BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
309 
310  Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
311 
312  // 'Successors' become successors of TransitionBB instead of BB,
313  // and TransitionBB becomes a single successor of BB.
314  Updates.push_back({DominatorTree::Insert, BB, TransitionBB});
315  for (BasicBlock *Successor : Successors) {
316  Updates.push_back({DominatorTree::Insert, TransitionBB, Successor});
317  Updates.push_back({DominatorTree::Delete, BB, Successor});
318  }
319 
320  // Create a branch that will always branch to the transition block and
321  // references DummyReturnBB.
322  BB->getTerminator()->eraseFromParent();
323  BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB);
324  Updates.push_back({DominatorTree::Insert, BB, DummyReturnBB});
325  }
326  Changed = true;
327  }
328  }
329 
330  if (!UnreachableBlocks.empty()) {
331  BasicBlock *UnreachableBlock = nullptr;
332 
333  if (UnreachableBlocks.size() == 1) {
334  UnreachableBlock = UnreachableBlocks.front();
335  } else {
336  UnreachableBlock = BasicBlock::Create(F.getContext(),
337  "UnifiedUnreachableBlock", &F);
338  new UnreachableInst(F.getContext(), UnreachableBlock);
339 
340  Updates.reserve(Updates.size() + UnreachableBlocks.size());
341  for (BasicBlock *BB : UnreachableBlocks) {
342  // Remove and delete the unreachable inst.
343  BB->getTerminator()->eraseFromParent();
344  BranchInst::Create(UnreachableBlock, BB);
345  Updates.push_back({DominatorTree::Insert, BB, UnreachableBlock});
346  }
347  Changed = true;
348  }
349 
350  if (!ReturningBlocks.empty()) {
351  // Don't create a new unreachable inst if we have a return. The
352  // structurizer/annotator can't handle the multiple exits
353 
354  Type *RetTy = F.getReturnType();
355  Value *RetVal = RetTy->isVoidTy() ? nullptr : UndefValue::get(RetTy);
356  // Remove and delete the unreachable inst.
357  UnreachableBlock->getTerminator()->eraseFromParent();
358 
359  Function *UnreachableIntrin =
360  Intrinsic::getDeclaration(F.getParent(), Intrinsic::amdgcn_unreachable);
361 
362  // Insert a call to an intrinsic tracking that this is an unreachable
363  // point, in case we want to kill the active lanes or something later.
364  CallInst::Create(UnreachableIntrin, {}, "", UnreachableBlock);
365 
366  // Don't create a scalar trap. We would only want to trap if this code was
367  // really reached, but a scalar trap would happen even if no lanes
368  // actually reached here.
369  ReturnInst::Create(F.getContext(), RetVal, UnreachableBlock);
370  ReturningBlocks.push_back(UnreachableBlock);
371  Changed = true;
372  }
373  }
374 
375  // FIXME: add PDT here once simplifycfg is ready.
378  DTU.applyUpdates(Updates);
379  Updates.clear();
380 
381  // Now handle return blocks.
382  if (ReturningBlocks.empty())
383  return Changed; // No blocks return
384 
385  if (ReturningBlocks.size() == 1 && !InsertExport)
386  return Changed; // Already has a single return block
387 
388  // Unify returning blocks. If we are going to insert the export it is also
389  // necessary to include blocks that are uniformly reached, because in addition
390  // to inserting the export the "done" bits on existing exports will be cleared
391  // and we do not want to end up with the normal export in a non-unified,
392  // uniformly reached block with the "done" bit cleared.
393  auto BlocksToUnify = std::move(ReturningBlocks);
394  if (InsertExport) {
395  llvm::append_range(BlocksToUnify, UniformlyReachedRetBlocks);
396  }
397 
398  unifyReturnBlockSet(F, DTU, BlocksToUnify, InsertExport,
399  "UnifiedReturnBlock");
400  return true;
401 }
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm
Definition: AllocatorList.h:23
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1318
Scalar.h
llvm::DomTreeUpdater::applyUpdates
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
Definition: DomTreeUpdater.cpp:230
llvm::succ_end
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
llvm::Function
Definition: Function.h:61
StringRef.h
Pass.h
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::IRBuilder<>
DomTreeUpdater.h
Local.h
llvm::AMDGPU::Exp::ET_NULL
@ ET_NULL
Definition: SIDefines.h:736
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::BasicBlock::eraseFromParent
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:129
llvm::DominatorTreeBase< BasicBlock, false >::Insert
static constexpr UpdateKind Insert
Definition: GenericDomTree.h:242
isUniformlyReached
static bool isUniformlyReached(const LegacyDivergenceAnalysis &DA, BasicBlock &BB)
Definition: AMDGPUUnifyDivergentExitNodes.cpp:117
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
PostDominators.h
Intrinsics.h
InstrTypes.h
llvm::PostDominatorTreeWrapperPass
Definition: PostDominators.h:73
llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1493
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:142
llvm::initializeAMDGPUUnifyDivergentExitNodesPass
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:205
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:281
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1770
llvm::DomTreeUpdater
Definition: DomTreeUpdater.h:28
llvm::LegacyDivergenceAnalysis
Definition: LegacyDivergenceAnalysis.h:31
SmallPtrSet.h
Utils.h
Type.h
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
CFG.h
BasicBlock.h
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::AMDGPUUnifyDivergentExitNodesID
char & AMDGPUUnifyDivergentExitNodesID
Definition: AMDGPUUnifyDivergentExitNodes.cpp:79
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2375
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUUnifyDivergentExitNodes.cpp:52
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2722
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3063
llvm::nodes
iterator_range< typename GraphTraits< GraphType >::nodes_iterator > nodes(const GraphType &G)
Definition: GraphTraits.h:108
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::succ_begin
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
ArrayRef.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE, "Unify divergent function exit nodes", false, false) INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
llvm::M68kBeads::DA
@ DA
Definition: M68kBaseInfo.h:59
llvm::simplifyCFG
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
Definition: SimplifyCFG.cpp:6824
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
AMDGPU.h
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1667
llvm::DomTreeUpdater::UpdateStrategy::Eager
@ Eager
llvm::RequireAndPreserveDomTree
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
llvm::LowerSwitchID
char & LowerSwitchID
Definition: LowerSwitch.cpp:572
llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:840
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:833
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::Successor
@ Successor
Definition: SIMachineScheduler.h:33
SIDefines.h
Casting.h
Function.h
llvm::ReturnInst::Create
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:2952
llvm::BreakCriticalEdgesID
char & BreakCriticalEdgesID
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
llvm::SimplifyCFGOptions
Definition: SimplifyCFGOptions.h:23
Instructions.h
LegacyDivergenceAnalysis.h
SmallVector.h
exit
declare void exit(i32) noreturn nounwind This compiles into
Definition: README.txt:1072
Dominators.h
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2572
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:93
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::UnreachableInst
This function has undefined behavior.
Definition: Instructions.h:4652
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3007
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
removeDoneExport
static void removeDoneExport(Function &F)
Definition: AMDGPUUnifyDivergentExitNodes.cpp:136
llvm::DominatorTreeBase< BasicBlock, false >::Delete
static constexpr UpdateKind Delete
Definition: GenericDomTree.h:243
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38