LLVM  16.0.0git
LegacyDivergenceAnalysis.cpp
Go to the documentation of this file.
1 //===- LegacyDivergenceAnalysis.cpp --------- Legacy Divergence Analysis
2 //Implementation -==//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements divergence analysis which determines whether a branch
11 // in a GPU program is divergent.It can help branch optimizations such as jump
12 // threading and loop unswitching to make better decisions.
13 //
14 // GPU programs typically use the SIMD execution model, where multiple threads
15 // in the same execution group have to execute in lock-step. Therefore, if the
16 // code contains divergent branches (i.e., threads in a group do not agree on
17 // which path of the branch to take), the group of threads has to execute all
18 // the paths from that branch with different subsets of threads enabled until
19 // they converge at the immediately post-dominating BB of the paths.
20 //
21 // Due to this execution model, some optimizations such as jump
22 // threading and loop unswitching can be unfortunately harmful when performed on
23 // divergent branches. Therefore, an analysis that computes which branches in a
24 // GPU program are divergent can help the compiler to selectively run these
25 // optimizations.
26 //
27 // This file defines divergence analysis which computes a conservative but
28 // non-trivial approximation of all divergent branches in a GPU program. It
29 // partially implements the approach described in
30 //
31 // Divergence Analysis
32 // Sampaio, Souza, Collange, Pereira
33 // TOPLAS '13
34 //
35 // The divergence analysis identifies the sources of divergence (e.g., special
36 // variables that hold the thread ID), and recursively marks variables that are
37 // data or sync dependent on a source of divergence as divergent.
38 //
39 // While data dependency is a well-known concept, the notion of sync dependency
40 // is worth more explanation. Sync dependence characterizes the control flow
41 // aspect of the propagation of branch divergence. For example,
42 //
43 // %cond = icmp slt i32 %tid, 10
44 // br i1 %cond, label %then, label %else
45 // then:
46 // br label %merge
47 // else:
48 // br label %merge
49 // merge:
50 // %a = phi i32 [ 0, %then ], [ 1, %else ]
51 //
52 // Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
53 // because %tid is not on its use-def chains, %a is sync dependent on %tid
54 // because the branch "br i1 %cond" depends on %tid and affects which value %a
55 // is assigned to.
56 //
57 // The current implementation has the following limitations:
58 // 1. intra-procedural. It conservatively considers the arguments of a
59 // non-kernel-entry function and the return value of a function call as
60 // divergent.
61 // 2. memory as black box. It conservatively considers values loaded from
62 // generic or local address as divergent. This can be improved by leveraging
63 // pointer analysis.
64 //
65 //===----------------------------------------------------------------------===//
66 
69 #include "llvm/Analysis/CFG.h"
71 #include "llvm/Analysis/LoopInfo.h"
72 #include "llvm/Analysis/Passes.h"
75 #include "llvm/IR/Dominators.h"
76 #include "llvm/IR/InstIterator.h"
77 #include "llvm/IR/Instructions.h"
78 #include "llvm/IR/Value.h"
79 #include "llvm/InitializePasses.h"
81 #include "llvm/Support/Debug.h"
83 #include <vector>
84 using namespace llvm;
85 
86 #define DEBUG_TYPE "divergence"
87 
88 // transparently use the GPUDivergenceAnalysis
89 static cl::opt<bool> UseGPUDA("use-gpu-divergence-analysis", cl::init(false),
90  cl::Hidden,
91  cl::desc("turn the LegacyDivergenceAnalysis into "
92  "a wrapper for GPUDivergenceAnalysis"));
93 
94 namespace {
95 
97 public:
101  : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV), DU(DU) {}
102  void populateWithSourcesOfDivergence();
103  void propagate();
104 
105 private:
106  // A helper function that explores data dependents of V.
107  void exploreDataDependency(Value *V);
108  // A helper function that explores sync dependents of TI.
109  void exploreSyncDependency(Instruction *TI);
110  // Computes the influence region from Start to End. This region includes all
111  // basic blocks on any simple path from Start to End.
112  void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End,
113  DenseSet<BasicBlock *> &InfluenceRegion);
114  // Finds all users of I that are outside the influence region, and add these
115  // users to Worklist.
116  void findUsersOutsideInfluenceRegion(
117  Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion);
118 
119  Function &F;
121  DominatorTree &DT;
123  std::vector<Value *> Worklist; // Stack for DFS.
124  DenseSet<const Value *> &DV; // Stores all divergent values.
125  DenseSet<const Use *> &DU; // Stores divergent uses of possibly uniform
126  // values.
127 };
128 
129 void DivergencePropagator::populateWithSourcesOfDivergence() {
130  Worklist.clear();
131  DV.clear();
132  DU.clear();
133  for (auto &I : instructions(F)) {
134  if (TTI.isSourceOfDivergence(&I)) {
135  Worklist.push_back(&I);
136  DV.insert(&I);
137  }
138  }
139  for (auto &Arg : F.args()) {
140  if (TTI.isSourceOfDivergence(&Arg)) {
141  Worklist.push_back(&Arg);
142  DV.insert(&Arg);
143  }
144  }
145 }
146 
147 void DivergencePropagator::exploreSyncDependency(Instruction *TI) {
148  // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's
149  // immediate post dominator are divergent. This rule handles if-then-else
150  // patterns. For example,
151  //
152  // if (tid < 5)
153  // a1 = 1;
154  // else
155  // a2 = 2;
156  // a = phi(a1, a2); // sync dependent on (tid < 5)
157  BasicBlock *ThisBB = TI->getParent();
158 
159  // Unreachable blocks may not be in the dominator tree.
160  if (!DT.isReachableFromEntry(ThisBB))
161  return;
162 
163  // If the function has no exit blocks or doesn't reach any exit blocks, the
164  // post dominator may be null.
165  DomTreeNode *ThisNode = PDT.getNode(ThisBB);
166  if (!ThisNode)
167  return;
168 
169  BasicBlock *IPostDom = ThisNode->getIDom()->getBlock();
170  if (IPostDom == nullptr)
171  return;
172 
173  for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) {
174  // A PHINode is uniform if it returns the same value no matter which path is
175  // taken.
176  if (!cast<PHINode>(I)->hasConstantOrUndefValue() && DV.insert(&*I).second)
177  Worklist.push_back(&*I);
178  }
179 
180  // Propagation rule 2: if a value defined in a loop is used outside, the user
181  // is sync dependent on the condition of the loop exits that dominate the
182  // user. For example,
183  //
184  // int i = 0;
185  // do {
186  // i++;
187  // if (foo(i)) ... // uniform
188  // } while (i < tid);
189  // if (bar(i)) ... // divergent
190  //
191  // A program may contain unstructured loops. Therefore, we cannot leverage
192  // LoopInfo, which only recognizes natural loops.
193  //
194  // The algorithm used here handles both natural and unstructured loops. Given
195  // a branch TI, we first compute its influence region, the union of all simple
196  // paths from TI to its immediate post dominator (IPostDom). Then, we search
197  // for all the values defined in the influence region but used outside. All
198  // these users are sync dependent on TI.
199  DenseSet<BasicBlock *> InfluenceRegion;
200  computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion);
201  // An insight that can speed up the search process is that all the in-region
202  // values that are used outside must dominate TI. Therefore, instead of
203  // searching every basic blocks in the influence region, we search all the
204  // dominators of TI until it is outside the influence region.
205  BasicBlock *InfluencedBB = ThisBB;
206  while (InfluenceRegion.count(InfluencedBB)) {
207  for (auto &I : *InfluencedBB) {
208  if (!DV.count(&I))
209  findUsersOutsideInfluenceRegion(I, InfluenceRegion);
210  }
211  DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
212  if (IDomNode == nullptr)
213  break;
214  InfluencedBB = IDomNode->getBlock();
215  }
216 }
217 
218 void DivergencePropagator::findUsersOutsideInfluenceRegion(
219  Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
220  for (Use &Use : I.uses()) {
221  Instruction *UserInst = cast<Instruction>(Use.getUser());
222  if (!InfluenceRegion.count(UserInst->getParent())) {
223  DU.insert(&Use);
224  if (DV.insert(UserInst).second)
225  Worklist.push_back(UserInst);
226  }
227  }
228 }
229 
230 // A helper function for computeInfluenceRegion that adds successors of "ThisBB"
231 // to the influence region.
232 static void
233 addSuccessorsToInfluenceRegion(BasicBlock *ThisBB, BasicBlock *End,
234  DenseSet<BasicBlock *> &InfluenceRegion,
235  std::vector<BasicBlock *> &InfluenceStack) {
236  for (BasicBlock *Succ : successors(ThisBB)) {
237  if (Succ != End && InfluenceRegion.insert(Succ).second)
238  InfluenceStack.push_back(Succ);
239  }
240 }
241 
242 void DivergencePropagator::computeInfluenceRegion(
243  BasicBlock *Start, BasicBlock *End,
244  DenseSet<BasicBlock *> &InfluenceRegion) {
245  assert(PDT.properlyDominates(End, Start) &&
246  "End does not properly dominate Start");
247 
248  // The influence region starts from the end of "Start" to the beginning of
249  // "End". Therefore, "Start" should not be in the region unless "Start" is in
250  // a loop that doesn't contain "End".
251  std::vector<BasicBlock *> InfluenceStack;
252  addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack);
253  while (!InfluenceStack.empty()) {
254  BasicBlock *BB = InfluenceStack.back();
255  InfluenceStack.pop_back();
256  addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack);
257  }
258 }
259 
260 void DivergencePropagator::exploreDataDependency(Value *V) {
261  // Follow def-use chains of V.
262  for (User *U : V->users()) {
263  if (!TTI.isAlwaysUniform(U) && DV.insert(U).second)
264  Worklist.push_back(U);
265  }
266 }
267 
269  // Traverse the dependency graph using DFS.
270  while (!Worklist.empty()) {
271  Value *V = Worklist.back();
272  Worklist.pop_back();
273  if (Instruction *I = dyn_cast<Instruction>(V)) {
274  // Terminators with less than two successors won't introduce sync
275  // dependency. Ignore them.
276  if (I->isTerminator() && I->getNumSuccessors() > 1)
277  exploreSyncDependency(I);
278  }
279  exploreDataDependency(V);
280  }
281 }
282 
283 } // namespace
284 
285 // Register this pass.
289 }
291  "Legacy Divergence Analysis", false, true)
296  "Legacy Divergence Analysis", false, true)
297 
299  return new LegacyDivergenceAnalysis();
300 }
301 
306  AU.setPreservesAll();
307 }
308 
309 bool LegacyDivergenceAnalysis::shouldUseGPUDivergenceAnalysis(
310  const Function &F, const TargetTransformInfo &TTI) const {
312  return false;
313 
314  // GPUDivergenceAnalysis requires a reducible CFG.
315  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
316  using RPOTraversal = ReversePostOrderTraversal<const Function *>;
317  RPOTraversal FuncRPOT(&F);
318  return !containsIrreducibleCFG<const BasicBlock *, const RPOTraversal,
319  const LoopInfo>(FuncRPOT, LI);
320 }
321 
323  auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
324  if (TTIWP == nullptr)
325  return false;
326 
327  TargetTransformInfo &TTI = TTIWP->getTTI(F);
328  // Fast path: if the target does not have branch divergence, we do not mark
329  // any branch as divergent.
330  if (!TTI.hasBranchDivergence())
331  return false;
332 
333  DivergentValues.clear();
334  DivergentUses.clear();
335  gpuDA = nullptr;
336 
337  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
338  auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
339 
340  if (shouldUseGPUDivergenceAnalysis(F, TTI)) {
341  // run the new GPU divergence analysis
342  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
343  gpuDA = std::make_unique<DivergenceInfo>(F, DT, PDT, LI, TTI,
344  /* KnownReducible = */ true);
345 
346  } else {
347  // run LLVM's existing DivergenceAnalysis
348  DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues, DivergentUses);
349  DP.populateWithSourcesOfDivergence();
350  DP.propagate();
351  }
352 
353  LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName()
354  << ":\n";
355  print(dbgs(), F.getParent()));
356 
357  return false;
358 }
359 
361  if (gpuDA) {
362  return gpuDA->isDivergent(*V);
363  }
364  return DivergentValues.count(V);
365 }
366 
368  if (gpuDA) {
369  return gpuDA->isDivergentUse(*U);
370  }
371  return DivergentValues.count(U->get()) || DivergentUses.count(U);
372 }
373 
375  if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty())
376  return;
377 
378  const Function *F = nullptr;
379  if (!DivergentValues.empty()) {
380  const Value *FirstDivergentValue = *DivergentValues.begin();
381  if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) {
382  F = Arg->getParent();
383  } else if (const Instruction *I =
384  dyn_cast<Instruction>(FirstDivergentValue)) {
385  F = I->getParent()->getParent();
386  } else {
387  llvm_unreachable("Only arguments and instructions can be divergent");
388  }
389  } else if (gpuDA) {
390  F = &gpuDA->getFunction();
391  }
392  if (!F)
393  return;
394 
395  // Dumps all divergent values in F, arguments and then instructions.
396  for (const auto &Arg : F->args()) {
397  OS << (isDivergent(&Arg) ? "DIVERGENT: " : " ");
398  OS << Arg << "\n";
399  }
400  // Iterate instructions using instructions() to ensure a deterministic order.
401  for (const BasicBlock &BB : *F) {
402  OS << "\n " << BB.getName() << ":\n";
403  for (const auto &I : BB.instructionsWithoutDebug()) {
404  OS << (isDivergent(&I) ? "DIVERGENT: " : " ");
405  OS << I << "\n";
406  }
407  }
408  OS << "\n";
409 }
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
llvm::LegacyDivergenceAnalysis::isDivergentUse
bool isDivergentUse(const Use *U) const
Definition: LegacyDivergenceAnalysis.cpp:367
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
InstIterator.h
llvm::Function
Definition: Function.h:60
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:173
llvm::DivergencePropagator::PDT
const PostDominatorTree & PDT
Definition: SyncDependenceAnalysis.cpp:264
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1793
llvm::LegacyDivergenceAnalysis::LegacyDivergenceAnalysis
LegacyDivergenceAnalysis()
Definition: LegacyDivergenceAnalysis.cpp:287
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1293
llvm::initializeLegacyDivergenceAnalysisPass
void initializeLegacyDivergenceAnalysisPass(PassRegistry &)
llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:351
DivergenceAnalysis.h
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
llvm::DomTreeNodeBase::getIDom
DomTreeNodeBase * getIDom() const
Definition: GenericDomTree.h:89
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::count
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:187
CommandLine.h
llvm::LegacyDivergenceAnalysis::ID
static char ID
Definition: LegacyDivergenceAnalysis.h:33
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
PostDominators.h
llvm::User
Definition: User.h:44
llvm::TargetTransformInfo::useGPUDivergenceAnalysis
bool useGPUDivergenceAnalysis() const
Return true if the target prefers to use GPU divergence analysis to replace the legacy version.
Definition: TargetTransformInfo.cpp:242
llvm::DivergencePropagator
Definition: SyncDependenceAnalysis.cpp:261
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:306
llvm::TargetTransformInfo::hasBranchDivergence
bool hasBranchDivergence() const
Return true if branch divergence exists.
Definition: TargetTransformInfo.cpp:238
llvm::PostDominatorTreeWrapperPass
Definition: PostDominators.h:73
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:141
divergence
divergence
Definition: LegacyDivergenceAnalysis.cpp:295
llvm::Instruction
Definition: Instruction.h:42
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:306
llvm::TargetTransformInfo::isSourceOfDivergence
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
Definition: TargetTransformInfo.cpp:246
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
llvm::LegacyDivergenceAnalysis
Definition: LegacyDivergenceAnalysis.h:31
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::LegacyDivergenceAnalysis::print
void print(raw_ostream &OS, const Module *) const override
print - Print out the internal state of the pass.
Definition: LegacyDivergenceAnalysis.cpp:374
llvm::LegacyDivergenceAnalysis::runOnFunction
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Definition: LegacyDivergenceAnalysis.cpp:322
llvm::DenseSet< const Value * >
llvm::cl::opt< bool >
llvm::instructions
inst_range instructions(Function *F)
Definition: InstIterator.h:133
llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition: GenericDomTree.h:88
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::LegacyDivergenceAnalysis::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: LegacyDivergenceAnalysis.cpp:302
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::detail::DenseSetImpl::empty
bool empty() const
Definition: DenseSet.h:80
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
propagate
static void propagate(InstantiatedValue From, InstantiatedValue To, MatchState State, ReachabilitySet &ReachSet, std::vector< WorkListItem > &WorkList)
Definition: CFLAndersAliasAnalysis.cpp:597
UseGPUDA
static cl::opt< bool > UseGPUDA("use-gpu-divergence-analysis", cl::init(false), cl::Hidden, cl::desc("turn the LegacyDivergenceAnalysis into " "a wrapper for GPUDivergenceAnalysis"))
llvm::detail::DenseSetImpl::begin
iterator begin()
Definition: DenseSet.h:173
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:335
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:168
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:66
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LegacyDivergenceAnalysis, "divergence", "Legacy Divergence Analysis", false, true) INITIALIZE_PASS_END(LegacyDivergenceAnalysis
llvm::detail::DenseSetImpl::clear
void clear()
Definition: DenseSet.h:92
CFG.h
llvm::LoopInfo
Definition: LoopInfo.h:1108
DP
So we should use XX3Form_Rcr to implement intrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store load store see def memrix16 in PPCInstrInfo td Load Store Vector load store outs ins lxsdx set load store with conversion from to DP
Definition: README_P9.txt:520
llvm::PostDominatorTree
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Definition: PostDominators.h:28
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::LegacyDivergenceAnalysis::isDivergent
bool isDivergent(const Value *V) const
Definition: LegacyDivergenceAnalysis.cpp:360
llvm::DominatorTreeBase::properlyDominates
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Definition: GenericDomTree.h:392
llvm::DomTreeNodeBase< BasicBlock >
llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:130
llvm::ReversePostOrderTraversal
Definition: PostOrderIterator.h:291
Instructions.h
PostOrderIterator.h
LegacyDivergenceAnalysis.h
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
TargetTransformInfo.h
llvm::DivergencePropagator::DT
const DominatorTree & DT
Definition: SyncDependenceAnalysis.cpp:263
llvm::createLegacyDivergenceAnalysisPass
FunctionPass * createLegacyDivergenceAnalysisPass()
Definition: LegacyDivergenceAnalysis.cpp:298
llvm::AnalysisUsage::addRequiredTransitive
AnalysisUsage & addRequiredTransitive()
Definition: PassAnalysisSupport.h:81
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::cl::desc
Definition: CommandLine.h:413
raw_ostream.h
llvm::DivergencePropagator::DivergencePropagator
DivergencePropagator(const ModifiedPO &LoopPOT, const DominatorTree &DT, const PostDominatorTree &PDT, const LoopInfo &LI, const BasicBlock &DivTermBlock)
Definition: SyncDependenceAnalysis.cpp:278
Value.h
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::TargetTransformInfo::isAlwaysUniform
bool isAlwaysUniform(const Value *V) const
Definition: TargetTransformInfo.cpp:250
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::containsIrreducibleCFG
bool containsIrreducibleCFG(RPOTraversalT &RPOTraversal, const LoopInfoT &LI)
Return true if the control flow in RPOTraversal is irreducible.
Definition: CFG.h:136
Passes.h