LLVM  14.0.0git
LegacyDivergenceAnalysis.cpp
Go to the documentation of this file.
1 //===- LegacyDivergenceAnalysis.cpp --------- Legacy Divergence Analysis
2 //Implementation -==//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements divergence analysis which determines whether a branch
11 // in a GPU program is divergent.It can help branch optimizations such as jump
12 // threading and loop unswitching to make better decisions.
13 //
14 // GPU programs typically use the SIMD execution model, where multiple threads
15 // in the same execution group have to execute in lock-step. Therefore, if the
16 // code contains divergent branches (i.e., threads in a group do not agree on
17 // which path of the branch to take), the group of threads has to execute all
18 // the paths from that branch with different subsets of threads enabled until
19 // they converge at the immediately post-dominating BB of the paths.
20 //
21 // Due to this execution model, some optimizations such as jump
22 // threading and loop unswitching can be unfortunately harmful when performed on
23 // divergent branches. Therefore, an analysis that computes which branches in a
24 // GPU program are divergent can help the compiler to selectively run these
25 // optimizations.
26 //
27 // This file defines divergence analysis which computes a conservative but
28 // non-trivial approximation of all divergent branches in a GPU program. It
29 // partially implements the approach described in
30 //
31 // Divergence Analysis
32 // Sampaio, Souza, Collange, Pereira
33 // TOPLAS '13
34 //
35 // The divergence analysis identifies the sources of divergence (e.g., special
36 // variables that hold the thread ID), and recursively marks variables that are
37 // data or sync dependent on a source of divergence as divergent.
38 //
39 // While data dependency is a well-known concept, the notion of sync dependency
40 // is worth more explanation. Sync dependence characterizes the control flow
41 // aspect of the propagation of branch divergence. For example,
42 //
43 // %cond = icmp slt i32 %tid, 10
44 // br i1 %cond, label %then, label %else
45 // then:
46 // br label %merge
47 // else:
48 // br label %merge
49 // merge:
50 // %a = phi i32 [ 0, %then ], [ 1, %else ]
51 //
52 // Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
53 // because %tid is not on its use-def chains, %a is sync dependent on %tid
54 // because the branch "br i1 %cond" depends on %tid and affects which value %a
55 // is assigned to.
56 //
57 // The current implementation has the following limitations:
58 // 1. intra-procedural. It conservatively considers the arguments of a
59 // non-kernel-entry function and the return value of a function call as
60 // divergent.
61 // 2. memory as black box. It conservatively considers values loaded from
62 // generic or local address as divergent. This can be improved by leveraging
63 // pointer analysis.
64 //
65 //===----------------------------------------------------------------------===//
66 
69 #include "llvm/Analysis/CFG.h"
71 #include "llvm/Analysis/Passes.h"
74 #include "llvm/IR/Dominators.h"
75 #include "llvm/IR/InstIterator.h"
76 #include "llvm/IR/Instructions.h"
77 #include "llvm/IR/Value.h"
78 #include "llvm/InitializePasses.h"
80 #include "llvm/Support/Debug.h"
82 #include <vector>
83 using namespace llvm;
84 
85 #define DEBUG_TYPE "divergence"
86 
87 // transparently use the GPUDivergenceAnalysis
88 static cl::opt<bool> UseGPUDA("use-gpu-divergence-analysis", cl::init(false),
89  cl::Hidden,
90  cl::desc("turn the LegacyDivergenceAnalysis into "
91  "a wrapper for GPUDivergenceAnalysis"));
92 
93 namespace {
94 
96 public:
100  : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV), DU(DU) {}
101  void populateWithSourcesOfDivergence();
102  void propagate();
103 
104 private:
105  // A helper function that explores data dependents of V.
106  void exploreDataDependency(Value *V);
107  // A helper function that explores sync dependents of TI.
108  void exploreSyncDependency(Instruction *TI);
109  // Computes the influence region from Start to End. This region includes all
110  // basic blocks on any simple path from Start to End.
111  void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End,
112  DenseSet<BasicBlock *> &InfluenceRegion);
113  // Finds all users of I that are outside the influence region, and add these
114  // users to Worklist.
115  void findUsersOutsideInfluenceRegion(
116  Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion);
117 
118  Function &F;
120  DominatorTree &DT;
122  std::vector<Value *> Worklist; // Stack for DFS.
123  DenseSet<const Value *> &DV; // Stores all divergent values.
124  DenseSet<const Use *> &DU; // Stores divergent uses of possibly uniform
125  // values.
126 };
127 
128 void DivergencePropagator::populateWithSourcesOfDivergence() {
129  Worklist.clear();
130  DV.clear();
131  DU.clear();
132  for (auto &I : instructions(F)) {
133  if (TTI.isSourceOfDivergence(&I)) {
134  Worklist.push_back(&I);
135  DV.insert(&I);
136  }
137  }
138  for (auto &Arg : F.args()) {
139  if (TTI.isSourceOfDivergence(&Arg)) {
140  Worklist.push_back(&Arg);
141  DV.insert(&Arg);
142  }
143  }
144 }
145 
146 void DivergencePropagator::exploreSyncDependency(Instruction *TI) {
147  // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's
148  // immediate post dominator are divergent. This rule handles if-then-else
149  // patterns. For example,
150  //
151  // if (tid < 5)
152  // a1 = 1;
153  // else
154  // a2 = 2;
155  // a = phi(a1, a2); // sync dependent on (tid < 5)
156  BasicBlock *ThisBB = TI->getParent();
157 
158  // Unreachable blocks may not be in the dominator tree.
159  if (!DT.isReachableFromEntry(ThisBB))
160  return;
161 
162  // If the function has no exit blocks or doesn't reach any exit blocks, the
163  // post dominator may be null.
164  DomTreeNode *ThisNode = PDT.getNode(ThisBB);
165  if (!ThisNode)
166  return;
167 
168  BasicBlock *IPostDom = ThisNode->getIDom()->getBlock();
169  if (IPostDom == nullptr)
170  return;
171 
172  for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) {
173  // A PHINode is uniform if it returns the same value no matter which path is
174  // taken.
175  if (!cast<PHINode>(I)->hasConstantOrUndefValue() && DV.insert(&*I).second)
176  Worklist.push_back(&*I);
177  }
178 
179  // Propagation rule 2: if a value defined in a loop is used outside, the user
180  // is sync dependent on the condition of the loop exits that dominate the
181  // user. For example,
182  //
183  // int i = 0;
184  // do {
185  // i++;
186  // if (foo(i)) ... // uniform
187  // } while (i < tid);
188  // if (bar(i)) ... // divergent
189  //
190  // A program may contain unstructured loops. Therefore, we cannot leverage
191  // LoopInfo, which only recognizes natural loops.
192  //
193  // The algorithm used here handles both natural and unstructured loops. Given
194  // a branch TI, we first compute its influence region, the union of all simple
195  // paths from TI to its immediate post dominator (IPostDom). Then, we search
196  // for all the values defined in the influence region but used outside. All
197  // these users are sync dependent on TI.
198  DenseSet<BasicBlock *> InfluenceRegion;
199  computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion);
200  // An insight that can speed up the search process is that all the in-region
201  // values that are used outside must dominate TI. Therefore, instead of
202  // searching every basic blocks in the influence region, we search all the
203  // dominators of TI until it is outside the influence region.
204  BasicBlock *InfluencedBB = ThisBB;
205  while (InfluenceRegion.count(InfluencedBB)) {
206  for (auto &I : *InfluencedBB) {
207  if (!DV.count(&I))
208  findUsersOutsideInfluenceRegion(I, InfluenceRegion);
209  }
210  DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
211  if (IDomNode == nullptr)
212  break;
213  InfluencedBB = IDomNode->getBlock();
214  }
215 }
216 
217 void DivergencePropagator::findUsersOutsideInfluenceRegion(
218  Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
219  for (Use &Use : I.uses()) {
220  Instruction *UserInst = cast<Instruction>(Use.getUser());
221  if (!InfluenceRegion.count(UserInst->getParent())) {
222  DU.insert(&Use);
223  if (DV.insert(UserInst).second)
224  Worklist.push_back(UserInst);
225  }
226  }
227 }
228 
229 // A helper function for computeInfluenceRegion that adds successors of "ThisBB"
230 // to the influence region.
231 static void
232 addSuccessorsToInfluenceRegion(BasicBlock *ThisBB, BasicBlock *End,
233  DenseSet<BasicBlock *> &InfluenceRegion,
234  std::vector<BasicBlock *> &InfluenceStack) {
235  for (BasicBlock *Succ : successors(ThisBB)) {
236  if (Succ != End && InfluenceRegion.insert(Succ).second)
237  InfluenceStack.push_back(Succ);
238  }
239 }
240 
241 void DivergencePropagator::computeInfluenceRegion(
242  BasicBlock *Start, BasicBlock *End,
243  DenseSet<BasicBlock *> &InfluenceRegion) {
244  assert(PDT.properlyDominates(End, Start) &&
245  "End does not properly dominate Start");
246 
247  // The influence region starts from the end of "Start" to the beginning of
248  // "End". Therefore, "Start" should not be in the region unless "Start" is in
249  // a loop that doesn't contain "End".
250  std::vector<BasicBlock *> InfluenceStack;
251  addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack);
252  while (!InfluenceStack.empty()) {
253  BasicBlock *BB = InfluenceStack.back();
254  InfluenceStack.pop_back();
255  addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack);
256  }
257 }
258 
259 void DivergencePropagator::exploreDataDependency(Value *V) {
260  // Follow def-use chains of V.
261  for (User *U : V->users()) {
262  if (!TTI.isAlwaysUniform(U) && DV.insert(U).second)
263  Worklist.push_back(U);
264  }
265 }
266 
268  // Traverse the dependency graph using DFS.
269  while (!Worklist.empty()) {
270  Value *V = Worklist.back();
271  Worklist.pop_back();
272  if (Instruction *I = dyn_cast<Instruction>(V)) {
273  // Terminators with less than two successors won't introduce sync
274  // dependency. Ignore them.
275  if (I->isTerminator() && I->getNumSuccessors() > 1)
276  exploreSyncDependency(I);
277  }
278  exploreDataDependency(V);
279  }
280 }
281 
282 } // namespace
283 
284 // Register this pass.
288 }
290  "Legacy Divergence Analysis", false, true)
295  "Legacy Divergence Analysis", false, true)
296 
298  return new LegacyDivergenceAnalysis();
299 }
300 
305  AU.setPreservesAll();
306 }
307 
308 bool LegacyDivergenceAnalysis::shouldUseGPUDivergenceAnalysis(
309  const Function &F, const TargetTransformInfo &TTI) const {
311  return false;
312 
313  // GPUDivergenceAnalysis requires a reducible CFG.
314  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
315  using RPOTraversal = ReversePostOrderTraversal<const Function *>;
316  RPOTraversal FuncRPOT(&F);
317  return !containsIrreducibleCFG<const BasicBlock *, const RPOTraversal,
318  const LoopInfo>(FuncRPOT, LI);
319 }
320 
322  auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
323  if (TTIWP == nullptr)
324  return false;
325 
326  TargetTransformInfo &TTI = TTIWP->getTTI(F);
327  // Fast path: if the target does not have branch divergence, we do not mark
328  // any branch as divergent.
329  if (!TTI.hasBranchDivergence())
330  return false;
331 
332  DivergentValues.clear();
333  DivergentUses.clear();
334  gpuDA = nullptr;
335 
336  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
337  auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
338 
339  if (shouldUseGPUDivergenceAnalysis(F, TTI)) {
340  // run the new GPU divergence analysis
341  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
342  gpuDA = std::make_unique<DivergenceInfo>(F, DT, PDT, LI, TTI,
343  /* KnownReducible = */ true);
344 
345  } else {
346  // run LLVM's existing DivergenceAnalysis
347  DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues, DivergentUses);
348  DP.populateWithSourcesOfDivergence();
349  DP.propagate();
350  }
351 
352  LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName()
353  << ":\n";
354  print(dbgs(), F.getParent()));
355 
356  return false;
357 }
358 
360  if (gpuDA) {
361  return gpuDA->isDivergent(*V);
362  }
363  return DivergentValues.count(V);
364 }
365 
367  if (gpuDA) {
368  return gpuDA->isDivergentUse(*U);
369  }
370  return DivergentValues.count(U->get()) || DivergentUses.count(U);
371 }
372 
374  if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty())
375  return;
376 
377  const Function *F = nullptr;
378  if (!DivergentValues.empty()) {
379  const Value *FirstDivergentValue = *DivergentValues.begin();
380  if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) {
381  F = Arg->getParent();
382  } else if (const Instruction *I =
383  dyn_cast<Instruction>(FirstDivergentValue)) {
384  F = I->getParent()->getParent();
385  } else {
386  llvm_unreachable("Only arguments and instructions can be divergent");
387  }
388  } else if (gpuDA) {
389  F = &gpuDA->getFunction();
390  }
391  if (!F)
392  return;
393 
394  // Dumps all divergent values in F, arguments and then instructions.
395  for (auto &Arg : F->args()) {
396  OS << (isDivergent(&Arg) ? "DIVERGENT: " : " ");
397  OS << Arg << "\n";
398  }
399  // Iterate instructions using instructions() to ensure a deterministic order.
400  for (const BasicBlock &BB : *F) {
401  OS << "\n " << BB.getName() << ":\n";
402  for (auto &I : BB.instructionsWithoutDebug()) {
403  OS << (isDivergent(&I) ? "DIVERGENT: " : " ");
404  OS << I << "\n";
405  }
406  }
407  OS << "\n";
408 }
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
llvm::LegacyDivergenceAnalysis::isDivergentUse
bool isDivergentUse(const Use *U) const
Definition: LegacyDivergenceAnalysis.cpp:366
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
InstIterator.h
llvm::Function
Definition: Function.h:61
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::DivergencePropagator::PDT
const PostDominatorTree & PDT
Definition: SyncDependenceAnalysis.cpp:247
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::LegacyDivergenceAnalysis::LegacyDivergenceAnalysis
LegacyDivergenceAnalysis()
Definition: LegacyDivergenceAnalysis.cpp:286
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1268
llvm::initializeLegacyDivergenceAnalysisPass
void initializeLegacyDivergenceAnalysisPass(PassRegistry &)
llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:351
DivergenceAnalysis.h
llvm::DomTreeNodeBase::getIDom
DomTreeNodeBase * getIDom() const
Definition: GenericDomTree.h:89
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::count
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
CommandLine.h
llvm::LegacyDivergenceAnalysis::ID
static char ID
Definition: LegacyDivergenceAnalysis.h:33
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
PostDominators.h
llvm::User
Definition: User.h:44
llvm::TargetTransformInfo::useGPUDivergenceAnalysis
bool useGPUDivergenceAnalysis() const
Return true if the target prefers to use GPU divergence analysis to replace the legacy version.
Definition: TargetTransformInfo.cpp:236
llvm::DivergencePropagator
Definition: SyncDependenceAnalysis.cpp:244
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::TargetTransformInfo::hasBranchDivergence
bool hasBranchDivergence() const
Return true if branch divergence exists.
Definition: TargetTransformInfo.cpp:232
llvm::PostDominatorTreeWrapperPass
Definition: PostDominators.h:73
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:142
divergence
divergence
Definition: LegacyDivergenceAnalysis.cpp:294
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::TargetTransformInfo::isSourceOfDivergence
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
Definition: TargetTransformInfo.cpp:240
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:73
llvm::LegacyDivergenceAnalysis
Definition: LegacyDivergenceAnalysis.h:31
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::LegacyDivergenceAnalysis::print
void print(raw_ostream &OS, const Module *) const override
print - Print out the internal state of the pass.
Definition: LegacyDivergenceAnalysis.cpp:373
llvm::LegacyDivergenceAnalysis::runOnFunction
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Definition: LegacyDivergenceAnalysis.cpp:321
llvm::DenseSet< const Value * >
llvm::cl::opt< bool >
llvm::instructions
inst_range instructions(Function *F)
Definition: InstIterator.h:133
llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition: GenericDomTree.h:88
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::LegacyDivergenceAnalysis::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: LegacyDivergenceAnalysis.cpp:301
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::detail::DenseSetImpl::empty
bool empty() const
Definition: DenseSet.h:80
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
propagate
static void propagate(InstantiatedValue From, InstantiatedValue To, MatchState State, ReachabilitySet &ReachSet, std::vector< WorkListItem > &WorkList)
Definition: CFLAndersAliasAnalysis.cpp:596
UseGPUDA
static cl::opt< bool > UseGPUDA("use-gpu-divergence-analysis", cl::init(false), cl::Hidden, cl::desc("turn the LegacyDivergenceAnalysis into " "a wrapper for GPUDivergenceAnalysis"))
llvm::detail::DenseSetImpl::begin
iterator begin()
Definition: DenseSet.h:173
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:328
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LegacyDivergenceAnalysis, "divergence", "Legacy Divergence Analysis", false, true) INITIALIZE_PASS_END(LegacyDivergenceAnalysis
llvm::detail::DenseSetImpl::clear
void clear()
Definition: DenseSet.h:92
CFG.h
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::PostDominatorTree
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Definition: PostDominators.h:28
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::LegacyDivergenceAnalysis::isDivergent
bool isDivergent(const Value *V) const
Definition: LegacyDivergenceAnalysis.cpp:359
llvm::DominatorTreeBase::properlyDominates
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Definition: GenericDomTree.h:392
llvm::DomTreeNodeBase< BasicBlock >
llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:130
DP
So we should use XX3Form_Rcr to implement instrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store load store see def memrix16 in PPCInstrInfo td Load Store Vector load store outs ins lxsdx set load store with conversion from to DP
Definition: README_P9.txt:520
llvm::ReversePostOrderTraversal
Definition: PostOrderIterator.h:290
Instructions.h
PostOrderIterator.h
LegacyDivergenceAnalysis.h
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
TargetTransformInfo.h
llvm::DivergencePropagator::DT
const DominatorTree & DT
Definition: SyncDependenceAnalysis.cpp:246
llvm::createLegacyDivergenceAnalysisPass
FunctionPass * createLegacyDivergenceAnalysisPass()
Definition: LegacyDivergenceAnalysis.cpp:297
llvm::AnalysisUsage::addRequiredTransitive
AnalysisUsage & addRequiredTransitive()
Definition: PassAnalysisSupport.h:81
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::cl::desc
Definition: CommandLine.h:414
raw_ostream.h
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1815
llvm::DivergencePropagator::DivergencePropagator
DivergencePropagator(const ModifiedPO &LoopPOT, const DominatorTree &DT, const PostDominatorTree &PDT, const LoopInfo &LI, const BasicBlock &DivTermBlock)
Definition: SyncDependenceAnalysis.cpp:261
Value.h
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:422
llvm::TargetTransformInfo::isAlwaysUniform
bool isAlwaysUniform(const Value *V) const
Definition: TargetTransformInfo.cpp:244
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::containsIrreducibleCFG
bool containsIrreducibleCFG(RPOTraversalT &RPOTraversal, const LoopInfoT &LI)
Return true if the control flow in RPOTraversal is irreducible.
Definition: CFG.h:136
Passes.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37