Go to the documentation of this file.
86 #define DEBUG_TYPE "divergence"
91 cl::desc(
"turn the LegacyDivergenceAnalysis into "
92 "a wrapper for GPUDivergenceAnalysis"));
102 void populateWithSourcesOfDivergence();
107 void exploreDataDependency(
Value *V);
116 void findUsersOutsideInfluenceRegion(
123 std::vector<Value *> Worklist;
129 void DivergencePropagator::populateWithSourcesOfDivergence() {
135 Worklist.push_back(&
I);
139 for (
auto &
Arg :
F.args()) {
141 Worklist.push_back(&
Arg);
147 void DivergencePropagator::exploreSyncDependency(
Instruction *TI) {
170 if (IPostDom ==
nullptr)
173 for (
auto I = IPostDom->
begin(); isa<PHINode>(
I); ++
I) {
176 if (!cast<PHINode>(
I)->hasConstantOrUndefValue() && DV.insert(&*
I).second)
177 Worklist.push_back(&*
I);
200 computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion);
206 while (InfluenceRegion.
count(InfluencedBB)) {
207 for (
auto &
I : *InfluencedBB) {
209 findUsersOutsideInfluenceRegion(
I, InfluenceRegion);
212 if (IDomNode ==
nullptr)
214 InfluencedBB = IDomNode->
getBlock();
218 void DivergencePropagator::findUsersOutsideInfluenceRegion(
224 if (DV.insert(UserInst).second)
225 Worklist.push_back(UserInst);
235 std::vector<BasicBlock *> &InfluenceStack) {
237 if (Succ != End && InfluenceRegion.
insert(Succ).second)
238 InfluenceStack.push_back(Succ);
242 void DivergencePropagator::computeInfluenceRegion(
246 "End does not properly dominate Start");
251 std::vector<BasicBlock *> InfluenceStack;
252 addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack);
253 while (!InfluenceStack.empty()) {
255 InfluenceStack.pop_back();
256 addSuccessorsToInfluenceRegion(
BB, End, InfluenceRegion, InfluenceStack);
260 void DivergencePropagator::exploreDataDependency(
Value *V) {
264 Worklist.push_back(U);
270 while (!Worklist.empty()) {
271 Value *V = Worklist.back();
276 if (
I->isTerminator() &&
I->getNumSuccessors() > 1)
277 exploreSyncDependency(
I);
279 exploreDataDependency(V);
291 "Legacy Divergence Analysis",
false,
true)
309 bool LegacyDivergenceAnalysis::shouldUseGPUDivergenceAnalysis(
315 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
317 RPOTraversal FuncRPOT(&
F);
323 auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
324 if (TTIWP ==
nullptr)
333 DivergentValues.
clear();
334 DivergentUses.clear();
337 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
338 auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
340 if (shouldUseGPUDivergenceAnalysis(
F,
TTI)) {
342 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
343 gpuDA = std::make_unique<DivergenceInfo>(
F, DT, PDT, LI,
TTI,
349 DP.populateWithSourcesOfDivergence();
362 return gpuDA->isDivergent(*V);
364 return DivergentValues.
count(V);
369 return gpuDA->isDivergentUse(*U);
371 return DivergentValues.
count(U->get()) || DivergentUses.count(U);
375 if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.
empty())
379 if (!DivergentValues.
empty()) {
380 const Value *FirstDivergentValue = *DivergentValues.
begin();
381 if (
const Argument *
Arg = dyn_cast<Argument>(FirstDivergentValue)) {
382 F =
Arg->getParent();
384 dyn_cast<Instruction>(FirstDivergentValue)) {
385 F =
I->getParent()->getParent();
390 F = &gpuDA->getFunction();
396 for (
auto &
Arg :
F->args()) {
402 OS <<
"\n " <<
BB.getName() <<
":\n";
403 for (
auto &
I :
BB.instructionsWithoutDebug()) {
This class represents an incoming formal argument to a Function.
bool isDivergentUse(const Use *U) const
This is an optimization pass for GlobalISel generic memory operations.
const PostDominatorTree & PDT
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LegacyDivergenceAnalysis()
The legacy pass manager's analysis pass to compute loop information.
void initializeLegacyDivergenceAnalysisPass(PassRegistry &)
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
auto successors(MachineBasicBlock *BB)
DomTreeNodeBase * getIDom() const
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
LLVM Basic Block Representation.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
iterator begin()
Instruction iterator methods.
Represent the analysis usage information of a pass.
Legacy analysis pass which computes a DominatorTree.
This class implements an extremely fast bulk output stream that can only output to a stream.
User * getUser() const
Returns the User that contains this Use.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
void print(raw_ostream &OS, const Module *) const override
print - Print out the internal state of the pass.
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
inst_range instructions(Function *F)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
initializer< Ty > init(const Ty &Val)
static void propagate(InstantiatedValue From, InstantiatedValue To, MatchState State, ReachabilitySet &ReachSet, std::vector< WorkListItem > &WorkList)
static cl::opt< bool > UseGPUDA("use-gpu-divergence-analysis", cl::init(false), cl::Hidden, cl::desc("turn the LegacyDivergenceAnalysis into " "a wrapper for GPUDivergenceAnalysis"))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
A Module instance is used to store all the information related to an LLVM module.
INITIALIZE_PASS_BEGIN(LegacyDivergenceAnalysis, "divergence", "Legacy Divergence Analysis", false, true) INITIALIZE_PASS_END(LegacyDivergenceAnalysis
So we should use XX3Form_Rcr to implement intrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store load store see def memrix16 in PPCInstrInfo td Load Store Vector load store outs ins lxsdx set load store with conversion from to DP
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isDivergent(const Value *V) const
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
void setPreservesAll()
Set by analyses that do not transform their input at all.
const BasicBlock * getParent() const
FunctionPass * createLegacyDivergenceAnalysisPass()
AnalysisUsage & addRequiredTransitive()
FunctionPass class - This class is used to implement most global optimizations.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
DivergencePropagator(const ModifiedPO &LoopPOT, const DominatorTree &DT, const PostDominatorTree &PDT, const LoopInfo &LI, const BasicBlock &DivTermBlock)
LLVM Value Representation.
iterator_range< user_iterator > users()
A Use represents the edge between a Value definition and its users.
bool containsIrreducibleCFG(RPOTraversalT &RPOTraversal, const LoopInfoT &LI)
Return true if the control flow in RPOTraversal is irreducible.