86#define DEBUG_TYPE "divergence"
91 cl::desc(
"turn the LegacyDivergenceAnalysis into "
92 "a wrapper for GPUDivergenceAnalysis"));
102 void populateWithSourcesOfDivergence();
107 void exploreDataDependency(
Value *V);
116 void findUsersOutsideInfluenceRegion(
123 std::vector<Value *> Worklist;
129void DivergencePropagator::populateWithSourcesOfDivergence() {
135 Worklist.push_back(&
I);
139 for (
auto &
Arg :
F.args()) {
141 Worklist.push_back(&
Arg);
147void DivergencePropagator::exploreSyncDependency(
Instruction *TI) {
160 if (!
DT.isReachableFromEntry(ThisBB))
170 if (IPostDom ==
nullptr)
173 for (
auto I = IPostDom->
begin(); isa<PHINode>(
I); ++
I) {
176 if (!cast<PHINode>(
I)->hasConstantOrUndefValue() && DV.insert(&*
I).second)
177 Worklist.push_back(&*
I);
200 computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion);
206 while (InfluenceRegion.
count(InfluencedBB)) {
207 for (
auto &
I : *InfluencedBB) {
209 findUsersOutsideInfluenceRegion(
I, InfluenceRegion);
212 if (IDomNode ==
nullptr)
214 InfluencedBB = IDomNode->
getBlock();
218void DivergencePropagator::findUsersOutsideInfluenceRegion(
224 if (DV.insert(UserInst).second)
225 Worklist.push_back(UserInst);
235 std::vector<BasicBlock *> &InfluenceStack) {
237 if (Succ != End && InfluenceRegion.
insert(Succ).second)
238 InfluenceStack.push_back(Succ);
242void DivergencePropagator::computeInfluenceRegion(
245 assert(PDT.properlyDominates(End, Start) &&
246 "End does not properly dominate Start");
251 std::vector<BasicBlock *> InfluenceStack;
252 addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack);
253 while (!InfluenceStack.empty()) {
255 InfluenceStack.pop_back();
256 addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack);
260void DivergencePropagator::exploreDataDependency(
Value *V) {
262 for (
User *U :
V->users()) {
264 Worklist.push_back(U);
268void DivergencePropagator::propagate() {
270 while (!Worklist.empty()) {
271 Value *
V = Worklist.back();
276 if (
I->isTerminator() &&
I->getNumSuccessors() > 1)
277 exploreSyncDependency(
I);
279 exploreDataDependency(V);
291 "Legacy Divergence Analysis",
false,
true)
309 RPOTraversal FuncRPOT(&
F);
321 gpuDA = std::make_unique<DivergenceInfo>(
F, DT, PDT, LI,
TTI,
327 DP.populateWithSourcesOfDivergence();
334 return gpuDA->isDivergent(*V);
341 return gpuDA->isDivergentUse(*U);
354 if (
const Argument *
Arg = dyn_cast<Argument>(FirstDivergentValue)) {
355 F =
Arg->getParent();
357 dyn_cast<Instruction>(FirstDivergentValue)) {
358 F =
I->getParent()->getParent();
363 F = &
gpuDA->getFunction();
369 for (
const auto &
Arg :
F->args()) {
392 auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
393 if (TTIWP ==
nullptr)
406 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
407 auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
408 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
423 DivergentValues.
clear();
424 DivergentUses.clear();
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
block Block Frequency Analysis
static cl::opt< bool > UseGPUDA("use-gpu-divergence-analysis", cl::init(false), cl::Hidden, cl::desc("turn the LegacyDivergenceAnalysis into " "a wrapper for GPUDivergenceAnalysis"))
print must be executed print the must be executed context for all instructions
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
void setPreservesAll()
Set by analyses that do not transform their input at all.
AnalysisUsage & addRequiredTransitive()
This class represents an incoming formal argument to a Function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
const Instruction & back() const
Implements a dense probed hash-table based set.
Compute divergence starting with a divergent branch.
const DominatorTreeT & DT
DomTreeNodeBase * getIDom() const
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
const BasicBlock * getParent() const
DenseSet< const Use * > DivergentUses
DenseSet< const Value * > DivergentValues
std::unique_ptr< DivergenceInfo > gpuDA
bool isDivergentUse(const Use *U) const
bool isDivergent(const Value *V) const
void run(Function &F, TargetTransformInfo &TTI, DominatorTree &DT, PostDominatorTree &PDT, const LoopInfo &LI)
bool shouldUseGPUDivergenceAnalysis(const Function &F, const TargetTransformInfo &TTI, const LoopInfo &LI)
void print(raw_ostream &OS, const Module *) const
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
LegacyDivergenceAnalysis()
Analysis pass that exposes the LoopInfo for a function.
The legacy pass manager's analysis pass to compute loop information.
A Module instance is used to store all the information related to an LLVM module.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Analysis pass providing the TargetTransformInfo.
A Use represents the edge between a Value definition and its users.
User * getUser() const
Returns the User that contains this Use.
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto successors(const MachineBasicBlock *BB)
bool containsIrreducibleCFG(RPOTraversalT &RPOTraversal, const LoopInfoT &LI)
Return true if the control flow in RPOTraversal is irreducible.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionPass * createLegacyDivergenceAnalysisPass()
void initializeLegacyDivergenceAnalysisPass(PassRegistry &)