42#include "llvm/IR/IntrinsicsAMDGPU.h"
53#define DEBUG_TYPE "amdgpu-unify-divergent-exit-nodes"
57class AMDGPUUnifyDivergentExitNodesImpl {
62 AMDGPUUnifyDivergentExitNodesImpl() =
delete;
74class AMDGPUUnifyDivergentExitNodes :
public FunctionPass {
86char AMDGPUUnifyDivergentExitNodes::ID = 0;
91 "Unify divergent function exit nodes",
false,
false)
98void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(
AnalysisUsage &AU)
const {
119 FunctionPass::getAnalysisUsage(AU);
130 while (!Stack.empty()) {
136 if (Visited.
insert(Pred).second)
137 Stack.push_back(Pred);
144BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(
154 if (
F.getReturnType()->isVoidTy()) {
158 PN =
B.CreatePHI(
F.getReturnType(), ReturningBlocks.
size(),
165 std::vector<DominatorTree::UpdateType> Updates;
166 Updates.reserve(ReturningBlocks.
size());
171 PN->
addIncoming(BB->getTerminator()->getOperand(0), BB);
174 BB->getTerminator()->eraseFromParent();
176 Updates.push_back({DominatorTree::Insert, BB, NewRetBlock});
197 !isa<BranchInst>(PDT.
getRoot()->getTerminator())))
208 bool Changed =
false;
209 std::vector<DominatorTree::UpdateType> Updates;
218 PDT.
roots(), [&](
auto BB) { return !isUniformlyReached(UA, *BB); });
221 if (isa<ReturnInst>(BB->getTerminator())) {
222 if (HasDivergentExitBlock)
224 }
else if (isa<UnreachableInst>(BB->getTerminator())) {
225 if (HasDivergentExitBlock)
227 }
else if (
BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
230 if (DummyReturnBB ==
nullptr) {
232 "DummyReturnBlock", &
F);
236 ReturningBlocks.
push_back(DummyReturnBB);
239 if (BI->isUnconditional()) {
240 BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
244 Updates.push_back({DominatorTree::Insert, BB, DummyReturnBB});
251 Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
255 Updates.push_back({DominatorTree::Insert, BB, TransitionBB});
257 Updates.push_back({DominatorTree::Insert, TransitionBB,
Successor});
258 Updates.push_back({DominatorTree::Delete, BB,
Successor});
265 Updates.push_back({DominatorTree::Insert, BB, DummyReturnBB});
271 if (!UnreachableBlocks.
empty()) {
274 if (UnreachableBlocks.
size() == 1) {
275 UnreachableBlock = UnreachableBlocks.
front();
278 "UnifiedUnreachableBlock", &
F);
281 Updates.reserve(Updates.size() + UnreachableBlocks.
size());
284 BB->getTerminator()->eraseFromParent();
286 Updates.push_back({DominatorTree::Insert, BB, UnreachableBlock});
291 if (!ReturningBlocks.
empty()) {
311 ReturningBlocks.
push_back(UnreachableBlock);
323 if (ReturningBlocks.
empty())
326 if (ReturningBlocks.
size() == 1)
329 unifyReturnBlockSet(
F, DTU, ReturningBlocks,
"UnifiedReturnBlock");
333bool AMDGPUUnifyDivergentExitNodes::runOnFunction(
Function &
F) {
336 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
338 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
339 const auto &UA = getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
340 const auto *TranformInfo =
341 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
342 return AMDGPUUnifyDivergentExitNodesImpl(TranformInfo).run(
F, DT, PDT, UA);
355 return AMDGPUUnifyDivergentExitNodesImpl(TransformInfo).
run(
F, DT, PDT, UA)
static bool isUniformlyReached(const UniformityInfo &UA, BasicBlock &BB)
Unify divergent function exit nodes
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
print Print MemDeps of function
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
Analysis pass which computes a DominatorTree.
iterator_range< root_iterator > roots()
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, Instruction *InsertBefore=nullptr)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetTransformInfo.
Result run(const Function &F, FunctionAnalysisManager &)
The instances of the Type class are immutable: once they are created, they are never changed.
This function has undefined behavior.
LLVM Value Representation.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
This is an optimization pass for GlobalISel generic memory operations.
auto successors(const MachineBasicBlock *BB)
char & AMDGPUUnifyDivergentExitNodesID
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
char & BreakCriticalEdgesID
auto predecessors(const MachineBasicBlock *BB)
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)