42#include "llvm/IR/IntrinsicsAMDGPU.h"
54#define DEBUG_TYPE "amdgpu-unify-divergent-exit-nodes"
58class AMDGPUUnifyDivergentExitNodesImpl {
63 AMDGPUUnifyDivergentExitNodesImpl() =
delete;
75class AMDGPUUnifyDivergentExitNodes :
public FunctionPass {
87char AMDGPUUnifyDivergentExitNodes::ID = 0;
92 "Unify divergent function exit nodes",
false,
false)
99void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(
AnalysisUsage &AU)
const {
118 FunctionPass::getAnalysisUsage(AU);
129 while (!Stack.empty()) {
135 if (Visited.
insert(Pred).second)
136 Stack.push_back(Pred);
143BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(
153 if (
F.getReturnType()->isVoidTy()) {
157 PN =
B.CreatePHI(
F.getReturnType(), ReturningBlocks.
size(),
164 std::vector<DominatorTree::UpdateType> Updates;
165 Updates.reserve(ReturningBlocks.
size());
170 PN->
addIncoming(BB->getTerminator()->getOperand(0), BB);
173 BB->getTerminator()->eraseFromParent();
175 Updates.push_back({DominatorTree::Insert, BB, NewRetBlock});
198 !isa<BranchInst>(PDT.
getRoot()->getTerminator())))
209 bool Changed =
false;
210 std::vector<DominatorTree::UpdateType> Updates;
219 PDT.
roots(), [&](
auto BB) { return !isUniformlyReached(UA, *BB); });
222 if (isa<ReturnInst>(BB->getTerminator())) {
223 if (HasDivergentExitBlock)
225 }
else if (isa<UnreachableInst>(BB->getTerminator())) {
226 if (HasDivergentExitBlock)
228 }
else if (
BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
231 if (DummyReturnBB ==
nullptr) {
233 "DummyReturnBlock", &
F);
237 ReturningBlocks.
push_back(DummyReturnBB);
240 if (BI->isUnconditional()) {
241 BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
245 Updates.push_back({DominatorTree::Insert, BB, DummyReturnBB});
252 Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
256 Updates.push_back({DominatorTree::Insert, BB, TransitionBB});
258 Updates.push_back({DominatorTree::Insert, TransitionBB,
Successor});
259 Updates.push_back({DominatorTree::Delete, BB,
Successor});
266 Updates.push_back({DominatorTree::Insert, BB, DummyReturnBB});
272 if (!UnreachableBlocks.
empty()) {
275 if (UnreachableBlocks.
size() == 1) {
276 UnreachableBlock = UnreachableBlocks.
front();
279 "UnifiedUnreachableBlock", &
F);
282 Updates.reserve(Updates.size() + UnreachableBlocks.
size());
285 BB->getTerminator()->eraseFromParent();
287 Updates.push_back({DominatorTree::Insert, BB, UnreachableBlock});
292 if (!ReturningBlocks.
empty()) {
312 ReturningBlocks.
push_back(UnreachableBlock);
324 if (ReturningBlocks.
empty())
327 if (ReturningBlocks.
size() == 1)
330 unifyReturnBlockSet(
F, DTU, ReturningBlocks,
"UnifiedReturnBlock");
334bool AMDGPUUnifyDivergentExitNodes::runOnFunction(
Function &
F) {
337 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
339 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
340 const auto &UA = getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
341 const auto *TranformInfo =
342 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
343 return AMDGPUUnifyDivergentExitNodesImpl(TranformInfo).run(
F, DT, PDT, UA);
356 return AMDGPUUnifyDivergentExitNodesImpl(TransformInfo).
run(
F, DT, PDT, UA)
static bool isUniformlyReached(const UniformityInfo &UA, BasicBlock &BB)
Unify divergent function exit nodes
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Performs the initial survey of the specified function
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
Analysis pass which computes a DominatorTree.
iterator_range< root_iterator > roots()
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, Instruction *InsertBefore=nullptr)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetTransformInfo.
Result run(const Function &F, FunctionAnalysisManager &)
The instances of the Type class are immutable: once they are created, they are never changed.
This function has undefined behavior.
LLVM Value Representation.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
This is an optimization pass for GlobalISel generic memory operations.
bool hasOnlySimpleTerminator(const Function &F)
auto successors(const MachineBasicBlock *BB)
char & AMDGPUUnifyDivergentExitNodesID
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
char & BreakCriticalEdgesID
auto predecessors(const MachineBasicBlock *BB)
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)