Go to the documentation of this file.
41 #include "llvm/IR/IntrinsicsAMDGPU.h"
52 #define DEBUG_TYPE "amdgpu-unify-divergent-exit-nodes"
56 class AMDGPUUnifyDivergentExitNodes :
public FunctionPass {
82 "Unify divergent function exit nodes",
false,
false)
89 void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(
AnalysisUsage &AU)
const{
122 while (!Stack.empty()) {
128 if (Visited.
insert(Pred).second)
129 Stack.push_back(Pred);
136 BasicBlock *AMDGPUUnifyDivergentExitNodes::unifyReturnBlockSet(
146 if (
F.getReturnType()->isVoidTy()) {
150 PN =
B.CreatePHI(
F.getReturnType(), ReturningBlocks.
size(),
157 std::vector<DominatorTree::UpdateType> Updates;
158 Updates.reserve(ReturningBlocks.
size());
166 BB->getTerminator()->eraseFromParent();
187 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
189 auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
192 if (PDT.root_size() <= 1)
196 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
206 bool Changed =
false;
207 std::vector<DominatorTree::UpdateType> Updates;
210 if (isa<ReturnInst>(
BB->getTerminator())) {
212 ReturningBlocks.push_back(
BB);
213 }
else if (isa<UnreachableInst>(
BB->getTerminator())) {
215 UnreachableBlocks.push_back(
BB);
216 }
else if (
BranchInst *BI = dyn_cast<BranchInst>(
BB->getTerminator())) {
219 if (DummyReturnBB ==
nullptr) {
221 "DummyReturnBlock", &
F);
222 Type *RetTy =
F.getReturnType();
225 ReturningBlocks.push_back(DummyReturnBB);
228 if (BI->isUnconditional()) {
229 BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
238 BasicBlock *TransitionBB =
BB->splitBasicBlock(BI,
"TransitionBlock");
240 Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
252 BB->getTerminator()->eraseFromParent();
260 if (!UnreachableBlocks.empty()) {
263 if (UnreachableBlocks.size() == 1) {
264 UnreachableBlock = UnreachableBlocks.front();
267 "UnifiedUnreachableBlock", &
F);
270 Updates.reserve(Updates.size() + UnreachableBlocks.size());
273 BB->getTerminator()->eraseFromParent();
280 if (!ReturningBlocks.empty()) {
284 Type *RetTy =
F.getReturnType();
300 ReturningBlocks.push_back(UnreachableBlock);
312 if (ReturningBlocks.empty())
315 if (ReturningBlocks.size() == 1)
318 unifyReturnBlockSet(
F, DTU, ReturningBlocks,
"UnifiedReturnBlock");
This is an optimization pass for GlobalISel generic memory operations.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
The instances of the Type class are immutable: once they are created, they are never changed.
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
static constexpr UpdateKind Insert
static bool isUniformlyReached(const LegacyDivergenceAnalysis &DA, BasicBlock &BB)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
auto successors(MachineBasicBlock *BB)
LLVM Basic Block Representation.
This is the shared class of boolean and integer constants.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Represent the analysis usage information of a pass.
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Legacy analysis pass which computes a DominatorTree.
auto predecessors(MachineBasicBlock *BB)
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
char & AMDGPUUnifyDivergentExitNodesID
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
iterator_range< typename GraphTraits< GraphType >::nodes_iterator > nodes(const GraphType &G)
INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE, "Unify divergent function exit nodes", false, false) INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes
print Print MemDeps of function
bool isVoidTy() const
Return true if this is 'void'.
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
StringRef - Represent a constant reference to a string, i.e.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
static bool runOnFunction(Function &F, bool PostInlining)
static ConstantInt * getTrue(LLVMContext &Context)
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, Instruction *InsertBefore=nullptr)
char & BreakCriticalEdgesID
declare void exit(i32) noreturn nounwind This compiles into
size_t size() const
size - Get the array size.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
FunctionPass class - This class is used to implement most global optimizations.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
This function has undefined behavior.
Conditional or Unconditional Branch instruction.
LLVM Value Representation.
static constexpr UpdateKind Delete
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.