Go to the documentation of this file.
84 #define DEBUG_TYPE "tailcallelim"
86 STATISTIC(NumEliminated,
"Number of tail calls removed");
87 STATISTIC(NumRetDuped,
"Number of return duplicated");
88 STATISTIC(NumAccumAdded,
"Number of accumulators introduced");
98 auto *AI = dyn_cast<AllocaInst>(&
I);
99 return !AI || AI->isStaticAlloca();
104 struct AllocaDerivedValueTracker {
108 void walk(
Value *Root) {
112 auto AddUsesToWorklist = [&](
Value *V) {
113 for (
auto &U : V->uses()) {
114 if (!Visited.
insert(&U).second)
116 Worklist.push_back(&U);
120 AddUsesToWorklist(Root);
122 while (!Worklist.empty()) {
126 switch (
I->getOpcode()) {
128 case Instruction::Invoke: {
129 auto &CB = cast<CallBase>(*
I);
134 if (CB.isArgOperand(U) && CB.isByValArgument(CB.getArgOperandNo(U)))
137 CB.isDataOperand(U) && CB.doesNotCapture(CB.getDataOperandNo(U));
138 callUsesLocalStack(CB, IsNocapture);
152 if (U->getOperandNo() == 0)
153 EscapePoints.insert(
I);
156 case Instruction::BitCast:
157 case Instruction::GetElementPtr:
158 case Instruction::PHI:
160 case Instruction::AddrSpaceCast:
163 EscapePoints.insert(
I);
167 AddUsesToWorklist(
I);
171 void callUsesLocalStack(
CallBase &CB,
bool IsNocapture) {
173 AllocaUsers.insert(&CB);
181 EscapePoints.insert(&CB);
190 if (
F.callsFunctionThatReturnsTwice())
194 AllocaDerivedValueTracker Tracker;
196 if (
Arg.hasByValAttr())
232 VisitType Escaped = UNESCAPED;
234 for (
auto &
I : *
BB) {
235 if (Tracker.EscapePoints.count(&
I))
242 if (!CI || CI->
isTailCall() || isa<DbgInfoIntrinsic>(&
I) ||
243 isa<PseudoProbeInst>(&
I))
259 bool SafeToTail =
true;
261 if (isa<Constant>(
Arg.getUser()))
263 if (
Argument *A = dyn_cast<Argument>(
Arg.getUser()))
264 if (!A->hasByValAttr())
273 <<
"marked as tail call candidate (readnone)";
281 if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI))
282 DeferredTails.push_back(CI);
286 auto &State = Visited[SuccBB];
287 if (State < Escaped) {
289 if (State == ESCAPED)
290 WorklistEscaped.push_back(SuccBB);
292 WorklistUnescaped.push_back(SuccBB);
296 if (!WorklistEscaped.empty()) {
301 while (!WorklistUnescaped.empty()) {
303 if (Visited[NextBB] == UNESCAPED) {
312 for (
CallInst *CI : DeferredTails) {
313 if (Visited[CI->getParent()] != ESCAPED) {
316 LLVM_DEBUG(
dbgs() <<
"Marked as tail call candidate: " << *CI <<
"\n");
330 if (isa<DbgInfoIntrinsic>(
I))
334 if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
340 if (
I->mayHaveSideEffects())
343 if (
LoadInst *L = dyn_cast<LoadInst>(
I)) {
353 L->getAlign(),
DL, L))
367 if (!
I->isAssociative() || !
I->isCommutative())
370 assert(
I->getNumOperands() == 2 &&
371 "Associative/commutative operations should have 2 args!");
374 if ((
I->getOperand(0) == CI &&
I->getOperand(1) == CI) ||
375 (
I->getOperand(0) != CI &&
I->getOperand(1) != CI))
379 if (!
I->hasOneUse() || !isa<ReturnInst>(
I->user_back()))
386 while (isa<DbgInfoIntrinsic>(
I))
392 class TailRecursionEliminator {
432 void createTailRecurseLoopHeader(
CallInst *CI);
438 void cleanupAndFinalize();
442 void copyByValueOperandIntoLocalTemp(
CallInst *CI,
int OpndIdx);
444 void copyLocalTempOfByValueOperandIntoArguments(
CallInst *CI,
int OpndIdx);
456 if (&
BB->front() == TI)
464 CI = dyn_cast<CallInst>(BBI);
468 if (BBI ==
BB->begin())
474 "Incompatible call site attributes(Tail,NoTail)");
482 if (
BB == &
F.getEntryBlock() &&
490 for (;
I !=
E && FI != FE; ++
I, ++FI)
491 if (*
I != &*FI)
break;
492 if (
I ==
E && FI == FE)
499 void TailRecursionEliminator::createTailRecurseLoopHeader(
CallInst *CI) {
500 HeaderBB = &
F.getEntryBlock();
503 HeaderBB->setName(
"tailrecurse");
509 NEBI = NewEntry->
begin();
511 if (
AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
512 if (isa<ConstantInt>(AI->getArraySize()))
513 AI->moveBefore(&*NEBI);
523 I->replaceAllUsesWith(PN);
525 ArgumentPHIs.push_back(PN);
532 Type *RetType =
F.getReturnType();
548 void TailRecursionEliminator::insertAccumulator(
Instruction *AccRecInstr) {
549 assert(!AccPN &&
"Trying to insert multiple accumulators");
551 AccumulatorRecursionInstr = AccRecInstr;
556 "accumulator.tr", &HeaderBB->front());
566 if (
P == &
F.getEntryBlock()) {
569 AccPN->addIncoming(Identity,
P);
571 AccPN->addIncoming(AccPN,
P);
580 void TailRecursionEliminator::copyByValueOperandIntoLocalTemp(
CallInst *CI,
592 AggTy,
DL.getAllocaAddrSpace(),
nullptr, Alignment,
599 Builder.CreateMemCpy(NewAlloca, Alignment,
607 void TailRecursionEliminator::copyLocalTempOfByValueOperandIntoArguments(
621 Builder.CreateMemCpy(
F.getArg(OpndIdx), Alignment,
626 bool TailRecursionEliminator::eliminateCall(
CallInst *CI) {
635 for (++BBI; &*BBI !=
Ret; ++BBI) {
656 <<
"transforming tail recursion into loop";
662 createTailRecurseLoopHeader(CI);
667 copyByValueOperandIntoLocalTemp(CI,
I);
675 copyLocalTempOfByValueOperandIntoArguments(CI,
I);
676 ArgumentPHIs[
I]->addIncoming(
F.getArg(
I),
BB);
682 insertAccumulator(AccRecInstr);
692 if (
Ret->getReturnValue() == CI || AccRecInstr) {
694 RetPN->addIncoming(RetPN,
BB);
695 RetKnownPN->addIncoming(RetKnownPN,
BB);
701 RetKnownPN, RetPN,
Ret->getReturnValue(),
"current.ret.tr",
Ret);
702 RetSelects.push_back(
SI);
704 RetPN->addIncoming(
SI,
BB);
709 AccPN->addIncoming(AccRecInstr ? AccRecInstr : AccPN,
BB);
717 BB->getInstList().erase(
Ret);
718 BB->getInstList().erase(CI);
724 void TailRecursionEliminator::cleanupAndFinalize() {
730 for (
PHINode *PN : ArgumentPHIs) {
739 if (RetSelects.empty()) {
742 RetPN->dropAllReferences();
743 RetPN->eraseFromParent();
745 RetKnownPN->dropAllReferences();
746 RetKnownPN->eraseFromParent();
751 Instruction *AccRecInstr = AccumulatorRecursionInstr;
753 ReturnInst *RI = dyn_cast<ReturnInst>(
BB.getTerminator());
758 AccRecInstrNew->
setName(
"accumulator.ret.tr");
769 ReturnInst *RI = dyn_cast<ReturnInst>(
BB.getTerminator());
774 RetKnownPN, RetPN, RI->
getOperand(0),
"current.ret.tr", RI);
775 RetSelects.push_back(
SI);
782 Instruction *AccRecInstr = AccumulatorRecursionInstr;
785 AccRecInstrNew->
setName(
"accumulator.ret.tr");
787 SI->getFalseValue());
789 SI->setFalseValue(AccRecInstrNew);
796 bool TailRecursionEliminator::processBlock(
BasicBlock &
BB) {
799 if (
BranchInst *BI = dyn_cast<BranchInst>(TI)) {
815 <<
"INTO UNCOND BRANCH PRED: " <<
BB);
829 }
else if (isa<ReturnInst>(TI)) {
833 return eliminateCall(CI);
839 bool TailRecursionEliminator::eliminate(
Function &
F,
844 if (
F.getFnAttribute(
"disable-tail-calls").getValueAsBool())
847 bool MadeChange =
false;
852 if (
F.getFunctionType()->isVarArg())
859 TailRecursionEliminator TRE(
F,
TTI,
AA, ORE, DTU);
862 MadeChange |= TRE.processBlock(
BB);
864 TRE.cleanupAndFinalize();
889 auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
890 auto *DT = DTWP ? &DTWP->getDomTree() :
nullptr;
891 auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
892 auto *PDT = PDTWP ? &PDTWP->getPostDomTree() :
nullptr;
898 return TailRecursionEliminator::eliminate(
899 F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F),
900 &getAnalysis<AAResultsWrapperPass>().getAAResults(),
901 &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(), DTU);
916 return new TailCallElim();
931 bool Changed = TailRecursionEliminator::eliminate(
F, &
TTI, &
AA, &ORE, DTU);
A set of analyses that are preserved following a run of a transformation pass.
This class represents an incoming formal argument to a Function.
@ OB_clang_arc_attachedcall
A manager for alias analyses.
Analysis pass providing the TargetTransformInfo.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createTailCallEliminationPass()
static IntegerType * getInt1Ty(LLVMContext &C)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void setTailCall(bool IsTc=true)
Return a value (possibly void), from a function.
A parsed version of the target data layout string in and methods for querying it.
InstListType::iterator iterator
Instruction iterators...
const Function * getParent() const
Return the enclosing method, or null if none.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
The instances of the Type class are immutable: once they are created, they are never changed.
@ Tail
Tail - This calling convention attemps to make calls as fast as possible while guaranteeing that tail...
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
static constexpr UpdateKind Insert
ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU=nullptr)
This method duplicates the specified return instruction into a predecessor which ends in an unconditi...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
auto successors(MachineBasicBlock *BB)
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_NODISCARD T pop_back_val()
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
LLVM Basic Block Representation.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void initializeTailCallElimPass(PassRegistry &)
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
iterator begin()
Instruction iterator methods.
Represent the analysis usage information of a pass.
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Legacy analysis pass which computes a DominatorTree.
STATISTIC(NumFunctions, "Total number of functions")
void setName(const Twine &Name)
Change the name of the value.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
This struct is a compact representation of a valid (non-zero power of two) alignment.
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool onlyReadsMemory(unsigned OpNo) const
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
PassBuilder PB(Machine, PassOpts->PTO, None, &PIC)
static Instruction * firstNonDbg(BasicBlock::iterator I)
inst_range instructions(Function *F)
This is an important base class in LLVM.
INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim", "Tail Call Elimination", false, false) INITIALIZE_PASS_END(TailCallElim
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
bool doesNotAccessMemory(unsigned OpNo) const
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false)
Return the identity constant for a binary opcode.
void preserve()
Mark an analysis as preserved.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
StandardInstrumentations SI(Debug, VerifyEach)
This class represents the LLVM 'select' instruction.
bool isVoidTy() const
Return true if this is 'void'.
static bool canTransformAccumulatorRecursion(Instruction *I, CallInst *CI)
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
void setOperand(unsigned i, Value *Val)
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Type * getType() const
All values are typed, get the type of this value.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
bool isNoTailCall() const
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
bool pred_empty(const BasicBlock *BB)
StringRef getName() const
Return a constant reference to the value's name.
An instruction for reading from memory.
void setArgOperand(unsigned i, Value *v)
static ConstantInt * getFalse(LLVMContext &Context)
bool hasOperandBundlesOtherThan(ArrayRef< uint32_t > IDs) const
Return true if this operand bundle user contains operand bundles with tags other than those specified...
static bool runOnFunction(Function &F, bool PostInlining)
static bool markTails(Function &F, OptimizationRemarkEmitter *ORE)
static ConstantInt * getTrue(LLVMContext &Context)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
unsigned arg_size() const
Interval::pred_iterator pred_end(Interval *I)
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
A wrapper class for inspecting calls to intrinsic functions.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Analysis pass which computes a DominatorTree.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Value * getArgOperand(unsigned i) const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
const BasicBlock * getParent() const
Legacy wrapper pass to provide the GlobalsAAResult object.
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
A container for analyses that lazily runs them and caches their results.
static bool canTRE(Function &F)
Scan the specified function for alloca instructions.
FunctionPass class - This class is used to implement most global optimizations.
This class represents a function call, abstracting a target machine's calling convention.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Analysis pass which computes a PostDominatorTree.
AnalysisUsage & addRequired()
void takeName(Value *V)
Transfer the name from V to this value.
an instruction to allocate memory on the stack
Value * getOperand(unsigned i) const
Conditional or Unconditional Branch instruction.
LLVM Value Representation.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
AllocaInst * findAllocaForValue(Value *V, bool OffsetZero=false)
Returns unique alloca where the value comes from, or nullptr.
A Use represents the edge between a Value definition and its users.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA)
Return true if it is safe to move the specified instruction from after the call to before the call,...