24#define DEBUG_TYPE "coro-elide"
26STATISTIC(NumOfCoroElided,
"The # of coroutine get elided.");
44 Lowerer(
Module &M) : LowererBase(M) {}
49 void collectPostSplitCoroIds(
Function *
F);
69 if (ValueTy != IntrTy) {
99 if (
auto *Call = dyn_cast<CallInst>(&
I))
101 !Call->isMustTailCall())
102 Call->setTailCall(
false);
107static std::optional<std::pair<uint64_t, Align>>
119 if (!isa<AllocaInst>(&
I))
127 "coro-elide-info-output-file shouldn't be empty");
133 llvm::errs() <<
"Error opening coro-elide-info-output-file '"
135 return std::make_unique<raw_fd_ostream>(2,
false);
154 for (
auto *CA : CoroAllocs) {
155 CA->replaceAllUsesWith(False);
156 CA->eraseFromParent();
165 auto *Frame =
new AllocaInst(FrameTy,
DL.getAllocaAddrSpace(),
"", InsertPt);
166 Frame->setAlignment(FrameAlign);
170 for (
auto *CB : CoroBegins) {
171 CB->replaceAllUsesWith(FrameVoidPtr);
172 CB->eraseFromParent();
182 const auto &It = DestroyAddr.find(CB);
183 assert(It != DestroyAddr.end());
186 unsigned Limit = 32 * (1 + It->second.size());
194 for (
auto *DA : It->second)
199 if (!Visited.
insert(BB).second)
208 auto TI = BB->getTerminator();
212 if (isa<SwitchInst>(TI) &&
213 CoroSuspendSwitches.count(cast<SwitchInst>(TI))) {
214 Worklist.
push_back(cast<SwitchInst>(TI)->getSuccessor(1));
215 Worklist.
push_back(cast<SwitchInst>(TI)->getSuccessor(2));
219 }
while (!Worklist.
empty());
229 if (CoroAllocs.empty())
243 auto *TI =
B.getTerminator();
244 if (TI->getNumSuccessors() == 0 && !TI->isExceptionalTerminator() &&
245 !isa<UnreachableInst>(TI))
251 for (
const auto &It : DestroyAddr) {
256 return DT.
dominates(DA, TI->getTerminator());
258 ReferencedCoroBegins.
insert(It.first);
268 if (!ReferencedCoroBegins.
count(It.first) &&
269 !hasEscapePath(It.first, Terminators))
270 ReferencedCoroBegins.
insert(It.first);
276 return ReferencedCoroBegins.
size() == CoroBegins.size();
279void Lowerer::collectPostSplitCoroIds(
Function *
F) {
281 CoroSuspendSwitches.clear();
283 if (
auto *CII = dyn_cast<CoroIdInst>(&
I))
284 if (CII->getInfo().isPostSplit())
286 if (CII->getCoroutine() != CII->getFunction())
287 CoroIds.push_back(CII);
294 if (
auto *CSI = dyn_cast<CoroSuspendInst>(&
I))
295 if (CSI->hasOneUse() && isa<SwitchInst>(CSI->use_begin()->getUser())) {
296 SwitchInst *SWI = cast<SwitchInst>(CSI->use_begin()->getUser());
298 CoroSuspendSwitches.insert(SWI);
312 if (
auto *CB = dyn_cast<CoroBeginInst>(U))
313 CoroBegins.push_back(CB);
314 else if (
auto *CA = dyn_cast<CoroAllocInst>(U))
315 CoroAllocs.push_back(CA);
324 if (
auto *II = dyn_cast<CoroSubFnInst>(U))
325 switch (II->getIndex()) {
327 ResumeAddr.push_back(II);
330 DestroyAddr[CB].push_back(II);
340 assert(Resumers &&
"PostSplit coro.id Info argument must refer to an array"
341 "of coroutine subfunctions");
342 auto *ResumeAddrConstant =
347 bool ShouldElide = shouldElide(CoroId->
getFunction(), DT);
350 if (
auto FrameSizeAndAlign =
354 <<
"' not elided in '"
357 <<
ore::NV(
"frame_size", FrameSizeAndAlign->first) <<
", align="
358 <<
ore::NV(
"align", FrameSizeAndAlign->second.value()) <<
")";
362 <<
"' not elided in '"
364 <<
"' (frame_size=unknown, align=unknown)";
370 for (
auto &It : DestroyAddr)
374 if (
auto FrameSizeAndAlign =
376 elideHeapAllocations(CoroId->
getFunction(), FrameSizeAndAlign->first,
377 FrameSizeAndAlign->second, AA);
392 <<
ore::NV(
"frame_size", FrameSizeAndAlign->first) <<
", align="
393 <<
ore::NV(
"align", FrameSizeAndAlign->second.value()) <<
")";
399 <<
"' not elided in '"
401 <<
"' (frame_size=unknown, align=unknown)";
414 auto &M = *
F.getParent();
420 L.collectPostSplitCoroIds(&
F);
422 if (L.CoroIds.empty())
429 bool Changed =
false;
430 for (
auto *CII : L.CoroIds)
431 Changed |= L.processCoroId(CII, AA, DT, ORE);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static void replaceWithConstant(Constant *Value, SmallVectorImpl< CoroSubFnInst * > &Users)
static Instruction * getFirstNonAllocaInTheEntryBlock(Function *F)
static cl::opt< std::string > CoroElideInfoOutputFilename("coro-elide-info-output-file", cl::value_desc("filename"), cl::desc("File to record the coroutines got elided"), cl::Hidden)
static void removeTailCallAttribute(AllocaInst *Frame, AAResults &AA)
static std::optional< std::pair< uint64_t, Align > > getFrameLayout(Function *Resume)
static bool declaresCoroElideIntrinsics(Module &M)
static std::unique_ptr< raw_fd_ostream > getOrCreateLogFile()
static bool operandReferences(CallInst *CI, AllocaInst *Frame, AAResults &AA)
This file defines the DenseMap class.
iv Induction Variable Users
print must be executed print the must be executed context for all instructions
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A manager for alias analyses.
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
an instruction to allocate memory on the stack
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM Basic Block Representation.
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
ConstantArray - Constant Array Declarations.
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
This class represents the llvm.coro.begin instruction.
This represents the llvm.coro.id instruction.
Function * getCoroutine() const
This class represents the llvm.coro.subfn.addr instruction.
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
uint64_t getParamDereferenceableBytes(unsigned ArgNo) const
Extract the number of dereferenceable bytes for a parameter.
MaybeAlign getParamAlign(unsigned ArgNo) const
const BasicBlock * getParent() const
const Function * getFunction() const
Return the function this instruction belongs to.
This is an important class for using LLVM in a threaded context.
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned getNumCases() const
Return the number of 'cases' in this switch instruction, excluding the default case.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt8Ty(LLVMContext &C)
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
iterator_range< value_op_iterator > operand_values()
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
StringRef getName() const
Return a constant reference to the value's name.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
bool declaresIntrinsics(const Module &M, const std::initializer_list< StringRef >)
void replaceCoroFree(CoroIdInst *CoroId, bool Elide)
DiagnosticInfoOptimizationBase::Argument NV
@ OF_Append
The file should be opened in append mode.
This is an optimization pass for GlobalISel generic memory operations.
Interval::succ_iterator succ_end(Interval *I)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, SmallSetVector< Instruction *, 8 > *UnsimplifiedUsers=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
This struct is a compact representation of a valid (non-zero power of two) alignment.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Used in the streaming interface as the general argument type.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.