61#define DEBUG_TYPE "hotcoldsplit"
63STATISTIC(NumColdRegionsFound,
"Number of cold regions found.");
64STATISTIC(NumColdRegionsOutlined,
"Number of cold regions outlined.");
73 cl::desc(
"Base penalty for splitting cold code (as a "
74 "multiple of TCC_Basic)"));
78 cl::desc(
"Enable placement of extracted cold functions"
79 " into a separate section after hot-cold splitting."));
84 cl::desc(
"Name for the section containing cold functions "
85 "extracted by hot-cold splitting."));
89 cl::desc(
"Maximum number of parameters for a split function"));
104 return !(isa<ReturnInst>(
I) || isa<IndirectBrInst>(
I));
115 if (
auto *CB = dyn_cast<CallBase>(&
I))
116 if (CB->hasFnAttr(Attribute::Cold) &&
117 !CB->getMetadata(LLVMContext::MD_nosanitize))
124 dyn_cast_or_null<CallInst>(BB.getTerminator()->getPrevNode()))
125 if (CI->hasFnAttr(Attribute::NoReturn))
134static bool mayExtractBlock(
const BasicBlock &BB) {
144 return !isa<InvokeInst>(Term) && !isa<ResumeInst>(Term);
151static bool markFunctionCold(
Function &
F,
bool UpdateEntryCount =
false) {
152 assert(!
F.hasOptNone() &&
"Can't mark this cold");
153 bool Changed =
false;
154 if (!
F.hasFnAttribute(Attribute::Cold)) {
155 F.addFnAttr(Attribute::Cold);
158 if (!
F.hasFnAttribute(Attribute::MinSize)) {
159 F.addFnAttr(Attribute::MinSize);
162 if (UpdateEntryCount) {
175bool HotColdSplitting::isFunctionCold(
const Function &
F)
const {
176 if (
F.hasFnAttribute(Attribute::Cold))
190bool HotColdSplitting::shouldOutlineFrom(
const Function &
F)
const {
191 if (
F.hasFnAttribute(Attribute::AlwaysInline))
194 if (
F.hasFnAttribute(Attribute::NoInline))
199 if (
F.hasFnAttribute(Attribute::NoReturn))
202 if (
F.hasFnAttribute(Attribute::SanitizeAddress) ||
203 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
204 F.hasFnAttribute(Attribute::SanitizeThread) ||
205 F.hasFnAttribute(Attribute::SanitizeMemory))
228 unsigned NumInputs,
unsigned NumOutputs) {
230 LLVM_DEBUG(
dbgs() <<
"Applying penalty for splitting: " << Penalty <<
"\n");
239 bool NoBlocksReturn =
true;
251 NoBlocksReturn =
false;
252 SuccsOutsideRegion.
insert(SuccBB);
262 unsigned NumSplitExitPhis = 0;
263 for (
BasicBlock *ExitBB : SuccsOutsideRegion) {
264 for (
PHINode &PN : ExitBB->phis()) {
266 int NumIncomingVals = 0;
267 for (
unsigned i = 0; i < PN.getNumIncomingValues(); ++i)
270 if (NumIncomingVals > 1) {
280 int NumOutputsAndSplitPhis = NumOutputs + NumSplitExitPhis;
281 int NumParams = NumInputs + NumOutputsAndSplitPhis;
283 LLVM_DEBUG(
dbgs() << NumInputs <<
" inputs and " << NumOutputsAndSplitPhis
284 <<
" outputs exceeds parameter limit ("
286 return std::numeric_limits<int>::max();
289 LLVM_DEBUG(
dbgs() <<
"Applying penalty for: " << NumParams <<
" params\n");
290 Penalty += CostForArgMaterialization * NumParams;
294 LLVM_DEBUG(
dbgs() <<
"Applying penalty for: " << NumOutputsAndSplitPhis
295 <<
" outputs/split phis\n");
297 Penalty += CostForRegionOutput * NumOutputsAndSplitPhis;
300 if (NoBlocksReturn) {
302 <<
" non-returning terminators\n");
308 if (SuccsOutsideRegion.
size() > 1) {
310 <<
" non-region successors\n");
317Function *HotColdSplitting::extractColdRegion(
327 "cold." + std::to_string(Count));
332 CE.findInputsOutputs(Inputs, Outputs, Sinks);
334 int OutliningPenalty =
336 LLVM_DEBUG(
dbgs() <<
"Split profitability: benefit = " << OutliningBenefit
337 <<
", penalty = " << OutliningPenalty <<
"\n");
338 if (!OutliningBenefit.
isValid() || OutliningBenefit <= OutliningPenalty)
342 if (
Function *OutF =
CE.extractCodeRegion(CEAC)) {
343 User *
U = *OutF->user_begin();
345 NumColdRegionsOutlined++;
359 markFunctionCold(*OutF, BFI !=
nullptr);
365 <<
ore::NV(
"Original", OrigF) <<
" split cold code into "
374 <<
"Failed to extract region at block "
381using BlockTy = std::pair<BasicBlock *, unsigned>;
388class OutliningRegion {
400 bool EntireFunctionCold =
false;
403 static unsigned getEntryPointScore(
BasicBlock &BB,
unsigned Score) {
404 return mayExtractBlock(BB) ? Score : 0;
409 static constexpr unsigned ScoreForSuccBlock = 1;
410 static constexpr unsigned ScoreForSinkBlock = 1;
412 OutliningRegion(
const OutliningRegion &) =
delete;
413 OutliningRegion &operator=(
const OutliningRegion &) =
delete;
416 OutliningRegion() =
default;
417 OutliningRegion(OutliningRegion &&) =
default;
418 OutliningRegion &operator=(OutliningRegion &&) =
default;
420 static std::vector<OutliningRegion> create(
BasicBlock &SinkBB,
423 std::vector<OutliningRegion> Regions;
426 Regions.emplace_back();
427 OutliningRegion *ColdRegion = &Regions.back();
429 auto addBlockToRegion = [&](
BasicBlock *BB,
unsigned Score) {
431 ColdRegion->Blocks.emplace_back(BB, Score);
435 unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock);
436 ColdRegion->SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB :
nullptr;
437 unsigned BestScore = SinkScore;
441 auto PredEnd =
idf_end(&SinkBB);
442 while (PredIt != PredEnd) {
444 bool SinkPostDom = PDT.
dominates(&SinkBB, &PredBB);
449 ColdRegion->EntireFunctionCold =
true;
455 if (!SinkPostDom || !mayExtractBlock(PredBB)) {
456 PredIt.skipChildren();
463 unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength());
464 if (PredScore > BestScore) {
465 ColdRegion->SuggestedEntryPoint = &PredBB;
466 BestScore = PredScore;
469 addBlockToRegion(&PredBB, PredScore);
479 if (mayExtractBlock(SinkBB)) {
480 addBlockToRegion(&SinkBB, SinkScore);
482 ColdRegion->EntireFunctionCold =
true;
486 Regions.emplace_back();
487 ColdRegion = &Regions.back();
493 auto SuccEnd =
df_end(&SinkBB);
494 while (SuccIt != SuccEnd) {
496 bool SinkDom = DT.
dominates(&SinkBB, &SuccBB);
499 bool DuplicateBlock = RegionBlocks.
count(&SuccBB);
503 if (DuplicateBlock || !SinkDom || !mayExtractBlock(SuccBB)) {
504 SuccIt.skipChildren();
508 unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock);
509 if (SuccScore > BestScore) {
510 ColdRegion->SuggestedEntryPoint = &SuccBB;
511 BestScore = SuccScore;
514 addBlockToRegion(&SuccBB, SuccScore);
522 bool empty()
const {
return !SuggestedEntryPoint; }
528 bool isEntireFunctionCold()
const {
return EntireFunctionCold; }
532 assert(!empty() && !isEntireFunctionCold() &&
"Nothing to extract");
539 unsigned NextScore = 0;
540 auto RegionEndIt = Blocks.
end();
543 unsigned Score = Block.second;
545 BB == SuggestedEntryPoint || DT.
dominates(SuggestedEntryPoint, BB);
546 if (!InSubRegion && Score > NextScore) {
550 if (InSubRegion && BB != SuggestedEntryPoint)
554 Blocks.
erase(RegionStartIt, RegionEndIt);
557 SuggestedEntryPoint = NextEntryPoint;
564bool HotColdSplitting::outlineColdRegions(
Function &
F,
bool HasProfileSummary) {
565 bool Changed =
false;
579 std::unique_ptr<DominatorTree> DT;
580 std::unique_ptr<PostDominatorTree> PDT;
586 if (HasProfileSummary)
596 if (ColdBlocks.
count(BB))
605 dbgs() <<
"Found a cold block:\n";
610 DT = std::make_unique<DominatorTree>(
F);
612 PDT = std::make_unique<PostDominatorTree>(
F);
614 auto Regions = OutliningRegion::create(*BB, *DT, *PDT);
615 for (OutliningRegion &
Region : Regions) {
619 if (
Region.isEntireFunctionCold()) {
621 return markFunctionCold(
F);
630 return !ColdBlocks.insert(Block.first).second;
636 ++NumColdRegionsFound;
640 if (OutliningWorklist.
empty())
644 unsigned OutlinedFunctionID = 1;
649 assert(!
Region.empty() &&
"Empty outlining region in worklist");
653 dbgs() <<
"Hot/cold splitting attempting to outline these blocks:\n";
658 Function *Outlined = extractColdRegion(SubRegion, CEAC, *DT, BFI,
TTI,
659 ORE, AC, OutlinedFunctionID);
661 ++OutlinedFunctionID;
664 }
while (!
Region.empty());
665 }
while (!OutliningWorklist.
empty());
671 bool Changed =
false;
672 bool HasProfileSummary = (M.getProfileSummary(
false) !=
nullptr);
675 if (
F.isDeclaration())
683 if (isFunctionCold(
F)) {
684 Changed |= markFunctionCold(
F);
688 if (!shouldOutlineFrom(
F)) {
694 Changed |= outlineColdRegions(
F, HasProfileSummary);
716 std::unique_ptr<OptimizationRemarkEmitter> ORE;
static bool blockEndsInUnreachable(const MachineBasicBlock *MBB)
A no successor, non-return block probably ends in unreachable and is cold.
std::pair< BasicBlock *, unsigned > BlockTy
A pair of (basic block, score).
static cl::opt< int > SplittingThreshold("hotcoldsplit-threshold", cl::init(2), cl::Hidden, cl::desc("Base penalty for splitting cold code (as a " "multiple of TCC_Basic)"))
static cl::opt< std::string > ColdSectionName("hotcoldsplit-cold-section-name", cl::init("__llvm_cold"), cl::Hidden, cl::desc("Name for the section containing cold functions " "extracted by hot-cold splitting."))
static cl::opt< int > MaxParametersForSplit("hotcoldsplit-max-params", cl::init(4), cl::Hidden, cl::desc("Maximum number of parameters for a split function"))
static InstructionCost getOutliningBenefit(ArrayRef< BasicBlock * > Region, TargetTransformInfo &TTI)
Get the benefit score of outlining Region.
static cl::opt< bool > EnableColdSection("enable-cold-section", cl::init(false), cl::Hidden, cl::desc("Enable placement of extracted cold functions" " into a separate section after hot-cold splitting."))
static cl::opt< bool > EnableStaticAnalysis("hot-cold-static-analysis", cl::init(true), cl::Hidden)
static int getOutliningPenalty(ArrayRef< BasicBlock * > Region, unsigned NumInputs, unsigned NumOutputs)
Get the penalty score for outlining Region.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
bool isEHPad() const
Return true if this basic block is an exception handling block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
void setCallingConv(CallingConv::ID CC)
This class represents a function call, abstracting a target machine's calling convention.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
StringRef getSection() const
Get the custom section of this global if it has one.
bool hasSection() const
Check if this global has a custom object file section.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
A Module instance is used to store all the information related to an LLVM module.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) const
Returns true if BasicBlock BB is considered cold.
bool isFunctionEntryCold(const Function *F) const
Returns true if F has cold function entry.
block_range blocks()
Returns a range view of the basic blocks in the region.
RegionT * getParent() const
Get the parent of the Region.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
void dump() const
Support for debugging, callable in GDB: V->dump()
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
bool succ_empty(const Instruction *I)
auto successors(const MachineBasicBlock *BB)
df_iterator< T > df_begin(const T &G)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
idf_iterator< T > idf_end(const T &G)
auto remove_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly.
idf_iterator< T > idf_begin(const T &G)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
bool pred_empty(const BasicBlock *BB)
df_iterator< T > df_end(const T &G)