60#define DEBUG_TYPE "hotcoldsplit"
62STATISTIC(NumColdRegionsFound,
"Number of cold regions found.");
63STATISTIC(NumColdRegionsOutlined,
"Number of cold regions outlined.");
72 cl::desc(
"Base penalty for splitting cold code (as a "
73 "multiple of TCC_Basic)"));
77 cl::desc(
"Enable placement of extracted cold functions"
78 " into a separate section after hot-cold splitting."));
83 cl::desc(
"Name for the section containing cold functions "
84 "extracted by hot-cold splitting."));
88 cl::desc(
"Maximum number of parameters for a split function"));
92 cl::desc(
"Divisor of cold branch probability."
93 "BranchProbability = 1/ColdBranchProbDenom"));
108 return !(isa<ReturnInst>(
I) || isa<IndirectBrInst>(
I));
123 auto SumWt = TrueWt + FalseWt;
130 if (TrueProb <= ColdProbThresh)
133 if (FalseProb <= ColdProbThresh)
145 if (
auto *CB = dyn_cast<CallBase>(&
I))
146 if (CB->hasFnAttr(Attribute::Cold) &&
147 !CB->getMetadata(LLVMContext::MD_nosanitize))
154 dyn_cast_or_null<CallInst>(BB.getTerminator()->getPrevNode()))
155 if (CI->hasFnAttr(Attribute::NoReturn))
164static bool mayExtractBlock(
const BasicBlock &BB) {
174 return !isa<InvokeInst>(Term) && !isa<ResumeInst>(Term);
181static bool markFunctionCold(
Function &
F,
bool UpdateEntryCount =
false) {
182 assert(!
F.hasOptNone() &&
"Can't mark this cold");
183 bool Changed =
false;
184 if (!
F.hasFnAttribute(Attribute::Cold)) {
185 F.addFnAttr(Attribute::Cold);
188 if (!
F.hasFnAttribute(Attribute::MinSize)) {
189 F.addFnAttr(Attribute::MinSize);
192 if (UpdateEntryCount) {
205bool HotColdSplitting::isFunctionCold(
const Function &
F)
const {
206 if (
F.hasFnAttribute(Attribute::Cold))
218bool HotColdSplitting::isBasicBlockCold(
227 analyzeProfMetadata(BB, ColdProbThresh, AnnotatedColdBlocks);
231 if (AnnotatedColdBlocks.
count(BB))
243bool HotColdSplitting::shouldOutlineFrom(
const Function &
F)
const {
244 if (
F.hasFnAttribute(Attribute::AlwaysInline))
247 if (
F.hasFnAttribute(Attribute::NoInline))
252 if (
F.hasFnAttribute(Attribute::NoReturn))
255 if (
F.hasFnAttribute(Attribute::SanitizeAddress) ||
256 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
257 F.hasFnAttribute(Attribute::SanitizeThread) ||
258 F.hasFnAttribute(Attribute::SanitizeMemory))
281 unsigned NumInputs,
unsigned NumOutputs) {
283 LLVM_DEBUG(
dbgs() <<
"Applying penalty for splitting: " << Penalty <<
"\n");
292 bool NoBlocksReturn =
true;
304 NoBlocksReturn =
false;
305 SuccsOutsideRegion.
insert(SuccBB);
315 unsigned NumSplitExitPhis = 0;
316 for (
BasicBlock *ExitBB : SuccsOutsideRegion) {
317 for (
PHINode &PN : ExitBB->phis()) {
319 int NumIncomingVals = 0;
320 for (
unsigned i = 0; i < PN.getNumIncomingValues(); ++i)
323 if (NumIncomingVals > 1) {
333 int NumOutputsAndSplitPhis = NumOutputs + NumSplitExitPhis;
334 int NumParams = NumInputs + NumOutputsAndSplitPhis;
336 LLVM_DEBUG(
dbgs() << NumInputs <<
" inputs and " << NumOutputsAndSplitPhis
337 <<
" outputs exceeds parameter limit ("
339 return std::numeric_limits<int>::max();
342 LLVM_DEBUG(
dbgs() <<
"Applying penalty for: " << NumParams <<
" params\n");
343 Penalty += CostForArgMaterialization * NumParams;
347 LLVM_DEBUG(
dbgs() <<
"Applying penalty for: " << NumOutputsAndSplitPhis
348 <<
" outputs/split phis\n");
350 Penalty += CostForRegionOutput * NumOutputsAndSplitPhis;
353 if (NoBlocksReturn) {
355 <<
" non-returning terminators\n");
361 if (SuccsOutsideRegion.
size() > 1) {
363 <<
" non-region successors\n");
371bool HotColdSplitting::isSplittingBeneficial(
CodeExtractor &CE,
379 CE.findInputsOutputs(Inputs, Outputs, Sinks);
381 int OutliningPenalty =
383 LLVM_DEBUG(
dbgs() <<
"Split profitability: benefit = " << OutliningBenefit
384 <<
", penalty = " << OutliningPenalty <<
"\n");
385 if (!OutliningBenefit.
isValid() || OutliningBenefit <= OutliningPenalty)
393Function *HotColdSplitting::extractColdRegion(
398 if (
Function *OutF =
CE.extractCodeRegion(CEAC)) {
399 User *
U = *OutF->user_begin();
401 NumColdRegionsOutlined++;
415 markFunctionCold(*OutF, BFI !=
nullptr);
420 &*EntryPoint.
begin())
421 <<
ore::NV(
"Original", OrigF) <<
" split cold code into "
429 &*EntryPoint.
begin())
430 <<
"Failed to extract region at block "
431 <<
ore::NV(
"Block", &EntryPoint);
437using BlockTy = std::pair<BasicBlock *, unsigned>;
444class OutliningRegion {
456 bool EntireFunctionCold =
false;
459 static unsigned getEntryPointScore(
BasicBlock &BB,
unsigned Score) {
460 return mayExtractBlock(BB) ? Score : 0;
465 static constexpr unsigned ScoreForSuccBlock = 1;
466 static constexpr unsigned ScoreForSinkBlock = 1;
468 OutliningRegion(
const OutliningRegion &) =
delete;
469 OutliningRegion &operator=(
const OutliningRegion &) =
delete;
472 OutliningRegion() =
default;
473 OutliningRegion(OutliningRegion &&) =
default;
474 OutliningRegion &operator=(OutliningRegion &&) =
default;
476 static std::vector<OutliningRegion> create(
BasicBlock &SinkBB,
479 std::vector<OutliningRegion> Regions;
482 Regions.emplace_back();
483 OutliningRegion *ColdRegion = &Regions.back();
485 auto addBlockToRegion = [&](
BasicBlock *BB,
unsigned Score) {
487 ColdRegion->Blocks.emplace_back(BB, Score);
491 unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock);
492 ColdRegion->SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB :
nullptr;
493 unsigned BestScore = SinkScore;
497 auto PredEnd =
idf_end(&SinkBB);
498 while (PredIt != PredEnd) {
500 bool SinkPostDom = PDT.
dominates(&SinkBB, &PredBB);
505 ColdRegion->EntireFunctionCold =
true;
511 if (!SinkPostDom || !mayExtractBlock(PredBB)) {
512 PredIt.skipChildren();
519 unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength());
520 if (PredScore > BestScore) {
521 ColdRegion->SuggestedEntryPoint = &PredBB;
522 BestScore = PredScore;
525 addBlockToRegion(&PredBB, PredScore);
535 if (mayExtractBlock(SinkBB)) {
536 addBlockToRegion(&SinkBB, SinkScore);
538 ColdRegion->EntireFunctionCold =
true;
542 Regions.emplace_back();
543 ColdRegion = &Regions.back();
549 auto SuccEnd =
df_end(&SinkBB);
550 while (SuccIt != SuccEnd) {
552 bool SinkDom = DT.
dominates(&SinkBB, &SuccBB);
555 bool DuplicateBlock = RegionBlocks.
count(&SuccBB);
559 if (DuplicateBlock || !SinkDom || !mayExtractBlock(SuccBB)) {
560 SuccIt.skipChildren();
564 unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock);
565 if (SuccScore > BestScore) {
566 ColdRegion->SuggestedEntryPoint = &SuccBB;
567 BestScore = SuccScore;
570 addBlockToRegion(&SuccBB, SuccScore);
578 bool empty()
const {
return !SuggestedEntryPoint; }
584 bool isEntireFunctionCold()
const {
return EntireFunctionCold; }
588 assert(!empty() && !isEntireFunctionCold() &&
"Nothing to extract");
595 unsigned NextScore = 0;
596 auto RegionEndIt =
Blocks.end();
599 unsigned Score =
Block.second;
601 BB == SuggestedEntryPoint || DT.
dominates(SuggestedEntryPoint, BB);
602 if (!InSubRegion && Score > NextScore) {
606 if (InSubRegion && BB != SuggestedEntryPoint)
610 Blocks.erase(RegionStartIt, RegionEndIt);
613 SuggestedEntryPoint = NextEntryPoint;
620bool HotColdSplitting::outlineColdRegions(
Function &
F,
bool HasProfileSummary) {
640 std::unique_ptr<DominatorTree> DT;
641 std::unique_ptr<PostDominatorTree> PDT;
647 if (HasProfileSummary)
658 unsigned OutlinedFunctionID = 1;
662 if (ColdBlocks.
count(BB))
666 if (CannotBeOutlinedColdBlocks.
count(BB))
669 if (!isBasicBlockCold(BB, ColdProbThresh, AnnotatedColdBlocks, BFI))
673 dbgs() <<
"Found a cold block:\n";
678 DT = std::make_unique<DominatorTree>(
F);
680 PDT = std::make_unique<PostDominatorTree>(
F);
682 auto Regions = OutliningRegion::create(*BB, *DT, *PDT);
683 for (OutliningRegion &
Region : Regions) {
687 if (
Region.isEntireFunctionCold()) {
689 return markFunctionCold(
F);
695 dbgs() <<
"Hot/cold splitting attempting to outline these blocks:\n";
702 SubRegion, &*DT,
false,
nullptr,
705 "cold." + std::to_string(OutlinedFunctionID));
707 if (
CE.isEligible() && isSplittingBeneficial(CE, SubRegion,
TTI) &&
717 ColdBlocks.
insert(SubRegion.begin(), SubRegion.end());
720 for (
auto *
Block : SubRegion)
721 dbgs() <<
" contains cold block:" <<
Block->getName() <<
"\n";
725 std::make_pair(SubRegion[0], std::move(CE)));
726 ++OutlinedFunctionID;
729 for (
auto *
Block : SubRegion)
730 if ((DT->dominates(BB,
Block) && PDT->dominates(
Block, BB)) ||
731 (PDT->dominates(BB,
Block) && DT->dominates(
Block, BB)))
735 }
while (!
Region.empty());
737 ++NumColdRegionsFound;
741 if (OutliningWorklist.
empty())
747 for (
auto &BCE : OutliningWorklist) {
749 extractColdRegion(*BCE.first, BCE.second, CEAC, BFI,
TTI, ORE);
750 assert(Outlined &&
"Should be outlined");
758 bool Changed =
false;
759 bool HasProfileSummary = (M.getProfileSummary(
false) !=
nullptr);
762 if (
F.isDeclaration())
770 if (isFunctionCold(
F)) {
771 Changed |= markFunctionCold(
F);
775 if (!shouldOutlineFrom(
F)) {
781 Changed |= outlineColdRegions(
F, HasProfileSummary);
803 std::unique_ptr<OptimizationRemarkEmitter> ORE;
bbsections Prepares for basic block by splitting functions into clusters of basic blocks
static bool blockEndsInUnreachable(const MachineBasicBlock *MBB)
A no successor, non-return block probably ends in unreachable and is cold.
DenseMap< Block *, BlockRelaxAux > Blocks
std::pair< BasicBlock *, unsigned > BlockTy
A pair of (basic block, score).
static cl::opt< int > SplittingThreshold("hotcoldsplit-threshold", cl::init(2), cl::Hidden, cl::desc("Base penalty for splitting cold code (as a " "multiple of TCC_Basic)"))
static cl::opt< std::string > ColdSectionName("hotcoldsplit-cold-section-name", cl::init("__llvm_cold"), cl::Hidden, cl::desc("Name for the section containing cold functions " "extracted by hot-cold splitting."))
static cl::opt< int > ColdBranchProbDenom("hotcoldsplit-cold-probability-denom", cl::init(100), cl::Hidden, cl::desc("Divisor of cold branch probability." "BranchProbability = 1/ColdBranchProbDenom"))
static cl::opt< int > MaxParametersForSplit("hotcoldsplit-max-params", cl::init(4), cl::Hidden, cl::desc("Maximum number of parameters for a split function"))
static InstructionCost getOutliningBenefit(ArrayRef< BasicBlock * > Region, TargetTransformInfo &TTI)
Get the benefit score of outlining Region.
static cl::opt< bool > EnableColdSection("enable-cold-section", cl::init(false), cl::Hidden, cl::desc("Enable placement of extracted cold functions" " into a separate section after hot-cold splitting."))
static cl::opt< bool > EnableStaticAnalysis("hot-cold-static-analysis", cl::init(true), cl::Hidden)
static int getOutliningPenalty(ArrayRef< BasicBlock * > Region, unsigned NumInputs, unsigned NumOutputs)
Get the penalty score for outlining Region.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file contains the declarations for profiling metadata utility functions.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
const Function * getParent() const
Return the enclosing method, or null if none.
bool isEHPad() const
Return true if this basic block is an exception handling block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
BasicBlock * getSuccessor(unsigned i) const
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void setCallingConv(CallingConv::ID CC)
This class represents a function call, abstracting a target machine's calling convention.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
StringRef getSection() const
Get the custom section of this global if it has one.
bool hasSection() const
Check if this global has a custom object file section.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
A Module instance is used to store all the information related to an LLVM module.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
bool isFunctionEntryCold(const Function *F) const
Returns true if F has cold function entry.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
void dump() const
Support for debugging, callable in GDB: V->dump()
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
bool succ_empty(const Instruction *I)
auto successors(const MachineBasicBlock *BB)
df_iterator< T > df_begin(const T &G)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
idf_iterator< T > idf_end(const T &G)
auto remove_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly.
idf_iterator< T > idf_begin(const T &G)
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
bool pred_empty(const BasicBlock *BB)
df_iterator< T > df_end(const T &G)