63#define DEBUG_TYPE "partial-inlining"
66 "Number of callsites functions partially inlined into.");
67STATISTIC(NumColdOutlinePartialInlined,
"Number of times functions with "
68 "cold outlined regions were partially "
69 "inlined into its caller(s).");
71 "Number of cold single entry/exit regions found.");
73 "Number of cold single entry/exit regions outlined.");
83 cl::desc(
"Disable multi-region partial inlining"));
89 cl::desc(
"Force outline regions with live exits"));
95 cl::desc(
"Mark outline function calls with ColdCC"));
108 cl::desc(
"Minimum ratio comparing relative sizes of each "
109 "outline candidate and original function"));
114 cl::desc(
"Minimum block executions to consider "
115 "its BranchProbabilityInfo valid"));
120 cl::desc(
"Minimum BranchProbability to consider a region cold."));
124 cl::desc(
"Max number of blocks to be partially inlined"));
130 cl::desc(
"Max number of partial inlining. The default is unlimited"));
138 cl::desc(
"Relative frequency of outline region to "
143 cl::desc(
"A debug option to add additional penalty to the computed one."));
147struct FunctionOutliningInfo {
148 FunctionOutliningInfo() =
default;
152 unsigned getNumInlinedBlocks()
const {
return Entries.size() + 1; }
168struct FunctionOutliningMultiRegionInfo {
169 FunctionOutliningMultiRegionInfo() =
default;
172 struct OutlineRegionInfo {
177 ExitBlock(ExitBlock), ReturnBlock(ReturnBlock) {}
187struct PartialInlinerImpl {
196 : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
197 GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
207 std::pair<bool, Function *> unswitchFunction(
Function &
F);
213 struct FunctionCloner {
216 FunctionCloner(
Function *
F, FunctionOutliningInfo *OI,
220 FunctionCloner(
Function *
F, FunctionOutliningMultiRegionInfo *OMRI,
230 void normalizeReturnBlock()
const;
233 bool doMultiRegionFunctionOutlining();
240 Function *doSingleRegionFunctionOutlining();
245 typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
251 bool IsFunctionInlined =
false;
255 std::unique_ptr<FunctionOutliningInfo> ClonedOI =
nullptr;
257 std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI =
nullptr;
258 std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI =
nullptr;
265 int NumPartialInlining = 0;
278 getOutliningCallBBRelativeFreq(FunctionCloner &Cloner)
const;
282 bool shouldPartialInline(
CallBase &CB, FunctionCloner &Cloner,
289 bool tryPartialInline(FunctionCloner &Cloner);
294 computeCallsiteToProfCountMap(
Function *DuplicateFunction,
297 bool isLimitReached()
const {
303 if (isa<CallInst>(U) || isa<InvokeInst>(U))
304 return cast<CallBase>(U);
311 return getSupportedCallBase(
User);
314 std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(
Function &
F)
const {
318 return std::make_tuple(DLoc, Block);
327 std::tuple<InstructionCost, InstructionCost>
328 computeOutliningCosts(FunctionCloner &Cloner)
const;
336 std::unique_ptr<FunctionOutliningInfo>
339 std::unique_ptr<FunctionOutliningMultiRegionInfo>
340 computeOutliningColdRegionsInfo(
Function &
F,
346std::unique_ptr<FunctionOutliningMultiRegionInfo>
347PartialInlinerImpl::computeOutliningColdRegionsInfo(
354 std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
358 BFI = ScopedBFI.get();
363 if (!PSI.hasInstrumentationProfile())
364 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
366 std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
367 std::make_unique<FunctionOutliningMultiRegionInfo>();
372 for (
auto *Block : BlockList) {
379 <<
"Region dominated by "
380 <<
ore::NV(
"Block", BlockList.front()->getName())
381 <<
" has more than one region exit edge.";
394 return BFI->getBlockProfileCount(BB).value_or(0);
402 OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
404 LLVM_DEBUG(
dbgs() <<
"OverallFunctionCost = " << OverallFunctionCost
413 bool ColdCandidateFound =
false;
415 std::vector<BasicBlock *> DFS;
417 DFS.push_back(CurrEntry);
418 VisitedMap[CurrEntry] =
true;
426 while (!DFS.empty()) {
427 auto *ThisBB = DFS.back();
432 if (PSI.isColdBlock(ThisBB, BFI) ||
438 VisitedMap[*
SI] =
true;
442 if (SuccProb > MinBranchProbability)
445 LLVM_DEBUG(
dbgs() <<
"Found cold edge: " << ThisBB->getName() <<
"->"
447 <<
"\nBranch Probability = " << SuccProb <<
"\n";);
450 DT.getDescendants(*SI, DominateVector);
452 "SI should be reachable and have at least itself as descendant");
455 if (!DominateVector.
front()->hasNPredecessors(1)) {
457 <<
" doesn't have a single predecessor in the "
458 "dominator tree\n";);
464 if (!(ExitBlock = IsSingleExit(DominateVector))) {
466 <<
" doesn't have a unique successor\n";);
471 for (
auto *BB : DominateVector)
472 OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
482 <<
" inline cost-savings smaller than "
483 <<
ore::NV(
"Cost", MinOutlineRegionCost);
486 LLVM_DEBUG(
dbgs() <<
"ABORT: Outline region cost is smaller than "
487 << MinOutlineRegionCost <<
"\n";);
495 for (
auto *BB : DominateVector)
496 VisitedMap[BB] =
true;
500 FunctionOutliningMultiRegionInfo::OutlineRegionInfo
RegInfo(
501 DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
502 OutliningInfo->ORI.push_back(
RegInfo);
504 << DominateVector.front()->getName() <<
"\n";);
505 ColdCandidateFound =
true;
506 NumColdRegionsFound++;
510 if (ColdCandidateFound)
511 return OutliningInfo;
513 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
516std::unique_ptr<FunctionOutliningInfo>
517PartialInlinerImpl::computeOutliningInfo(
Function &
F)
const {
520 if (!BR ||
BR->isUnconditional())
521 return std::unique_ptr<FunctionOutliningInfo>();
530 return isa<ReturnInst>(TI);
534 if (IsReturnBlock(Succ1))
535 return std::make_tuple(Succ1, Succ2);
536 if (IsReturnBlock(Succ2))
537 return std::make_tuple(Succ2, Succ1);
539 return std::make_tuple<BasicBlock *, BasicBlock *>(
nullptr,
nullptr);
544 if (IsSuccessor(Succ1, Succ2))
545 return std::make_tuple(Succ1, Succ2);
546 if (IsSuccessor(Succ2, Succ1))
547 return std::make_tuple(Succ2, Succ1);
549 return std::make_tuple<BasicBlock *, BasicBlock *>(
nullptr,
nullptr);
552 std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
553 std::make_unique<FunctionOutliningInfo>();
556 bool CandidateFound =
false;
571 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
574 OutliningInfo->Entries.push_back(CurrEntry);
575 OutliningInfo->ReturnBlock = ReturnBlock;
576 OutliningInfo->NonReturnBlock = NonReturnBlock;
577 CandidateFound =
true;
582 std::tie(CommSucc,
OtherSucc) = GetCommonSucc(Succ1, Succ2);
587 OutliningInfo->Entries.push_back(CurrEntry);
592 return std::unique_ptr<FunctionOutliningInfo>();
596 assert(OutliningInfo->Entries[0] == &
F.front() &&
597 "Function Entry must be the first in Entries vector");
604 auto HasNonEntryPred = [Entries](
BasicBlock *BB) {
606 if (!Entries.count(Pred))
611 auto CheckAndNormalizeCandidate =
612 [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
615 if (Entries.count(Succ))
617 if (Succ == OutliningInfo->ReturnBlock)
618 OutliningInfo->ReturnBlockPreds.push_back(
E);
619 else if (Succ != OutliningInfo->NonReturnBlock)
623 if (HasNonEntryPred(
E))
629 if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
630 return std::unique_ptr<FunctionOutliningInfo>();
635 BasicBlock *Cand = OutliningInfo->NonReturnBlock;
639 if (HasNonEntryPred(Cand))
646 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
647 if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
654 OutliningInfo->Entries.push_back(Cand);
655 OutliningInfo->NonReturnBlock = NonReturnBlock;
656 OutliningInfo->ReturnBlockPreds.push_back(Cand);
657 Entries.insert(Cand);
660 return OutliningInfo;
665 if (
F.hasProfileData())
668 for (
auto *
E : OI.Entries) {
669 BranchInst *BR = dyn_cast<BranchInst>(
E->getTerminator());
670 if (!BR || BR->isUnconditional())
679 FunctionCloner &Cloner)
const {
680 BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.
back().second;
682 Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
683 auto OutliningCallFreq =
684 Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
688 if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
689 OutliningCallFreq = EntryFreq;
692 OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
695 return OutlineRegionRelFreq;
710 return OutlineRegionRelFreq;
712 OutlineRegionRelFreq = std::max(
715 return OutlineRegionRelFreq;
718bool PartialInlinerImpl::shouldPartialInline(
730 auto &CalleeTTI = GetTTI(*
Callee);
731 bool RemarksEnabled =
732 Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
736 GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE :
nullptr);
741 <<
NV(
"Callee", Cloner.OrigFunc)
742 <<
" should always be fully inlined, not partially";
750 <<
NV(
"Callee", Cloner.OrigFunc) <<
" not partially inlined into "
751 <<
NV(
"Caller", Caller)
752 <<
" because it should never be inlined (cost=never)";
760 <<
NV(
"Callee", Cloner.OrigFunc) <<
" not partially inlined into "
761 <<
NV(
"Caller", Caller) <<
" because too costly to inline (cost="
762 <<
NV(
"Cost", IC.
getCost()) <<
", threshold="
774 if (NormWeightedSavings < WeightedOutliningRcost) {
778 <<
NV(
"Callee", Cloner.OrigFunc) <<
" not partially inlined into "
779 <<
NV(
"Caller", Caller) <<
" runtime overhead (overhead="
780 <<
NV(
"Overhead", (
unsigned)WeightedOutliningRcost.
getFrequency())
782 <<
NV(
"Savings", (
unsigned)NormWeightedSavings.getFrequency())
784 <<
" of making the outlined call is too high";
792 <<
NV(
"Callee", Cloner.OrigFunc) <<
" can be partially inlined into "
793 <<
NV(
"Caller", Caller) <<
" with cost=" <<
NV(
"Cost", IC.
getCost())
804PartialInlinerImpl::computeBBInlineCost(
BasicBlock *BB,
811 switch (
I.getOpcode()) {
812 case Instruction::BitCast:
813 case Instruction::PtrToInt:
814 case Instruction::IntToPtr:
815 case Instruction::Alloca:
816 case Instruction::PHI:
818 case Instruction::GetElementPtr:
819 if (cast<GetElementPtrInst>(&
I)->hasAllZeroIndices())
826 if (
I.isLifetimeStartOrEnd())
829 if (
auto *II = dyn_cast<IntrinsicInst>(&
I)) {
833 for (
Value *Val : II->args())
836 if (
auto *FPMO = dyn_cast<FPMathOperator>(II))
837 FMF = FPMO->getFastMathFlags();
844 if (
CallInst *CI = dyn_cast<CallInst>(&
I)) {
864std::tuple<InstructionCost, InstructionCost>
865PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner)
const {
867 for (
auto FuncBBPair : Cloner.OutlinedFunctions) {
868 Function *OutlinedFunc = FuncBBPair.first;
869 BasicBlock* OutliningCallBB = FuncBBPair.second;
872 auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
873 OutliningFuncCallCost +=
874 computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
878 OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
880 assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
881 "Outlined function cost should be no less than the outlined region");
886 OutlinedFunctionCost -=
890 OutliningFuncCallCost +
891 (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
894 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
900void PartialInlinerImpl::computeCallsiteToProfCountMap(
906 std::unique_ptr<BlockFrequencyInfo> TempBFI;
916 CurrentCallerBFI = TempBFI.get();
919 CurrentCallerBFI = &(GetBFI(*Caller));
925 if (isa<BlockAddress>(
User))
929 if (CurrentCaller != Caller) {
931 ComputeCurrBFI(Caller);
933 assert(CurrentCallerBFI &&
"CallerBFI is not set");
938 CallSiteToProfCountMap[
User] = *Count;
940 CallSiteToProfCountMap[
User] = 0;
944PartialInlinerImpl::FunctionCloner::FunctionCloner(
948 : OrigFunc(
F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
949 ClonedOI = std::make_unique<FunctionOutliningInfo>();
955 ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
956 ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
958 ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
962 ClonedOI->ReturnBlockPreds.push_back(NewE);
966 F->replaceAllUsesWith(ClonedFunc);
969PartialInlinerImpl::FunctionCloner::FunctionCloner(
970 Function *
F, FunctionOutliningMultiRegionInfo *OI,
974 : OrigFunc(
F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
975 ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
983 for (
const FunctionOutliningMultiRegionInfo::OutlineRegionInfo &
RegionInfo :
987 Region.push_back(cast<BasicBlock>(VMap[BB]));
993 NewReturnBlock = cast<BasicBlock>(VMap[
RegionInfo.ReturnBlock]);
994 FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
995 Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
996 ClonedOMRI->ORI.push_back(MappedRegionInfo);
1000 F->replaceAllUsesWith(ClonedFunc);
1003void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock()
const {
1007 while (
I != BB->end()) {
1008 PHINode *Phi = dyn_cast<PHINode>(
I);
1028 BasicBlock *PreReturn = ClonedOI->ReturnBlock;
1030 PHINode *FirstPhi = GetFirstPHI(PreReturn);
1031 unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
1038 return PN->getIncomingValue(0);
1042 ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
1043 ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
1047 while (
I != PreReturn->
end()) {
1048 PHINode *OldPhi = dyn_cast<PHINode>(
I);
1055 Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
1058 for (
BasicBlock *
E : ClonedOI->ReturnBlockPreds) {
1067 if (
auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
1073 for (
auto *DP : DeadPhis)
1074 DP->eraseFromParent();
1076 for (
auto *
E : ClonedOI->ReturnBlockPreds)
1077 E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
1080bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1082 auto ComputeRegionCost =
1086 Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
1090 assert(ClonedOMRI &&
"Expecting OutlineInfo for multi region outline");
1092 if (ClonedOMRI->ORI.empty())
1108 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo
RegionInfo :
1114 ClonedFuncBFI.get(), &BPI,
1115 LookupAC(*
RegionInfo.EntryBlock->getParent()),
1118 CE.findInputsOutputs(Inputs, Outputs, Sinks);
1121 dbgs() <<
"inputs: " << Inputs.
size() <<
"\n";
1122 dbgs() <<
"outputs: " << Outputs.
size() <<
"\n";
1124 dbgs() <<
"value used in func: " << *
value <<
"\n";
1125 for (
Value *output : Outputs)
1126 dbgs() <<
"instr used in func: " << *output <<
"\n";
1133 if (
Function *OutlinedFunc =
CE.extractCodeRegion(CEAC)) {
1134 CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
1137 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1138 NumColdRegionsOutlined++;
1139 OutlinedRegionCost += CurrentOutlinedRegionCost;
1149 <<
"Failed to extract region at block "
1154 return !OutlinedFunctions.empty();
1158PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
1161 auto ToBeInlined = [&,
this](
BasicBlock *BB) {
1162 return BB == ClonedOI->ReturnBlock ||
1166 assert(ClonedOI &&
"Expecting OutlineInfo for single region outline");
1177 std::vector<BasicBlock *> ToExtract;
1178 auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
1179 ToExtract.push_back(ClonedOI->NonReturnBlock);
1180 OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
1181 ClonedOI->NonReturnBlock, ClonedFuncTTI);
1183 if (!ToBeInlined(BB) && BB != ClonedOI->NonReturnBlock) {
1184 ToExtract.push_back(BB);
1189 OutlinedRegionCost += computeBBInlineCost(BB, ClonedFuncTTI);
1196 ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
1202 PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->
getParent();
1204 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1208 &ToExtract.front()->front())
1209 <<
"Failed to extract region at block "
1210 <<
ore::NV(
"Block", ToExtract.front());
1213 return OutlinedFunc;
1216PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
1220 ClonedFunc->eraseFromParent();
1221 if (!IsFunctionInlined) {
1224 for (
auto FuncBBPair : OutlinedFunctions) {
1226 Func->eraseFromParent();
1231std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(
Function &
F) {
1232 if (
F.hasAddressTaken())
1233 return {
false,
nullptr};
1236 if (
F.hasFnAttribute(Attribute::AlwaysInline))
1237 return {
false,
nullptr};
1239 if (
F.hasFnAttribute(Attribute::NoInline))
1240 return {
false,
nullptr};
1242 if (PSI.isFunctionEntryCold(&
F))
1243 return {
false,
nullptr};
1245 if (
F.users().empty())
1246 return {
false,
nullptr};
1252 if (PSI.hasProfileSummary() &&
F.hasProfileData() &&
1254 std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
1255 computeOutliningColdRegionsInfo(
F, ORE);
1257 FunctionCloner Cloner(&
F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
1260 dbgs() <<
"HotCountThreshold = " << PSI.getHotCountThreshold() <<
"\n";
1261 dbgs() <<
"ColdCountThreshold = " << PSI.getColdCountThreshold()
1265 bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1269 dbgs() <<
">>>>>> Outlined (Cloned) Function >>>>>>\n";
1270 Cloner.ClonedFunc->print(
dbgs());
1271 dbgs() <<
"<<<<<< Outlined (Cloned) Function <<<<<<\n";
1274 if (tryPartialInline(Cloner))
1275 return {
true,
nullptr};
1283 std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(
F);
1285 return {
false,
nullptr};
1287 FunctionCloner Cloner(&
F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
1288 Cloner.normalizeReturnBlock();
1290 Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1292 if (!OutlinedFunction)
1293 return {
false,
nullptr};
1295 if (tryPartialInline(Cloner))
1296 return {
true, OutlinedFunction};
1298 return {
false,
nullptr};
1301bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
1302 if (Cloner.OutlinedFunctions.empty())
1305 auto OutliningCosts = computeOutliningCosts(Cloner);
1311 "Expected valid costs");
1316 if (Cloner.ClonedOI)
1317 RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1337 std::tie(DLoc, Block) = getOneDebugLoc(*Cloner.ClonedFunc);
1338 OrigFuncORE.emit([&]() {
1341 <<
ore::NV(
"Function", Cloner.OrigFunc)
1342 <<
" not partially inlined into callers (Original Size = "
1343 <<
ore::NV(
"OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
1344 <<
", Size of call sequence to outlined function = "
1345 <<
ore::NV(
"NewSize", SizeCost) <<
")";
1350 assert(Cloner.OrigFunc->users().empty() &&
1351 "F's users should all be replaced!");
1353 std::vector<User *>
Users(Cloner.ClonedFunc->user_begin(),
1354 Cloner.ClonedFunc->user_end());
1357 auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1358 if (CalleeEntryCount)
1359 computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
1362 (CalleeEntryCount ? CalleeEntryCount->getCount() : 0);
1364 bool AnyInline =
false;
1367 if (isa<BlockAddress>(
User))
1372 if (isLimitReached())
1376 if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
1382 OR <<
ore::NV(
"Callee", Cloner.OrigFunc) <<
" partially inlined into "
1389 (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1397 if (CalleeEntryCountV && CallSiteToProfCountMap.
count(
User)) {
1399 CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
1403 NumPartialInlining++;
1405 if (Cloner.ClonedOI)
1406 NumPartialInlined++;
1408 NumColdOutlinePartialInlined++;
1412 Cloner.IsFunctionInlined =
true;
1413 if (CalleeEntryCount)
1415 CalleeEntryCountV, CalleeEntryCount->getType()));
1417 OrigFuncORE.emit([&]() {
1419 <<
"Partially inlined into at least one caller";
1426bool PartialInlinerImpl::run(
Module &M) {
1430 std::vector<Function *> Worklist;
1431 Worklist.reserve(
M.size());
1433 if (!
F.use_empty() && !
F.isDeclaration())
1434 Worklist.push_back(&
F);
1436 bool Changed =
false;
1437 while (!Worklist.empty()) {
1439 Worklist.pop_back();
1444 std::pair<bool, Function *>
Result = unswitchFunction(*CurrFunc);
1446 Worklist.push_back(
Result.second);
1479 if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
1480 GetTLI, PSI, GetBFI)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Callee
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Given that RA is a live value
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
Module.h This file contains the declarations for the Module class.
static cl::opt< unsigned > MaxNumInlineBlocks("max-num-inline-blocks", cl::init(5), cl::Hidden, cl::desc("Max number of blocks to be partially inlined"))
static cl::opt< int > OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), cl::Hidden, cl::desc("Relative frequency of outline region to " "the entry block"))
static cl::opt< bool > MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden, cl::desc("Mark outline function calls with ColdCC"))
static cl::opt< float > MinRegionSizeRatio("min-region-size-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum ratio comparing relative sizes of each " "outline candidate and original function"))
static cl::opt< bool > DisableMultiRegionPartialInline("disable-mr-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable multi-region partial inlining"))
static cl::opt< unsigned > MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid"))
static cl::opt< int > MaxNumPartialInlining("max-partial-inlining", cl::init(-1), cl::Hidden, cl::desc("Max number of partial inlining. The default is unlimited"))
static cl::opt< bool > DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial inlining"))
static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI)
static cl::opt< float > ColdBranchRatio("cold-branch-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum BranchProbability to consider a region cold."))
static cl::opt< bool > ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden, cl::desc("Force outline regions with live exits"))
static cl::opt< unsigned > ExtraOutliningPenalty("partial-inlining-extra-penalty", cl::init(0), cl::Hidden, cl::desc("A debug option to add additional penalty to the computed one."))
static cl::opt< bool > SkipCostAnalysis("skip-partial-inlining-cost-analysis", cl::ReallyHidden, cl::desc("Skip Cost Analysis"))
FunctionAnalysisManager FAM
This file contains the declarations for profiling metadata utility functions.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
const Instruction & back() const
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
Conditional or Unconditional Branch instruction.
Analysis providing branch probability information.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
A parsed version of the target data layout string in and methods for querying it.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Implements a dense probed hash-table based set.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent profile counts.
const BasicBlock & back() const
void setCallingConv(CallingConv::ID CC)
Module * getParent()
Get the module that this global value is contained inside of...
Represents the cost of inlining a function.
int getCost() const
Get the inline cost estimate.
int getCostDelta() const
Get the cost delta from the threshold for inlining.
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
auto map(const Function &F) const -> InstructionCost
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const BasicBlock * getParent() const
A Module instance is used to store all the information related to an LLVM module.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
An efficient, type-erasing, non-owning reference to a callable.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ BR
Control flow instructions. These all have token chains.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
DiagnosticInfoOptimizationBase::Argument NV
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
const_iterator end(StringRef path)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
Interval::succ_iterator succ_end(Interval *I)
auto successors(const MachineBasicBlock *BB)
int getCallsiteCost(const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
iterator_range< df_iterator< T > > depth_first(const T &G)
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
unsigned succ_size(const MachineBasicBlock *BB)
bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.