31#include "llvm/Config/llvm-config.h"
52#define DEBUG_TYPE "inline-cost"
54STATISTIC(NumCallsAnalyzed,
"Number of call sites analyzed");
58 cl::desc(
"Default amount of inlining to perform"));
67 cl::desc(
"Ignore TTI attributes compatibility check between callee/caller "
68 "during inline cost calculation"));
72 cl::desc(
"Prints comments for instruction based on inline cost analysis"));
76 cl::desc(
"Control the amount of inlining to perform (default = 225)"));
80 cl::desc(
"Threshold for inlining functions with inline hint"));
85 cl::desc(
"Threshold for inlining cold callsites"));
89 cl::desc(
"Enable the cost-benefit analysis for the inliner"));
96 cl::desc(
"Multiplier to multiply cycle savings by during inlining"));
103 cl::desc(
"A multiplier on top of cycle savings to decide whether the "
104 "savings won't justify the cost"));
108 cl::desc(
"The maximum size of a callee that get's "
109 "inlined without sufficient cycle savings"));
116 cl::desc(
"Threshold for inlining functions with cold attribute"));
120 cl::desc(
"Threshold for hot callsites "));
124 cl::desc(
"Threshold for locally hot callsites "));
128 cl::desc(
"Maximum block frequency, expressed as a percentage of caller's "
129 "entry frequency, for a callsite to be cold in the absence of "
130 "profile information."));
134 cl::desc(
"Minimum block frequency, expressed as a multiple of caller's "
135 "entry frequency, for a callsite to be hot in the absence of "
136 "profile information."));
140 cl::desc(
"Cost of a single instruction when inlining"));
144 cl::desc(
"Cost of load/store instruction when inlining"));
148 cl::desc(
"Call penalty that is applied per callsite when inlining"));
152 cl::init(std::numeric_limits<size_t>::max()),
153 cl::desc(
"Do not inline functions with a stack size "
154 "that exceeds the specified limit"));
159 cl::desc(
"Do not inline recursive functions with a stack "
160 "size that exceeds the specified limit"));
164 cl::desc(
"Compute the full inline cost of a call site even when the cost "
165 "exceeds the threshold."));
169 cl::desc(
"Allow inlining when caller has a superset of callee's nobuiltin "
174 cl::desc(
"Disables evaluation of GetElementPtr with constant operands"));
194namespace InlineConstants {
202class InlineCostCallAnalyzer;
206struct InstructionCostDetail {
209 int ThresholdBefore = 0;
210 int ThresholdAfter = 0;
212 int getThresholdDelta()
const {
return ThresholdAfter - ThresholdBefore; }
214 int getCostDelta()
const {
return CostAfter - CostBefore; }
216 bool hasThresholdChanged()
const {
return ThresholdAfter != ThresholdBefore; }
221 InlineCostCallAnalyzer *
const ICCA;
224 InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
237class CallAnalyzer :
public InstVisitor<CallAnalyzer, bool> {
242 virtual ~CallAnalyzer() =
default;
271 virtual void onBlockStart(
const BasicBlock *BB) {}
274 virtual void onBlockAnalyzed(
const BasicBlock *BB) {}
277 virtual void onInstructionAnalysisStart(
const Instruction *
I) {}
280 virtual void onInstructionAnalysisFinish(
const Instruction *
I) {}
290 virtual bool shouldStop() {
return false; }
299 virtual void onDisableSROA(
AllocaInst *Arg) {}
302 virtual void onDisableLoadElimination() {}
306 virtual bool onCallBaseVisitStart(
CallBase &Call) {
return true; }
309 virtual void onCallPenalty() {}
312 virtual void onMemAccess(){};
316 virtual void onLoadEliminationOpportunity() {}
320 virtual void onCallArgumentSetup(
const CallBase &Call) {}
323 virtual void onLoadRelativeIntrinsic() {}
331 virtual bool onJumpTable(
unsigned JumpTableSize) {
return true; }
335 virtual bool onCaseCluster(
unsigned NumCaseCluster) {
return true; }
339 virtual void onFinalizeSwitch(
unsigned JumpTableSize,
340 unsigned NumCaseCluster) {}
344 virtual void onMissedSimplification() {}
347 virtual void onInitializeSROAArg(
AllocaInst *Arg) {}
350 virtual void onAggregateSROAUse(
AllocaInst *V) {}
352 bool handleSROA(
Value *V,
bool DoNotDisable) {
354 if (
auto *SROAArg = getSROAArgForValueOrNull(V)) {
356 onAggregateSROAUse(SROAArg);
359 disableSROAForArg(SROAArg);
364 bool IsCallerRecursive =
false;
365 bool IsRecursiveCall =
false;
366 bool ExposesReturnsTwice =
false;
367 bool HasDynamicAlloca =
false;
368 bool ContainsNoDuplicateCall =
false;
369 bool HasReturn =
false;
370 bool HasIndirectBr =
false;
371 bool HasUninlineableIntrinsic =
false;
372 bool InitsVargArgs =
false;
376 unsigned NumInstructions = 0;
377 unsigned NumVectorInstructions = 0;
408 bool EnableLoadElimination =
true;
411 bool AllowRecursiveCall =
false;
416 auto It = SROAArgValues.
find(V);
417 if (It == SROAArgValues.
end() || EnabledSROAAllocas.
count(It->second) == 0)
423 bool isAllocaDerivedArg(
Value *V);
425 void disableSROA(
Value *V);
427 void disableLoadElimination();
433 bool simplifyIntrinsicCallIsConstant(
CallBase &CB);
434 bool simplifyIntrinsicCallObjectSize(
CallBase &CB);
446 bool isKnownNonNullInCallee(
Value *V);
449 bool allowSizeGrowth(
CallBase &Call);
500 :
TTI(
TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
502 CandidateCall(
Call) {}
506 std::optional<Constant *> getSimplifiedValue(
Instruction *
I) {
508 return SimplifiedValues[
I];
514 unsigned NumConstantArgs = 0;
515 unsigned NumConstantOffsetPtrArgs = 0;
516 unsigned NumAllocaArgs = 0;
517 unsigned NumConstantPtrCmps = 0;
518 unsigned NumConstantPtrDiffs = 0;
519 unsigned NumInstructionsSimplified = 0;
539int64_t getExpectedNumberOfCompare(
int NumCaseCluster) {
540 return 3 *
static_cast<int64_t
>(NumCaseCluster) / 2 - 1;
545class InlineCostCallAnalyzer final :
public CallAnalyzer {
546 const bool ComputeFullInlineCost;
547 int LoadEliminationCost = 0;
552 int SingleBBBonus = 0;
567 int StaticBonusApplied = 0;
570 const bool BoostIndirectCalls;
573 const bool IgnoreThreshold;
576 const bool CostBenefitAnalysisEnabled;
587 int CostAtBBStart = 0;
594 bool DecidedByCostThreshold =
false;
597 bool DecidedByCostBenefit =
false;
602 bool SingleBB =
true;
604 unsigned SROACostSavings = 0;
605 unsigned SROACostSavingsLost = 0;
621 std::optional<int> getHotCallSiteThreshold(
CallBase &Call,
625 void addCost(int64_t Inc) {
626 Inc = std::clamp<int64_t>(Inc, INT_MIN, INT_MAX);
627 Cost = std::clamp<int64_t>(Inc +
Cost, INT_MIN, INT_MAX);
630 void onDisableSROA(
AllocaInst *Arg)
override {
631 auto CostIt = SROAArgCosts.
find(Arg);
632 if (CostIt == SROAArgCosts.
end())
634 addCost(CostIt->second);
635 SROACostSavings -= CostIt->second;
636 SROACostSavingsLost += CostIt->second;
637 SROAArgCosts.
erase(CostIt);
640 void onDisableLoadElimination()
override {
641 addCost(LoadEliminationCost);
642 LoadEliminationCost = 0;
645 bool onCallBaseVisitStart(
CallBase &Call)
override {
646 if (std::optional<int> AttrCallThresholdBonus =
648 Threshold += *AttrCallThresholdBonus;
650 if (std::optional<int> AttrCallCost =
652 addCost(*AttrCallCost);
660 void onCallPenalty()
override { addCost(
CallPenalty); }
664 void onCallArgumentSetup(
const CallBase &Call)
override {
669 void onLoadRelativeIntrinsic()
override {
674 bool IsIndirectCall)
override {
683 if (IsIndirectCall && BoostIndirectCalls) {
684 auto IndirectCallParams = Params;
689 InlineCostCallAnalyzer CA(*
F, Call, IndirectCallParams,
TTI,
690 GetAssumptionCache, GetBFI, PSI, ORE,
false);
691 if (CA.analyze().isSuccess()) {
694 Cost -= std::max(0, CA.getThreshold() - CA.getCost());
701 void onFinalizeSwitch(
unsigned JumpTableSize,
702 unsigned NumCaseCluster)
override {
714 if (NumCaseCluster <= 3) {
720 int64_t ExpectedNumberOfCompare =
721 getExpectedNumberOfCompare(NumCaseCluster);
722 int64_t SwitchCost = ExpectedNumberOfCompare * 2 *
InstrCost;
726 void onMissedSimplification()
override { addCost(
InstrCost); }
728 void onInitializeSROAArg(
AllocaInst *Arg)
override {
730 "Should not initialize SROA costs for null value.");
732 SROACostSavings += SROAArgCost;
733 SROAArgCosts[Arg] = SROAArgCost;
736 void onAggregateSROAUse(
AllocaInst *SROAArg)
override {
737 auto CostIt = SROAArgCosts.
find(SROAArg);
739 "expected this argument to have a cost");
744 void onBlockStart(
const BasicBlock *BB)
override { CostAtBBStart =
Cost; }
746 void onBlockAnalyzed(
const BasicBlock *BB)
override {
747 if (CostBenefitAnalysisEnabled) {
750 assert(GetBFI &&
"GetBFI must be available");
752 assert(BFI &&
"BFI must be available");
755 ColdSize +=
Cost - CostAtBBStart;
763 if (SingleBB && TI->getNumSuccessors() > 1) {
765 Threshold -= SingleBBBonus;
770 void onInstructionAnalysisStart(
const Instruction *
I)
override {
775 InstructionCostDetailMap[
I].CostBefore =
Cost;
776 InstructionCostDetailMap[
I].ThresholdBefore = Threshold;
779 void onInstructionAnalysisFinish(
const Instruction *
I)
override {
784 InstructionCostDetailMap[
I].CostAfter =
Cost;
785 InstructionCostDetailMap[
I].ThresholdAfter = Threshold;
788 bool isCostBenefitAnalysisEnabled() {
789 if (!PSI || !PSI->hasProfileSummary())
801 if (!(PSI->hasInstrumentationProfile() || PSI->hasSampleProfile()))
806 if (!
Caller->getEntryCount())
814 if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
818 auto EntryCount =
F.getEntryCount();
819 if (!EntryCount || !EntryCount->getCount())
830 unsigned getInliningCostBenefitAnalysisSavingsMultiplier()
const {
837 unsigned getInliningCostBenefitAnalysisProfitableMultiplier()
const {
843 void OverrideCycleSavingsAndSizeForTesting(
APInt &CycleSavings,
int &
Size) {
845 CandidateCall,
"inline-cycle-savings-for-test")) {
846 CycleSavings = *AttrCycleSavings;
850 CandidateCall,
"inline-runtime-cost-for-test")) {
851 Size = *AttrRuntimeCost;
858 std::optional<bool> costBenefitAnalysis() {
859 if (!CostBenefitAnalysisEnabled)
883 APInt CycleSavings(128, 0);
886 APInt CurrentSavings(128, 0);
890 if (BI->isConditional() &&
891 isa_and_nonnull<ConstantInt>(
892 SimplifiedValues.
lookup(BI->getCondition()))) {
895 }
else if (
SwitchInst *SI = dyn_cast<SwitchInst>(&
I)) {
896 if (isa_and_present<ConstantInt>(SimplifiedValues.
lookup(
SI->getCondition())))
898 }
else if (
Value *V = dyn_cast<Value>(&
I)) {
900 if (SimplifiedValues.
count(V)) {
908 CycleSavings += CurrentSavings;
912 auto EntryProfileCount =
F.getEntryCount();
913 assert(EntryProfileCount && EntryProfileCount->getCount());
914 auto EntryCount = EntryProfileCount->getCount();
915 CycleSavings += EntryCount / 2;
916 CycleSavings = CycleSavings.
udiv(EntryCount);
919 auto *CallerBB = CandidateCall.
getParent();
933 OverrideCycleSavingsAndSizeForTesting(CycleSavings,
Size);
957 APInt Threshold(128, PSI->getOrCompHotCountThreshold());
960 APInt UpperBoundCycleSavings = CycleSavings;
961 UpperBoundCycleSavings *= getInliningCostBenefitAnalysisSavingsMultiplier();
962 if (UpperBoundCycleSavings.
uge(Threshold))
965 APInt LowerBoundCycleSavings = CycleSavings;
966 LowerBoundCycleSavings *=
967 getInliningCostBenefitAnalysisProfitableMultiplier();
968 if (LowerBoundCycleSavings.
ult(Threshold))
982 if (
Caller->hasMinSize()) {
988 if (DeadBlocks.
count(
L->getHeader()))
998 if (NumVectorInstructions <= NumInstructions / 10)
999 Threshold -= VectorBonus;
1000 else if (NumVectorInstructions <= NumInstructions / 2)
1001 Threshold -= VectorBonus / 2;
1003 if (std::optional<int> AttrCost =
1010 Cost *= *AttrCostMult;
1012 if (std::optional<int> AttrThreshold =
1014 Threshold = *AttrThreshold;
1016 if (
auto Result = costBenefitAnalysis()) {
1017 DecidedByCostBenefit =
true;
1024 if (IgnoreThreshold)
1027 DecidedByCostThreshold =
true;
1028 return Cost < std::max(1, Threshold)
1033 bool shouldStop()
override {
1034 if (IgnoreThreshold || ComputeFullInlineCost)
1038 if (
Cost < Threshold)
1040 DecidedByCostThreshold =
true;
1044 void onLoadEliminationOpportunity()
override {
1059 assert(NumInstructions == 0);
1060 assert(NumVectorInstructions == 0);
1063 updateThreshold(CandidateCall,
F);
1069 assert(SingleBBBonus >= 0);
1070 assert(VectorBonus >= 0);
1075 Threshold += (SingleBBBonus + VectorBonus);
1089 if (
Cost >= Threshold && !ComputeFullInlineCost)
1096 InlineCostCallAnalyzer(
1103 bool IgnoreThreshold =
false)
1104 : CallAnalyzer(
Callee,
Call,
TTI, GetAssumptionCache, GetBFI, PSI, ORE),
1106 Params.ComputeFullInlineCost || ORE ||
1107 isCostBenefitAnalysisEnabled()),
1109 BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
1110 CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
1116 InlineCostAnnotationWriter Writer;
1124 std::optional<InstructionCostDetail> getCostDetails(
const Instruction *
I) {
1125 if (InstructionCostDetailMap.
contains(
I))
1126 return InstructionCostDetailMap[
I];
1127 return std::nullopt;
1130 virtual ~InlineCostCallAnalyzer() =
default;
1131 int getThreshold()
const {
return Threshold; }
1132 int getCost()
const {
return Cost; }
1133 int getStaticBonusApplied()
const {
return StaticBonusApplied; }
1134 std::optional<CostBenefitPair> getCostBenefitPair() {
return CostBenefit; }
1135 bool wasDecidedByCostBenefit()
const {
return DecidedByCostBenefit; }
1136 bool wasDecidedByCostThreshold()
const {
return DecidedByCostThreshold; }
1140static bool isSoleCallToLocalFunction(
const CallBase &CB,
1142 return Callee.hasLocalLinkage() &&
Callee.hasOneLiveUse() &&
1146class InlineCostFeaturesAnalyzer final :
public CallAnalyzer {
1153 static constexpr int JTCostMultiplier = 4;
1154 static constexpr int CaseClusterCostMultiplier = 2;
1155 static constexpr int SwitchCostMultiplier = 2;
1159 unsigned SROACostSavingOpportunities = 0;
1160 int VectorBonus = 0;
1161 int SingleBBBonus = 0;
1167 Cost[
static_cast<size_t>(Feature)] += Delta;
1171 Cost[
static_cast<size_t>(Feature)] =
Value;
1174 void onDisableSROA(
AllocaInst *Arg)
override {
1175 auto CostIt = SROACosts.
find(Arg);
1176 if (CostIt == SROACosts.
end())
1179 increment(InlineCostFeatureIndex::sroa_losses, CostIt->second);
1180 SROACostSavingOpportunities -= CostIt->second;
1181 SROACosts.
erase(CostIt);
1184 void onDisableLoadElimination()
override {
1185 set(InlineCostFeatureIndex::load_elimination, 1);
1188 void onCallPenalty()
override {
1189 increment(InlineCostFeatureIndex::call_penalty,
CallPenalty);
1192 void onCallArgumentSetup(
const CallBase &Call)
override {
1193 increment(InlineCostFeatureIndex::call_argument_setup,
1197 void onLoadRelativeIntrinsic()
override {
1198 increment(InlineCostFeatureIndex::load_relative_intrinsic, 3 *
InstrCost);
1202 bool IsIndirectCall)
override {
1203 increment(InlineCostFeatureIndex::lowered_call_arg_setup,
1206 if (IsIndirectCall) {
1220 InlineCostCallAnalyzer CA(*
F, Call, IndirectCallParams,
TTI,
1221 GetAssumptionCache, GetBFI, PSI, ORE,
false,
1223 if (CA.analyze().isSuccess()) {
1224 increment(InlineCostFeatureIndex::nested_inline_cost_estimate,
1226 increment(InlineCostFeatureIndex::nested_inlines, 1);
1233 void onFinalizeSwitch(
unsigned JumpTableSize,
1234 unsigned NumCaseCluster)
override {
1236 if (JumpTableSize) {
1237 int64_t JTCost =
static_cast<int64_t
>(JumpTableSize) *
InstrCost +
1239 increment(InlineCostFeatureIndex::jump_table_penalty, JTCost);
1243 if (NumCaseCluster <= 3) {
1244 increment(InlineCostFeatureIndex::case_cluster_penalty,
1245 NumCaseCluster * CaseClusterCostMultiplier *
InstrCost);
1249 int64_t ExpectedNumberOfCompare =
1250 getExpectedNumberOfCompare(NumCaseCluster);
1252 int64_t SwitchCost =
1253 ExpectedNumberOfCompare * SwitchCostMultiplier *
InstrCost;
1254 increment(InlineCostFeatureIndex::switch_penalty, SwitchCost);
1257 void onMissedSimplification()
override {
1258 increment(InlineCostFeatureIndex::unsimplified_common_instructions,
1262 void onInitializeSROAArg(
AllocaInst *Arg)
override {
1264 SROACosts[Arg] = SROAArgCost;
1265 SROACostSavingOpportunities += SROAArgCost;
1268 void onAggregateSROAUse(
AllocaInst *Arg)
override {
1270 SROACostSavingOpportunities +=
InstrCost;
1273 void onBlockAnalyzed(
const BasicBlock *BB)
override {
1275 set(InlineCostFeatureIndex::is_multiple_blocks, 1);
1276 Threshold -= SingleBBBonus;
1281 if (
Caller->hasMinSize()) {
1284 for (
Loop *L : LI) {
1286 if (DeadBlocks.
count(
L->getHeader()))
1288 increment(InlineCostFeatureIndex::num_loops,
1292 set(InlineCostFeatureIndex::dead_blocks, DeadBlocks.
size());
1293 set(InlineCostFeatureIndex::simplified_instructions,
1294 NumInstructionsSimplified);
1295 set(InlineCostFeatureIndex::constant_args, NumConstantArgs);
1296 set(InlineCostFeatureIndex::constant_offset_ptr_args,
1297 NumConstantOffsetPtrArgs);
1298 set(InlineCostFeatureIndex::sroa_savings, SROACostSavingOpportunities);
1300 if (NumVectorInstructions <= NumInstructions / 10)
1301 Threshold -= VectorBonus;
1302 else if (NumVectorInstructions <= NumInstructions / 2)
1303 Threshold -= VectorBonus / 2;
1305 set(InlineCostFeatureIndex::threshold, Threshold);
1310 bool shouldStop()
override {
return false; }
1312 void onLoadEliminationOpportunity()
override {
1313 increment(InlineCostFeatureIndex::load_elimination, 1);
1317 increment(InlineCostFeatureIndex::callsite_cost,
1320 set(InlineCostFeatureIndex::cold_cc_penalty,
1323 set(InlineCostFeatureIndex::last_call_to_static_bonus,
1324 isSoleCallToLocalFunction(CandidateCall,
F));
1329 int SingleBBBonusPercent = 50;
1333 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1334 VectorBonus = Threshold * VectorBonusPercent / 100;
1335 Threshold += (SingleBBBonus + VectorBonus);
1341 InlineCostFeaturesAnalyzer(
1347 : CallAnalyzer(
Callee,
Call,
TTI, GetAssumptionCache, GetBFI, PSI) {}
1355bool CallAnalyzer::isAllocaDerivedArg(
Value *V) {
1356 return SROAArgValues.
count(V);
1359void CallAnalyzer::disableSROAForArg(
AllocaInst *SROAArg) {
1360 onDisableSROA(SROAArg);
1361 EnabledSROAAllocas.
erase(SROAArg);
1362 disableLoadElimination();
1365void InlineCostAnnotationWriter::emitInstructionAnnot(
1370 std::optional<InstructionCostDetail>
Record = ICCA->getCostDetails(
I);
1372 OS <<
"; No analysis for the instruction";
1374 OS <<
"; cost before = " <<
Record->CostBefore
1375 <<
", cost after = " <<
Record->CostAfter
1376 <<
", threshold before = " <<
Record->ThresholdBefore
1377 <<
", threshold after = " <<
Record->ThresholdAfter <<
", ";
1378 OS <<
"cost delta = " <<
Record->getCostDelta();
1379 if (
Record->hasThresholdChanged())
1380 OS <<
", threshold delta = " <<
Record->getThresholdDelta();
1382 auto C = ICCA->getSimplifiedValue(
const_cast<Instruction *
>(
I));
1384 OS <<
", simplified to ";
1385 (*C)->print(
OS,
true);
1391void CallAnalyzer::disableSROA(
Value *V) {
1392 if (
auto *SROAArg = getSROAArgForValueOrNull(V)) {
1393 disableSROAForArg(SROAArg);
1397void CallAnalyzer::disableLoadElimination() {
1398 if (EnableLoadElimination) {
1399 onDisableLoadElimination();
1400 EnableLoadElimination =
false;
1409 unsigned IntPtrWidth =
DL.getIndexTypeSizeInBits(
GEP.getType());
1413 GTI != GTE; ++GTI) {
1414 ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
1416 if (
Constant *SimpleOp = SimplifiedValues.
lookup(GTI.getOperand()))
1417 OpC = dyn_cast<ConstantInt>(SimpleOp);
1424 if (
StructType *STy = GTI.getStructTypeOrNull()) {
1431 APInt TypeSize(IntPtrWidth,
DL.getTypeAllocSize(GTI.getIndexedType()));
1443 for (
const Use &
Op :
GEP.indices())
1454 disableSROA(
I.getOperand(0));
1458 if (
I.isArrayAllocation()) {
1460 if (
auto *AllocSize = dyn_cast_or_null<ConstantInt>(
Size)) {
1469 Type *Ty =
I.getAllocatedType();
1471 AllocSize->getLimitedValue(),
1472 DL.getTypeAllocSize(Ty).getKnownMinValue(), AllocatedSize);
1474 HasDynamicAlloca =
true;
1480 if (
I.isStaticAlloca()) {
1481 Type *Ty =
I.getAllocatedType();
1482 AllocatedSize =
SaturatingAdd(
DL.getTypeAllocSize(Ty).getKnownMinValue(),
1490 if (!
I.isStaticAlloca())
1491 HasDynamicAlloca =
true;
1496bool CallAnalyzer::visitPHI(
PHINode &
I) {
1508 bool CheckSROA =
I.getType()->isPointerTy();
1512 std::pair<Value *, APInt> FirstBaseAndOffset = {
nullptr, ZeroOffset};
1513 Value *FirstV =
nullptr;
1515 for (
unsigned i = 0, e =
I.getNumIncomingValues(); i != e; ++i) {
1518 if (DeadBlocks.
count(Pred))
1522 BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
1523 if (KnownSuccessor && KnownSuccessor !=
I.getParent())
1526 Value *
V =
I.getIncomingValue(i);
1533 C = SimplifiedValues.
lookup(V);
1535 std::pair<Value *, APInt> BaseAndOffset = {
nullptr, ZeroOffset};
1536 if (!
C && CheckSROA)
1537 BaseAndOffset = ConstantOffsetPtrs.
lookup(V);
1539 if (!
C && !BaseAndOffset.first)
1556 if (FirstBaseAndOffset == BaseAndOffset)
1570 FirstBaseAndOffset = BaseAndOffset;
1575 SimplifiedValues[&
I] = FirstC;
1580 if (FirstBaseAndOffset.first) {
1581 ConstantOffsetPtrs[&
I] = FirstBaseAndOffset;
1583 if (
auto *SROAArg = getSROAArgForValueOrNull(FirstV))
1584 SROAArgValues[&
I] = SROAArg;
1596 std::pair<Value *, APInt> BaseAndOffset =
1597 ConstantOffsetPtrs.
lookup(
I.getPointerOperand());
1598 if (!BaseAndOffset.first)
1603 if (!accumulateGEPOffset(cast<GEPOperator>(
I), BaseAndOffset.second))
1607 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1613 auto *SROAArg = getSROAArgForValueOrNull(
I.getPointerOperand());
1617 for (
const Use &
Op :
GEP.indices())
1618 if (!isa<Constant>(
Op) && !SimplifiedValues.
lookup(
Op))
1627 if ((
I.isInBounds() && canFoldInboundsGEP(
I)) || IsGEPOffsetConstant(
I)) {
1629 SROAArgValues[&
I] = SROAArg;
1637 disableSROAForArg(SROAArg);
1638 return isGEPFree(
I);
1642bool CallAnalyzer::simplifyInstruction(
Instruction &
I) {
1655 SimplifiedValues[&
I] =
C;
1668bool CallAnalyzer::simplifyIntrinsicCallIsConstant(
CallBase &CB) {
1670 auto *
C = dyn_cast<Constant>(Arg);
1673 C = dyn_cast_or_null<Constant>(SimplifiedValues.
lookup(Arg));
1680bool CallAnalyzer::simplifyIntrinsicCallObjectSize(
CallBase &CB) {
1688 Constant *
C = dyn_cast_or_null<Constant>(V);
1690 SimplifiedValues[&CB] =
C;
1700 std::pair<Value *, APInt> BaseAndOffset =
1701 ConstantOffsetPtrs.
lookup(
I.getOperand(0));
1703 if (BaseAndOffset.first)
1704 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1707 if (
auto *SROAArg = getSROAArgForValueOrNull(
I.getOperand(0)))
1708 SROAArgValues[&
I] = SROAArg;
1721 unsigned IntegerSize =
I.getType()->getScalarSizeInBits();
1722 unsigned AS =
I.getOperand(0)->getType()->getPointerAddressSpace();
1723 if (IntegerSize ==
DL.getPointerSizeInBits(AS)) {
1724 std::pair<Value *, APInt> BaseAndOffset =
1725 ConstantOffsetPtrs.
lookup(
I.getOperand(0));
1726 if (BaseAndOffset.first)
1727 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1737 if (
auto *SROAArg = getSROAArgForValueOrNull(
I.getOperand(0)))
1738 SROAArgValues[&
I] = SROAArg;
1752 unsigned IntegerSize =
Op->getType()->getScalarSizeInBits();
1753 if (IntegerSize <=
DL.getPointerTypeSizeInBits(
I.getType())) {
1754 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.
lookup(
Op);
1755 if (BaseAndOffset.first)
1756 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1760 if (
auto *SROAArg = getSROAArgForValueOrNull(
Op))
1761 SROAArgValues[&
I] = SROAArg;
1767bool CallAnalyzer::visitCastInst(
CastInst &
I) {
1774 disableSROA(
I.getOperand(0));
1779 switch (
I.getOpcode()) {
1780 case Instruction::FPTrunc:
1781 case Instruction::FPExt:
1782 case Instruction::UIToFP:
1783 case Instruction::SIToFP:
1784 case Instruction::FPToUI:
1785 case Instruction::FPToSI:
1801bool CallAnalyzer::isKnownNonNullInCallee(
Value *V) {
1807 if (
Argument *
A = dyn_cast<Argument>(V))
1808 if (paramHasAttr(
A, Attribute::NonNull))
1814 if (isAllocaDerivedArg(V))
1823bool CallAnalyzer::allowSizeGrowth(
CallBase &Call) {
1839 if (
InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
1840 if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
1842 }
else if (isa<UnreachableInst>(
Call.getParent()->getTerminator()))
1848bool InlineCostCallAnalyzer::isColdCallSite(
CallBase &Call,
1852 if (PSI && PSI->hasProfileSummary())
1853 return PSI->isColdCallSite(Call, CallerBFI);
1864 auto CallSiteBB =
Call.getParent();
1865 auto CallSiteFreq = CallerBFI->
getBlockFreq(CallSiteBB);
1866 auto CallerEntryFreq =
1868 return CallSiteFreq < CallerEntryFreq * ColdProb;
1872InlineCostCallAnalyzer::getHotCallSiteThreshold(
CallBase &Call,
1877 if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI))
1883 return std::nullopt;
1893 if (Limit && CallSiteFreq >= *Limit)
1897 return std::nullopt;
1900void InlineCostCallAnalyzer::updateThreshold(
CallBase &Call,
Function &Callee) {
1902 if (!allowSizeGrowth(Call)) {
1910 auto MinIfValid = [](
int A, std::optional<int>
B) {
1911 return B ? std::min(
A, *
B) :
A;
1915 auto MaxIfValid = [](
int A, std::optional<int>
B) {
1916 return B ? std::max(
A, *
B) :
A;
1931 int SingleBBBonusPercent = 50;
1936 auto DisallowAllBonuses = [&]() {
1937 SingleBBBonusPercent = 0;
1938 VectorBonusPercent = 0;
1944 if (
Caller->hasMinSize()) {
1950 SingleBBBonusPercent = 0;
1951 VectorBonusPercent = 0;
1952 }
else if (
Caller->hasOptSize())
1957 if (!
Caller->hasMinSize()) {
1958 if (
Callee.hasFnAttribute(Attribute::InlineHint))
1983 DisallowAllBonuses();
1988 if (PSI->isFunctionEntryHot(&Callee)) {
1994 }
else if (PSI->isFunctionEntryCold(&Callee)) {
2000 DisallowAllBonuses();
2012 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
2013 VectorBonus = Threshold * VectorBonusPercent / 100;
2018 if (isSoleCallToLocalFunction(Call,
F)) {
2024bool CallAnalyzer::visitCmpInst(
CmpInst &
I) {
2030 if (
I.getOpcode() == Instruction::FCmp)
2035 Value *LHSBase, *RHSBase;
2036 APInt LHSOffset, RHSOffset;
2037 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.
lookup(LHS);
2039 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.
lookup(RHS);
2040 if (RHSBase && LHSBase == RHSBase) {
2046 SimplifiedValues[&
I] =
C;
2047 ++NumConstantPtrCmps;
2053 auto isImplicitNullCheckCmp = [](
const CmpInst &
I) {
2054 for (
auto *
User :
I.users())
2055 if (
auto *Instr = dyn_cast<Instruction>(
User))
2056 if (!
Instr->getMetadata(LLVMContext::MD_make_implicit))
2063 if (
I.isEquality() && isa<ConstantPointerNull>(
I.getOperand(1))) {
2064 if (isKnownNonNullInCallee(
I.getOperand(0))) {
2072 if (isImplicitNullCheckCmp(
I))
2075 return handleSROA(
I.getOperand(0), isa<ConstantPointerNull>(
I.getOperand(1)));
2082 Value *LHSBase, *RHSBase;
2083 APInt LHSOffset, RHSOffset;
2084 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.
lookup(LHS);
2086 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.
lookup(RHS);
2087 if (RHSBase && LHSBase == RHSBase) {
2093 SimplifiedValues[&
I] =
C;
2094 ++NumConstantPtrDiffs;
2102 return Base::visitSub(
I);
2107 Constant *CLHS = dyn_cast<Constant>(LHS);
2109 CLHS = SimplifiedValues.
lookup(LHS);
2110 Constant *CRHS = dyn_cast<Constant>(RHS);
2112 CRHS = SimplifiedValues.
lookup(RHS);
2114 Value *SimpleV =
nullptr;
2115 if (
auto FI = dyn_cast<FPMathOperator>(&
I))
2116 SimpleV =
simplifyBinOp(
I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
2117 FI->getFastMathFlags(),
DL);
2122 if (
Constant *
C = dyn_cast_or_null<Constant>(SimpleV))
2123 SimplifiedValues[&
I] =
C;
2136 if (
I.getType()->isFloatingPointTy() &&
2151 COp ? COp :
Op, cast<FPMathOperator>(
I).getFastMathFlags(),
DL);
2153 if (
Constant *
C = dyn_cast_or_null<Constant>(SimpleV))
2154 SimplifiedValues[&
I] =
C;
2165bool CallAnalyzer::visitLoad(
LoadInst &
I) {
2166 if (handleSROA(
I.getPointerOperand(),
I.isSimple()))
2172 if (EnableLoadElimination &&
2173 !LoadAddrSet.
insert(
I.getPointerOperand()).second &&
I.isUnordered()) {
2174 onLoadEliminationOpportunity();
2183 if (handleSROA(
I.getPointerOperand(),
I.isSimple()))
2194 disableLoadElimination();
2206 return Base::visitExtractValue(
I);
2215 return Base::visitInsertValue(
I);
2238 C = dyn_cast_or_null<Constant>(SimplifiedValues.
lookup(
I));
2245 SimplifiedValues[&
Call] =
C;
2252bool CallAnalyzer::visitCallBase(
CallBase &Call) {
2253 if (!onCallBaseVisitStart(Call))
2256 if (
Call.hasFnAttr(Attribute::ReturnsTwice) &&
2257 !
F.hasFnAttribute(Attribute::ReturnsTwice)) {
2259 ExposesReturnsTwice =
true;
2262 if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
2263 ContainsNoDuplicateCall =
true;
2266 bool IsIndirectCall = !
F;
2267 if (IsIndirectCall) {
2271 F = dyn_cast_or_null<Function>(SimplifiedValues.
lookup(Callee));
2272 if (!
F ||
F->getFunctionType() !=
Call.getFunctionType()) {
2273 onCallArgumentSetup(Call);
2275 if (!
Call.onlyReadsMemory())
2276 disableLoadElimination();
2277 return Base::visitCallBase(Call);
2281 assert(
F &&
"Expected a call to a known function");
2284 if (simplifyCallSite(
F, Call))
2290 switch (II->getIntrinsicID()) {
2293 disableLoadElimination();
2294 return Base::visitCallBase(Call);
2296 case Intrinsic::load_relative:
2297 onLoadRelativeIntrinsic();
2300 case Intrinsic::memset:
2301 case Intrinsic::memcpy:
2302 case Intrinsic::memmove:
2303 disableLoadElimination();
2306 case Intrinsic::icall_branch_funnel:
2307 case Intrinsic::localescape:
2308 HasUninlineableIntrinsic =
true;
2310 case Intrinsic::vastart:
2311 InitsVargArgs =
true;
2313 case Intrinsic::launder_invariant_group:
2314 case Intrinsic::strip_invariant_group:
2315 if (
auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0)))
2316 SROAArgValues[II] = SROAArg;
2318 case Intrinsic::is_constant:
2319 return simplifyIntrinsicCallIsConstant(Call);
2320 case Intrinsic::objectsize:
2321 return simplifyIntrinsicCallObjectSize(Call);
2325 if (
F ==
Call.getFunction()) {
2328 IsRecursiveCall =
true;
2329 if (!AllowRecursiveCall)
2334 onLoweredCall(
F, Call, IsIndirectCall);
2337 if (!(
Call.onlyReadsMemory() || (IsIndirectCall &&
F->onlyReadsMemory())))
2338 disableLoadElimination();
2339 return Base::visitCallBase(Call);
2342bool CallAnalyzer::visitReturnInst(
ReturnInst &RI) {
2344 bool Free = !HasReturn;
2349bool CallAnalyzer::visitBranchInst(
BranchInst &BI) {
2356 isa_and_nonnull<ConstantInt>(
2360bool CallAnalyzer::visitSelectInst(
SelectInst &SI) {
2361 bool CheckSROA =
SI.getType()->isPointerTy();
2365 Constant *TrueC = dyn_cast<Constant>(TrueVal);
2367 TrueC = SimplifiedValues.
lookup(TrueVal);
2368 Constant *FalseC = dyn_cast<Constant>(FalseVal);
2370 FalseC = SimplifiedValues.
lookup(FalseVal);
2372 dyn_cast_or_null<Constant>(SimplifiedValues.
lookup(
SI.getCondition()));
2376 if (TrueC == FalseC && TrueC) {
2377 SimplifiedValues[&
SI] = TrueC;
2382 return Base::visitSelectInst(SI);
2384 std::pair<Value *, APInt> TrueBaseAndOffset =
2385 ConstantOffsetPtrs.
lookup(TrueVal);
2386 std::pair<Value *, APInt> FalseBaseAndOffset =
2387 ConstantOffsetPtrs.
lookup(FalseVal);
2388 if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
2389 ConstantOffsetPtrs[&
SI] = TrueBaseAndOffset;
2391 if (
auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
2392 SROAArgValues[&
SI] = SROAArg;
2396 return Base::visitSelectInst(SI);
2407 if (TrueC && FalseC) {
2409 SimplifiedValues[&
SI] =
C;
2413 return Base::visitSelectInst(SI);
2417 if (
Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
2418 SimplifiedValues[&
SI] = SelectedC;
2425 std::pair<Value *, APInt> BaseAndOffset =
2426 ConstantOffsetPtrs.
lookup(SelectedV);
2427 if (BaseAndOffset.first) {
2428 ConstantOffsetPtrs[&
SI] = BaseAndOffset;
2430 if (
auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
2431 SROAArgValues[&
SI] = SROAArg;
2437bool CallAnalyzer::visitSwitchInst(
SwitchInst &SI) {
2440 if (isa<ConstantInt>(
SI.getCondition()))
2443 if (isa<ConstantInt>(V))
2458 unsigned JumpTableSize = 0;
2460 unsigned NumCaseCluster =
2463 onFinalizeSwitch(JumpTableSize, NumCaseCluster);
2476 HasIndirectBr =
true;
2480bool CallAnalyzer::visitResumeInst(
ResumeInst &RI) {
2514 for (
const Use &
Op :
I.operands())
2539 if (
I.isDebugOrPseudoInst())
2547 if (isa<ExtractElementInst>(
I) ||
I.getType()->isVectorTy())
2548 ++NumVectorInstructions;
2555 onInstructionAnalysisStart(&
I);
2557 if (Base::visit(&
I))
2558 ++NumInstructionsSimplified;
2560 onMissedSimplification();
2562 onInstructionAnalysisFinish(&
I);
2563 using namespace ore;
2566 if (IsRecursiveCall && !AllowRecursiveCall)
2568 else if (ExposesReturnsTwice)
2570 else if (HasDynamicAlloca)
2572 else if (HasIndirectBr)
2574 else if (HasUninlineableIntrinsic)
2576 else if (InitsVargArgs)
2578 if (!
IR.isSuccess()) {
2583 <<
NV(
"Callee", &
F) <<
" has uninlinable pattern ("
2584 <<
NV(
"InlineResult",
IR.getFailureReason())
2585 <<
") and cost is not fully computed";
2600 <<
NV(
"Callee", &
F) <<
" is "
2601 <<
NV(
"InlineResult",
IR.getFailureReason())
2602 <<
". Cost is not fully computed";
2609 "Call site analysis is not favorable to inlining.");
2621ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(
Value *&V) {
2622 if (!
V->getType()->isPointerTy())
2625 unsigned AS =
V->getType()->getPointerAddressSpace();
2626 unsigned IntPtrWidth =
DL.getIndexSizeInBits(AS);
2635 if (!
GEP->isInBounds() || !accumulateGEPOffset(*
GEP,
Offset))
2637 V =
GEP->getPointerOperand();
2639 V = cast<Operator>(V)->getOperand(0);
2640 }
else if (
GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2641 if (GA->isInterposable())
2643 V = GA->getAliasee();
2647 assert(
V->getType()->isPointerTy() &&
"Unexpected operand type!");
2648 }
while (Visited.
insert(V).second);
2650 Type *IdxPtrTy =
DL.getIndexType(
V->getType());
2665 return (DeadBlocks.
count(Pred) ||
2666 (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
2671 return (!DeadBlocks.
count(BB) &&
2677 if (Succ == NextBB || !IsNewlyDead(Succ))
2681 while (!NewDead.
empty()) {
2683 if (DeadBlocks.
insert(Dead).second)
2702 auto Result = onAnalysisStart();
2713 if (Call &&
Call->getFunction() == Caller) {
2714 IsCallerRecursive =
true;
2724 if (
Constant *
C = dyn_cast<Constant>(CAI))
2725 SimplifiedValues[&FAI] =
C;
2727 Value *PtrArg = *CAI;
2728 if (
ConstantInt *
C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
2729 ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg,
C->getValue());
2732 if (
auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) {
2733 SROAArgValues[&FAI] = SROAArg;
2734 onInitializeSROAArg(SROAArg);
2735 EnabledSROAAllocas.
insert(SROAArg);
2740 NumConstantArgs = SimplifiedValues.
size();
2741 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.
size();
2742 NumAllocaArgs = SROAArgValues.
size();
2759 BBWorklist.
insert(&
F.getEntryBlock());
2782 if (!isa<CallBrInst>(*U))
2788 if (!
IR.isSuccess())
2795 if (
BranchInst *BI = dyn_cast<BranchInst>(TI)) {
2799 dyn_cast_or_null<ConstantInt>(SimplifiedValues.
lookup(
Cond))) {
2801 BBWorklist.
insert(NextBB);
2802 KnownSuccessors[BB] = NextBB;
2803 findDeadBlocks(BB, NextBB);
2807 }
else if (
SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
2810 dyn_cast_or_null<ConstantInt>(SimplifiedValues.
lookup(
Cond))) {
2811 BasicBlock *NextBB =
SI->findCaseValue(SimpleCond)->getCaseSuccessor();
2812 BBWorklist.
insert(NextBB);
2813 KnownSuccessors[BB] = NextBB;
2814 findDeadBlocks(BB, NextBB);
2825 onBlockAnalyzed(BB);
2831 if (!isSoleCallToLocalFunction(CandidateCall,
F) && ContainsNoDuplicateCall)
2841 FinalStackSizeThreshold = *AttrMaxStackSize;
2842 if (AllocatedSize > FinalStackSizeThreshold)
2845 return finalizeAnalysis();
2849#define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
2851 F.print(
OS, &Writer);
2865#undef DEBUG_PRINT_STAT
2868#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2882 auto CalleeTLI = GetTLI(*Callee);
2885 GetTLI(*Caller).areInlineCompatible(CalleeTLI,
2892 for (
unsigned I = 0,
E = Call.arg_size();
I !=
E; ++
I) {
2893 if (Call.isByValArgument(
I)) {
2896 PointerType *PTy = cast<PointerType>(Call.getArgOperand(
I)->getType());
2897 unsigned TypeSize =
DL.getTypeSizeInBits(Call.getParamByValType(
I));
2899 unsigned PointerSize =
DL.getPointerSizeInBits(AS);
2901 unsigned NumStores = (
TypeSize + PointerSize - 1) / PointerSize;
2909 NumStores = std::min(NumStores, 8U);
2921 return std::min<int64_t>(
Cost, INT_MAX);
2930 return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
2931 GetAssumptionCache, GetTLI, GetBFI, PSI, ORE);
2950 InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
2951 GetAssumptionCache, GetBFI, PSI, ORE,
true,
2953 auto R = CA.analyze();
2955 return std::nullopt;
2956 return CA.getCost();
2964 InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI,
2965 ORE, *Call.getCalledFunction(), Call);
2966 auto R = CFA.analyze();
2968 return std::nullopt;
2969 return CFA.features();
2984 if (Callee->isPresplitCoroutine())
2992 unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
2993 for (
unsigned I = 0,
E = Call.arg_size();
I !=
E; ++
I)
2994 if (Call.isByValArgument(
I)) {
2995 PointerType *PTy = cast<PointerType>(Call.getArgOperand(
I)->getType());
3003 if (Call.hasFnAttr(Attribute::AlwaysInline)) {
3004 if (Call.getAttributes().hasFnAttr(Attribute::NoInline))
3008 if (IsViable.isSuccess())
3015 Function *Caller = Call.getCaller();
3020 if (Caller->hasOptNone())
3025 if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
3029 if (Callee->isInterposable())
3033 if (Callee->hasFnAttribute(Attribute::NoInline))
3037 if (Call.isNoInline())
3040 return std::nullopt;
3055 if (UserDecision->isSuccess())
3061 <<
"... (caller:" << Call.getCaller()->getName()
3064 InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
3065 GetAssumptionCache, GetBFI, PSI, ORE);
3073 if (CA.wasDecidedByCostBenefit()) {
3076 CA.getCostBenefitPair());
3081 if (CA.wasDecidedByCostThreshold())
3083 CA.getStaticBonusApplied());
3092 bool ReturnsTwice =
F.hasFnAttribute(Attribute::ReturnsTwice);
3102 if (!isa<CallBrInst>(*U))
3105 for (
auto &II : BB) {
3106 CallBase *Call = dyn_cast<CallBase>(&II);
3111 Function *Callee = Call->getCalledFunction();
3117 if (!ReturnsTwice && isa<CallInst>(Call) &&
3118 cast<CallInst>(Call)->canReturnTwice())
3122 switch (Callee->getIntrinsicID()) {
3125 case llvm::Intrinsic::icall_branch_funnel:
3129 "disallowed inlining of @llvm.icall.branch.funnel");
3130 case llvm::Intrinsic::localescape:
3134 "disallowed inlining of @llvm.localescape");
3135 case llvm::Intrinsic::vastart:
3139 "contains VarArgs initialized with va_start");
3210 unsigned SizeOptLevel) {
3213 if (SizeOptLevel == 1)
3215 if (SizeOptLevel == 2)
3251 if (
CallInst *CI = dyn_cast<CallInst>(&
I)) {
3252 Function *CalledFunction = CI->getCalledFunction();
3256 InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params,
TTI,
3257 GetAssumptionCache,
nullptr, &PSI, &ORE);
3259 OS <<
" Analyzing call of " << CalledFunction->
getName()
3260 <<
"... (caller:" << CI->getCaller()->getName() <<
")\n";
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
SetVector< BasicBlock * > BBSetVector
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
static cl::opt< int > InlineSavingsMultiplier("inline-savings-multiplier", cl::Hidden, cl::init(8), cl::desc("Multiplier to multiply cycle savings by during inlining"))
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::desc("Control the amount of inlining to perform (default = 225)"))
static cl::opt< int > CallPenalty("inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining"))
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::desc("Threshold for hot callsites "))
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))
static cl::opt< size_t > RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden, cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller), cl::desc("Do not inline recursive functions with a stack " "size that exceeds the specified limit"))
static cl::opt< bool > PrintInstructionComments("print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis"))
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::desc("Threshold for locally hot callsites "))
static cl::opt< bool > InlineCallerSupersetNoBuiltin("inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes."))
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))
static cl::opt< size_t > StackSizeThreshold("inline-max-stacksize", cl::Hidden, cl::init(std::numeric_limits< size_t >::max()), cl::desc("Do not inline functions with a stack size " "that exceeds the specified limit"))
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
static cl::opt< uint64_t > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
static cl::opt< int > InlineSavingsProfitableMultiplier("inline-savings-profitable-multiplier", cl::Hidden, cl::init(4), cl::desc("A multiplier on top of cycle savings to decide whether the " "savings won't justify the cost"))
static cl::opt< int > MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0), cl::desc("Cost of load/store instruction when inlining"))
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
static cl::opt< bool > IgnoreTTIInlineCompatible("ignore-tti-inline-compatible", cl::Hidden, cl::init(false), cl::desc("Ignore TTI attributes compatibility check between callee/caller " "during inline cost calculation"))
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
#define DEBUG_PRINT_STAT(x)
static cl::opt< bool > InlineEnableCostBenefitAnalysis("inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner"))
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
static cl::opt< int > InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings"))
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref< const TargetLibraryInfo &(Function &)> &GetTLI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining.
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
static cl::opt< bool > DisableGEPConstOperand("disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands"))
static cl::opt< int > DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::desc("Default amount of inlining to perform"))
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Legalize the Machine IR a function s Machine IR
mir Rename Register Operands
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
Class for arbitrary precision integers.
APInt udiv(const APInt &RHS) const
Unsigned division operation.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
an instruction to allocate memory on the stack
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This class represents an incoming formal argument to a Function.
virtual void emitInstructionAnnot(const Instruction *, formatted_raw_ostream &)
emitInstructionAnnot - This may be implemented to emit a string right before an instruction is emitte...
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
StringRef getValueAsString() const
Return the attribute's value as a string.
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results,...
bool isValid() const
Return true if the attribute is any kind of attribute.
LLVM Basic Block Representation.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a no-op cast from one type to another.
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
uint64_t getEntryFreq() const
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
std::optional< BlockFrequency > mul(uint64_t Factor) const
Multiplies frequency with Factor. Returns nullopt in case of overflow.
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
FunctionType * getFunctionType() const
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
This class is the base class for the comparison instructions.
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Implements a dense probed hash-table based set.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Type * getReturnType() const
Class to represent profile counts.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Indirect Branch Instruction.
Represents the cost of inlining a function.
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
InlineResult is basically true or false.
static InlineResult success()
static InlineResult failure(const char *Reason)
const char * getFailureReason() const
This instruction inserts a struct field of array element value into an aggregate value.
Base class for instruction visitors.
RetTy visitIndirectBrInst(IndirectBrInst &I)
RetTy visitCmpInst(CmpInst &I)
RetTy visitCallBase(CallBase &I)
RetTy visitCleanupReturnInst(CleanupReturnInst &I)
RetTy visitUnreachableInst(UnreachableInst &I)
RetTy visitSwitchInst(SwitchInst &I)
void visit(Iterator Start, Iterator End)
RetTy visitReturnInst(ReturnInst &I)
RetTy visitBinaryOperator(BinaryOperator &I)
RetTy visitResumeInst(ResumeInst &I)
RetTy visitCatchReturnInst(CatchReturnInst &I)
RetTy visitCastInst(CastInst &I)
RetTy visitBranchInst(BranchInst &I)
RetTy visitSelectInst(SelectInst &I)
void visitInstruction(Instruction &I)
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const BasicBlock * getParent() const
const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
This class represents a cast from an integer to a pointer.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
A Module instance is used to store all the information related to an LLVM module.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Analysis providing profile information.
This class represents a cast from a pointer to an integer.
Resume the propagation of an exception.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
TypeSize getElementOffset(unsigned Idx) const
Class to represent struct types.
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
LLVMContext & getContext() const
All values hold a context through their type.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ C
The default llvm calling convention, compatible with C.
const char FunctionInlineCostMultiplierAttributeName[]
const int OptSizeThreshold
Use when optsize (-Os) is specified.
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
const uint64_t MaxSimplifiedDynamicAllocaToInline
Do not inline dynamic allocas that have been constant propagated to be static allocas above this amou...
const int IndirectCallThreshold
const int OptAggressiveThreshold
Use when -O3 is specified.
const char MaxInlineStackSizeAttributeName[]
const int LastCallToStaticBonus
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive.
bool match(Val *V, const Pattern &P)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< InstrNode * > Instr
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
Function::ProfileCount ProfileCount
std::optional< int > getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind)
auto successors(const MachineBasicBlock *BB)
int getCallsiteCost(const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
std::optional< InlineCostFeatures > getInliningCostFeatures(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the expanded cost features.
gep_type_iterator gep_type_end(const User *GEP)
Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Value * simplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)
Given operand for an FNeg, fold the result or return null.
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
std::array< int, static_cast< size_t >(InlineCostFeatureIndex::NumberOfFeatures)> InlineCostFeatures
std::optional< InlineResult > getAttributeBasedInliningDecision(CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref< const TargetLibraryInfo &(Function &)> GetTLI)
Returns InlineResult::success() if the call site should be always inlined because of user directives,...
Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
std::optional< int > getInliningCostEstimate(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the cost estimate ignoring thresholds.
gep_type_iterator gep_type_begin(const User *GEP)
auto predecessors(const MachineBasicBlock *BB)
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Thresholds to tune inline cost analysis.
std::optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
std::optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
std::optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
std::optional< int > ColdThreshold
Threshold to use for cold callees.
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
int DefaultThreshold
The default threshold to start with for a callee.
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
std::optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.