31#include "llvm/Config/llvm-config.h"
52#define DEBUG_TYPE "inline-cost"
54STATISTIC(NumCallsAnalyzed,
"Number of call sites analyzed");
58 cl::desc(
"Default amount of inlining to perform"));
67 cl::desc(
"Ignore TTI attributes compatibility check between callee/caller "
68 "during inline cost calculation"));
72 cl::desc(
"Prints comments for instruction based on inline cost analysis"));
76 cl::desc(
"Control the amount of inlining to perform (default = 225)"));
80 cl::desc(
"Threshold for inlining functions with inline hint"));
85 cl::desc(
"Threshold for inlining cold callsites"));
89 cl::desc(
"Enable the cost-benefit analysis for the inliner"));
96 cl::desc(
"Multiplier to multiply cycle savings by during inlining"));
103 cl::desc(
"A multiplier on top of cycle savings to decide whether the "
104 "savings won't justify the cost"));
108 cl::desc(
"The maximum size of a callee that get's "
109 "inlined without sufficient cycle savings"));
116 cl::desc(
"Threshold for inlining functions with cold attribute"));
120 cl::desc(
"Threshold for hot callsites "));
124 cl::desc(
"Threshold for locally hot callsites "));
128 cl::desc(
"Maximum block frequency, expressed as a percentage of caller's "
129 "entry frequency, for a callsite to be cold in the absence of "
130 "profile information."));
134 cl::desc(
"Minimum block frequency, expressed as a multiple of caller's "
135 "entry frequency, for a callsite to be hot in the absence of "
136 "profile information."));
140 cl::desc(
"Cost of a single instruction when inlining"));
144 cl::desc(
"Cost of load/store instruction when inlining"));
148 cl::desc(
"Call penalty that is applied per callsite when inlining"));
152 cl::init(std::numeric_limits<size_t>::max()),
153 cl::desc(
"Do not inline functions with a stack size "
154 "that exceeds the specified limit"));
159 cl::desc(
"Do not inline recursive functions with a stack "
160 "size that exceeds the specified limit"));
164 cl::desc(
"Compute the full inline cost of a call site even when the cost "
165 "exceeds the threshold."));
169 cl::desc(
"Allow inlining when caller has a superset of callee's nobuiltin "
174 cl::desc(
"Disables evaluation of GetElementPtr with constant operands"));
194namespace InlineConstants {
202class InlineCostCallAnalyzer;
206struct InstructionCostDetail {
209 int ThresholdBefore = 0;
210 int ThresholdAfter = 0;
212 int getThresholdDelta()
const {
return ThresholdAfter - ThresholdBefore; }
214 int getCostDelta()
const {
return CostAfter - CostBefore; }
216 bool hasThresholdChanged()
const {
return ThresholdAfter != ThresholdBefore; }
221 InlineCostCallAnalyzer *
const ICCA;
224 InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
237class CallAnalyzer :
public InstVisitor<CallAnalyzer, bool> {
242 virtual ~CallAnalyzer() =
default;
271 virtual void onBlockStart(
const BasicBlock *BB) {}
274 virtual void onBlockAnalyzed(
const BasicBlock *BB) {}
277 virtual void onInstructionAnalysisStart(
const Instruction *
I) {}
280 virtual void onInstructionAnalysisFinish(
const Instruction *
I) {}
290 virtual bool shouldStop() {
return false; }
299 virtual void onDisableSROA(
AllocaInst *Arg) {}
302 virtual void onDisableLoadElimination() {}
306 virtual bool onCallBaseVisitStart(
CallBase &Call) {
return true; }
309 virtual void onCallPenalty() {}
312 virtual void onMemAccess(){};
316 virtual void onLoadEliminationOpportunity() {}
320 virtual void onCallArgumentSetup(
const CallBase &Call) {}
323 virtual void onLoadRelativeIntrinsic() {}
331 virtual bool onJumpTable(
unsigned JumpTableSize) {
return true; }
335 virtual bool onCaseCluster(
unsigned NumCaseCluster) {
return true; }
339 virtual void onFinalizeSwitch(
unsigned JumpTableSize,
unsigned NumCaseCluster,
340 bool DefaultDestUndefined) {}
344 virtual void onMissedSimplification() {}
347 virtual void onInitializeSROAArg(
AllocaInst *Arg) {}
350 virtual void onAggregateSROAUse(
AllocaInst *V) {}
352 bool handleSROA(
Value *V,
bool DoNotDisable) {
354 if (
auto *SROAArg = getSROAArgForValueOrNull(V)) {
356 onAggregateSROAUse(SROAArg);
359 disableSROAForArg(SROAArg);
364 bool IsCallerRecursive =
false;
365 bool IsRecursiveCall =
false;
366 bool ExposesReturnsTwice =
false;
367 bool HasDynamicAlloca =
false;
368 bool ContainsNoDuplicateCall =
false;
369 bool HasReturn =
false;
370 bool HasIndirectBr =
false;
371 bool HasUninlineableIntrinsic =
false;
372 bool InitsVargArgs =
false;
376 unsigned NumInstructions = 0;
377 unsigned NumVectorInstructions = 0;
408 bool EnableLoadElimination =
true;
411 bool AllowRecursiveCall =
false;
416 auto It = SROAArgValues.
find(V);
417 if (It == SROAArgValues.
end() || EnabledSROAAllocas.
count(It->second) == 0)
423 bool isAllocaDerivedArg(
Value *V);
425 void disableSROA(
Value *V);
427 void disableLoadElimination();
433 bool simplifyIntrinsicCallIsConstant(
CallBase &CB);
434 bool simplifyIntrinsicCallObjectSize(
CallBase &CB);
446 bool isKnownNonNullInCallee(
Value *V);
449 bool allowSizeGrowth(
CallBase &Call);
500 :
TTI(
TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
501 PSI(PSI),
F(
Callee),
DL(
F.getDataLayout()), ORE(ORE),
502 CandidateCall(
Call) {}
506 std::optional<Constant *> getSimplifiedValue(
Instruction *
I) {
508 return SimplifiedValues[
I];
514 unsigned NumConstantArgs = 0;
515 unsigned NumConstantOffsetPtrArgs = 0;
516 unsigned NumAllocaArgs = 0;
517 unsigned NumConstantPtrCmps = 0;
518 unsigned NumConstantPtrDiffs = 0;
519 unsigned NumInstructionsSimplified = 0;
539int64_t getExpectedNumberOfCompare(
int NumCaseCluster) {
540 return 3 *
static_cast<int64_t
>(NumCaseCluster) / 2 - 1;
545class InlineCostCallAnalyzer final :
public CallAnalyzer {
546 const bool ComputeFullInlineCost;
547 int LoadEliminationCost = 0;
552 int SingleBBBonus = 0;
567 int StaticBonusApplied = 0;
570 const bool BoostIndirectCalls;
573 const bool IgnoreThreshold;
576 const bool CostBenefitAnalysisEnabled;
587 int CostAtBBStart = 0;
594 bool DecidedByCostThreshold =
false;
597 bool DecidedByCostBenefit =
false;
602 bool SingleBB =
true;
604 unsigned SROACostSavings = 0;
605 unsigned SROACostSavingsLost = 0;
621 std::optional<int> getHotCallSiteThreshold(
CallBase &Call,
625 void addCost(int64_t Inc) {
626 Inc = std::clamp<int64_t>(Inc, INT_MIN, INT_MAX);
627 Cost = std::clamp<int64_t>(Inc +
Cost, INT_MIN, INT_MAX);
630 void onDisableSROA(
AllocaInst *Arg)
override {
631 auto CostIt = SROAArgCosts.
find(Arg);
632 if (CostIt == SROAArgCosts.
end())
634 addCost(CostIt->second);
635 SROACostSavings -= CostIt->second;
636 SROACostSavingsLost += CostIt->second;
637 SROAArgCosts.
erase(CostIt);
640 void onDisableLoadElimination()
override {
641 addCost(LoadEliminationCost);
642 LoadEliminationCost = 0;
645 bool onCallBaseVisitStart(
CallBase &Call)
override {
646 if (std::optional<int> AttrCallThresholdBonus =
648 Threshold += *AttrCallThresholdBonus;
650 if (std::optional<int> AttrCallCost =
652 addCost(*AttrCallCost);
660 void onCallPenalty()
override { addCost(
CallPenalty); }
664 void onCallArgumentSetup(
const CallBase &Call)
override {
669 void onLoadRelativeIntrinsic()
override {
674 bool IsIndirectCall)
override {
683 if (IsIndirectCall && BoostIndirectCalls) {
684 auto IndirectCallParams = Params;
689 InlineCostCallAnalyzer CA(*
F, Call, IndirectCallParams,
TTI,
690 GetAssumptionCache, GetBFI, PSI, ORE,
false);
691 if (CA.analyze().isSuccess()) {
694 Cost -= std::max(0, CA.getThreshold() - CA.getCost());
702 void onFinalizeSwitch(
unsigned JumpTableSize,
unsigned NumCaseCluster,
703 bool DefaultDestUndefined)
override {
710 if (!DefaultDestUndefined)
719 if (NumCaseCluster <= 3) {
723 addCost((NumCaseCluster - DefaultDestUndefined) * 2 *
InstrCost);
727 int64_t ExpectedNumberOfCompare =
728 getExpectedNumberOfCompare(NumCaseCluster);
729 int64_t SwitchCost = ExpectedNumberOfCompare * 2 *
InstrCost;
733 void onMissedSimplification()
override { addCost(
InstrCost); }
735 void onInitializeSROAArg(
AllocaInst *Arg)
override {
737 "Should not initialize SROA costs for null value.");
739 SROACostSavings += SROAArgCost;
740 SROAArgCosts[Arg] = SROAArgCost;
743 void onAggregateSROAUse(
AllocaInst *SROAArg)
override {
744 auto CostIt = SROAArgCosts.
find(SROAArg);
746 "expected this argument to have a cost");
751 void onBlockStart(
const BasicBlock *BB)
override { CostAtBBStart =
Cost; }
753 void onBlockAnalyzed(
const BasicBlock *BB)
override {
754 if (CostBenefitAnalysisEnabled) {
757 assert(GetBFI &&
"GetBFI must be available");
759 assert(BFI &&
"BFI must be available");
762 ColdSize +=
Cost - CostAtBBStart;
770 if (SingleBB && TI->getNumSuccessors() > 1) {
772 Threshold -= SingleBBBonus;
777 void onInstructionAnalysisStart(
const Instruction *
I)
override {
782 InstructionCostDetailMap[
I].CostBefore =
Cost;
783 InstructionCostDetailMap[
I].ThresholdBefore = Threshold;
786 void onInstructionAnalysisFinish(
const Instruction *
I)
override {
791 InstructionCostDetailMap[
I].CostAfter =
Cost;
792 InstructionCostDetailMap[
I].ThresholdAfter = Threshold;
795 bool isCostBenefitAnalysisEnabled() {
796 if (!PSI || !PSI->hasProfileSummary())
808 if (!PSI->hasInstrumentationProfile())
813 if (!
Caller->getEntryCount())
821 if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
825 auto EntryCount =
F.getEntryCount();
826 if (!EntryCount || !EntryCount->getCount())
837 unsigned getInliningCostBenefitAnalysisSavingsMultiplier()
const {
844 unsigned getInliningCostBenefitAnalysisProfitableMultiplier()
const {
850 void OverrideCycleSavingsAndSizeForTesting(
APInt &CycleSavings,
int &
Size) {
852 CandidateCall,
"inline-cycle-savings-for-test")) {
853 CycleSavings = *AttrCycleSavings;
857 CandidateCall,
"inline-runtime-cost-for-test")) {
858 Size = *AttrRuntimeCost;
865 std::optional<bool> costBenefitAnalysis() {
866 if (!CostBenefitAnalysisEnabled)
890 APInt CycleSavings(128, 0);
893 APInt CurrentSavings(128, 0);
897 if (BI->isConditional() &&
898 isa_and_nonnull<ConstantInt>(
899 SimplifiedValues.
lookup(BI->getCondition()))) {
902 }
else if (
SwitchInst *SI = dyn_cast<SwitchInst>(&
I)) {
903 if (isa_and_present<ConstantInt>(SimplifiedValues.
lookup(
SI->getCondition())))
905 }
else if (
Value *V = dyn_cast<Value>(&
I)) {
907 if (SimplifiedValues.
count(V)) {
915 CycleSavings += CurrentSavings;
919 auto EntryProfileCount =
F.getEntryCount();
920 assert(EntryProfileCount && EntryProfileCount->getCount());
921 auto EntryCount = EntryProfileCount->getCount();
922 CycleSavings += EntryCount / 2;
923 CycleSavings = CycleSavings.
udiv(EntryCount);
926 auto *CallerBB = CandidateCall.
getParent();
940 OverrideCycleSavingsAndSizeForTesting(CycleSavings,
Size);
964 APInt Threshold(128, PSI->getOrCompHotCountThreshold());
967 APInt UpperBoundCycleSavings = CycleSavings;
968 UpperBoundCycleSavings *= getInliningCostBenefitAnalysisSavingsMultiplier();
969 if (UpperBoundCycleSavings.
uge(Threshold))
972 APInt LowerBoundCycleSavings = CycleSavings;
973 LowerBoundCycleSavings *=
974 getInliningCostBenefitAnalysisProfitableMultiplier();
975 if (LowerBoundCycleSavings.
ult(Threshold))
989 if (
Caller->hasMinSize()) {
995 if (DeadBlocks.
count(
L->getHeader()))
1005 if (NumVectorInstructions <= NumInstructions / 10)
1006 Threshold -= VectorBonus;
1007 else if (NumVectorInstructions <= NumInstructions / 2)
1008 Threshold -= VectorBonus / 2;
1010 if (std::optional<int> AttrCost =
1017 Cost *= *AttrCostMult;
1019 if (std::optional<int> AttrThreshold =
1021 Threshold = *AttrThreshold;
1023 if (
auto Result = costBenefitAnalysis()) {
1024 DecidedByCostBenefit =
true;
1031 if (IgnoreThreshold)
1034 DecidedByCostThreshold =
true;
1035 return Cost < std::max(1, Threshold)
1040 bool shouldStop()
override {
1041 if (IgnoreThreshold || ComputeFullInlineCost)
1045 if (
Cost < Threshold)
1047 DecidedByCostThreshold =
true;
1051 void onLoadEliminationOpportunity()
override {
1066 assert(NumInstructions == 0);
1067 assert(NumVectorInstructions == 0);
1070 updateThreshold(CandidateCall,
F);
1076 assert(SingleBBBonus >= 0);
1077 assert(VectorBonus >= 0);
1082 Threshold += (SingleBBBonus + VectorBonus);
1096 if (
Cost >= Threshold && !ComputeFullInlineCost)
1103 InlineCostCallAnalyzer(
1110 bool IgnoreThreshold =
false)
1111 : CallAnalyzer(
Callee,
Call,
TTI, GetAssumptionCache, GetBFI, PSI, ORE),
1113 Params.ComputeFullInlineCost || ORE ||
1114 isCostBenefitAnalysisEnabled()),
1116 BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
1117 CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
1123 InlineCostAnnotationWriter Writer;
1131 std::optional<InstructionCostDetail> getCostDetails(
const Instruction *
I) {
1132 if (InstructionCostDetailMap.
contains(
I))
1133 return InstructionCostDetailMap[
I];
1134 return std::nullopt;
1137 virtual ~InlineCostCallAnalyzer() =
default;
1138 int getThreshold()
const {
return Threshold; }
1139 int getCost()
const {
return Cost; }
1140 int getStaticBonusApplied()
const {
return StaticBonusApplied; }
1141 std::optional<CostBenefitPair> getCostBenefitPair() {
return CostBenefit; }
1142 bool wasDecidedByCostBenefit()
const {
return DecidedByCostBenefit; }
1143 bool wasDecidedByCostThreshold()
const {
return DecidedByCostThreshold; }
1147static bool isSoleCallToLocalFunction(
const CallBase &CB,
1149 return Callee.hasLocalLinkage() &&
Callee.hasOneLiveUse() &&
1153class InlineCostFeaturesAnalyzer final :
public CallAnalyzer {
1160 static constexpr int JTCostMultiplier = 2;
1161 static constexpr int CaseClusterCostMultiplier = 2;
1162 static constexpr int SwitchDefaultDestCostMultiplier = 2;
1163 static constexpr int SwitchCostMultiplier = 2;
1167 unsigned SROACostSavingOpportunities = 0;
1168 int VectorBonus = 0;
1169 int SingleBBBonus = 0;
1175 Cost[
static_cast<size_t>(Feature)] += Delta;
1179 Cost[
static_cast<size_t>(Feature)] =
Value;
1182 void onDisableSROA(
AllocaInst *Arg)
override {
1183 auto CostIt = SROACosts.
find(Arg);
1184 if (CostIt == SROACosts.
end())
1187 increment(InlineCostFeatureIndex::sroa_losses, CostIt->second);
1188 SROACostSavingOpportunities -= CostIt->second;
1189 SROACosts.
erase(CostIt);
1192 void onDisableLoadElimination()
override {
1193 set(InlineCostFeatureIndex::load_elimination, 1);
1196 void onCallPenalty()
override {
1197 increment(InlineCostFeatureIndex::call_penalty,
CallPenalty);
1200 void onCallArgumentSetup(
const CallBase &Call)
override {
1201 increment(InlineCostFeatureIndex::call_argument_setup,
1205 void onLoadRelativeIntrinsic()
override {
1206 increment(InlineCostFeatureIndex::load_relative_intrinsic, 3 *
InstrCost);
1210 bool IsIndirectCall)
override {
1211 increment(InlineCostFeatureIndex::lowered_call_arg_setup,
1214 if (IsIndirectCall) {
1228 InlineCostCallAnalyzer CA(*
F, Call, IndirectCallParams,
TTI,
1229 GetAssumptionCache, GetBFI, PSI, ORE,
false,
1231 if (CA.analyze().isSuccess()) {
1232 increment(InlineCostFeatureIndex::nested_inline_cost_estimate,
1234 increment(InlineCostFeatureIndex::nested_inlines, 1);
1241 void onFinalizeSwitch(
unsigned JumpTableSize,
unsigned NumCaseCluster,
1242 bool DefaultDestUndefined)
override {
1243 if (JumpTableSize) {
1244 if (!DefaultDestUndefined)
1245 increment(InlineCostFeatureIndex::switch_default_dest_penalty,
1246 SwitchDefaultDestCostMultiplier *
InstrCost);
1247 int64_t JTCost =
static_cast<int64_t
>(JumpTableSize) *
InstrCost +
1249 increment(InlineCostFeatureIndex::jump_table_penalty, JTCost);
1253 if (NumCaseCluster <= 3) {
1254 increment(InlineCostFeatureIndex::case_cluster_penalty,
1255 (NumCaseCluster - DefaultDestUndefined) *
1260 int64_t ExpectedNumberOfCompare =
1261 getExpectedNumberOfCompare(NumCaseCluster);
1263 int64_t SwitchCost =
1264 ExpectedNumberOfCompare * SwitchCostMultiplier *
InstrCost;
1265 increment(InlineCostFeatureIndex::switch_penalty, SwitchCost);
1268 void onMissedSimplification()
override {
1269 increment(InlineCostFeatureIndex::unsimplified_common_instructions,
1273 void onInitializeSROAArg(
AllocaInst *Arg)
override {
1275 SROACosts[Arg] = SROAArgCost;
1276 SROACostSavingOpportunities += SROAArgCost;
1279 void onAggregateSROAUse(
AllocaInst *Arg)
override {
1281 SROACostSavingOpportunities +=
InstrCost;
1284 void onBlockAnalyzed(
const BasicBlock *BB)
override {
1286 set(InlineCostFeatureIndex::is_multiple_blocks, 1);
1287 Threshold -= SingleBBBonus;
1292 if (
Caller->hasMinSize()) {
1295 for (
Loop *L : LI) {
1297 if (DeadBlocks.
count(
L->getHeader()))
1299 increment(InlineCostFeatureIndex::num_loops,
1303 set(InlineCostFeatureIndex::dead_blocks, DeadBlocks.
size());
1304 set(InlineCostFeatureIndex::simplified_instructions,
1305 NumInstructionsSimplified);
1306 set(InlineCostFeatureIndex::constant_args, NumConstantArgs);
1307 set(InlineCostFeatureIndex::constant_offset_ptr_args,
1308 NumConstantOffsetPtrArgs);
1309 set(InlineCostFeatureIndex::sroa_savings, SROACostSavingOpportunities);
1311 if (NumVectorInstructions <= NumInstructions / 10)
1312 Threshold -= VectorBonus;
1313 else if (NumVectorInstructions <= NumInstructions / 2)
1314 Threshold -= VectorBonus / 2;
1316 set(InlineCostFeatureIndex::threshold, Threshold);
1321 bool shouldStop()
override {
return false; }
1323 void onLoadEliminationOpportunity()
override {
1324 increment(InlineCostFeatureIndex::load_elimination, 1);
1328 increment(InlineCostFeatureIndex::callsite_cost,
1331 set(InlineCostFeatureIndex::cold_cc_penalty,
1334 set(InlineCostFeatureIndex::last_call_to_static_bonus,
1335 isSoleCallToLocalFunction(CandidateCall,
F));
1340 int SingleBBBonusPercent = 50;
1344 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1345 VectorBonus = Threshold * VectorBonusPercent / 100;
1346 Threshold += (SingleBBBonus + VectorBonus);
1352 InlineCostFeaturesAnalyzer(
1358 : CallAnalyzer(
Callee,
Call,
TTI, GetAssumptionCache, GetBFI, PSI) {}
1366bool CallAnalyzer::isAllocaDerivedArg(
Value *V) {
1367 return SROAArgValues.
count(V);
1370void CallAnalyzer::disableSROAForArg(
AllocaInst *SROAArg) {
1371 onDisableSROA(SROAArg);
1372 EnabledSROAAllocas.
erase(SROAArg);
1373 disableLoadElimination();
1376void InlineCostAnnotationWriter::emitInstructionAnnot(
1381 std::optional<InstructionCostDetail>
Record = ICCA->getCostDetails(
I);
1383 OS <<
"; No analysis for the instruction";
1385 OS <<
"; cost before = " <<
Record->CostBefore
1386 <<
", cost after = " <<
Record->CostAfter
1387 <<
", threshold before = " <<
Record->ThresholdBefore
1388 <<
", threshold after = " <<
Record->ThresholdAfter <<
", ";
1389 OS <<
"cost delta = " <<
Record->getCostDelta();
1390 if (
Record->hasThresholdChanged())
1391 OS <<
", threshold delta = " <<
Record->getThresholdDelta();
1393 auto C = ICCA->getSimplifiedValue(
const_cast<Instruction *
>(
I));
1395 OS <<
", simplified to ";
1396 (*C)->print(
OS,
true);
1402void CallAnalyzer::disableSROA(
Value *V) {
1403 if (
auto *SROAArg = getSROAArgForValueOrNull(V)) {
1404 disableSROAForArg(SROAArg);
1408void CallAnalyzer::disableLoadElimination() {
1409 if (EnableLoadElimination) {
1410 onDisableLoadElimination();
1411 EnableLoadElimination =
false;
1420 unsigned IntPtrWidth =
DL.getIndexTypeSizeInBits(
GEP.getType());
1424 GTI != GTE; ++GTI) {
1425 ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
1427 if (
Constant *SimpleOp = SimplifiedValues.
lookup(GTI.getOperand()))
1428 OpC = dyn_cast<ConstantInt>(SimpleOp);
1435 if (
StructType *STy = GTI.getStructTypeOrNull()) {
1454 for (
const Use &
Op :
GEP.indices())
1465 disableSROA(
I.getOperand(0));
1469 if (
I.isArrayAllocation()) {
1471 if (
auto *AllocSize = dyn_cast_or_null<ConstantInt>(
Size)) {
1480 Type *Ty =
I.getAllocatedType();
1482 AllocSize->getLimitedValue(),
1483 DL.getTypeAllocSize(Ty).getKnownMinValue(), AllocatedSize);
1485 HasDynamicAlloca =
true;
1491 if (
I.isStaticAlloca()) {
1492 Type *Ty =
I.getAllocatedType();
1493 AllocatedSize =
SaturatingAdd(
DL.getTypeAllocSize(Ty).getKnownMinValue(),
1501 if (!
I.isStaticAlloca())
1502 HasDynamicAlloca =
true;
1507bool CallAnalyzer::visitPHI(
PHINode &
I) {
1519 bool CheckSROA =
I.getType()->isPointerTy();
1523 std::pair<Value *, APInt> FirstBaseAndOffset = {
nullptr, ZeroOffset};
1524 Value *FirstV =
nullptr;
1526 for (
unsigned i = 0, e =
I.getNumIncomingValues(); i != e; ++i) {
1529 if (DeadBlocks.
count(Pred))
1533 BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
1534 if (KnownSuccessor && KnownSuccessor !=
I.getParent())
1537 Value *
V =
I.getIncomingValue(i);
1544 C = SimplifiedValues.
lookup(V);
1546 std::pair<Value *, APInt> BaseAndOffset = {
nullptr, ZeroOffset};
1547 if (!
C && CheckSROA)
1548 BaseAndOffset = ConstantOffsetPtrs.
lookup(V);
1550 if (!
C && !BaseAndOffset.first)
1567 if (FirstBaseAndOffset == BaseAndOffset)
1581 FirstBaseAndOffset = BaseAndOffset;
1586 SimplifiedValues[&
I] = FirstC;
1591 if (FirstBaseAndOffset.first) {
1592 ConstantOffsetPtrs[&
I] = FirstBaseAndOffset;
1594 if (
auto *SROAArg = getSROAArgForValueOrNull(FirstV))
1595 SROAArgValues[&
I] = SROAArg;
1607 std::pair<Value *, APInt> BaseAndOffset =
1608 ConstantOffsetPtrs.
lookup(
I.getPointerOperand());
1609 if (!BaseAndOffset.first)
1614 if (!accumulateGEPOffset(cast<GEPOperator>(
I), BaseAndOffset.second))
1618 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1624 auto *SROAArg = getSROAArgForValueOrNull(
I.getPointerOperand());
1628 for (
const Use &
Op :
GEP.indices())
1629 if (!isa<Constant>(
Op) && !SimplifiedValues.
lookup(
Op))
1638 if ((
I.isInBounds() && canFoldInboundsGEP(
I)) || IsGEPOffsetConstant(
I)) {
1640 SROAArgValues[&
I] = SROAArg;
1648 disableSROAForArg(SROAArg);
1649 return isGEPFree(
I);
1653bool CallAnalyzer::simplifyInstruction(
Instruction &
I) {
1666 SimplifiedValues[&
I] =
C;
1679bool CallAnalyzer::simplifyIntrinsicCallIsConstant(
CallBase &CB) {
1681 auto *
C = dyn_cast<Constant>(Arg);
1684 C = dyn_cast_or_null<Constant>(SimplifiedValues.
lookup(Arg));
1687 SimplifiedValues[&CB] = ConstantInt::get(RT,
C ? 1 : 0);
1691bool CallAnalyzer::simplifyIntrinsicCallObjectSize(
CallBase &CB) {
1699 Constant *
C = dyn_cast_or_null<Constant>(V);
1701 SimplifiedValues[&CB] =
C;
1711 std::pair<Value *, APInt> BaseAndOffset =
1712 ConstantOffsetPtrs.
lookup(
I.getOperand(0));
1714 if (BaseAndOffset.first)
1715 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1718 if (
auto *SROAArg = getSROAArgForValueOrNull(
I.getOperand(0)))
1719 SROAArgValues[&
I] = SROAArg;
1732 unsigned IntegerSize =
I.getType()->getScalarSizeInBits();
1733 unsigned AS =
I.getOperand(0)->getType()->getPointerAddressSpace();
1734 if (IntegerSize ==
DL.getPointerSizeInBits(AS)) {
1735 std::pair<Value *, APInt> BaseAndOffset =
1736 ConstantOffsetPtrs.
lookup(
I.getOperand(0));
1737 if (BaseAndOffset.first)
1738 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1748 if (
auto *SROAArg = getSROAArgForValueOrNull(
I.getOperand(0)))
1749 SROAArgValues[&
I] = SROAArg;
1763 unsigned IntegerSize =
Op->getType()->getScalarSizeInBits();
1764 if (IntegerSize <=
DL.getPointerTypeSizeInBits(
I.getType())) {
1765 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.
lookup(
Op);
1766 if (BaseAndOffset.first)
1767 ConstantOffsetPtrs[&
I] = BaseAndOffset;
1771 if (
auto *SROAArg = getSROAArgForValueOrNull(
Op))
1772 SROAArgValues[&
I] = SROAArg;
1778bool CallAnalyzer::visitCastInst(
CastInst &
I) {
1785 disableSROA(
I.getOperand(0));
1790 switch (
I.getOpcode()) {
1791 case Instruction::FPTrunc:
1792 case Instruction::FPExt:
1793 case Instruction::UIToFP:
1794 case Instruction::SIToFP:
1795 case Instruction::FPToUI:
1796 case Instruction::FPToSI:
1812bool CallAnalyzer::isKnownNonNullInCallee(
Value *V) {
1818 if (
Argument *
A = dyn_cast<Argument>(V))
1819 if (paramHasAttr(
A, Attribute::NonNull))
1825 if (isAllocaDerivedArg(V))
1834bool CallAnalyzer::allowSizeGrowth(
CallBase &Call) {
1851 if (isa<UnreachableInst>(
II->getNormalDest()->getTerminator()))
1853 }
else if (isa<UnreachableInst>(
Call.getParent()->getTerminator()))
1859bool InlineCostCallAnalyzer::isColdCallSite(
CallBase &Call,
1863 if (PSI && PSI->hasProfileSummary())
1864 return PSI->isColdCallSite(Call, CallerBFI);
1875 auto CallSiteBB =
Call.getParent();
1876 auto CallSiteFreq = CallerBFI->
getBlockFreq(CallSiteBB);
1877 auto CallerEntryFreq =
1879 return CallSiteFreq < CallerEntryFreq * ColdProb;
1883InlineCostCallAnalyzer::getHotCallSiteThreshold(
CallBase &Call,
1888 if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI))
1894 return std::nullopt;
1904 if (Limit && CallSiteFreq >= *Limit)
1908 return std::nullopt;
1911void InlineCostCallAnalyzer::updateThreshold(
CallBase &Call,
Function &Callee) {
1913 if (!allowSizeGrowth(Call)) {
1921 auto MinIfValid = [](
int A, std::optional<int>
B) {
1922 return B ? std::min(
A, *
B) :
A;
1926 auto MaxIfValid = [](
int A, std::optional<int>
B) {
1927 return B ? std::max(
A, *
B) :
A;
1942 int SingleBBBonusPercent = 50;
1947 auto DisallowAllBonuses = [&]() {
1948 SingleBBBonusPercent = 0;
1949 VectorBonusPercent = 0;
1955 if (
Caller->hasMinSize()) {
1961 SingleBBBonusPercent = 0;
1962 VectorBonusPercent = 0;
1963 }
else if (
Caller->hasOptSize())
1968 if (!
Caller->hasMinSize()) {
1969 if (
Callee.hasFnAttribute(Attribute::InlineHint))
1994 DisallowAllBonuses();
1999 if (PSI->isFunctionEntryHot(&Callee)) {
2005 }
else if (PSI->isFunctionEntryCold(&Callee)) {
2011 DisallowAllBonuses();
2023 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
2024 VectorBonus = Threshold * VectorBonusPercent / 100;
2029 if (isSoleCallToLocalFunction(Call,
F)) {
2035bool CallAnalyzer::visitCmpInst(
CmpInst &
I) {
2041 if (
I.getOpcode() == Instruction::FCmp)
2046 Value *LHSBase, *RHSBase;
2047 APInt LHSOffset, RHSOffset;
2048 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.
lookup(LHS);
2050 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.
lookup(RHS);
2051 if (RHSBase && LHSBase == RHSBase) {
2057 ++NumConstantPtrCmps;
2062 auto isImplicitNullCheckCmp = [](
const CmpInst &
I) {
2063 for (
auto *
User :
I.users())
2064 if (
auto *Instr = dyn_cast<Instruction>(
User))
2065 if (!
Instr->getMetadata(LLVMContext::MD_make_implicit))
2072 if (
I.isEquality() && isa<ConstantPointerNull>(
I.getOperand(1))) {
2073 if (isKnownNonNullInCallee(
I.getOperand(0))) {
2081 if (isImplicitNullCheckCmp(
I))
2084 return handleSROA(
I.getOperand(0), isa<ConstantPointerNull>(
I.getOperand(1)));
2091 Value *LHSBase, *RHSBase;
2092 APInt LHSOffset, RHSOffset;
2093 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.
lookup(LHS);
2095 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.
lookup(RHS);
2096 if (RHSBase && LHSBase == RHSBase) {
2102 SimplifiedValues[&
I] =
C;
2103 ++NumConstantPtrDiffs;
2111 return Base::visitSub(
I);
2116 Constant *CLHS = dyn_cast<Constant>(LHS);
2118 CLHS = SimplifiedValues.
lookup(LHS);
2119 Constant *CRHS = dyn_cast<Constant>(RHS);
2121 CRHS = SimplifiedValues.
lookup(RHS);
2123 Value *SimpleV =
nullptr;
2124 if (
auto FI = dyn_cast<FPMathOperator>(&
I))
2125 SimpleV =
simplifyBinOp(
I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
2126 FI->getFastMathFlags(),
DL);
2131 if (
Constant *
C = dyn_cast_or_null<Constant>(SimpleV))
2132 SimplifiedValues[&
I] =
C;
2145 if (
I.getType()->isFloatingPointTy() &&
2162 if (
Constant *
C = dyn_cast_or_null<Constant>(SimpleV))
2163 SimplifiedValues[&
I] =
C;
2174bool CallAnalyzer::visitLoad(
LoadInst &
I) {
2175 if (handleSROA(
I.getPointerOperand(),
I.isSimple()))
2181 if (EnableLoadElimination &&
2182 !LoadAddrSet.
insert(
I.getPointerOperand()).second &&
I.isUnordered()) {
2183 onLoadEliminationOpportunity();
2192 if (handleSROA(
I.getPointerOperand(),
I.isSimple()))
2203 disableLoadElimination();
2215 return Base::visitExtractValue(
I);
2224 return Base::visitInsertValue(
I);
2247 C = dyn_cast_or_null<Constant>(SimplifiedValues.
lookup(
I));
2254 SimplifiedValues[&
Call] =
C;
2261bool CallAnalyzer::visitCallBase(
CallBase &Call) {
2262 if (!onCallBaseVisitStart(Call))
2265 if (
Call.hasFnAttr(Attribute::ReturnsTwice) &&
2266 !
F.hasFnAttribute(Attribute::ReturnsTwice)) {
2268 ExposesReturnsTwice =
true;
2271 if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
2272 ContainsNoDuplicateCall =
true;
2275 bool IsIndirectCall = !
F;
2276 if (IsIndirectCall) {
2280 F = dyn_cast_or_null<Function>(SimplifiedValues.
lookup(Callee));
2281 if (!
F ||
F->getFunctionType() !=
Call.getFunctionType()) {
2282 onCallArgumentSetup(Call);
2284 if (!
Call.onlyReadsMemory())
2285 disableLoadElimination();
2286 return Base::visitCallBase(Call);
2290 assert(
F &&
"Expected a call to a known function");
2293 if (simplifyCallSite(
F, Call))
2299 switch (
II->getIntrinsicID()) {
2302 disableLoadElimination();
2303 return Base::visitCallBase(Call);
2305 case Intrinsic::load_relative:
2306 onLoadRelativeIntrinsic();
2309 case Intrinsic::memset:
2310 case Intrinsic::memcpy:
2311 case Intrinsic::memmove:
2312 disableLoadElimination();
2315 case Intrinsic::icall_branch_funnel:
2316 case Intrinsic::localescape:
2317 HasUninlineableIntrinsic =
true;
2319 case Intrinsic::vastart:
2320 InitsVargArgs =
true;
2322 case Intrinsic::launder_invariant_group:
2323 case Intrinsic::strip_invariant_group:
2324 if (
auto *SROAArg = getSROAArgForValueOrNull(
II->getOperand(0)))
2325 SROAArgValues[
II] = SROAArg;
2327 case Intrinsic::is_constant:
2328 return simplifyIntrinsicCallIsConstant(Call);
2329 case Intrinsic::objectsize:
2330 return simplifyIntrinsicCallObjectSize(Call);
2334 if (
F ==
Call.getFunction()) {
2337 IsRecursiveCall =
true;
2338 if (!AllowRecursiveCall)
2343 onLoweredCall(
F, Call, IsIndirectCall);
2346 if (!(
Call.onlyReadsMemory() || (IsIndirectCall &&
F->onlyReadsMemory())))
2347 disableLoadElimination();
2348 return Base::visitCallBase(Call);
2351bool CallAnalyzer::visitReturnInst(
ReturnInst &RI) {
2353 bool Free = !HasReturn;
2358bool CallAnalyzer::visitBranchInst(
BranchInst &BI) {
2365 isa_and_nonnull<ConstantInt>(
2369bool CallAnalyzer::visitSelectInst(
SelectInst &SI) {
2370 bool CheckSROA =
SI.getType()->isPointerTy();
2374 Constant *TrueC = dyn_cast<Constant>(TrueVal);
2376 TrueC = SimplifiedValues.
lookup(TrueVal);
2377 Constant *FalseC = dyn_cast<Constant>(FalseVal);
2379 FalseC = SimplifiedValues.
lookup(FalseVal);
2381 dyn_cast_or_null<Constant>(SimplifiedValues.
lookup(
SI.getCondition()));
2385 if (TrueC == FalseC && TrueC) {
2386 SimplifiedValues[&
SI] = TrueC;
2391 return Base::visitSelectInst(SI);
2393 std::pair<Value *, APInt> TrueBaseAndOffset =
2394 ConstantOffsetPtrs.
lookup(TrueVal);
2395 std::pair<Value *, APInt> FalseBaseAndOffset =
2396 ConstantOffsetPtrs.
lookup(FalseVal);
2397 if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
2398 ConstantOffsetPtrs[&
SI] = TrueBaseAndOffset;
2400 if (
auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
2401 SROAArgValues[&
SI] = SROAArg;
2405 return Base::visitSelectInst(SI);
2416 if (TrueC && FalseC) {
2418 SimplifiedValues[&
SI] =
C;
2422 return Base::visitSelectInst(SI);
2426 if (
Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
2427 SimplifiedValues[&
SI] = SelectedC;
2434 std::pair<Value *, APInt> BaseAndOffset =
2435 ConstantOffsetPtrs.
lookup(SelectedV);
2436 if (BaseAndOffset.first) {
2437 ConstantOffsetPtrs[&
SI] = BaseAndOffset;
2439 if (
auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
2440 SROAArgValues[&
SI] = SROAArg;
2446bool CallAnalyzer::visitSwitchInst(
SwitchInst &SI) {
2449 if (isa<ConstantInt>(
SI.getCondition()))
2452 if (isa<ConstantInt>(V))
2467 unsigned JumpTableSize = 0;
2469 unsigned NumCaseCluster =
2472 onFinalizeSwitch(JumpTableSize, NumCaseCluster,
SI.defaultDestUndefined());
2485 HasIndirectBr =
true;
2489bool CallAnalyzer::visitResumeInst(
ResumeInst &RI) {
2523 for (
const Use &
Op :
I.operands())
2548 if (
I.isDebugOrPseudoInst())
2556 if (isa<ExtractElementInst>(
I) ||
I.getType()->isVectorTy())
2557 ++NumVectorInstructions;
2564 onInstructionAnalysisStart(&
I);
2566 if (Base::visit(&
I))
2567 ++NumInstructionsSimplified;
2569 onMissedSimplification();
2571 onInstructionAnalysisFinish(&
I);
2572 using namespace ore;
2575 if (IsRecursiveCall && !AllowRecursiveCall)
2577 else if (ExposesReturnsTwice)
2579 else if (HasDynamicAlloca)
2581 else if (HasIndirectBr)
2583 else if (HasUninlineableIntrinsic)
2585 else if (InitsVargArgs)
2587 if (!
IR.isSuccess()) {
2592 <<
NV(
"Callee", &
F) <<
" has uninlinable pattern ("
2593 <<
NV(
"InlineResult",
IR.getFailureReason())
2594 <<
") and cost is not fully computed";
2609 <<
NV(
"Callee", &
F) <<
" is "
2610 <<
NV(
"InlineResult",
IR.getFailureReason())
2611 <<
". Cost is not fully computed";
2618 "Call site analysis is not favorable to inlining.");
2630ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(
Value *&V) {
2631 if (!
V->getType()->isPointerTy())
2634 unsigned AS =
V->getType()->getPointerAddressSpace();
2635 unsigned IntPtrWidth =
DL.getIndexSizeInBits(AS);
2644 if (!
GEP->isInBounds() || !accumulateGEPOffset(*
GEP,
Offset))
2646 V =
GEP->getPointerOperand();
2647 }
else if (
GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2648 if (GA->isInterposable())
2650 V = GA->getAliasee();
2654 assert(
V->getType()->isPointerTy() &&
"Unexpected operand type!");
2655 }
while (Visited.
insert(V).second);
2657 Type *IdxPtrTy =
DL.getIndexType(
V->getType());
2658 return cast<ConstantInt>(ConstantInt::get(IdxPtrTy,
Offset));
2672 return (DeadBlocks.
count(Pred) ||
2673 (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
2678 return (!DeadBlocks.
count(BB) &&
2684 if (Succ == NextBB || !IsNewlyDead(Succ))
2688 while (!NewDead.
empty()) {
2690 if (DeadBlocks.
insert(Dead).second)
2709 auto Result = onAnalysisStart();
2720 if (Call &&
Call->getFunction() == Caller) {
2721 IsCallerRecursive =
true;
2731 if (
Constant *
C = dyn_cast<Constant>(CAI))
2732 SimplifiedValues[&FAI] =
C;
2734 Value *PtrArg = *CAI;
2735 if (
ConstantInt *
C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
2736 ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg,
C->getValue());
2739 if (
auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) {
2740 SROAArgValues[&FAI] = SROAArg;
2741 onInitializeSROAArg(SROAArg);
2742 EnabledSROAAllocas.
insert(SROAArg);
2747 NumConstantArgs = SimplifiedValues.
size();
2748 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.
size();
2749 NumAllocaArgs = SROAArgValues.
size();
2766 BBWorklist.
insert(&
F.getEntryBlock());
2789 if (!isa<CallBrInst>(*U))
2795 if (!
IR.isSuccess())
2802 if (
BranchInst *BI = dyn_cast<BranchInst>(TI)) {
2806 dyn_cast_or_null<ConstantInt>(SimplifiedValues.
lookup(
Cond))) {
2808 BBWorklist.
insert(NextBB);
2809 KnownSuccessors[BB] = NextBB;
2810 findDeadBlocks(BB, NextBB);
2814 }
else if (
SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
2817 dyn_cast_or_null<ConstantInt>(SimplifiedValues.
lookup(
Cond))) {
2818 BasicBlock *NextBB =
SI->findCaseValue(SimpleCond)->getCaseSuccessor();
2819 BBWorklist.
insert(NextBB);
2820 KnownSuccessors[BB] = NextBB;
2821 findDeadBlocks(BB, NextBB);
2831 onBlockAnalyzed(BB);
2837 if (!isSoleCallToLocalFunction(CandidateCall,
F) && ContainsNoDuplicateCall)
2847 FinalStackSizeThreshold = *AttrMaxStackSize;
2848 if (AllocatedSize > FinalStackSizeThreshold)
2851 return finalizeAnalysis();
2855#define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
2857 F.print(
OS, &Writer);
2871#undef DEBUG_PRINT_STAT
2874#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2888 auto CalleeTLI = GetTLI(*Callee);
2891 GetTLI(*Caller).areInlineCompatible(CalleeTLI,
2899 for (
unsigned I = 0, E = Call.arg_size();
I != E; ++
I) {
2900 if (Call.isByValArgument(
I)) {
2903 PointerType *PTy = cast<PointerType>(Call.getArgOperand(
I)->getType());
2904 unsigned TypeSize =
DL.getTypeSizeInBits(Call.getParamByValType(
I));
2906 unsigned PointerSize =
DL.getPointerSizeInBits(AS);
2908 unsigned NumStores = (
TypeSize + PointerSize - 1) / PointerSize;
2916 NumStores = std::min(NumStores, 8U);
2929 return std::min<int64_t>(
Cost, INT_MAX);
2938 return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
2939 GetAssumptionCache, GetTLI, GetBFI, PSI, ORE);
2958 InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
2959 GetAssumptionCache, GetBFI, PSI, ORE,
true,
2961 auto R = CA.analyze();
2963 return std::nullopt;
2964 return CA.getCost();
2972 InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI,
2973 ORE, *Call.getCalledFunction(), Call);
2974 auto R = CFA.analyze();
2976 return std::nullopt;
2977 return CFA.features();
2992 if (Callee->isPresplitCoroutine())
3000 unsigned AllocaAS = Callee->getDataLayout().getAllocaAddrSpace();
3001 for (
unsigned I = 0, E = Call.arg_size();
I != E; ++
I)
3002 if (Call.isByValArgument(
I)) {
3003 PointerType *PTy = cast<PointerType>(Call.getArgOperand(
I)->getType());
3011 if (Call.hasFnAttr(Attribute::AlwaysInline)) {
3012 if (Call.getAttributes().hasFnAttr(Attribute::NoInline))
3016 if (IsViable.isSuccess())
3023 Function *Caller = Call.getCaller();
3028 if (Caller->hasOptNone())
3033 if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
3037 if (Callee->isInterposable())
3041 if (Callee->hasFnAttribute(Attribute::NoInline))
3045 if (Call.isNoInline())
3048 return std::nullopt;
3063 if (UserDecision->isSuccess())
3069 <<
"... (caller:" << Call.getCaller()->getName()
3072 InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
3073 GetAssumptionCache, GetBFI, PSI, ORE);
3081 if (CA.wasDecidedByCostBenefit()) {
3084 CA.getCostBenefitPair());
3089 if (CA.wasDecidedByCostThreshold())
3091 CA.getStaticBonusApplied());
3100 bool ReturnsTwice =
F.hasFnAttribute(Attribute::ReturnsTwice);
3110 if (!isa<CallBrInst>(*U))
3113 for (
auto &
II : BB) {
3119 Function *Callee = Call->getCalledFunction();
3125 if (!ReturnsTwice && isa<CallInst>(Call) &&
3126 cast<CallInst>(Call)->canReturnTwice())
3130 switch (Callee->getIntrinsicID()) {
3133 case llvm::Intrinsic::icall_branch_funnel:
3137 "disallowed inlining of @llvm.icall.branch.funnel");
3138 case llvm::Intrinsic::localescape:
3142 "disallowed inlining of @llvm.localescape");
3143 case llvm::Intrinsic::vastart:
3147 "contains VarArgs initialized with va_start");
3218 unsigned SizeOptLevel) {
3221 if (SizeOptLevel == 1)
3223 if (SizeOptLevel == 2)
3259 if (
CallInst *CI = dyn_cast<CallInst>(&
I)) {
3260 Function *CalledFunction = CI->getCalledFunction();
3264 InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params,
TTI,
3265 GetAssumptionCache,
nullptr, &PSI, &ORE);
3267 OS <<
" Analyzing call of " << CalledFunction->
getName()
3268 <<
"... (caller:" << CI->getCaller()->getName() <<
")\n";
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
SetVector< BasicBlock * > BBSetVector
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
static cl::opt< int > InlineSavingsMultiplier("inline-savings-multiplier", cl::Hidden, cl::init(8), cl::desc("Multiplier to multiply cycle savings by during inlining"))
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::desc("Control the amount of inlining to perform (default = 225)"))
static cl::opt< int > CallPenalty("inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining"))
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::desc("Threshold for hot callsites "))
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))
static cl::opt< size_t > RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden, cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller), cl::desc("Do not inline recursive functions with a stack " "size that exceeds the specified limit"))
static cl::opt< bool > PrintInstructionComments("print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis"))
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::desc("Threshold for locally hot callsites "))
static cl::opt< bool > InlineCallerSupersetNoBuiltin("inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes."))
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))
static cl::opt< size_t > StackSizeThreshold("inline-max-stacksize", cl::Hidden, cl::init(std::numeric_limits< size_t >::max()), cl::desc("Do not inline functions with a stack size " "that exceeds the specified limit"))
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
static cl::opt< uint64_t > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
static cl::opt< int > InlineSavingsProfitableMultiplier("inline-savings-profitable-multiplier", cl::Hidden, cl::init(4), cl::desc("A multiplier on top of cycle savings to decide whether the " "savings won't justify the cost"))
static cl::opt< int > MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0), cl::desc("Cost of load/store instruction when inlining"))
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
static cl::opt< bool > IgnoreTTIInlineCompatible("ignore-tti-inline-compatible", cl::Hidden, cl::init(false), cl::desc("Ignore TTI attributes compatibility check between callee/caller " "during inline cost calculation"))
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
#define DEBUG_PRINT_STAT(x)
static cl::opt< bool > InlineEnableCostBenefitAnalysis("inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner"))
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
static cl::opt< int > InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings"))
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref< const TargetLibraryInfo &(Function &)> &GetTLI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining.
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
static cl::opt< bool > DisableGEPConstOperand("disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands"))
static cl::opt< int > DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::desc("Default amount of inlining to perform"))
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Legalize the Machine IR a function s Machine IR
mir Rename Register Operands
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getFastMathFlags(const MachineInstr &I)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
Class for arbitrary precision integers.
APInt udiv(const APInt &RHS) const
Unsigned division operation.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
an instruction to allocate memory on the stack
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This class represents an incoming formal argument to a Function.
virtual void emitInstructionAnnot(const Instruction *, formatted_raw_ostream &)
emitInstructionAnnot - This may be implemented to emit a string right before an instruction is emitte...
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
StringRef getValueAsString() const
Return the attribute's value as a string.
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results,...
bool isValid() const
Return true if the attribute is any kind of attribute.
LLVM Basic Block Representation.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a no-op cast from one type to another.
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
BlockFrequency getEntryFreq() const
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
std::optional< BlockFrequency > mul(uint64_t Factor) const
Multiplies frequency with Factor. Returns nullopt in case of overflow.
Conditional or Unconditional Branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Value * getCondition() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
FunctionType * getFunctionType() const
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
This class is the base class for the comparison instructions.
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
static ConstantInt * getBool(LLVMContext &Context, bool V)
This is an important base class in LLVM.
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Implements a dense probed hash-table based set.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Type * getReturnType() const
Class to represent profile counts.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
static bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
Indirect Branch Instruction.
Represents the cost of inlining a function.
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
InlineResult is basically true or false.
static InlineResult success()
static InlineResult failure(const char *Reason)
const char * getFailureReason() const
This instruction inserts a struct field of array element value into an aggregate value.
Base class for instruction visitors.
RetTy visitIndirectBrInst(IndirectBrInst &I)
RetTy visitCmpInst(CmpInst &I)
RetTy visitCallBase(CallBase &I)
RetTy visitCleanupReturnInst(CleanupReturnInst &I)
RetTy visitUnreachableInst(UnreachableInst &I)
RetTy visitSwitchInst(SwitchInst &I)
void visit(Iterator Start, Iterator End)
RetTy visitReturnInst(ReturnInst &I)
RetTy visitBinaryOperator(BinaryOperator &I)
RetTy visitResumeInst(ResumeInst &I)
RetTy visitCatchReturnInst(CatchReturnInst &I)
RetTy visitCastInst(CastInst &I)
RetTy visitBranchInst(BranchInst &I)
RetTy visitSelectInst(SelectInst &I)
void visitInstruction(Instruction &I)
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
This class represents a cast from an integer to a pointer.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
A Module instance is used to store all the information related to an LLVM module.
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Analysis providing profile information.
This class represents a cast from a pointer to an integer.
Resume the propagation of an exception.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
TypeSize getElementOffset(unsigned Idx) const
Class to represent struct types.
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
LLVMContext & getContext() const
All values hold a context through their type.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ C
The default llvm calling convention, compatible with C.
const char FunctionInlineCostMultiplierAttributeName[]
const int OptSizeThreshold
Use when optsize (-Os) is specified.
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
const uint64_t MaxSimplifiedDynamicAllocaToInline
Do not inline dynamic allocas that have been constant propagated to be static allocas above this amou...
const int IndirectCallThreshold
const int OptAggressiveThreshold
Use when -O3 is specified.
const char MaxInlineStackSizeAttributeName[]
const int LastCallToStaticBonus
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive.
bool match(Val *V, const Pattern &P)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< InstrNode * > Instr
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
Function::ProfileCount ProfileCount
std::optional< int > getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind)
auto successors(const MachineBasicBlock *BB)
Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
std::optional< InlineCostFeatures > getInliningCostFeatures(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the expanded cost features.
LogicalResult failure(bool IsFailure=true)
Utility function to generate a LogicalResult.
gep_type_iterator gep_type_end(const User *GEP)
Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Value * simplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)
Given operand for an FNeg, fold the result or return null.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
std::array< int, static_cast< size_t >(InlineCostFeatureIndex::NumberOfFeatures)> InlineCostFeatures
std::optional< InlineResult > getAttributeBasedInliningDecision(CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref< const TargetLibraryInfo &(Function &)> GetTLI)
Returns InlineResult::success() if the call site should be always inlined because of user directives,...
Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
std::optional< int > getInliningCostEstimate(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the cost estimate ignoring thresholds.
gep_type_iterator gep_type_begin(const User *GEP)
auto predecessors(const MachineBasicBlock *BB)
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Thresholds to tune inline cost analysis.
std::optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
std::optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
std::optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
std::optional< int > ColdThreshold
Threshold to use for cold callees.
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
int DefaultThreshold
The default threshold to start with for a callee.
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
std::optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.