LLVM  16.0.0git
InlineOrder.cpp
Go to the documentation of this file.
1 //===- InlineOrder.cpp - Inlining order abstraction -*- C++ ---*-----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
20 
21 using namespace llvm;
22 
23 #define DEBUG_TYPE "inline-order"
24 
25 enum class InlinePriorityMode : int { Size, Cost, CostBenefit };
26 
28  "inline-priority-mode", cl::init(InlinePriorityMode::Size), cl::Hidden,
29  cl::desc("Choose the priority mode to use in module inline"),
31  "Use callee size priority."),
33  "Use inline cost priority."),
35  "Use cost-benefit ratio.")));
36 
38  "moudle-inliner-top-priority-threshold", cl::Hidden, cl::init(0),
39  cl::desc("The cost threshold for call sites that get inlined without the "
40  "cost-benefit analysis"));
41 
42 namespace {
43 
44 llvm::InlineCost getInlineCostWrapper(CallBase &CB,
46  const InlineParams &Params) {
47  Function &Caller = *CB.getCaller();
48  ProfileSummaryInfo *PSI =
50  .getCachedResult<ProfileSummaryAnalysis>(
51  *CB.getParent()->getParent()->getParent());
52 
54  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
56  };
57  auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & {
59  };
60  auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
62  };
63 
65  auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
66  bool RemarksEnabled =
67  Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
68  DEBUG_TYPE);
69  return getInlineCost(CB, Params, CalleeTTI, GetAssumptionCache, GetTLI,
70  GetBFI, PSI, RemarksEnabled ? &ORE : nullptr);
71 }
72 
73 class SizePriority {
74 public:
75  SizePriority() = default;
76  SizePriority(const CallBase *CB, FunctionAnalysisManager &,
77  const InlineParams &) {
79  Size = Callee->getInstructionCount();
80  }
81 
82  static bool isMoreDesirable(const SizePriority &P1, const SizePriority &P2) {
83  return P1.Size < P2.Size;
84  }
85 
86 private:
87  unsigned Size;
88 };
89 
90 class CostPriority {
91 public:
92  CostPriority() = default;
93  CostPriority(const CallBase *CB, FunctionAnalysisManager &FAM,
94  const InlineParams &Params) {
95  auto IC = getInlineCostWrapper(const_cast<CallBase &>(*CB), FAM, Params);
96  if (IC.isVariable())
97  Cost = IC.getCost();
98  else
99  Cost = IC.isNever() ? INT_MAX : INT_MIN;
100  }
101 
102  static bool isMoreDesirable(const CostPriority &P1, const CostPriority &P2) {
103  return P1.Cost < P2.Cost;
104  }
105 
106 private:
107  int Cost;
108 };
109 
110 class CostBenefitPriority {
111 public:
112  CostBenefitPriority() = default;
113  CostBenefitPriority(const CallBase *CB, FunctionAnalysisManager &FAM,
114  const InlineParams &Params) {
115  auto IC = getInlineCostWrapper(const_cast<CallBase &>(*CB), FAM, Params);
116  Cost = IC.getCost();
117  StaticBonusApplied = IC.getStaticBonusApplied();
118  CostBenefit = IC.getCostBenefit();
119  }
120 
121  static bool isMoreDesirable(const CostBenefitPriority &P1,
122  const CostBenefitPriority &P2) {
123  // We prioritize call sites in the dictionary order of the following
124  // priorities:
125  //
126  // 1. Those call sites that are expected to reduce the caller size when
127  // inlined. Within them, we prioritize those call sites with bigger
128  // reduction.
129  //
130  // 2. Those call sites that have gone through the cost-benefit analysis.
131  // Currently, they are limited to hot call sites. Within them, we
132  // prioritize those call sites with higher benefit-to-cost ratios.
133  //
134  // 3. Remaining call sites are prioritized according to their costs.
135 
136  // We add back StaticBonusApplied to determine whether we expect the caller
137  // to shrink (even if we don't delete the callee).
138  bool P1ReducesCallerSize =
139  P1.Cost + P1.StaticBonusApplied < ModuleInlinerTopPriorityThreshold;
140  bool P2ReducesCallerSize =
141  P2.Cost + P2.StaticBonusApplied < ModuleInlinerTopPriorityThreshold;
142  if (P1ReducesCallerSize || P2ReducesCallerSize) {
143  // If one reduces the caller size while the other doesn't, then return
144  // true iff P1 reduces the caller size.
145  if (P1ReducesCallerSize != P2ReducesCallerSize)
146  return P1ReducesCallerSize;
147 
148  // If they both reduce the caller size, pick the one with the smaller
149  // cost.
150  return P1.Cost < P2.Cost;
151  }
152 
153  bool P1HasCB = P1.CostBenefit.has_value();
154  bool P2HasCB = P2.CostBenefit.has_value();
155  if (P1HasCB || P2HasCB) {
156  // If one has undergone the cost-benefit analysis while the other hasn't,
157  // then return true iff P1 has.
158  if (P1HasCB != P2HasCB)
159  return P1HasCB;
160 
161  // If they have undergone the cost-benefit analysis, then pick the one
162  // with a higher benefit-to-cost ratio.
163  APInt LHS = P1.CostBenefit->getBenefit() * P2.CostBenefit->getCost();
164  APInt RHS = P2.CostBenefit->getBenefit() * P1.CostBenefit->getCost();
165  return LHS.ugt(RHS);
166  }
167 
168  // Remaining call sites are ordered according to their costs.
169  return P1.Cost < P2.Cost;
170  }
171 
172 private:
173  int Cost;
174  int StaticBonusApplied;
176 };
177 
178 template <typename PriorityT>
179 class PriorityInlineOrder : public InlineOrder<std::pair<CallBase *, int>> {
180  using T = std::pair<CallBase *, int>;
181 
182  bool hasLowerPriority(const CallBase *L, const CallBase *R) const {
183  const auto I1 = Priorities.find(L);
184  const auto I2 = Priorities.find(R);
185  assert(I1 != Priorities.end() && I2 != Priorities.end());
186  return PriorityT::isMoreDesirable(I2->second, I1->second);
187  }
188 
189  bool updateAndCheckDecreased(const CallBase *CB) {
190  auto It = Priorities.find(CB);
191  const auto OldPriority = It->second;
192  It->second = PriorityT(CB, FAM, Params);
193  const auto NewPriority = It->second;
194  return PriorityT::isMoreDesirable(OldPriority, NewPriority);
195  }
196 
197  // A call site could become less desirable for inlining because of the size
198  // growth from prior inlining into the callee. This method is used to lazily
199  // update the desirability of a call site if it's decreasing. It is only
200  // called on pop() or front(), not every time the desirability changes. When
201  // the desirability of the front call site decreases, an updated one would be
202  // pushed right back into the heap. For simplicity, those cases where
203  // the desirability of a call site increases are ignored here.
204  void adjust() {
205  while (updateAndCheckDecreased(Heap.front())) {
206  std::pop_heap(Heap.begin(), Heap.end(), isLess);
207  std::push_heap(Heap.begin(), Heap.end(), isLess);
208  }
209  }
210 
211 public:
212  PriorityInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params)
213  : FAM(FAM), Params(Params) {
214  isLess = [&](const CallBase *L, const CallBase *R) {
215  return hasLowerPriority(L, R);
216  };
217  }
218 
219  size_t size() override { return Heap.size(); }
220 
221  void push(const T &Elt) override {
222  CallBase *CB = Elt.first;
223  const int InlineHistoryID = Elt.second;
224 
225  Heap.push_back(CB);
226  Priorities[CB] = PriorityT(CB, FAM, Params);
227  std::push_heap(Heap.begin(), Heap.end(), isLess);
228  InlineHistoryMap[CB] = InlineHistoryID;
229  }
230 
231  T pop() override {
232  assert(size() > 0);
233  adjust();
234 
235  CallBase *CB = Heap.front();
236  T Result = std::make_pair(CB, InlineHistoryMap[CB]);
237  InlineHistoryMap.erase(CB);
238  std::pop_heap(Heap.begin(), Heap.end(), isLess);
239  Heap.pop_back();
240  return Result;
241  }
242 
243  void erase_if(function_ref<bool(T)> Pred) override {
244  auto PredWrapper = [=](CallBase *CB) -> bool {
245  return Pred(std::make_pair(CB, 0));
246  };
247  llvm::erase_if(Heap, PredWrapper);
248  std::make_heap(Heap.begin(), Heap.end(), isLess);
249  }
250 
251 private:
253  std::function<bool(const CallBase *L, const CallBase *R)> isLess;
254  DenseMap<CallBase *, int> InlineHistoryMap;
257  const InlineParams &Params;
258 };
259 
260 } // namespace
261 
262 std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>>
264  switch (UseInlinePriority) {
266  LLVM_DEBUG(dbgs() << " Current used priority: Size priority ---- \n");
267  return std::make_unique<PriorityInlineOrder<SizePriority>>(FAM, Params);
268 
270  LLVM_DEBUG(dbgs() << " Current used priority: Cost priority ---- \n");
271  return std::make_unique<PriorityInlineOrder<CostPriority>>(FAM, Params);
272 
274  LLVM_DEBUG(
275  dbgs() << " Current used priority: cost-benefit priority ---- \n");
276  return std::make_unique<PriorityInlineOrder<CostBenefitPriority>>(FAM, Params);
277  }
278  return nullptr;
279 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
llvm::OuterAnalysisManagerProxy
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1056
AssumptionCache.h
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2586
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
DEBUG_TYPE
#define DEBUG_TYPE
Definition: InlineOrder.cpp:23
llvm::dxil::ParameterKind::I1
@ I1
InlinePriorityMode
InlinePriorityMode
Definition: InlineOrder.cpp:25
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
llvm::Function
Definition: Function.h:60
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1997
push
static void push(SmallVectorImpl< uint64_t > &R, StringRef Str)
Definition: BitstreamRemarkSerializer.cpp:24
OptimizationRemarkEmitter.h
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
GlobalsModRef.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::Optional
Definition: APInt.h:33
llvm::InlineParams
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:204
T
#define T
Definition: Mips16ISelLowering.cpp:341
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::InlineOrder
Definition: InlineOrder.h:19
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:265
InlinePriorityMode::CostBenefit
@ CostBenefit
ModuleInlinerTopPriorityThreshold
static cl::opt< int > ModuleInlinerTopPriorityThreshold("moudle-inliner-top-priority-threshold", cl::Hidden, cl::init(0), cl::desc("The cost threshold for call sites that get inlined without the " "cost-benefit analysis"))
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
CommandLine.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::getInlineCost
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Definition: InlineCost.cpp:2822
InlinePriorityMode::Cost
@ Cost
P2
This might compile to this xmm1 xorps xmm0 movss xmm0 ret Now consider if the code caused xmm1 to get spilled This might produce this xmm1 movaps xmm0 movaps xmm1 movss xmm0 ret since the reload is only used by these we could fold it into the producing something like xmm1 movaps xmm0 ret saving two instructions The basic idea is that a reload from a spill if only one byte chunk is bring in zeros the one element instead of elements This can be used to simplify a variety of shuffle where the elements are fixed zeros This code generates ugly probably due to costs being off or< 4 x float > * P2
Definition: README-SSE.txt:278
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1397
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::InlineCost
Represents the cost of inlining a function.
Definition: InlineCost.h:90
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
TargetLibraryInfo.h
InlineAdvisor.h
llvm::pdb::PDB_SymType::Caller
@ Caller
InlineOrder.h
llvm::BlockFrequencyAnalysis
Analysis pass which computes BlockFrequencyInfo.
Definition: BlockFrequencyInfo.h:112
llvm::CallBase::getCaller
Function * getCaller()
Helper to get the caller (the parent function).
Definition: Instructions.cpp:284
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:40
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
llvm::cl::opt
Definition: CommandLine.h:1412
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:705
llvm::getInlineOrder
std::unique_ptr< InlineOrder< std::pair< CallBase *, int > > > getInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params)
Definition: InlineOrder.cpp:263
ProfileSummaryInfo.h
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::DenseMap
Definition: DenseMap.h:714
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
InlineCost.h
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1715
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:680
BlockFrequencyInfo.h
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:187
adjust
Definition: AVRAsmBackend.cpp:33
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:226
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
TargetTransformInfo.h
UseInlinePriority
static cl::opt< InlinePriorityMode > UseInlinePriority("inline-priority-mode", cl::init(InlinePriorityMode::Size), cl::Hidden, cl::desc("Choose the priority mode to use in module inline"), cl::values(clEnumValN(InlinePriorityMode::Size, "size", "Use callee size priority."), clEnumValN(InlinePriorityMode::Cost, "cost", "Use inline cost priority."), clEnumValN(InlinePriorityMode::CostBenefit, "cost-benefit", "Use cost-benefit ratio.")))
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1175
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
InlinePriorityMode::Size
@ Size
llvm::cl::desc
Definition: CommandLine.h:413
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:450