LLVM  16.0.0git
InlineCost.cpp
Go to the documentation of this file.
1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements inline cost analysis.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SetVector.h"
16 #include "llvm/ADT/SmallPtrSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/LoopInfo.h"
30 #include "llvm/Config/llvm-config.h"
32 #include "llvm/IR/CallingConv.h"
33 #include "llvm/IR/DataLayout.h"
34 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/GlobalAlias.h"
37 #include "llvm/IR/InstVisitor.h"
38 #include "llvm/IR/IntrinsicInst.h"
39 #include "llvm/IR/Operator.h"
40 #include "llvm/IR/PatternMatch.h"
42 #include "llvm/Support/Debug.h"
45 #include <climits>
46 #include <limits>
47 #include <optional>
48 
49 using namespace llvm;
50 
51 #define DEBUG_TYPE "inline-cost"
52 
53 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
54 
55 static cl::opt<int>
56  DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225),
57  cl::desc("Default amount of inlining to perform"));
58 
59 // We introduce this option since there is a minor compile-time win by avoiding
60 // addition of TTI attributes (target-features in particular) to inline
61 // candidates when they are guaranteed to be the same as top level methods in
62 // some use cases. If we avoid adding the attribute, we need an option to avoid
63 // checking these attributes.
65  "ignore-tti-inline-compatible", cl::Hidden, cl::init(false),
66  cl::desc("Ignore TTI attributes compatibility check between callee/caller "
67  "during inline cost calculation"));
68 
70  "print-instruction-comments", cl::Hidden, cl::init(false),
71  cl::desc("Prints comments for instruction based on inline cost analysis"));
72 
74  "inline-threshold", cl::Hidden, cl::init(225),
75  cl::desc("Control the amount of inlining to perform (default = 225)"));
76 
78  "inlinehint-threshold", cl::Hidden, cl::init(325),
79  cl::desc("Threshold for inlining functions with inline hint"));
80 
81 static cl::opt<int>
82  ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
83  cl::init(45),
84  cl::desc("Threshold for inlining cold callsites"));
85 
87  "inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false),
88  cl::desc("Enable the cost-benefit analysis for the inliner"));
89 
91  "inline-savings-multiplier", cl::Hidden, cl::init(8),
92  cl::desc("Multiplier to multiply cycle savings by during inlining"));
93 
94 static cl::opt<int>
95  InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100),
96  cl::desc("The maximum size of a callee that get's "
97  "inlined without sufficient cycle savings"));
98 
99 // We introduce this threshold to help performance of instrumentation based
100 // PGO before we actually hook up inliner with analysis passes such as BPI and
101 // BFI.
103  "inlinecold-threshold", cl::Hidden, cl::init(45),
104  cl::desc("Threshold for inlining functions with cold attribute"));
105 
106 static cl::opt<int>
107  HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
108  cl::desc("Threshold for hot callsites "));
109 
111  "locally-hot-callsite-threshold", cl::Hidden, cl::init(525),
112  cl::desc("Threshold for locally hot callsites "));
113 
115  "cold-callsite-rel-freq", cl::Hidden, cl::init(2),
116  cl::desc("Maximum block frequency, expressed as a percentage of caller's "
117  "entry frequency, for a callsite to be cold in the absence of "
118  "profile information."));
119 
121  "hot-callsite-rel-freq", cl::Hidden, cl::init(60),
122  cl::desc("Minimum block frequency, expressed as a multiple of caller's "
123  "entry frequency, for a callsite to be hot in the absence of "
124  "profile information."));
125 
126 static cl::opt<int>
127  InstrCost("inline-instr-cost", cl::Hidden, cl::init(5),
128  cl::desc("Cost of a single instruction when inlining"));
129 
130 static cl::opt<int>
131  MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0),
132  cl::desc("Cost of load/store instruction when inlining"));
133 
135  "inline-call-penalty", cl::Hidden, cl::init(25),
136  cl::desc("Call penalty that is applied per callsite when inlining"));
137 
138 static cl::opt<size_t>
139  StackSizeThreshold("inline-max-stacksize", cl::Hidden,
141  cl::desc("Do not inline functions with a stack size "
142  "that exceeds the specified limit"));
143 
144 static cl::opt<size_t>
145  RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden,
147  cl::desc("Do not inline recursive functions with a stack "
148  "size that exceeds the specified limit"));
149 
151  "inline-cost-full", cl::Hidden,
152  cl::desc("Compute the full inline cost of a call site even when the cost "
153  "exceeds the threshold."));
154 
156  "inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true),
157  cl::desc("Allow inlining when caller has a superset of callee's nobuiltin "
158  "attributes."));
159 
161  "disable-gep-const-evaluation", cl::Hidden, cl::init(false),
162  cl::desc("Disables evaluation of GetElementPtr with constant operands"));
163 
164 namespace llvm {
166  if (Attr.isValid()) {
167  int AttrValue = 0;
168  if (!Attr.getValueAsString().getAsInteger(10, AttrValue))
169  return AttrValue;
170  }
171  return std::nullopt;
172 }
173 
175  return getStringFnAttrAsInt(CB.getFnAttr(AttrKind));
176 }
177 
179  return getStringFnAttrAsInt(F->getFnAttribute(AttrKind));
180 }
181 
182 namespace InlineConstants {
183 int getInstrCost() { return InstrCost; }
184 
185 } // namespace InlineConstants
186 
187 } // namespace llvm
188 
189 namespace {
190 class InlineCostCallAnalyzer;
191 
192 // This struct is used to store information about inline cost of a
193 // particular instruction
194 struct InstructionCostDetail {
195  int CostBefore = 0;
196  int CostAfter = 0;
197  int ThresholdBefore = 0;
198  int ThresholdAfter = 0;
199 
200  int getThresholdDelta() const { return ThresholdAfter - ThresholdBefore; }
201 
202  int getCostDelta() const { return CostAfter - CostBefore; }
203 
204  bool hasThresholdChanged() const { return ThresholdAfter != ThresholdBefore; }
205 };
206 
207 class InlineCostAnnotationWriter : public AssemblyAnnotationWriter {
208 private:
209  InlineCostCallAnalyzer *const ICCA;
210 
211 public:
212  InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
213  void emitInstructionAnnot(const Instruction *I,
214  formatted_raw_ostream &OS) override;
215 };
216 
217 /// Carry out call site analysis, in order to evaluate inlinability.
218 /// NOTE: the type is currently used as implementation detail of functions such
219 /// as llvm::getInlineCost. Note the function_ref constructor parameters - the
220 /// expectation is that they come from the outer scope, from the wrapper
221 /// functions. If we want to support constructing CallAnalyzer objects where
222 /// lambdas are provided inline at construction, or where the object needs to
223 /// otherwise survive past the scope of the provided functions, we need to
224 /// revisit the argument types.
225 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
227  friend class InstVisitor<CallAnalyzer, bool>;
228 
229 protected:
230  virtual ~CallAnalyzer() = default;
231  /// The TargetTransformInfo available for this compilation.
232  const TargetTransformInfo &TTI;
233 
234  /// Getter for the cache of @llvm.assume intrinsics.
235  function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
236 
237  /// Getter for BlockFrequencyInfo
239 
240  /// Profile summary information.
241  ProfileSummaryInfo *PSI;
242 
243  /// The called function.
244  Function &F;
245 
246  // Cache the DataLayout since we use it a lot.
247  const DataLayout &DL;
248 
249  /// The OptimizationRemarkEmitter available for this compilation.
251 
252  /// The candidate callsite being analyzed. Please do not use this to do
253  /// analysis in the caller function; we want the inline cost query to be
254  /// easily cacheable. Instead, use the cover function paramHasAttr.
255  CallBase &CandidateCall;
256 
257  /// Extension points for handling callsite features.
258  // Called before a basic block was analyzed.
259  virtual void onBlockStart(const BasicBlock *BB) {}
260 
261  /// Called after a basic block was analyzed.
262  virtual void onBlockAnalyzed(const BasicBlock *BB) {}
263 
264  /// Called before an instruction was analyzed
265  virtual void onInstructionAnalysisStart(const Instruction *I) {}
266 
267  /// Called after an instruction was analyzed
268  virtual void onInstructionAnalysisFinish(const Instruction *I) {}
269 
270  /// Called at the end of the analysis of the callsite. Return the outcome of
271  /// the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or
272  /// the reason it can't.
273  virtual InlineResult finalizeAnalysis() { return InlineResult::success(); }
274  /// Called when we're about to start processing a basic block, and every time
275  /// we are done processing an instruction. Return true if there is no point in
276  /// continuing the analysis (e.g. we've determined already the call site is
277  /// too expensive to inline)
278  virtual bool shouldStop() { return false; }
279 
280  /// Called before the analysis of the callee body starts (with callsite
281  /// contexts propagated). It checks callsite-specific information. Return a
282  /// reason analysis can't continue if that's the case, or 'true' if it may
283  /// continue.
284  virtual InlineResult onAnalysisStart() { return InlineResult::success(); }
285  /// Called if the analysis engine decides SROA cannot be done for the given
286  /// alloca.
287  virtual void onDisableSROA(AllocaInst *Arg) {}
288 
289  /// Called the analysis engine determines load elimination won't happen.
290  virtual void onDisableLoadElimination() {}
291 
292  /// Called when we visit a CallBase, before the analysis starts. Return false
293  /// to stop further processing of the instruction.
294  virtual bool onCallBaseVisitStart(CallBase &Call) { return true; }
295 
296  /// Called to account for a call.
297  virtual void onCallPenalty() {}
298 
299  /// Called to account for a load or store.
300  virtual void onMemAccess(){};
301 
302  /// Called to account for the expectation the inlining would result in a load
303  /// elimination.
304  virtual void onLoadEliminationOpportunity() {}
305 
306  /// Called to account for the cost of argument setup for the Call in the
307  /// callee's body (not the callsite currently under analysis).
308  virtual void onCallArgumentSetup(const CallBase &Call) {}
309 
310  /// Called to account for a load relative intrinsic.
311  virtual void onLoadRelativeIntrinsic() {}
312 
313  /// Called to account for a lowered call.
314  virtual void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) {
315  }
316 
317  /// Account for a jump table of given size. Return false to stop further
318  /// processing the switch instruction
319  virtual bool onJumpTable(unsigned JumpTableSize) { return true; }
320 
321  /// Account for a case cluster of given size. Return false to stop further
322  /// processing of the instruction.
323  virtual bool onCaseCluster(unsigned NumCaseCluster) { return true; }
324 
325  /// Called at the end of processing a switch instruction, with the given
326  /// number of case clusters.
327  virtual void onFinalizeSwitch(unsigned JumpTableSize,
328  unsigned NumCaseCluster) {}
329 
330  /// Called to account for any other instruction not specifically accounted
331  /// for.
332  virtual void onMissedSimplification() {}
333 
334  /// Start accounting potential benefits due to SROA for the given alloca.
335  virtual void onInitializeSROAArg(AllocaInst *Arg) {}
336 
337  /// Account SROA savings for the AllocaInst value.
338  virtual void onAggregateSROAUse(AllocaInst *V) {}
339 
340  bool handleSROA(Value *V, bool DoNotDisable) {
341  // Check for SROA candidates in comparisons.
342  if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
343  if (DoNotDisable) {
344  onAggregateSROAUse(SROAArg);
345  return true;
346  }
347  disableSROAForArg(SROAArg);
348  }
349  return false;
350  }
351 
352  bool IsCallerRecursive = false;
353  bool IsRecursiveCall = false;
354  bool ExposesReturnsTwice = false;
355  bool HasDynamicAlloca = false;
356  bool ContainsNoDuplicateCall = false;
357  bool HasReturn = false;
358  bool HasIndirectBr = false;
359  bool HasUninlineableIntrinsic = false;
360  bool InitsVargArgs = false;
361 
362  /// Number of bytes allocated statically by the callee.
363  uint64_t AllocatedSize = 0;
364  unsigned NumInstructions = 0;
365  unsigned NumVectorInstructions = 0;
366 
367  /// While we walk the potentially-inlined instructions, we build up and
368  /// maintain a mapping of simplified values specific to this callsite. The
369  /// idea is to propagate any special information we have about arguments to
370  /// this call through the inlinable section of the function, and account for
371  /// likely simplifications post-inlining. The most important aspect we track
372  /// is CFG altering simplifications -- when we prove a basic block dead, that
373  /// can cause dramatic shifts in the cost of inlining a function.
374  DenseMap<Value *, Constant *> SimplifiedValues;
375 
376  /// Keep track of the values which map back (through function arguments) to
377  /// allocas on the caller stack which could be simplified through SROA.
378  DenseMap<Value *, AllocaInst *> SROAArgValues;
379 
380  /// Keep track of Allocas for which we believe we may get SROA optimization.
381  DenseSet<AllocaInst *> EnabledSROAAllocas;
382 
383  /// Keep track of values which map to a pointer base and constant offset.
385 
386  /// Keep track of dead blocks due to the constant arguments.
388 
389  /// The mapping of the blocks to their known unique successors due to the
390  /// constant arguments.
391  DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
392 
393  /// Model the elimination of repeated loads that is expected to happen
394  /// whenever we simplify away the stores that would otherwise cause them to be
395  /// loads.
396  bool EnableLoadElimination = true;
397 
398  /// Whether we allow inlining for recursive call.
399  bool AllowRecursiveCall = false;
400 
401  SmallPtrSet<Value *, 16> LoadAddrSet;
402 
403  AllocaInst *getSROAArgForValueOrNull(Value *V) const {
404  auto It = SROAArgValues.find(V);
405  if (It == SROAArgValues.end() || EnabledSROAAllocas.count(It->second) == 0)
406  return nullptr;
407  return It->second;
408  }
409 
410  // Custom simplification helper routines.
411  bool isAllocaDerivedArg(Value *V);
412  void disableSROAForArg(AllocaInst *SROAArg);
413  void disableSROA(Value *V);
414  void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
415  void disableLoadElimination();
416  bool isGEPFree(GetElementPtrInst &GEP);
417  bool canFoldInboundsGEP(GetElementPtrInst &I);
418  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
419  bool simplifyCallSite(Function *F, CallBase &Call);
421  bool simplifyIntrinsicCallIsConstant(CallBase &CB);
422  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
423 
424  /// Return true if the given argument to the function being considered for
425  /// inlining has the given attribute set either at the call site or the
426  /// function declaration. Primarily used to inspect call site specific
427  /// attributes since these can be more precise than the ones on the callee
428  /// itself.
429  bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
430 
431  /// Return true if the given value is known non null within the callee if
432  /// inlined through this particular callsite.
433  bool isKnownNonNullInCallee(Value *V);
434 
435  /// Return true if size growth is allowed when inlining the callee at \p Call.
436  bool allowSizeGrowth(CallBase &Call);
437 
438  // Custom analysis routines.
439  InlineResult analyzeBlock(BasicBlock *BB,
440  SmallPtrSetImpl<const Value *> &EphValues);
441 
442  // Disable several entry points to the visitor so we don't accidentally use
443  // them by declaring but not defining them here.
444  void visit(Module *);
445  void visit(Module &);
446  void visit(Function *);
447  void visit(Function &);
448  void visit(BasicBlock *);
449  void visit(BasicBlock &);
450 
451  // Provide base case for our instruction visit.
452  bool visitInstruction(Instruction &I);
453 
454  // Our visit overrides.
455  bool visitAlloca(AllocaInst &I);
456  bool visitPHI(PHINode &I);
457  bool visitGetElementPtr(GetElementPtrInst &I);
458  bool visitBitCast(BitCastInst &I);
459  bool visitPtrToInt(PtrToIntInst &I);
460  bool visitIntToPtr(IntToPtrInst &I);
461  bool visitCastInst(CastInst &I);
462  bool visitCmpInst(CmpInst &I);
463  bool visitSub(BinaryOperator &I);
464  bool visitBinaryOperator(BinaryOperator &I);
465  bool visitFNeg(UnaryOperator &I);
466  bool visitLoad(LoadInst &I);
467  bool visitStore(StoreInst &I);
468  bool visitExtractValue(ExtractValueInst &I);
469  bool visitInsertValue(InsertValueInst &I);
470  bool visitCallBase(CallBase &Call);
471  bool visitReturnInst(ReturnInst &RI);
472  bool visitBranchInst(BranchInst &BI);
473  bool visitSelectInst(SelectInst &SI);
474  bool visitSwitchInst(SwitchInst &SI);
475  bool visitIndirectBrInst(IndirectBrInst &IBI);
476  bool visitResumeInst(ResumeInst &RI);
477  bool visitCleanupReturnInst(CleanupReturnInst &RI);
478  bool visitCatchReturnInst(CatchReturnInst &RI);
479  bool visitUnreachableInst(UnreachableInst &I);
480 
481 public:
482  CallAnalyzer(Function &Callee, CallBase &Call, const TargetTransformInfo &TTI,
483  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
484  function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
485  ProfileSummaryInfo *PSI = nullptr,
486  OptimizationRemarkEmitter *ORE = nullptr)
487  : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
488  PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
489  CandidateCall(Call) {}
490 
491  InlineResult analyze();
492 
493  std::optional<Constant *> getSimplifiedValue(Instruction *I) {
494  if (SimplifiedValues.find(I) != SimplifiedValues.end())
495  return SimplifiedValues[I];
496  return std::nullopt;
497  }
498 
499  // Keep a bunch of stats about the cost savings found so we can print them
500  // out when debugging.
501  unsigned NumConstantArgs = 0;
502  unsigned NumConstantOffsetPtrArgs = 0;
503  unsigned NumAllocaArgs = 0;
504  unsigned NumConstantPtrCmps = 0;
505  unsigned NumConstantPtrDiffs = 0;
506  unsigned NumInstructionsSimplified = 0;
507 
508  void dump();
509 };
510 
511 // Considering forming a binary search, we should find the number of nodes
512 // which is same as the number of comparisons when lowered. For a given
513 // number of clusters, n, we can define a recursive function, f(n), to find
514 // the number of nodes in the tree. The recursion is :
515 // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
516 // and f(n) = n, when n <= 3.
517 // This will lead a binary tree where the leaf should be either f(2) or f(3)
518 // when n > 3. So, the number of comparisons from leaves should be n, while
519 // the number of non-leaf should be :
520 // 2^(log2(n) - 1) - 1
521 // = 2^log2(n) * 2^-1 - 1
522 // = n / 2 - 1.
523 // Considering comparisons from leaf and non-leaf nodes, we can estimate the
524 // number of comparisons in a simple closed form :
525 // n + n / 2 - 1 = n * 3 / 2 - 1
526 int64_t getExpectedNumberOfCompare(int NumCaseCluster) {
527  return 3 * static_cast<int64_t>(NumCaseCluster) / 2 - 1;
528 }
529 
530 /// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note
531 /// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer
532 class InlineCostCallAnalyzer final : public CallAnalyzer {
533  const bool ComputeFullInlineCost;
534  int LoadEliminationCost = 0;
535  /// Bonus to be applied when percentage of vector instructions in callee is
536  /// high (see more details in updateThreshold).
537  int VectorBonus = 0;
538  /// Bonus to be applied when the callee has only one reachable basic block.
539  int SingleBBBonus = 0;
540 
541  /// Tunable parameters that control the analysis.
542  const InlineParams &Params;
543 
544  // This DenseMap stores the delta change in cost and threshold after
545  // accounting for the given instruction. The map is filled only with the
546  // flag PrintInstructionComments on.
548 
549  /// Upper bound for the inlining cost. Bonuses are being applied to account
550  /// for speculative "expected profit" of the inlining decision.
551  int Threshold = 0;
552 
553  /// The amount of StaticBonus applied.
554  int StaticBonusApplied = 0;
555 
556  /// Attempt to evaluate indirect calls to boost its inline cost.
557  const bool BoostIndirectCalls;
558 
559  /// Ignore the threshold when finalizing analysis.
560  const bool IgnoreThreshold;
561 
562  // True if the cost-benefit-analysis-based inliner is enabled.
563  const bool CostBenefitAnalysisEnabled;
564 
565  /// Inlining cost measured in abstract units, accounts for all the
566  /// instructions expected to be executed for a given function invocation.
567  /// Instructions that are statically proven to be dead based on call-site
568  /// arguments are not counted here.
569  int Cost = 0;
570 
571  // The cumulative cost at the beginning of the basic block being analyzed. At
572  // the end of analyzing each basic block, "Cost - CostAtBBStart" represents
573  // the size of that basic block.
574  int CostAtBBStart = 0;
575 
576  // The static size of live but cold basic blocks. This is "static" in the
577  // sense that it's not weighted by profile counts at all.
578  int ColdSize = 0;
579 
580  // Whether inlining is decided by cost-threshold analysis.
581  bool DecidedByCostThreshold = false;
582 
583  // Whether inlining is decided by cost-benefit analysis.
584  bool DecidedByCostBenefit = false;
585 
586  // The cost-benefit pair computed by cost-benefit analysis.
587  Optional<CostBenefitPair> CostBenefit = std::nullopt;
588 
589  bool SingleBB = true;
590 
591  unsigned SROACostSavings = 0;
592  unsigned SROACostSavingsLost = 0;
593 
594  /// The mapping of caller Alloca values to their accumulated cost savings. If
595  /// we have to disable SROA for one of the allocas, this tells us how much
596  /// cost must be added.
597  DenseMap<AllocaInst *, int> SROAArgCosts;
598 
599  /// Return true if \p Call is a cold callsite.
600  bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
601 
602  /// Update Threshold based on callsite properties such as callee
603  /// attributes and callee hotness for PGO builds. The Callee is explicitly
604  /// passed to support analyzing indirect calls whose target is inferred by
605  /// analysis.
606  void updateThreshold(CallBase &Call, Function &Callee);
607  /// Return a higher threshold if \p Call is a hot callsite.
608  Optional<int> getHotCallSiteThreshold(CallBase &Call,
609  BlockFrequencyInfo *CallerBFI);
610 
611  /// Handle a capped 'int' increment for Cost.
612  void addCost(int64_t Inc) {
613  Inc = std::max<int64_t>(std::min<int64_t>(INT_MAX, Inc), INT_MIN);
614  Cost = std::max<int64_t>(std::min<int64_t>(INT_MAX, Inc + Cost), INT_MIN);
615  }
616 
617  void onDisableSROA(AllocaInst *Arg) override {
618  auto CostIt = SROAArgCosts.find(Arg);
619  if (CostIt == SROAArgCosts.end())
620  return;
621  addCost(CostIt->second);
622  SROACostSavings -= CostIt->second;
623  SROACostSavingsLost += CostIt->second;
624  SROAArgCosts.erase(CostIt);
625  }
626 
627  void onDisableLoadElimination() override {
628  addCost(LoadEliminationCost);
629  LoadEliminationCost = 0;
630  }
631 
632  bool onCallBaseVisitStart(CallBase &Call) override {
633  if (Optional<int> AttrCallThresholdBonus =
634  getStringFnAttrAsInt(Call, "call-threshold-bonus"))
635  Threshold += *AttrCallThresholdBonus;
636 
637  if (Optional<int> AttrCallCost =
638  getStringFnAttrAsInt(Call, "call-inline-cost")) {
639  addCost(*AttrCallCost);
640  // Prevent further processing of the call since we want to override its
641  // inline cost, not just add to it.
642  return false;
643  }
644  return true;
645  }
646 
647  void onCallPenalty() override { addCost(CallPenalty); }
648 
649  void onMemAccess() override { addCost(MemAccessCost); }
650 
651  void onCallArgumentSetup(const CallBase &Call) override {
652  // Pay the price of the argument setup. We account for the average 1
653  // instruction per call argument setup here.
654  addCost(Call.arg_size() * InstrCost);
655  }
656  void onLoadRelativeIntrinsic() override {
657  // This is normally lowered to 4 LLVM instructions.
658  addCost(3 * InstrCost);
659  }
660  void onLoweredCall(Function *F, CallBase &Call,
661  bool IsIndirectCall) override {
662  // We account for the average 1 instruction per call argument setup here.
663  addCost(Call.arg_size() * InstrCost);
664 
665  // If we have a constant that we are calling as a function, we can peer
666  // through it and see the function target. This happens not infrequently
667  // during devirtualization and so we want to give it a hefty bonus for
668  // inlining, but cap that bonus in the event that inlining wouldn't pan out.
669  // Pretend to inline the function, with a custom threshold.
670  if (IsIndirectCall && BoostIndirectCalls) {
671  auto IndirectCallParams = Params;
672  IndirectCallParams.DefaultThreshold =
674  /// FIXME: if InlineCostCallAnalyzer is derived from, this may need
675  /// to instantiate the derived class.
676  InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
677  GetAssumptionCache, GetBFI, PSI, ORE, false);
678  if (CA.analyze().isSuccess()) {
679  // We were able to inline the indirect call! Subtract the cost from the
680  // threshold to get the bonus we want to apply, but don't go below zero.
681  Cost -= std::max(0, CA.getThreshold() - CA.getCost());
682  }
683  } else
684  // Otherwise simply add the cost for merely making the call.
685  addCost(CallPenalty);
686  }
687 
688  void onFinalizeSwitch(unsigned JumpTableSize,
689  unsigned NumCaseCluster) override {
690  // If suitable for a jump table, consider the cost for the table size and
691  // branch to destination.
692  // Maximum valid cost increased in this function.
693  if (JumpTableSize) {
694  int64_t JTCost =
695  static_cast<int64_t>(JumpTableSize) * InstrCost + 4 * InstrCost;
696 
697  addCost(JTCost);
698  return;
699  }
700 
701  if (NumCaseCluster <= 3) {
702  // Suppose a comparison includes one compare and one conditional branch.
703  addCost(NumCaseCluster * 2 * InstrCost);
704  return;
705  }
706 
707  int64_t ExpectedNumberOfCompare =
708  getExpectedNumberOfCompare(NumCaseCluster);
709  int64_t SwitchCost = ExpectedNumberOfCompare * 2 * InstrCost;
710 
711  addCost(SwitchCost);
712  }
713  void onMissedSimplification() override { addCost(InstrCost); }
714 
715  void onInitializeSROAArg(AllocaInst *Arg) override {
716  assert(Arg != nullptr &&
717  "Should not initialize SROA costs for null value.");
718  SROAArgCosts[Arg] = 0;
719  }
720 
721  void onAggregateSROAUse(AllocaInst *SROAArg) override {
722  auto CostIt = SROAArgCosts.find(SROAArg);
723  assert(CostIt != SROAArgCosts.end() &&
724  "expected this argument to have a cost");
725  CostIt->second += InstrCost;
726  SROACostSavings += InstrCost;
727  }
728 
729  void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; }
730 
731  void onBlockAnalyzed(const BasicBlock *BB) override {
732  if (CostBenefitAnalysisEnabled) {
733  // Keep track of the static size of live but cold basic blocks. For now,
734  // we define a cold basic block to be one that's never executed.
735  assert(GetBFI && "GetBFI must be available");
736  BlockFrequencyInfo *BFI = &(GetBFI(F));
737  assert(BFI && "BFI must be available");
738  auto ProfileCount = BFI->getBlockProfileCount(BB);
740  if (ProfileCount.value() == 0)
741  ColdSize += Cost - CostAtBBStart;
742  }
743 
744  auto *TI = BB->getTerminator();
745  // If we had any successors at this point, than post-inlining is likely to
746  // have them as well. Note that we assume any basic blocks which existed
747  // due to branches or switches which folded above will also fold after
748  // inlining.
749  if (SingleBB && TI->getNumSuccessors() > 1) {
750  // Take off the bonus we applied to the threshold.
751  Threshold -= SingleBBBonus;
752  SingleBB = false;
753  }
754  }
755 
756  void onInstructionAnalysisStart(const Instruction *I) override {
757  // This function is called to store the initial cost of inlining before
758  // the given instruction was assessed.
760  return;
761  InstructionCostDetailMap[I].CostBefore = Cost;
762  InstructionCostDetailMap[I].ThresholdBefore = Threshold;
763  }
764 
765  void onInstructionAnalysisFinish(const Instruction *I) override {
766  // This function is called to find new values of cost and threshold after
767  // the instruction has been assessed.
769  return;
770  InstructionCostDetailMap[I].CostAfter = Cost;
771  InstructionCostDetailMap[I].ThresholdAfter = Threshold;
772  }
773 
774  bool isCostBenefitAnalysisEnabled() {
775  if (!PSI || !PSI->hasProfileSummary())
776  return false;
777 
778  if (!GetBFI)
779  return false;
780 
782  // Honor the explicit request from the user.
784  return false;
785  } else {
786  // Otherwise, require instrumentation profile.
787  if (!PSI->hasInstrumentationProfile())
788  return false;
789  }
790 
791  auto *Caller = CandidateCall.getParent()->getParent();
792  if (!Caller->getEntryCount())
793  return false;
794 
795  BlockFrequencyInfo *CallerBFI = &(GetBFI(*Caller));
796  if (!CallerBFI)
797  return false;
798 
799  // For now, limit to hot call site.
800  if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
801  return false;
802 
803  // Make sure we have a nonzero entry count.
804  auto EntryCount = F.getEntryCount();
805  if (!EntryCount || !EntryCount->getCount())
806  return false;
807 
808  BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
809  if (!CalleeBFI)
810  return false;
811 
812  return true;
813  }
814 
815  // Determine whether we should inline the given call site, taking into account
816  // both the size cost and the cycle savings. Return std::nullopt if we don't
817  // have suficient profiling information to determine.
818  std::optional<bool> costBenefitAnalysis() {
819  if (!CostBenefitAnalysisEnabled)
820  return std::nullopt;
821 
822  // buildInlinerPipeline in the pass builder sets HotCallSiteThreshold to 0
823  // for the prelink phase of the AutoFDO + ThinLTO build. Honor the logic by
824  // falling back to the cost-based metric.
825  // TODO: Improve this hacky condition.
826  if (Threshold == 0)
827  return std::nullopt;
828 
829  assert(GetBFI);
830  BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
831  assert(CalleeBFI);
832 
833  // The cycle savings expressed as the sum of InstrCost
834  // multiplied by the estimated dynamic count of each instruction we can
835  // avoid. Savings come from the call site cost, such as argument setup and
836  // the call instruction, as well as the instructions that are folded.
837  //
838  // We use 128-bit APInt here to avoid potential overflow. This variable
839  // should stay well below 10^^24 (or 2^^80) in practice. This "worst" case
840  // assumes that we can avoid or fold a billion instructions, each with a
841  // profile count of 10^^15 -- roughly the number of cycles for a 24-hour
842  // period on a 4GHz machine.
843  APInt CycleSavings(128, 0);
844 
845  for (auto &BB : F) {
846  APInt CurrentSavings(128, 0);
847  for (auto &I : BB) {
848  if (BranchInst *BI = dyn_cast<BranchInst>(&I)) {
849  // Count a conditional branch as savings if it becomes unconditional.
850  if (BI->isConditional() &&
851  isa_and_nonnull<ConstantInt>(
852  SimplifiedValues.lookup(BI->getCondition()))) {
853  CurrentSavings += InstrCost;
854  }
855  } else if (Value *V = dyn_cast<Value>(&I)) {
856  // Count an instruction as savings if we can fold it.
857  if (SimplifiedValues.count(V)) {
858  CurrentSavings += InstrCost;
859  }
860  }
861  }
862 
863  auto ProfileCount = CalleeBFI->getBlockProfileCount(&BB);
865  CurrentSavings *= ProfileCount.value();
866  CycleSavings += CurrentSavings;
867  }
868 
869  // Compute the cycle savings per call.
870  auto EntryProfileCount = F.getEntryCount();
871  assert(EntryProfileCount && EntryProfileCount->getCount());
872  auto EntryCount = EntryProfileCount->getCount();
873  CycleSavings += EntryCount / 2;
874  CycleSavings = CycleSavings.udiv(EntryCount);
875 
876  // Compute the total savings for the call site.
877  auto *CallerBB = CandidateCall.getParent();
878  BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent())));
879  CycleSavings += getCallsiteCost(this->CandidateCall, DL);
880  CycleSavings *= *CallerBFI->getBlockProfileCount(CallerBB);
881 
882  // Remove the cost of the cold basic blocks.
883  int Size = Cost - ColdSize;
884 
885  // Allow tiny callees to be inlined regardless of whether they meet the
886  // savings threshold.
888 
889  CostBenefit.emplace(APInt(128, Size), CycleSavings);
890 
891  // Return true if the savings justify the cost of inlining. Specifically,
892  // we evaluate the following inequality:
893  //
894  // CycleSavings PSI->getOrCompHotCountThreshold()
895  // -------------- >= -----------------------------------
896  // Size InlineSavingsMultiplier
897  //
898  // Note that the left hand side is specific to a call site. The right hand
899  // side is a constant for the entire executable.
900  APInt LHS = CycleSavings;
902  APInt RHS(128, PSI->getOrCompHotCountThreshold());
903  RHS *= Size;
904  return LHS.uge(RHS);
905  }
906 
907  InlineResult finalizeAnalysis() override {
908  // Loops generally act a lot like calls in that they act like barriers to
909  // movement, require a certain amount of setup, etc. So when optimising for
910  // size, we penalise any call sites that perform loops. We do this after all
911  // other costs here, so will likely only be dealing with relatively small
912  // functions (and hence DT and LI will hopefully be cheap).
913  auto *Caller = CandidateCall.getFunction();
914  if (Caller->hasMinSize()) {
915  DominatorTree DT(F);
916  LoopInfo LI(DT);
917  int NumLoops = 0;
918  for (Loop *L : LI) {
919  // Ignore loops that will not be executed
920  if (DeadBlocks.count(L->getHeader()))
921  continue;
922  NumLoops++;
923  }
924  addCost(NumLoops * InlineConstants::LoopPenalty);
925  }
926 
927  // We applied the maximum possible vector bonus at the beginning. Now,
928  // subtract the excess bonus, if any, from the Threshold before
929  // comparing against Cost.
930  if (NumVectorInstructions <= NumInstructions / 10)
931  Threshold -= VectorBonus;
932  else if (NumVectorInstructions <= NumInstructions / 2)
933  Threshold -= VectorBonus / 2;
934 
935  if (Optional<int> AttrCost =
936  getStringFnAttrAsInt(CandidateCall, "function-inline-cost"))
937  Cost = *AttrCost;
938 
939  if (Optional<int> AttrCostMult = getStringFnAttrAsInt(
940  CandidateCall,
942  Cost *= *AttrCostMult;
943 
944  if (Optional<int> AttrThreshold =
945  getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))
946  Threshold = *AttrThreshold;
947 
948  if (auto Result = costBenefitAnalysis()) {
949  DecidedByCostBenefit = true;
950  if (*Result)
951  return InlineResult::success();
952  else
953  return InlineResult::failure("Cost over threshold.");
954  }
955 
956  if (IgnoreThreshold)
957  return InlineResult::success();
958 
959  DecidedByCostThreshold = true;
960  return Cost < std::max(1, Threshold)
962  : InlineResult::failure("Cost over threshold.");
963  }
964 
965  bool shouldStop() override {
966  if (IgnoreThreshold || ComputeFullInlineCost)
967  return false;
968  // Bail out the moment we cross the threshold. This means we'll under-count
969  // the cost, but only when undercounting doesn't matter.
970  if (Cost < Threshold)
971  return false;
972  DecidedByCostThreshold = true;
973  return true;
974  }
975 
976  void onLoadEliminationOpportunity() override {
977  LoadEliminationCost += InstrCost;
978  }
979 
980  InlineResult onAnalysisStart() override {
981  // Perform some tweaks to the cost and threshold based on the direct
982  // callsite information.
983 
984  // We want to more aggressively inline vector-dense kernels, so up the
985  // threshold, and we'll lower it if the % of vector instructions gets too
986  // low. Note that these bonuses are some what arbitrary and evolved over
987  // time by accident as much as because they are principled bonuses.
988  //
989  // FIXME: It would be nice to remove all such bonuses. At least it would be
990  // nice to base the bonus values on something more scientific.
991  assert(NumInstructions == 0);
992  assert(NumVectorInstructions == 0);
993 
994  // Update the threshold based on callsite properties
995  updateThreshold(CandidateCall, F);
996 
997  // While Threshold depends on commandline options that can take negative
998  // values, we want to enforce the invariant that the computed threshold and
999  // bonuses are non-negative.
1000  assert(Threshold >= 0);
1001  assert(SingleBBBonus >= 0);
1002  assert(VectorBonus >= 0);
1003 
1004  // Speculatively apply all possible bonuses to Threshold. If cost exceeds
1005  // this Threshold any time, and cost cannot decrease, we can stop processing
1006  // the rest of the function body.
1007  Threshold += (SingleBBBonus + VectorBonus);
1008 
1009  // Give out bonuses for the callsite, as the instructions setting them up
1010  // will be gone after inlining.
1011  addCost(-getCallsiteCost(this->CandidateCall, DL));
1012 
1013  // If this function uses the coldcc calling convention, prefer not to inline
1014  // it.
1015  if (F.getCallingConv() == CallingConv::Cold)
1017 
1018  LLVM_DEBUG(dbgs() << " Initial cost: " << Cost << "\n");
1019 
1020  // Check if we're done. This can happen due to bonuses and penalties.
1021  if (Cost >= Threshold && !ComputeFullInlineCost)
1022  return InlineResult::failure("high cost");
1023 
1024  return InlineResult::success();
1025  }
1026 
1027 public:
1028  InlineCostCallAnalyzer(
1029  Function &Callee, CallBase &Call, const InlineParams &Params,
1030  const TargetTransformInfo &TTI,
1031  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
1032  function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
1033  ProfileSummaryInfo *PSI = nullptr,
1034  OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true,
1035  bool IgnoreThreshold = false)
1036  : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI, ORE),
1037  ComputeFullInlineCost(OptComputeFullInlineCost ||
1038  Params.ComputeFullInlineCost || ORE ||
1039  isCostBenefitAnalysisEnabled()),
1040  Params(Params), Threshold(Params.DefaultThreshold),
1041  BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
1042  CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
1043  Writer(this) {
1044  AllowRecursiveCall = *Params.AllowRecursiveCall;
1045  }
1046 
1047  /// Annotation Writer for instruction details
1048  InlineCostAnnotationWriter Writer;
1049 
1050  void dump();
1051 
1052  // Prints the same analysis as dump(), but its definition is not dependent
1053  // on the build.
1054  void print(raw_ostream &OS);
1055 
1056  Optional<InstructionCostDetail> getCostDetails(const Instruction *I) {
1057  if (InstructionCostDetailMap.find(I) != InstructionCostDetailMap.end())
1058  return InstructionCostDetailMap[I];
1059  return std::nullopt;
1060  }
1061 
1062  virtual ~InlineCostCallAnalyzer() = default;
1063  int getThreshold() const { return Threshold; }
1064  int getCost() const { return Cost; }
1065  int getStaticBonusApplied() const { return StaticBonusApplied; }
1066  Optional<CostBenefitPair> getCostBenefitPair() { return CostBenefit; }
1067  bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; }
1068  bool wasDecidedByCostThreshold() const { return DecidedByCostThreshold; }
1069 };
1070 
1071 // Return true if CB is the sole call to local function Callee.
1072 static bool isSoleCallToLocalFunction(const CallBase &CB,
1073  const Function &Callee) {
1074  return Callee.hasLocalLinkage() && Callee.hasOneLiveUse() &&
1075  &Callee == CB.getCalledFunction();
1076 }
1077 
1078 class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
1079 private:
1080  InlineCostFeatures Cost = {};
1081 
1082  // FIXME: These constants are taken from the heuristic-based cost visitor.
1083  // These should be removed entirely in a later revision to avoid reliance on
1084  // heuristics in the ML inliner.
1085  static constexpr int JTCostMultiplier = 4;
1086  static constexpr int CaseClusterCostMultiplier = 2;
1087  static constexpr int SwitchCostMultiplier = 2;
1088 
1089  // FIXME: These are taken from the heuristic-based cost visitor: we should
1090  // eventually abstract these to the CallAnalyzer to avoid duplication.
1091  unsigned SROACostSavingOpportunities = 0;
1092  int VectorBonus = 0;
1093  int SingleBBBonus = 0;
1094  int Threshold = 5;
1095 
1097 
1098  void increment(InlineCostFeatureIndex Feature, int64_t Delta = 1) {
1099  Cost[static_cast<size_t>(Feature)] += Delta;
1100  }
1101 
1102  void set(InlineCostFeatureIndex Feature, int64_t Value) {
1103  Cost[static_cast<size_t>(Feature)] = Value;
1104  }
1105 
1106  void onDisableSROA(AllocaInst *Arg) override {
1107  auto CostIt = SROACosts.find(Arg);
1108  if (CostIt == SROACosts.end())
1109  return;
1110 
1111  increment(InlineCostFeatureIndex::SROALosses, CostIt->second);
1112  SROACostSavingOpportunities -= CostIt->second;
1113  SROACosts.erase(CostIt);
1114  }
1115 
1116  void onDisableLoadElimination() override {
1117  set(InlineCostFeatureIndex::LoadElimination, 1);
1118  }
1119 
1120  void onCallPenalty() override {
1122  }
1123 
1124  void onCallArgumentSetup(const CallBase &Call) override {
1125  increment(InlineCostFeatureIndex::CallArgumentSetup,
1126  Call.arg_size() * InstrCost);
1127  }
1128 
1129  void onLoadRelativeIntrinsic() override {
1130  increment(InlineCostFeatureIndex::LoadRelativeIntrinsic, 3 * InstrCost);
1131  }
1132 
1133  void onLoweredCall(Function *F, CallBase &Call,
1134  bool IsIndirectCall) override {
1135  increment(InlineCostFeatureIndex::LoweredCallArgSetup,
1136  Call.arg_size() * InstrCost);
1137 
1138  if (IsIndirectCall) {
1139  InlineParams IndirectCallParams = {/* DefaultThreshold*/ 0,
1140  /*HintThreshold*/ {},
1141  /*ColdThreshold*/ {},
1142  /*OptSizeThreshold*/ {},
1143  /*OptMinSizeThreshold*/ {},
1144  /*HotCallSiteThreshold*/ {},
1145  /*LocallyHotCallSiteThreshold*/ {},
1146  /*ColdCallSiteThreshold*/ {},
1147  /*ComputeFullInlineCost*/ true,
1148  /*EnableDeferral*/ true};
1149  IndirectCallParams.DefaultThreshold =
1151 
1152  InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
1153  GetAssumptionCache, GetBFI, PSI, ORE, false,
1154  true);
1155  if (CA.analyze().isSuccess()) {
1156  increment(InlineCostFeatureIndex::NestedInlineCostEstimate,
1157  CA.getCost());
1158  increment(InlineCostFeatureIndex::NestedInlines, 1);
1159  }
1160  } else {
1161  onCallPenalty();
1162  }
1163  }
1164 
1165  void onFinalizeSwitch(unsigned JumpTableSize,
1166  unsigned NumCaseCluster) override {
1167 
1168  if (JumpTableSize) {
1169  int64_t JTCost = static_cast<int64_t>(JumpTableSize) * InstrCost +
1170  JTCostMultiplier * InstrCost;
1171  increment(InlineCostFeatureIndex::JumpTablePenalty, JTCost);
1172  return;
1173  }
1174 
1175  if (NumCaseCluster <= 3) {
1176  increment(InlineCostFeatureIndex::CaseClusterPenalty,
1177  NumCaseCluster * CaseClusterCostMultiplier * InstrCost);
1178  return;
1179  }
1180 
1181  int64_t ExpectedNumberOfCompare =
1182  getExpectedNumberOfCompare(NumCaseCluster);
1183 
1184  int64_t SwitchCost =
1185  ExpectedNumberOfCompare * SwitchCostMultiplier * InstrCost;
1186  increment(InlineCostFeatureIndex::SwitchPenalty, SwitchCost);
1187  }
1188 
1189  void onMissedSimplification() override {
1190  increment(InlineCostFeatureIndex::UnsimplifiedCommonInstructions,
1191  InstrCost);
1192  }
1193 
1194  void onInitializeSROAArg(AllocaInst *Arg) override { SROACosts[Arg] = 0; }
1195  void onAggregateSROAUse(AllocaInst *Arg) override {
1196  SROACosts.find(Arg)->second += InstrCost;
1197  SROACostSavingOpportunities += InstrCost;
1198  }
1199 
1200  void onBlockAnalyzed(const BasicBlock *BB) override {
1201  if (BB->getTerminator()->getNumSuccessors() > 1)
1202  set(InlineCostFeatureIndex::IsMultipleBlocks, 1);
1203  Threshold -= SingleBBBonus;
1204  }
1205 
1206  InlineResult finalizeAnalysis() override {
1207  auto *Caller = CandidateCall.getFunction();
1208  if (Caller->hasMinSize()) {
1209  DominatorTree DT(F);
1210  LoopInfo LI(DT);
1211  for (Loop *L : LI) {
1212  // Ignore loops that will not be executed
1213  if (DeadBlocks.count(L->getHeader()))
1214  continue;
1215  increment(InlineCostFeatureIndex::NumLoops,
1217  }
1218  }
1219  set(InlineCostFeatureIndex::DeadBlocks, DeadBlocks.size());
1220  set(InlineCostFeatureIndex::SimplifiedInstructions,
1221  NumInstructionsSimplified);
1222  set(InlineCostFeatureIndex::ConstantArgs, NumConstantArgs);
1223  set(InlineCostFeatureIndex::ConstantOffsetPtrArgs,
1224  NumConstantOffsetPtrArgs);
1225  set(InlineCostFeatureIndex::SROASavings, SROACostSavingOpportunities);
1226 
1227  if (NumVectorInstructions <= NumInstructions / 10)
1228  Threshold -= VectorBonus;
1229  else if (NumVectorInstructions <= NumInstructions / 2)
1230  Threshold -= VectorBonus / 2;
1231 
1232  set(InlineCostFeatureIndex::Threshold, Threshold);
1233 
1234  return InlineResult::success();
1235  }
1236 
1237  bool shouldStop() override { return false; }
1238 
1239  void onLoadEliminationOpportunity() override {
1240  increment(InlineCostFeatureIndex::LoadElimination, 1);
1241  }
1242 
1243  InlineResult onAnalysisStart() override {
1244  increment(InlineCostFeatureIndex::CallSiteCost,
1245  -1 * getCallsiteCost(this->CandidateCall, DL));
1246 
1247  set(InlineCostFeatureIndex::ColdCcPenalty,
1248  (F.getCallingConv() == CallingConv::Cold));
1249 
1251  isSoleCallToLocalFunction(CandidateCall, F));
1252 
1253  // FIXME: we shouldn't repeat this logic in both the Features and Cost
1254  // analyzer - instead, we should abstract it to a common method in the
1255  // CallAnalyzer
1256  int SingleBBBonusPercent = 50;
1257  int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
1258  Threshold += TTI.adjustInliningThreshold(&CandidateCall);
1259  Threshold *= TTI.getInliningThresholdMultiplier();
1260  SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1261  VectorBonus = Threshold * VectorBonusPercent / 100;
1262  Threshold += (SingleBBBonus + VectorBonus);
1263 
1264  return InlineResult::success();
1265  }
1266 
1267 public:
1268  InlineCostFeaturesAnalyzer(
1269  const TargetTransformInfo &TTI,
1270  function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,
1273  CallBase &Call)
1274  : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI) {}
1275 
1276  const InlineCostFeatures &features() const { return Cost; }
1277 };
1278 
1279 } // namespace
1280 
1281 /// Test whether the given value is an Alloca-derived function argument.
1282 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
1283  return SROAArgValues.count(V);
1284 }
1285 
1286 void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
1287  onDisableSROA(SROAArg);
1288  EnabledSROAAllocas.erase(SROAArg);
1289  disableLoadElimination();
1290 }
1291 
1292 void InlineCostAnnotationWriter::emitInstructionAnnot(
1293  const Instruction *I, formatted_raw_ostream &OS) {
1294  // The cost of inlining of the given instruction is printed always.
1295  // The threshold delta is printed only when it is non-zero. It happens
1296  // when we decided to give a bonus at a particular instruction.
1297  Optional<InstructionCostDetail> Record = ICCA->getCostDetails(I);
1298  if (!Record)
1299  OS << "; No analysis for the instruction";
1300  else {
1301  OS << "; cost before = " << Record->CostBefore
1302  << ", cost after = " << Record->CostAfter
1303  << ", threshold before = " << Record->ThresholdBefore
1304  << ", threshold after = " << Record->ThresholdAfter << ", ";
1305  OS << "cost delta = " << Record->getCostDelta();
1306  if (Record->hasThresholdChanged())
1307  OS << ", threshold delta = " << Record->getThresholdDelta();
1308  }
1309  auto C = ICCA->getSimplifiedValue(const_cast<Instruction *>(I));
1310  if (C) {
1311  OS << ", simplified to ";
1312  (*C)->print(OS, true);
1313  }
1314  OS << "\n";
1315 }
1316 
1317 /// If 'V' maps to a SROA candidate, disable SROA for it.
1318 void CallAnalyzer::disableSROA(Value *V) {
1319  if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
1320  disableSROAForArg(SROAArg);
1321  }
1322 }
1323 
1324 void CallAnalyzer::disableLoadElimination() {
1325  if (EnableLoadElimination) {
1326  onDisableLoadElimination();
1327  EnableLoadElimination = false;
1328  }
1329 }
1330 
1331 /// Accumulate a constant GEP offset into an APInt if possible.
1332 ///
1333 /// Returns false if unable to compute the offset for any reason. Respects any
1334 /// simplified values known during the analysis of this callsite.
1335 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
1336  unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
1337  assert(IntPtrWidth == Offset.getBitWidth());
1338 
1339  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
1340  GTI != GTE; ++GTI) {
1341  ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
1342  if (!OpC)
1343  if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
1344  OpC = dyn_cast<ConstantInt>(SimpleOp);
1345  if (!OpC)
1346  return false;
1347  if (OpC->isZero())
1348  continue;
1349 
1350  // Handle a struct index, which adds its field offset to the pointer.
1351  if (StructType *STy = GTI.getStructTypeOrNull()) {
1352  unsigned ElementIdx = OpC->getZExtValue();
1353  const StructLayout *SL = DL.getStructLayout(STy);
1354  Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
1355  continue;
1356  }
1357 
1358  APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
1359  Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
1360  }
1361  return true;
1362 }
1363 
1364 /// Use TTI to check whether a GEP is free.
1365 ///
1366 /// Respects any simplified values known during the analysis of this callsite.
1367 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
1369  Operands.push_back(GEP.getOperand(0));
1370  for (const Use &Op : GEP.indices())
1371  if (Constant *SimpleOp = SimplifiedValues.lookup(Op))
1372  Operands.push_back(SimpleOp);
1373  else
1374  Operands.push_back(Op);
1378 }
1379 
1380 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
1381  disableSROA(I.getOperand(0));
1382 
1383  // Check whether inlining will turn a dynamic alloca into a static
1384  // alloca and handle that case.
1385  if (I.isArrayAllocation()) {
1386  Constant *Size = SimplifiedValues.lookup(I.getArraySize());
1387  if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
1388  // Sometimes a dynamic alloca could be converted into a static alloca
1389  // after this constant prop, and become a huge static alloca on an
1390  // unconditional CFG path. Avoid inlining if this is going to happen above
1391  // a threshold.
1392  // FIXME: If the threshold is removed or lowered too much, we could end up
1393  // being too pessimistic and prevent inlining non-problematic code. This
1394  // could result in unintended perf regressions. A better overall strategy
1395  // is needed to track stack usage during inlining.
1396  Type *Ty = I.getAllocatedType();
1397  AllocatedSize = SaturatingMultiplyAdd(
1398  AllocSize->getLimitedValue(),
1399  DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize);
1401  HasDynamicAlloca = true;
1402  return false;
1403  }
1404  }
1405 
1406  // Accumulate the allocated size.
1407  if (I.isStaticAlloca()) {
1408  Type *Ty = I.getAllocatedType();
1409  AllocatedSize =
1410  SaturatingAdd(DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize);
1411  }
1412 
1413  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
1414  // a variety of reasons, and so we would like to not inline them into
1415  // functions which don't currently have a dynamic alloca. This simply
1416  // disables inlining altogether in the presence of a dynamic alloca.
1417  if (!I.isStaticAlloca())
1418  HasDynamicAlloca = true;
1419 
1420  return false;
1421 }
1422 
1423 bool CallAnalyzer::visitPHI(PHINode &I) {
1424  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
1425  // though we don't want to propagate it's bonuses. The idea is to disable
1426  // SROA if it *might* be used in an inappropriate manner.
1427 
1428  // Phi nodes are always zero-cost.
1429  // FIXME: Pointer sizes may differ between different address spaces, so do we
1430  // need to use correct address space in the call to getPointerSizeInBits here?
1431  // Or could we skip the getPointerSizeInBits call completely? As far as I can
1432  // see the ZeroOffset is used as a dummy value, so we can probably use any
1433  // bit width for the ZeroOffset?
1434  APInt ZeroOffset = APInt::getZero(DL.getPointerSizeInBits(0));
1435  bool CheckSROA = I.getType()->isPointerTy();
1436 
1437  // Track the constant or pointer with constant offset we've seen so far.
1438  Constant *FirstC = nullptr;
1439  std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};
1440  Value *FirstV = nullptr;
1441 
1442  for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {
1443  BasicBlock *Pred = I.getIncomingBlock(i);
1444  // If the incoming block is dead, skip the incoming block.
1445  if (DeadBlocks.count(Pred))
1446  continue;
1447  // If the parent block of phi is not the known successor of the incoming
1448  // block, skip the incoming block.
1449  BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
1450  if (KnownSuccessor && KnownSuccessor != I.getParent())
1451  continue;
1452 
1453  Value *V = I.getIncomingValue(i);
1454  // If the incoming value is this phi itself, skip the incoming value.
1455  if (&I == V)
1456  continue;
1457 
1458  Constant *C = dyn_cast<Constant>(V);
1459  if (!C)
1460  C = SimplifiedValues.lookup(V);
1461 
1462  std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};
1463  if (!C && CheckSROA)
1464  BaseAndOffset = ConstantOffsetPtrs.lookup(V);
1465 
1466  if (!C && !BaseAndOffset.first)
1467  // The incoming value is neither a constant nor a pointer with constant
1468  // offset, exit early.
1469  return true;
1470 
1471  if (FirstC) {
1472  if (FirstC == C)
1473  // If we've seen a constant incoming value before and it is the same
1474  // constant we see this time, continue checking the next incoming value.
1475  continue;
1476  // Otherwise early exit because we either see a different constant or saw
1477  // a constant before but we have a pointer with constant offset this time.
1478  return true;
1479  }
1480 
1481  if (FirstV) {
1482  // The same logic as above, but check pointer with constant offset here.
1483  if (FirstBaseAndOffset == BaseAndOffset)
1484  continue;
1485  return true;
1486  }
1487 
1488  if (C) {
1489  // This is the 1st time we've seen a constant, record it.
1490  FirstC = C;
1491  continue;
1492  }
1493 
1494  // The remaining case is that this is the 1st time we've seen a pointer with
1495  // constant offset, record it.
1496  FirstV = V;
1497  FirstBaseAndOffset = BaseAndOffset;
1498  }
1499 
1500  // Check if we can map phi to a constant.
1501  if (FirstC) {
1502  SimplifiedValues[&I] = FirstC;
1503  return true;
1504  }
1505 
1506  // Check if we can map phi to a pointer with constant offset.
1507  if (FirstBaseAndOffset.first) {
1508  ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
1509 
1510  if (auto *SROAArg = getSROAArgForValueOrNull(FirstV))
1511  SROAArgValues[&I] = SROAArg;
1512  }
1513 
1514  return true;
1515 }
1516 
1517 /// Check we can fold GEPs of constant-offset call site argument pointers.
1518 /// This requires target data and inbounds GEPs.
1519 ///
1520 /// \return true if the specified GEP can be folded.
1521 bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
1522  // Check if we have a base + offset for the pointer.
1523  std::pair<Value *, APInt> BaseAndOffset =
1524  ConstantOffsetPtrs.lookup(I.getPointerOperand());
1525  if (!BaseAndOffset.first)
1526  return false;
1527 
1528  // Check if the offset of this GEP is constant, and if so accumulate it
1529  // into Offset.
1530  if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second))
1531  return false;
1532 
1533  // Add the result as a new mapping to Base + Offset.
1534  ConstantOffsetPtrs[&I] = BaseAndOffset;
1535 
1536  return true;
1537 }
1538 
1539 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
1540  auto *SROAArg = getSROAArgForValueOrNull(I.getPointerOperand());
1541 
1542  // Lambda to check whether a GEP's indices are all constant.
1543  auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
1544  for (const Use &Op : GEP.indices())
1545  if (!isa<Constant>(Op) && !SimplifiedValues.lookup(Op))
1546  return false;
1547  return true;
1548  };
1549 
1551  if (simplifyInstruction(I))
1552  return true;
1553 
1554  if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
1555  if (SROAArg)
1556  SROAArgValues[&I] = SROAArg;
1557 
1558  // Constant GEPs are modeled as free.
1559  return true;
1560  }
1561 
1562  // Variable GEPs will require math and will disable SROA.
1563  if (SROAArg)
1564  disableSROAForArg(SROAArg);
1565  return isGEPFree(I);
1566 }
1567 
1568 /// Simplify \p I if its operands are constants and update SimplifiedValues.
1571  for (Value *Op : I.operands()) {
1572  Constant *COp = dyn_cast<Constant>(Op);
1573  if (!COp)
1574  COp = SimplifiedValues.lookup(Op);
1575  if (!COp)
1576  return false;
1577  COps.push_back(COp);
1578  }
1579  auto *C = ConstantFoldInstOperands(&I, COps, DL);
1580  if (!C)
1581  return false;
1582  SimplifiedValues[&I] = C;
1583  return true;
1584 }
1585 
1586 /// Try to simplify a call to llvm.is.constant.
1587 ///
1588 /// Duplicate the argument checking from CallAnalyzer::simplifyCallSite since
1589 /// we expect calls of this specific intrinsic to be infrequent.
1590 ///
1591 /// FIXME: Given that we know CB's parent (F) caller
1592 /// (CandidateCall->getParent()->getParent()), we might be able to determine
1593 /// whether inlining F into F's caller would change how the call to
1594 /// llvm.is.constant would evaluate.
1595 bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) {
1596  Value *Arg = CB.getArgOperand(0);
1597  auto *C = dyn_cast<Constant>(Arg);
1598 
1599  if (!C)
1600  C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(Arg));
1601 
1602  Type *RT = CB.getFunctionType()->getReturnType();
1603  SimplifiedValues[&CB] = ConstantInt::get(RT, C ? 1 : 0);
1604  return true;
1605 }
1606 
1607 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
1608  // Propagate constants through bitcasts.
1609  if (simplifyInstruction(I))
1610  return true;
1611 
1612  // Track base/offsets through casts
1613  std::pair<Value *, APInt> BaseAndOffset =
1614  ConstantOffsetPtrs.lookup(I.getOperand(0));
1615  // Casts don't change the offset, just wrap it up.
1616  if (BaseAndOffset.first)
1617  ConstantOffsetPtrs[&I] = BaseAndOffset;
1618 
1619  // Also look for SROA candidates here.
1620  if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
1621  SROAArgValues[&I] = SROAArg;
1622 
1623  // Bitcasts are always zero cost.
1624  return true;
1625 }
1626 
1627 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
1628  // Propagate constants through ptrtoint.
1629  if (simplifyInstruction(I))
1630  return true;
1631 
1632  // Track base/offset pairs when converted to a plain integer provided the
1633  // integer is large enough to represent the pointer.
1634  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
1635  unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
1636  if (IntegerSize == DL.getPointerSizeInBits(AS)) {
1637  std::pair<Value *, APInt> BaseAndOffset =
1638  ConstantOffsetPtrs.lookup(I.getOperand(0));
1639  if (BaseAndOffset.first)
1640  ConstantOffsetPtrs[&I] = BaseAndOffset;
1641  }
1642 
1643  // This is really weird. Technically, ptrtoint will disable SROA. However,
1644  // unless that ptrtoint is *used* somewhere in the live basic blocks after
1645  // inlining, it will be nuked, and SROA should proceed. All of the uses which
1646  // would block SROA would also block SROA if applied directly to a pointer,
1647  // and so we can just add the integer in here. The only places where SROA is
1648  // preserved either cannot fire on an integer, or won't in-and-of themselves
1649  // disable SROA (ext) w/o some later use that we would see and disable.
1650  if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
1651  SROAArgValues[&I] = SROAArg;
1652 
1655 }
1656 
1657 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
1658  // Propagate constants through ptrtoint.
1659  if (simplifyInstruction(I))
1660  return true;
1661 
1662  // Track base/offset pairs when round-tripped through a pointer without
1663  // modifications provided the integer is not too large.
1664  Value *Op = I.getOperand(0);
1665  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
1666  if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
1667  std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
1668  if (BaseAndOffset.first)
1669  ConstantOffsetPtrs[&I] = BaseAndOffset;
1670  }
1671 
1672  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
1673  if (auto *SROAArg = getSROAArgForValueOrNull(Op))
1674  SROAArgValues[&I] = SROAArg;
1675 
1678 }
1679 
1680 bool CallAnalyzer::visitCastInst(CastInst &I) {
1681  // Propagate constants through casts.
1682  if (simplifyInstruction(I))
1683  return true;
1684 
1685  // Disable SROA in the face of arbitrary casts we don't explicitly list
1686  // elsewhere.
1687  disableSROA(I.getOperand(0));
1688 
1689  // If this is a floating-point cast, and the target says this operation
1690  // is expensive, this may eventually become a library call. Treat the cost
1691  // as such.
1692  switch (I.getOpcode()) {
1693  case Instruction::FPTrunc:
1694  case Instruction::FPExt:
1695  case Instruction::UIToFP:
1696  case Instruction::SIToFP:
1697  case Instruction::FPToUI:
1698  case Instruction::FPToSI:
1700  onCallPenalty();
1701  break;
1702  default:
1703  break;
1704  }
1705 
1708 }
1709 
1710 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
1711  return CandidateCall.paramHasAttr(A->getArgNo(), Attr);
1712 }
1713 
1714 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
1715  // Does the *call site* have the NonNull attribute set on an argument? We
1716  // use the attribute on the call site to memoize any analysis done in the
1717  // caller. This will also trip if the callee function has a non-null
1718  // parameter attribute, but that's a less interesting case because hopefully
1719  // the callee would already have been simplified based on that.
1720  if (Argument *A = dyn_cast<Argument>(V))
1721  if (paramHasAttr(A, Attribute::NonNull))
1722  return true;
1723 
1724  // Is this an alloca in the caller? This is distinct from the attribute case
1725  // above because attributes aren't updated within the inliner itself and we
1726  // always want to catch the alloca derived case.
1727  if (isAllocaDerivedArg(V))
1728  // We can actually predict the result of comparisons between an
1729  // alloca-derived value and null. Note that this fires regardless of
1730  // SROA firing.
1731  return true;
1732 
1733  return false;
1734 }
1735 
1736 bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {
1737  // If the normal destination of the invoke or the parent block of the call
1738  // site is unreachable-terminated, there is little point in inlining this
1739  // unless there is literally zero cost.
1740  // FIXME: Note that it is possible that an unreachable-terminated block has a
1741  // hot entry. For example, in below scenario inlining hot_call_X() may be
1742  // beneficial :
1743  // main() {
1744  // hot_call_1();
1745  // ...
1746  // hot_call_N()
1747  // exit(0);
1748  // }
1749  // For now, we are not handling this corner case here as it is rare in real
1750  // code. In future, we should elaborate this based on BPI and BFI in more
1751  // general threshold adjusting heuristics in updateThreshold().
1752  if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
1753  if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
1754  return false;
1755  } else if (isa<UnreachableInst>(Call.getParent()->getTerminator()))
1756  return false;
1757 
1758  return true;
1759 }
1760 
1762  BlockFrequencyInfo *CallerBFI) {
1763  // If global profile summary is available, then callsite's coldness is
1764  // determined based on that.
1765  if (PSI && PSI->hasProfileSummary())
1766  return PSI->isColdCallSite(Call, CallerBFI);
1767 
1768  // Otherwise we need BFI to be available.
1769  if (!CallerBFI)
1770  return false;
1771 
1772  // Determine if the callsite is cold relative to caller's entry. We could
1773  // potentially cache the computation of scaled entry frequency, but the added
1774  // complexity is not worth it unless this scaling shows up high in the
1775  // profiles.
1776  const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
1777  auto CallSiteBB = Call.getParent();
1778  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
1779  auto CallerEntryFreq =
1780  CallerBFI->getBlockFreq(&(Call.getCaller()->getEntryBlock()));
1781  return CallSiteFreq < CallerEntryFreq * ColdProb;
1782 }
1783 
1785 InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
1786  BlockFrequencyInfo *CallerBFI) {
1787 
1788  // If global profile summary is available, then callsite's hotness is
1789  // determined based on that.
1790  if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI))
1791  return Params.HotCallSiteThreshold;
1792 
1793  // Otherwise we need BFI to be available and to have a locally hot callsite
1794  // threshold.
1795  if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
1796  return std::nullopt;
1797 
1798  // Determine if the callsite is hot relative to caller's entry. We could
1799  // potentially cache the computation of scaled entry frequency, but the added
1800  // complexity is not worth it unless this scaling shows up high in the
1801  // profiles.
1802  auto CallSiteBB = Call.getParent();
1803  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
1804  auto CallerEntryFreq = CallerBFI->getEntryFreq();
1805  if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
1806  return Params.LocallyHotCallSiteThreshold;
1807 
1808  // Otherwise treat it normally.
1809  return std::nullopt;
1810 }
1811 
1812 void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
1813  // If no size growth is allowed for this inlining, set Threshold to 0.
1814  if (!allowSizeGrowth(Call)) {
1815  Threshold = 0;
1816  return;
1817  }
1818 
1819  Function *Caller = Call.getCaller();
1820 
1821  // return min(A, B) if B is valid.
1822  auto MinIfValid = [](int A, Optional<int> B) {
1823  return B ? std::min(A, B.value()) : A;
1824  };
1825 
1826  // return max(A, B) if B is valid.
1827  auto MaxIfValid = [](int A, Optional<int> B) {
1828  return B ? std::max(A, B.value()) : A;
1829  };
1830 
1831  // Various bonus percentages. These are multiplied by Threshold to get the
1832  // bonus values.
1833  // SingleBBBonus: This bonus is applied if the callee has a single reachable
1834  // basic block at the given callsite context. This is speculatively applied
1835  // and withdrawn if more than one basic block is seen.
1836  //
1837  // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
1838  // of the last call to a static function as inlining such functions is
1839  // guaranteed to reduce code size.
1840  //
1841  // These bonus percentages may be set to 0 based on properties of the caller
1842  // and the callsite.
1843  int SingleBBBonusPercent = 50;
1844  int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
1846 
1847  // Lambda to set all the above bonus and bonus percentages to 0.
1848  auto DisallowAllBonuses = [&]() {
1849  SingleBBBonusPercent = 0;
1850  VectorBonusPercent = 0;
1852  };
1853 
1854  // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
1855  // and reduce the threshold if the caller has the necessary attribute.
1856  if (Caller->hasMinSize()) {
1857  Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
1858  // For minsize, we want to disable the single BB bonus and the vector
1859  // bonuses, but not the last-call-to-static bonus. Inlining the last call to
1860  // a static function will, at the minimum, eliminate the parameter setup and
1861  // call/return instructions.
1862  SingleBBBonusPercent = 0;
1863  VectorBonusPercent = 0;
1864  } else if (Caller->hasOptSize())
1865  Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
1866 
1867  // Adjust the threshold based on inlinehint attribute and profile based
1868  // hotness information if the caller does not have MinSize attribute.
1869  if (!Caller->hasMinSize()) {
1870  if (Callee.hasFnAttribute(Attribute::InlineHint))
1871  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
1872 
1873  // FIXME: After switching to the new passmanager, simplify the logic below
1874  // by checking only the callsite hotness/coldness as we will reliably
1875  // have local profile information.
1876  //
1877  // Callsite hotness and coldness can be determined if sample profile is
1878  // used (which adds hotness metadata to calls) or if caller's
1879  // BlockFrequencyInfo is available.
1880  BlockFrequencyInfo *CallerBFI = GetBFI ? &(GetBFI(*Caller)) : nullptr;
1881  auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI);
1882  if (!Caller->hasOptSize() && HotCallSiteThreshold) {
1883  LLVM_DEBUG(dbgs() << "Hot callsite.\n");
1884  // FIXME: This should update the threshold only if it exceeds the
1885  // current threshold, but AutoFDO + ThinLTO currently relies on this
1886  // behavior to prevent inlining of hot callsites during ThinLTO
1887  // compile phase.
1888  Threshold = *HotCallSiteThreshold;
1889  } else if (isColdCallSite(Call, CallerBFI)) {
1890  LLVM_DEBUG(dbgs() << "Cold callsite.\n");
1891  // Do not apply bonuses for a cold callsite including the
1892  // LastCallToStatic bonus. While this bonus might result in code size
1893  // reduction, it can cause the size of a non-cold caller to increase
1894  // preventing it from being inlined.
1895  DisallowAllBonuses();
1896  Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
1897  } else if (PSI) {
1898  // Use callee's global profile information only if we have no way of
1899  // determining this via callsite information.
1900  if (PSI->isFunctionEntryHot(&Callee)) {
1901  LLVM_DEBUG(dbgs() << "Hot callee.\n");
1902  // If callsite hotness can not be determined, we may still know
1903  // that the callee is hot and treat it as a weaker hint for threshold
1904  // increase.
1905  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
1906  } else if (PSI->isFunctionEntryCold(&Callee)) {
1907  LLVM_DEBUG(dbgs() << "Cold callee.\n");
1908  // Do not apply bonuses for a cold callee including the
1909  // LastCallToStatic bonus. While this bonus might result in code size
1910  // reduction, it can cause the size of a non-cold caller to increase
1911  // preventing it from being inlined.
1912  DisallowAllBonuses();
1913  Threshold = MinIfValid(Threshold, Params.ColdThreshold);
1914  }
1915  }
1916  }
1917 
1918  Threshold += TTI.adjustInliningThreshold(&Call);
1919 
1920  // Finally, take the target-specific inlining threshold multiplier into
1921  // account.
1922  Threshold *= TTI.getInliningThresholdMultiplier();
1923 
1924  SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1925  VectorBonus = Threshold * VectorBonusPercent / 100;
1926 
1927  // If there is only one call of the function, and it has internal linkage,
1928  // the cost of inlining it drops dramatically. It may seem odd to update
1929  // Cost in updateThreshold, but the bonus depends on the logic in this method.
1930  if (isSoleCallToLocalFunction(Call, F)) {
1932  StaticBonusApplied = LastCallToStaticBonus;
1933  }
1934 }
1935 
1936 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
1937  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1938  // First try to handle simplified comparisons.
1939  if (simplifyInstruction(I))
1940  return true;
1941 
1942  if (I.getOpcode() == Instruction::FCmp)
1943  return false;
1944 
1945  // Otherwise look for a comparison between constant offset pointers with
1946  // a common base.
1947  Value *LHSBase, *RHSBase;
1948  APInt LHSOffset, RHSOffset;
1949  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1950  if (LHSBase) {
1951  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1952  if (RHSBase && LHSBase == RHSBase) {
1953  // We have common bases, fold the icmp to a constant based on the
1954  // offsets.
1955  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1956  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1957  if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
1958  SimplifiedValues[&I] = C;
1959  ++NumConstantPtrCmps;
1960  return true;
1961  }
1962  }
1963  }
1964 
1965  // If the comparison is an equality comparison with null, we can simplify it
1966  // if we know the value (argument) can't be null
1967  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
1968  isKnownNonNullInCallee(I.getOperand(0))) {
1969  bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
1970  SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
1971  : ConstantInt::getFalse(I.getType());
1972  return true;
1973  }
1974  return handleSROA(I.getOperand(0), isa<ConstantPointerNull>(I.getOperand(1)));
1975 }
1976 
1977 bool CallAnalyzer::visitSub(BinaryOperator &I) {
1978  // Try to handle a special case: we can fold computing the difference of two
1979  // constant-related pointers.
1980  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1981  Value *LHSBase, *RHSBase;
1982  APInt LHSOffset, RHSOffset;
1983  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1984  if (LHSBase) {
1985  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1986  if (RHSBase && LHSBase == RHSBase) {
1987  // We have common bases, fold the subtract to a constant based on the
1988  // offsets.
1989  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1990  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1991  if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
1992  SimplifiedValues[&I] = C;
1993  ++NumConstantPtrDiffs;
1994  return true;
1995  }
1996  }
1997  }
1998 
1999  // Otherwise, fall back to the generic logic for simplifying and handling
2000  // instructions.
2001  return Base::visitSub(I);
2002 }
2003 
2004 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
2005  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
2006  Constant *CLHS = dyn_cast<Constant>(LHS);
2007  if (!CLHS)
2008  CLHS = SimplifiedValues.lookup(LHS);
2009  Constant *CRHS = dyn_cast<Constant>(RHS);
2010  if (!CRHS)
2011  CRHS = SimplifiedValues.lookup(RHS);
2012 
2013  Value *SimpleV = nullptr;
2014  if (auto FI = dyn_cast<FPMathOperator>(&I))
2015  SimpleV = simplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
2016  FI->getFastMathFlags(), DL);
2017  else
2018  SimpleV =
2019  simplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
2020 
2021  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
2022  SimplifiedValues[&I] = C;
2023 
2024  if (SimpleV)
2025  return true;
2026 
2027  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
2028  disableSROA(LHS);
2029  disableSROA(RHS);
2030 
2031  // If the instruction is floating point, and the target says this operation
2032  // is expensive, this may eventually become a library call. Treat the cost
2033  // as such. Unless it's fneg which can be implemented with an xor.
2034  using namespace llvm::PatternMatch;
2035  if (I.getType()->isFloatingPointTy() &&
2037  !match(&I, m_FNeg(m_Value())))
2038  onCallPenalty();
2039 
2040  return false;
2041 }
2042 
2043 bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
2044  Value *Op = I.getOperand(0);
2045  Constant *COp = dyn_cast<Constant>(Op);
2046  if (!COp)
2047  COp = SimplifiedValues.lookup(Op);
2048 
2049  Value *SimpleV = simplifyFNegInst(
2050  COp ? COp : Op, cast<FPMathOperator>(I).getFastMathFlags(), DL);
2051 
2052  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
2053  SimplifiedValues[&I] = C;
2054 
2055  if (SimpleV)
2056  return true;
2057 
2058  // Disable any SROA on arguments to arbitrary, unsimplified fneg.
2059  disableSROA(Op);
2060 
2061  return false;
2062 }
2063 
2064 bool CallAnalyzer::visitLoad(LoadInst &I) {
2065  if (handleSROA(I.getPointerOperand(), I.isSimple()))
2066  return true;
2067 
2068  // If the data is already loaded from this address and hasn't been clobbered
2069  // by any stores or calls, this load is likely to be redundant and can be
2070  // eliminated.
2071  if (EnableLoadElimination &&
2072  !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
2073  onLoadEliminationOpportunity();
2074  return true;
2075  }
2076 
2077  onMemAccess();
2078  return false;
2079 }
2080 
2081 bool CallAnalyzer::visitStore(StoreInst &I) {
2082  if (handleSROA(I.getPointerOperand(), I.isSimple()))
2083  return true;
2084 
2085  // The store can potentially clobber loads and prevent repeated loads from
2086  // being eliminated.
2087  // FIXME:
2088  // 1. We can probably keep an initial set of eliminatable loads substracted
2089  // from the cost even when we finally see a store. We just need to disable
2090  // *further* accumulation of elimination savings.
2091  // 2. We should probably at some point thread MemorySSA for the callee into
2092  // this and then use that to actually compute *really* precise savings.
2093  disableLoadElimination();
2094 
2095  onMemAccess();
2096  return false;
2097 }
2098 
2099 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
2100  // Constant folding for extract value is trivial.
2101  if (simplifyInstruction(I))
2102  return true;
2103 
2104  // SROA can't look through these, but they may be free.
2105  return Base::visitExtractValue(I);
2106 }
2107 
2108 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
2109  // Constant folding for insert value is trivial.
2110  if (simplifyInstruction(I))
2111  return true;
2112 
2113  // SROA can't look through these, but they may be free.
2114  return Base::visitInsertValue(I);
2115 }
2116 
2117 /// Try to simplify a call site.
2118 ///
2119 /// Takes a concrete function and callsite and tries to actually simplify it by
2120 /// analyzing the arguments and call itself with instsimplify. Returns true if
2121 /// it has simplified the callsite to some other entity (a constant), making it
2122 /// free.
2123 bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
2124  // FIXME: Using the instsimplify logic directly for this is inefficient
2125  // because we have to continually rebuild the argument list even when no
2126  // simplifications can be performed. Until that is fixed with remapping
2127  // inside of instsimplify, directly constant fold calls here.
2128  if (!canConstantFoldCallTo(&Call, F))
2129  return false;
2130 
2131  // Try to re-map the arguments to constants.
2132  SmallVector<Constant *, 4> ConstantArgs;
2133  ConstantArgs.reserve(Call.arg_size());
2134  for (Value *I : Call.args()) {
2135  Constant *C = dyn_cast<Constant>(I);
2136  if (!C)
2137  C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(I));
2138  if (!C)
2139  return false; // This argument doesn't map to a constant.
2140 
2141  ConstantArgs.push_back(C);
2142  }
2143  if (Constant *C = ConstantFoldCall(&Call, F, ConstantArgs)) {
2144  SimplifiedValues[&Call] = C;
2145  return true;
2146  }
2147 
2148  return false;
2149 }
2150 
2151 bool CallAnalyzer::visitCallBase(CallBase &Call) {
2152  if (!onCallBaseVisitStart(Call))
2153  return true;
2154 
2155  if (Call.hasFnAttr(Attribute::ReturnsTwice) &&
2156  !F.hasFnAttribute(Attribute::ReturnsTwice)) {
2157  // This aborts the entire analysis.
2158  ExposesReturnsTwice = true;
2159  return false;
2160  }
2161  if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
2162  ContainsNoDuplicateCall = true;
2163 
2164  Function *F = Call.getCalledFunction();
2165  bool IsIndirectCall = !F;
2166  if (IsIndirectCall) {
2167  // Check if this happens to be an indirect function call to a known function
2168  // in this inline context. If not, we've done all we can.
2169  Value *Callee = Call.getCalledOperand();
2170  F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
2171  if (!F || F->getFunctionType() != Call.getFunctionType()) {
2172  onCallArgumentSetup(Call);
2173 
2174  if (!Call.onlyReadsMemory())
2175  disableLoadElimination();
2176  return Base::visitCallBase(Call);
2177  }
2178  }
2179 
2180  assert(F && "Expected a call to a known function");
2181 
2182  // When we have a concrete function, first try to simplify it directly.
2183  if (simplifyCallSite(F, Call))
2184  return true;
2185 
2186  // Next check if it is an intrinsic we know about.
2187  // FIXME: Lift this into part of the InstVisitor.
2188  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
2189  switch (II->getIntrinsicID()) {
2190  default:
2191  if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
2192  disableLoadElimination();
2193  return Base::visitCallBase(Call);
2194 
2195  case Intrinsic::load_relative:
2196  onLoadRelativeIntrinsic();
2197  return false;
2198 
2199  case Intrinsic::memset:
2200  case Intrinsic::memcpy:
2201  case Intrinsic::memmove:
2202  disableLoadElimination();
2203  // SROA can usually chew through these intrinsics, but they aren't free.
2204  return false;
2205  case Intrinsic::icall_branch_funnel:
2206  case Intrinsic::localescape:
2207  HasUninlineableIntrinsic = true;
2208  return false;
2209  case Intrinsic::vastart:
2210  InitsVargArgs = true;
2211  return false;
2212  case Intrinsic::launder_invariant_group:
2213  case Intrinsic::strip_invariant_group:
2214  if (auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0)))
2215  SROAArgValues[II] = SROAArg;
2216  return true;
2217  case Intrinsic::is_constant:
2218  return simplifyIntrinsicCallIsConstant(Call);
2219  }
2220  }
2221 
2222  if (F == Call.getFunction()) {
2223  // This flag will fully abort the analysis, so don't bother with anything
2224  // else.
2225  IsRecursiveCall = true;
2226  if (!AllowRecursiveCall)
2227  return false;
2228  }
2229 
2230  if (TTI.isLoweredToCall(F)) {
2231  onLoweredCall(F, Call, IsIndirectCall);
2232  }
2233 
2234  if (!(Call.onlyReadsMemory() || (IsIndirectCall && F->onlyReadsMemory())))
2235  disableLoadElimination();
2236  return Base::visitCallBase(Call);
2237 }
2238 
2239 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
2240  // At least one return instruction will be free after inlining.
2241  bool Free = !HasReturn;
2242  HasReturn = true;
2243  return Free;
2244 }
2245 
2246 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
2247  // We model unconditional branches as essentially free -- they really
2248  // shouldn't exist at all, but handling them makes the behavior of the
2249  // inliner more regular and predictable. Interestingly, conditional branches
2250  // which will fold away are also free.
2251  return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
2252  isa_and_nonnull<ConstantInt>(
2253  SimplifiedValues.lookup(BI.getCondition()));
2254 }
2255 
2256 bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
2257  bool CheckSROA = SI.getType()->isPointerTy();
2258  Value *TrueVal = SI.getTrueValue();
2259  Value *FalseVal = SI.getFalseValue();
2260 
2261  Constant *TrueC = dyn_cast<Constant>(TrueVal);
2262  if (!TrueC)
2263  TrueC = SimplifiedValues.lookup(TrueVal);
2264  Constant *FalseC = dyn_cast<Constant>(FalseVal);
2265  if (!FalseC)
2266  FalseC = SimplifiedValues.lookup(FalseVal);
2267  Constant *CondC =
2268  dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition()));
2269 
2270  if (!CondC) {
2271  // Select C, X, X => X
2272  if (TrueC == FalseC && TrueC) {
2273  SimplifiedValues[&SI] = TrueC;
2274  return true;
2275  }
2276 
2277  if (!CheckSROA)
2278  return Base::visitSelectInst(SI);
2279 
2280  std::pair<Value *, APInt> TrueBaseAndOffset =
2281  ConstantOffsetPtrs.lookup(TrueVal);
2282  std::pair<Value *, APInt> FalseBaseAndOffset =
2283  ConstantOffsetPtrs.lookup(FalseVal);
2284  if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
2285  ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
2286 
2287  if (auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
2288  SROAArgValues[&SI] = SROAArg;
2289  return true;
2290  }
2291 
2292  return Base::visitSelectInst(SI);
2293  }
2294 
2295  // Select condition is a constant.
2296  Value *SelectedV = CondC->isAllOnesValue() ? TrueVal
2297  : (CondC->isNullValue()) ? FalseVal
2298  : nullptr;
2299  if (!SelectedV) {
2300  // Condition is a vector constant that is not all 1s or all 0s. If all
2301  // operands are constants, ConstantExpr::getSelect() can handle the cases
2302  // such as select vectors.
2303  if (TrueC && FalseC) {
2304  if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {
2305  SimplifiedValues[&SI] = C;
2306  return true;
2307  }
2308  }
2309  return Base::visitSelectInst(SI);
2310  }
2311 
2312  // Condition is either all 1s or all 0s. SI can be simplified.
2313  if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
2314  SimplifiedValues[&SI] = SelectedC;
2315  return true;
2316  }
2317 
2318  if (!CheckSROA)
2319  return true;
2320 
2321  std::pair<Value *, APInt> BaseAndOffset =
2322  ConstantOffsetPtrs.lookup(SelectedV);
2323  if (BaseAndOffset.first) {
2324  ConstantOffsetPtrs[&SI] = BaseAndOffset;
2325 
2326  if (auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
2327  SROAArgValues[&SI] = SROAArg;
2328  }
2329 
2330  return true;
2331 }
2332 
2333 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
2334  // We model unconditional switches as free, see the comments on handling
2335  // branches.
2336  if (isa<ConstantInt>(SI.getCondition()))
2337  return true;
2338  if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
2339  if (isa<ConstantInt>(V))
2340  return true;
2341 
2342  // Assume the most general case where the switch is lowered into
2343  // either a jump table, bit test, or a balanced binary tree consisting of
2344  // case clusters without merging adjacent clusters with the same
2345  // destination. We do not consider the switches that are lowered with a mix
2346  // of jump table/bit test/binary search tree. The cost of the switch is
2347  // proportional to the size of the tree or the size of jump table range.
2348  //
2349  // NB: We convert large switches which are just used to initialize large phi
2350  // nodes to lookup tables instead in simplifycfg, so this shouldn't prevent
2351  // inlining those. It will prevent inlining in cases where the optimization
2352  // does not (yet) fire.
2353 
2354  unsigned JumpTableSize = 0;
2355  BlockFrequencyInfo *BFI = GetBFI ? &(GetBFI(F)) : nullptr;
2356  unsigned NumCaseCluster =
2357  TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize, PSI, BFI);
2358 
2359  onFinalizeSwitch(JumpTableSize, NumCaseCluster);
2360  return false;
2361 }
2362 
2363 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
2364  // We never want to inline functions that contain an indirectbr. This is
2365  // incorrect because all the blockaddress's (in static global initializers
2366  // for example) would be referring to the original function, and this
2367  // indirect jump would jump from the inlined copy of the function into the
2368  // original function which is extremely undefined behavior.
2369  // FIXME: This logic isn't really right; we can safely inline functions with
2370  // indirectbr's as long as no other function or global references the
2371  // blockaddress of a block within the current function.
2372  HasIndirectBr = true;
2373  return false;
2374 }
2375 
2376 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
2377  // FIXME: It's not clear that a single instruction is an accurate model for
2378  // the inline cost of a resume instruction.
2379  return false;
2380 }
2381 
2382 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
2383  // FIXME: It's not clear that a single instruction is an accurate model for
2384  // the inline cost of a cleanupret instruction.
2385  return false;
2386 }
2387 
2388 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
2389  // FIXME: It's not clear that a single instruction is an accurate model for
2390  // the inline cost of a catchret instruction.
2391  return false;
2392 }
2393 
2394 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
2395  // FIXME: It might be reasonably to discount the cost of instructions leading
2396  // to unreachable as they have the lowest possible impact on both runtime and
2397  // code size.
2398  return true; // No actual code is needed for unreachable.
2399 }
2400 
2401 bool CallAnalyzer::visitInstruction(Instruction &I) {
2402  // Some instructions are free. All of the free intrinsics can also be
2403  // handled by SROA, etc.
2406  return true;
2407 
2408  // We found something we don't understand or can't handle. Mark any SROA-able
2409  // values in the operand list as no longer viable.
2410  for (const Use &Op : I.operands())
2411  disableSROA(Op);
2412 
2413  return false;
2414 }
2415 
2416 /// Analyze a basic block for its contribution to the inline cost.
2417 ///
2418 /// This method walks the analyzer over every instruction in the given basic
2419 /// block and accounts for their cost during inlining at this callsite. It
2420 /// aborts early if the threshold has been exceeded or an impossible to inline
2421 /// construct has been detected. It returns false if inlining is no longer
2422 /// viable, and true if inlining remains viable.
2424 CallAnalyzer::analyzeBlock(BasicBlock *BB,
2425  SmallPtrSetImpl<const Value *> &EphValues) {
2426  for (Instruction &I : *BB) {
2427  // FIXME: Currently, the number of instructions in a function regardless of
2428  // our ability to simplify them during inline to constants or dead code,
2429  // are actually used by the vector bonus heuristic. As long as that's true,
2430  // we have to special case debug intrinsics here to prevent differences in
2431  // inlining due to debug symbols. Eventually, the number of unsimplified
2432  // instructions shouldn't factor into the cost computation, but until then,
2433  // hack around it here.
2434  // Similarly, skip pseudo-probes.
2435  if (I.isDebugOrPseudoInst())
2436  continue;
2437 
2438  // Skip ephemeral values.
2439  if (EphValues.count(&I))
2440  continue;
2441 
2442  ++NumInstructions;
2443  if (isa<ExtractElementInst>(I) || I.getType()->isVectorTy())
2444  ++NumVectorInstructions;
2445 
2446  // If the instruction simplified to a constant, there is no cost to this
2447  // instruction. Visit the instructions using our InstVisitor to account for
2448  // all of the per-instruction logic. The visit tree returns true if we
2449  // consumed the instruction in any way, and false if the instruction's base
2450  // cost should count against inlining.
2451  onInstructionAnalysisStart(&I);
2452 
2453  if (Base::visit(&I))
2454  ++NumInstructionsSimplified;
2455  else
2456  onMissedSimplification();
2457 
2458  onInstructionAnalysisFinish(&I);
2459  using namespace ore;
2460  // If the visit this instruction detected an uninlinable pattern, abort.
2462  if (IsRecursiveCall && !AllowRecursiveCall)
2463  IR = InlineResult::failure("recursive");
2464  else if (ExposesReturnsTwice)
2465  IR = InlineResult::failure("exposes returns twice");
2466  else if (HasDynamicAlloca)
2467  IR = InlineResult::failure("dynamic alloca");
2468  else if (HasIndirectBr)
2469  IR = InlineResult::failure("indirect branch");
2470  else if (HasUninlineableIntrinsic)
2471  IR = InlineResult::failure("uninlinable intrinsic");
2472  else if (InitsVargArgs)
2473  IR = InlineResult::failure("varargs");
2474  if (!IR.isSuccess()) {
2475  if (ORE)
2476  ORE->emit([&]() {
2477  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
2478  &CandidateCall)
2479  << NV("Callee", &F) << " has uninlinable pattern ("
2480  << NV("InlineResult", IR.getFailureReason())
2481  << ") and cost is not fully computed";
2482  });
2483  return IR;
2484  }
2485 
2486  // If the caller is a recursive function then we don't want to inline
2487  // functions which allocate a lot of stack space because it would increase
2488  // the caller stack usage dramatically.
2489  if (IsCallerRecursive && AllocatedSize > RecurStackSizeThreshold) {
2490  auto IR =
2491  InlineResult::failure("recursive and allocates too much stack space");
2492  if (ORE)
2493  ORE->emit([&]() {
2494  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
2495  &CandidateCall)
2496  << NV("Callee", &F) << " is "
2497  << NV("InlineResult", IR.getFailureReason())
2498  << ". Cost is not fully computed";
2499  });
2500  return IR;
2501  }
2502 
2503  if (shouldStop())
2504  return InlineResult::failure(
2505  "Call site analysis is not favorable to inlining.");
2506  }
2507 
2508  return InlineResult::success();
2509 }
2510 
2511 /// Compute the base pointer and cumulative constant offsets for V.
2512 ///
2513 /// This strips all constant offsets off of V, leaving it the base pointer, and
2514 /// accumulates the total constant offset applied in the returned constant. It
2515 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
2516 /// no constant offsets applied.
2517 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
2518  if (!V->getType()->isPointerTy())
2519  return nullptr;
2520 
2521  unsigned AS = V->getType()->getPointerAddressSpace();
2522  unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
2523  APInt Offset = APInt::getZero(IntPtrWidth);
2524 
2525  // Even though we don't look through PHI nodes, we could be called on an
2526  // instruction in an unreachable block, which may be on a cycle.
2527  SmallPtrSet<Value *, 4> Visited;
2528  Visited.insert(V);
2529  do {
2530  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
2531  if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
2532  return nullptr;
2533  V = GEP->getPointerOperand();
2534  } else if (Operator::getOpcode(V) == Instruction::BitCast) {
2535  V = cast<Operator>(V)->getOperand(0);
2536  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2537  if (GA->isInterposable())
2538  break;
2539  V = GA->getAliasee();
2540  } else {
2541  break;
2542  }
2543  assert(V->getType()->isPointerTy() && "Unexpected operand type!");
2544  } while (Visited.insert(V).second);
2545 
2546  Type *IdxPtrTy = DL.getIndexType(V->getType());
2547  return cast<ConstantInt>(ConstantInt::get(IdxPtrTy, Offset));
2548 }
2549 
2550 /// Find dead blocks due to deleted CFG edges during inlining.
2551 ///
2552 /// If we know the successor of the current block, \p CurrBB, has to be \p
2553 /// NextBB, the other successors of \p CurrBB are dead if these successors have
2554 /// no live incoming CFG edges. If one block is found to be dead, we can
2555 /// continue growing the dead block list by checking the successors of the dead
2556 /// blocks to see if all their incoming edges are dead or not.
2557 void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
2558  auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
2559  // A CFG edge is dead if the predecessor is dead or the predecessor has a
2560  // known successor which is not the one under exam.
2561  return (DeadBlocks.count(Pred) ||
2562  (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
2563  };
2564 
2565  auto IsNewlyDead = [&](BasicBlock *BB) {
2566  // If all the edges to a block are dead, the block is also dead.
2567  return (!DeadBlocks.count(BB) &&
2569  [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));
2570  };
2571 
2572  for (BasicBlock *Succ : successors(CurrBB)) {
2573  if (Succ == NextBB || !IsNewlyDead(Succ))
2574  continue;
2576  NewDead.push_back(Succ);
2577  while (!NewDead.empty()) {
2578  BasicBlock *Dead = NewDead.pop_back_val();
2579  if (DeadBlocks.insert(Dead).second)
2580  // Continue growing the dead block lists.
2581  for (BasicBlock *S : successors(Dead))
2582  if (IsNewlyDead(S))
2583  NewDead.push_back(S);
2584  }
2585  }
2586 }
2587 
2588 /// Analyze a call site for potential inlining.
2589 ///
2590 /// Returns true if inlining this call is viable, and false if it is not
2591 /// viable. It computes the cost and adjusts the threshold based on numerous
2592 /// factors and heuristics. If this method returns false but the computed cost
2593 /// is below the computed threshold, then inlining was forcibly disabled by
2594 /// some artifact of the routine.
2595 InlineResult CallAnalyzer::analyze() {
2596  ++NumCallsAnalyzed;
2597 
2598  auto Result = onAnalysisStart();
2599  if (!Result.isSuccess())
2600  return Result;
2601 
2602  if (F.empty())
2603  return InlineResult::success();
2604 
2605  Function *Caller = CandidateCall.getFunction();
2606  // Check if the caller function is recursive itself.
2607  for (User *U : Caller->users()) {
2608  CallBase *Call = dyn_cast<CallBase>(U);
2609  if (Call && Call->getFunction() == Caller) {
2610  IsCallerRecursive = true;
2611  break;
2612  }
2613  }
2614 
2615  // Populate our simplified values by mapping from function arguments to call
2616  // arguments with known important simplifications.
2617  auto CAI = CandidateCall.arg_begin();
2618  for (Argument &FAI : F.args()) {
2619  assert(CAI != CandidateCall.arg_end());
2620  if (Constant *C = dyn_cast<Constant>(CAI))
2621  SimplifiedValues[&FAI] = C;
2622 
2623  Value *PtrArg = *CAI;
2624  if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
2625  ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg, C->getValue());
2626 
2627  // We can SROA any pointer arguments derived from alloca instructions.
2628  if (auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) {
2629  SROAArgValues[&FAI] = SROAArg;
2630  onInitializeSROAArg(SROAArg);
2631  EnabledSROAAllocas.insert(SROAArg);
2632  }
2633  }
2634  ++CAI;
2635  }
2636  NumConstantArgs = SimplifiedValues.size();
2637  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
2638  NumAllocaArgs = SROAArgValues.size();
2639 
2640  // FIXME: If a caller has multiple calls to a callee, we end up recomputing
2641  // the ephemeral values multiple times (and they're completely determined by
2642  // the callee, so this is purely duplicate work).
2644  CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
2645 
2646  // The worklist of live basic blocks in the callee *after* inlining. We avoid
2647  // adding basic blocks of the callee which can be proven to be dead for this
2648  // particular call site in order to get more accurate cost estimates. This
2649  // requires a somewhat heavyweight iteration pattern: we need to walk the
2650  // basic blocks in a breadth-first order as we insert live successors. To
2651  // accomplish this, prioritizing for small iterations because we exit after
2652  // crossing our threshold, we use a small-size optimized SetVector.
2655  BBSetVector;
2656  BBSetVector BBWorklist;
2657  BBWorklist.insert(&F.getEntryBlock());
2658 
2659  // Note that we *must not* cache the size, this loop grows the worklist.
2660  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
2661  if (shouldStop())
2662  break;
2663 
2664  BasicBlock *BB = BBWorklist[Idx];
2665  if (BB->empty())
2666  continue;
2667 
2668  onBlockStart(BB);
2669 
2670  // Disallow inlining a blockaddress with uses other than strictly callbr.
2671  // A blockaddress only has defined behavior for an indirect branch in the
2672  // same function, and we do not currently support inlining indirect
2673  // branches. But, the inliner may not see an indirect branch that ends up
2674  // being dead code at a particular call site. If the blockaddress escapes
2675  // the function, e.g., via a global variable, inlining may lead to an
2676  // invalid cross-function reference.
2677  // FIXME: pr/39560: continue relaxing this overt restriction.
2678  if (BB->hasAddressTaken())
2679  for (User *U : BlockAddress::get(&*BB)->users())
2680  if (!isa<CallBrInst>(*U))
2681  return InlineResult::failure("blockaddress used outside of callbr");
2682 
2683  // Analyze the cost of this block. If we blow through the threshold, this
2684  // returns false, and we can bail on out.
2685  InlineResult IR = analyzeBlock(BB, EphValues);
2686  if (!IR.isSuccess())
2687  return IR;
2688 
2689  Instruction *TI = BB->getTerminator();
2690 
2691  // Add in the live successors by first checking whether we have terminator
2692  // that may be simplified based on the values simplified by this call.
2693  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
2694  if (BI->isConditional()) {
2695  Value *Cond = BI->getCondition();
2696  if (ConstantInt *SimpleCond =
2697  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
2698  BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0);
2699  BBWorklist.insert(NextBB);
2700  KnownSuccessors[BB] = NextBB;
2701  findDeadBlocks(BB, NextBB);
2702  continue;
2703  }
2704  }
2705  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
2706  Value *Cond = SI->getCondition();
2707  if (ConstantInt *SimpleCond =
2708  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
2709  BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
2710  BBWorklist.insert(NextBB);
2711  KnownSuccessors[BB] = NextBB;
2712  findDeadBlocks(BB, NextBB);
2713  continue;
2714  }
2715  }
2716 
2717  // If we're unable to select a particular successor, just count all of
2718  // them.
2719  for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
2720  ++TIdx)
2721  BBWorklist.insert(TI->getSuccessor(TIdx));
2722 
2723  onBlockAnalyzed(BB);
2724  }
2725 
2726  // If this is a noduplicate call, we can still inline as long as
2727  // inlining this would cause the removal of the caller (so the instruction
2728  // is not actually duplicated, just moved).
2729  if (!isSoleCallToLocalFunction(CandidateCall, F) && ContainsNoDuplicateCall)
2730  return InlineResult::failure("noduplicate");
2731 
2732  // If the callee's stack size exceeds the user-specified threshold,
2733  // do not let it be inlined.
2734  // The command line option overrides a limit set in the function attributes.
2735  size_t FinalStackSizeThreshold = StackSizeThreshold;
2736  if (!StackSizeThreshold.getNumOccurrences())
2737  if (Optional<int> AttrMaxStackSize = getStringFnAttrAsInt(
2739  FinalStackSizeThreshold = *AttrMaxStackSize;
2740  if (AllocatedSize > FinalStackSizeThreshold)
2741  return InlineResult::failure("stacksize");
2742 
2743  return finalizeAnalysis();
2744 }
2745 
2747 #define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
2749  F.print(OS, &Writer);
2750  DEBUG_PRINT_STAT(NumConstantArgs);
2751  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
2752  DEBUG_PRINT_STAT(NumAllocaArgs);
2753  DEBUG_PRINT_STAT(NumConstantPtrCmps);
2754  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
2755  DEBUG_PRINT_STAT(NumInstructionsSimplified);
2756  DEBUG_PRINT_STAT(NumInstructions);
2757  DEBUG_PRINT_STAT(SROACostSavings);
2758  DEBUG_PRINT_STAT(SROACostSavingsLost);
2759  DEBUG_PRINT_STAT(LoadEliminationCost);
2760  DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
2761  DEBUG_PRINT_STAT(Cost);
2762  DEBUG_PRINT_STAT(Threshold);
2763 #undef DEBUG_PRINT_STAT
2764 }
2765 
2766 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2767 /// Dump stats about this call's analysis.
2769 #endif
2770 
2771 /// Test that there are no attribute conflicts between Caller and Callee
2772 /// that prevent inlining.
2774  Function *Caller, Function *Callee, TargetTransformInfo &TTI,
2775  function_ref<const TargetLibraryInfo &(Function &)> &GetTLI) {
2776  // Note that CalleeTLI must be a copy not a reference. The legacy pass manager
2777  // caches the most recently created TLI in the TargetLibraryInfoWrapperPass
2778  // object, and always returns the same object (which is overwritten on each
2779  // GetTLI call). Therefore we copy the first result.
2780  auto CalleeTLI = GetTLI(*Callee);
2781  return (IgnoreTTIInlineCompatible ||
2782  TTI.areInlineCompatible(Caller, Callee)) &&
2783  GetTLI(*Caller).areInlineCompatible(CalleeTLI,
2786 }
2787 
2788 int llvm::getCallsiteCost(const CallBase &Call, const DataLayout &DL) {
2789  int64_t Cost = 0;
2790  for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) {
2791  if (Call.isByValArgument(I)) {
2792  // We approximate the number of loads and stores needed by dividing the
2793  // size of the byval type by the target's pointer size.
2794  PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
2795  unsigned TypeSize = DL.getTypeSizeInBits(Call.getParamByValType(I));
2796  unsigned AS = PTy->getAddressSpace();
2797  unsigned PointerSize = DL.getPointerSizeInBits(AS);
2798  // Ceiling division.
2799  unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
2800 
2801  // If it generates more than 8 stores it is likely to be expanded as an
2802  // inline memcpy so we take that as an upper bound. Otherwise we assume
2803  // one load and one store per word copied.
2804  // FIXME: The maxStoresPerMemcpy setting from the target should be used
2805  // here instead of a magic number of 8, but it's not available via
2806  // DataLayout.
2807  NumStores = std::min(NumStores, 8U);
2808 
2809  Cost += 2 * NumStores * InstrCost;
2810  } else {
2811  // For non-byval arguments subtract off one instruction per call
2812  // argument.
2813  Cost += InstrCost;
2814  }
2815  }
2816  // The call instruction also disappears after inlining.
2817  Cost += InstrCost;
2818  Cost += CallPenalty;
2819  return std::min<int64_t>(Cost, INT_MAX);
2820 }
2821 
2823  CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
2824  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2825  function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
2828  return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
2829  GetAssumptionCache, GetTLI, GetBFI, PSI, ORE);
2830 }
2831 
2833  CallBase &Call, TargetTransformInfo &CalleeTTI,
2834  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2837  const InlineParams Params = {/* DefaultThreshold*/ 0,
2838  /*HintThreshold*/ {},
2839  /*ColdThreshold*/ {},
2840  /*OptSizeThreshold*/ {},
2841  /*OptMinSizeThreshold*/ {},
2842  /*HotCallSiteThreshold*/ {},
2843  /*LocallyHotCallSiteThreshold*/ {},
2844  /*ColdCallSiteThreshold*/ {},
2845  /*ComputeFullInlineCost*/ true,
2846  /*EnableDeferral*/ true};
2847 
2848  InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
2849  GetAssumptionCache, GetBFI, PSI, ORE, true,
2850  /*IgnoreThreshold*/ true);
2851  auto R = CA.analyze();
2852  if (!R.isSuccess())
2853  return std::nullopt;
2854  return CA.getCost();
2855 }
2856 
2858  CallBase &Call, TargetTransformInfo &CalleeTTI,
2859  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2862  InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI,
2863  ORE, *Call.getCalledFunction(), Call);
2864  auto R = CFA.analyze();
2865  if (!R.isSuccess())
2866  return std::nullopt;
2867  return CFA.features();
2868 }
2869 
2871  CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI,
2872  function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
2873 
2874  // Cannot inline indirect calls.
2875  if (!Callee)
2876  return InlineResult::failure("indirect call");
2877 
2878  // When callee coroutine function is inlined into caller coroutine function
2879  // before coro-split pass,
2880  // coro-early pass can not handle this quiet well.
2881  // So we won't inline the coroutine function if it have not been unsplited
2882  if (Callee->isPresplitCoroutine())
2883  return InlineResult::failure("unsplited coroutine call");
2884 
2885  // Never inline calls with byval arguments that does not have the alloca
2886  // address space. Since byval arguments can be replaced with a copy to an
2887  // alloca, the inlined code would need to be adjusted to handle that the
2888  // argument is in the alloca address space (so it is a little bit complicated
2889  // to solve).
2890  unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
2891  for (unsigned I = 0, E = Call.arg_size(); I != E; ++I)
2892  if (Call.isByValArgument(I)) {
2893  PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
2894  if (PTy->getAddressSpace() != AllocaAS)
2895  return InlineResult::failure("byval arguments without alloca"
2896  " address space");
2897  }
2898 
2899  // Calls to functions with always-inline attributes should be inlined
2900  // whenever possible.
2901  if (Call.hasFnAttr(Attribute::AlwaysInline)) {
2902  if (Call.getAttributes().hasFnAttr(Attribute::NoInline))
2903  return InlineResult::failure("noinline call site attribute");
2904 
2905  auto IsViable = isInlineViable(*Callee);
2906  if (IsViable.isSuccess())
2907  return InlineResult::success();
2908  return InlineResult::failure(IsViable.getFailureReason());
2909  }
2910 
2911  // Never inline functions with conflicting attributes (unless callee has
2912  // always-inline attribute).
2913  Function *Caller = Call.getCaller();
2914  if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI, GetTLI))
2915  return InlineResult::failure("conflicting attributes");
2916 
2917  // Don't inline this call if the caller has the optnone attribute.
2918  if (Caller->hasOptNone())
2919  return InlineResult::failure("optnone attribute");
2920 
2921  // Don't inline a function that treats null pointer as valid into a caller
2922  // that does not have this attribute.
2923  if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
2924  return InlineResult::failure("nullptr definitions incompatible");
2925 
2926  // Don't inline functions which can be interposed at link-time.
2927  if (Callee->isInterposable())
2928  return InlineResult::failure("interposable");
2929 
2930  // Don't inline functions marked noinline.
2931  if (Callee->hasFnAttribute(Attribute::NoInline))
2932  return InlineResult::failure("noinline function attribute");
2933 
2934  // Don't inline call sites marked noinline.
2935  if (Call.isNoInline())
2936  return InlineResult::failure("noinline call site attribute");
2937 
2938  return std::nullopt;
2939 }
2940 
2942  CallBase &Call, Function *Callee, const InlineParams &Params,
2943  TargetTransformInfo &CalleeTTI,
2944  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2945  function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
2948 
2949  auto UserDecision =
2950  llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI);
2951 
2952  if (UserDecision) {
2953  if (UserDecision->isSuccess())
2954  return llvm::InlineCost::getAlways("always inline attribute");
2955  return llvm::InlineCost::getNever(UserDecision->getFailureReason());
2956  }
2957 
2958  LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
2959  << "... (caller:" << Call.getCaller()->getName()
2960  << ")\n");
2961 
2962  InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
2963  GetAssumptionCache, GetBFI, PSI, ORE);
2964  InlineResult ShouldInline = CA.analyze();
2965 
2966  LLVM_DEBUG(CA.dump());
2967 
2968  // Always make cost benefit based decision explicit.
2969  // We use always/never here since threshold is not meaningful,
2970  // as it's not what drives cost-benefit analysis.
2971  if (CA.wasDecidedByCostBenefit()) {
2972  if (ShouldInline.isSuccess())
2973  return InlineCost::getAlways("benefit over cost",
2974  CA.getCostBenefitPair());
2975  else
2976  return InlineCost::getNever("cost over benefit", CA.getCostBenefitPair());
2977  }
2978 
2979  if (CA.wasDecidedByCostThreshold())
2980  return InlineCost::get(CA.getCost(), CA.getThreshold(),
2981  CA.getStaticBonusApplied());
2982 
2983  // No details on how the decision was made, simply return always or never.
2984  return ShouldInline.isSuccess()
2985  ? InlineCost::getAlways("empty function")
2986  : InlineCost::getNever(ShouldInline.getFailureReason());
2987 }
2988 
2990  bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
2991  for (BasicBlock &BB : F) {
2992  // Disallow inlining of functions which contain indirect branches.
2993  if (isa<IndirectBrInst>(BB.getTerminator()))
2994  return InlineResult::failure("contains indirect branches");
2995 
2996  // Disallow inlining of blockaddresses which are used by non-callbr
2997  // instructions.
2998  if (BB.hasAddressTaken())
2999  for (User *U : BlockAddress::get(&BB)->users())
3000  if (!isa<CallBrInst>(*U))
3001  return InlineResult::failure("blockaddress used outside of callbr");
3002 
3003  for (auto &II : BB) {
3004  CallBase *Call = dyn_cast<CallBase>(&II);
3005  if (!Call)
3006  continue;
3007 
3008  // Disallow recursive calls.
3009  Function *Callee = Call->getCalledFunction();
3010  if (&F == Callee)
3011  return InlineResult::failure("recursive call");
3012 
3013  // Disallow calls which expose returns-twice to a function not previously
3014  // attributed as such.
3015  if (!ReturnsTwice && isa<CallInst>(Call) &&
3016  cast<CallInst>(Call)->canReturnTwice())
3017  return InlineResult::failure("exposes returns-twice attribute");
3018 
3019  if (Callee)
3020  switch (Callee->getIntrinsicID()) {
3021  default:
3022  break;
3023  case llvm::Intrinsic::icall_branch_funnel:
3024  // Disallow inlining of @llvm.icall.branch.funnel because current
3025  // backend can't separate call targets from call arguments.
3026  return InlineResult::failure(
3027  "disallowed inlining of @llvm.icall.branch.funnel");
3028  case llvm::Intrinsic::localescape:
3029  // Disallow inlining functions that call @llvm.localescape. Doing this
3030  // correctly would require major changes to the inliner.
3031  return InlineResult::failure(
3032  "disallowed inlining of @llvm.localescape");
3033  case llvm::Intrinsic::vastart:
3034  // Disallow inlining of functions that initialize VarArgs with
3035  // va_start.
3036  return InlineResult::failure(
3037  "contains VarArgs initialized with va_start");
3038  }
3039  }
3040  }
3041 
3042  return InlineResult::success();
3043 }
3044 
3045 // APIs to create InlineParams based on command line flags and/or other
3046 // parameters.
3047 
3049  InlineParams Params;
3050 
3051  // This field is the threshold to use for a callee by default. This is
3052  // derived from one or more of:
3053  // * optimization or size-optimization levels,
3054  // * a value passed to createFunctionInliningPass function, or
3055  // * the -inline-threshold flag.
3056  // If the -inline-threshold flag is explicitly specified, that is used
3057  // irrespective of anything else.
3058  if (InlineThreshold.getNumOccurrences() > 0)
3060  else
3061  Params.DefaultThreshold = Threshold;
3062 
3063  // Set the HintThreshold knob from the -inlinehint-threshold.
3064  Params.HintThreshold = HintThreshold;
3065 
3066  // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
3068 
3069  // If the -locally-hot-callsite-threshold is explicitly specified, use it to
3070  // populate LocallyHotCallSiteThreshold. Later, we populate
3071  // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
3072  // we know that optimization level is O3 (in the getInlineParams variant that
3073  // takes the opt and size levels).
3074  // FIXME: Remove this check (and make the assignment unconditional) after
3075  // addressing size regression issues at O2.
3076  if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
3078 
3079  // Set the ColdCallSiteThreshold knob from the
3080  // -inline-cold-callsite-threshold.
3082 
3083  // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
3084  // -inlinehint-threshold commandline option is not explicitly given. If that
3085  // option is present, then its value applies even for callees with size and
3086  // minsize attributes.
3087  // If the -inline-threshold is not specified, set the ColdThreshold from the
3088  // -inlinecold-threshold even if it is not explicitly passed. If
3089  // -inline-threshold is specified, then -inlinecold-threshold needs to be
3090  // explicitly specified to set the ColdThreshold knob
3091  if (InlineThreshold.getNumOccurrences() == 0) {
3094  Params.ColdThreshold = ColdThreshold;
3095  } else if (ColdThreshold.getNumOccurrences() > 0) {
3096  Params.ColdThreshold = ColdThreshold;
3097  }
3098  return Params;
3099 }
3100 
3103 }
3104 
3105 // Compute the default threshold for inlining based on the opt level and the
3106 // size opt level.
3107 static int computeThresholdFromOptLevels(unsigned OptLevel,
3108  unsigned SizeOptLevel) {
3109  if (OptLevel > 2)
3111  if (SizeOptLevel == 1) // -Os
3113  if (SizeOptLevel == 2) // -Oz
3115  return DefaultThreshold;
3116 }
3117 
3118 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
3119  auto Params =
3120  getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
3121  // At O3, use the value of -locally-hot-callsite-threshold option to populate
3122  // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
3123  // when it is specified explicitly.
3124  if (OptLevel > 2)
3126  return Params;
3127 }
3128 
3132  PrintInstructionComments = true;
3133  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
3134  [&](Function &F) -> AssumptionCache & {
3135  return FAM.getResult<AssumptionAnalysis>(F);
3136  };
3137  Module *M = F.getParent();
3138  ProfileSummaryInfo PSI(*M);
3139  DataLayout DL(M);
3141  // FIXME: Redesign the usage of InlineParams to expand the scope of this pass.
3142  // In the current implementation, the type of InlineParams doesn't matter as
3143  // the pass serves only for verification of inliner's decisions.
3144  // We can add a flag which determines InlineParams for this run. Right now,
3145  // the default InlineParams are used.
3146  const InlineParams Params = llvm::getInlineParams();
3147  for (BasicBlock &BB : F) {
3148  for (Instruction &I : BB) {
3149  if (CallInst *CI = dyn_cast<CallInst>(&I)) {
3150  Function *CalledFunction = CI->getCalledFunction();
3151  if (!CalledFunction || CalledFunction->isDeclaration())
3152  continue;
3153  OptimizationRemarkEmitter ORE(CalledFunction);
3154  InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params, TTI,
3155  GetAssumptionCache, nullptr, &PSI, &ORE);
3156  ICCA.analyze();
3157  OS << " Analyzing call of " << CalledFunction->getName()
3158  << "... (caller:" << CI->getCaller()->getName() << ")\n";
3159  ICCA.print(OS);
3160  OS << "\n";
3161  }
3162  }
3163  }
3164  return PreservedAnalyses::all();
3165 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
set
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 atomic and others It is also currently not done for read modify write instructions It is also current not done if the OF or CF flags are needed The shift operators have the complication that when the shift count is EFLAGS is not set
Definition: README.txt:1277
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
AssumptionCache.h
llvm::Constant::isAllOnesValue
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
Definition: Constants.cpp:93
llvm::InlineResult::success
static InlineResult success()
Definition: InlineCost.h:185
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:734
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:246
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
Definition: Instruction.cpp:810
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:492
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::Attribute::isValid
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:185
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:225
llvm::getStringFnAttrAsInt
Optional< int > getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind)
Definition: InlineCost.cpp:174
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::SaturatingAdd
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:744
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:3050
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:189
llvm::InlineParams::DefaultThreshold
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:208
llvm::InlineParams::AllowRecursiveCall
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
Definition: InlineCost.h:239
isColdCallSite
static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
Definition: GlobalOpt.cpp:1766
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:237
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
llvm::Attribute
Definition: Attributes.h:67
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5253
GetElementPtrTypeIterator.h
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
llvm::ConstantExpr::getICmp
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:2496
llvm::InlineParams::LocallyHotCallSiteThreshold
Optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.
Definition: InlineCost.h:227
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:173
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::InlineCost::getNever
static InlineCost getNever(const char *Reason, Optional< CostBenefitPair > CostBenefit=std::nullopt)
Definition: InlineCost.h:131
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:682
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
llvm::CallingConv::Cold
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
llvm::GlobalAlias
Definition: GlobalAlias.h:28
ValueTracking.h
OptimizationRemarkEmitter.h
llvm::ConstantExpr::getSelect
static Constant * getSelect(Constant *C, Constant *V1, Constant *V2, Type *OnlyIfReducedTy=nullptr)
Select constant expr.
Definition: Constants.cpp:2414
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
MemAccessCost
static cl::opt< int > MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0), cl::desc("Cost of load/store instruction when inlining"))
llvm::InlineConstants::OptSizeThreshold
const int OptSizeThreshold
Use when optsize (-Os) is specified.
Definition: InlineCost.h:39
InlineCallerSupersetNoBuiltin
static cl::opt< bool > InlineCallerSupersetNoBuiltin("inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes."))
BBSetVector
SetVector< BasicBlock * > BBSetVector
Definition: BasicBlockUtils.cpp:1696
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::CallBase::getFunctionType
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1254
llvm::Optional< int >
llvm::isInlineViable
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
Definition: InlineCost.cpp:2989
llvm::InlineParams
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:206
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:80
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::tgtok::FalseVal
@ FalseVal
Definition: TGLexer.h:62
Operator.h
llvm::InlineConstants::FunctionInlineCostMultiplierAttributeName
const char FunctionInlineCostMultiplierAttributeName[]
Definition: InlineCost.h:60
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
llvm::InlineParams::ColdThreshold
Optional< int > ColdThreshold
Threshold to use for cold callees.
Definition: InlineCost.h:214
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::TargetTransformInfo::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent() const
Definition: TargetTransformInfo.cpp:207
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:877
STLExtras.h
llvm::CallBase::arg_begin
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1316
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::InlineParams::OptMinSizeThreshold
Optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
Definition: InlineCost.h:220
llvm::UnaryOperator
Definition: InstrTypes.h:101
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:123
RecurStackSizeThreshold
static cl::opt< size_t > RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden, cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller), cl::desc("Do not inline recursive functions with a stack " "size that exceeds the specified limit"))
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::count
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
ConstantFolding.h
llvm::APInt::getZero
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:177
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
functionsHaveCompatibleAttributes
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref< const TargetLibraryInfo &(Function &)> &GetTLI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining.
Definition: InlineCost.cpp:2773
F
#define F(x, y, z)
Definition: MD5.cpp:55
InlinePriorityMode::CostBenefit
@ CostBenefit
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::gep_type_end
gep_type_iterator gep_type_end(const User *GEP)
Definition: GetElementPtrTypeIterator.h:130
InstrCost
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ConstantExpr::getSub
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2637
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:187
CommandLine.h
llvm::BlockFrequencyInfo::getEntryFreq
uint64_t getEntryFreq() const
Definition: BlockFrequencyInfo.cpp:281
CodeMetrics.h
FormattedStream.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::TargetTransformInfo::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: TargetTransformInfo.cpp:1076
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1734
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:266
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::getInlineCost
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Definition: InlineCost.cpp:2822
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
InlinePriorityMode::Cost
@ Cost
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
InlineThreshold
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::desc("Control the amount of inlining to perform (default = 225)"))
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
llvm::InlineCost::get
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
Definition: InlineCost.h:120
SI
@ SI
Definition: SIInstrInfo.cpp:7982
llvm::InlineCost
Represents the cost of inlining a function.
Definition: InlineCost.h:90
llvm::StringRef::getAsInteger
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:474
llvm::ConstantFoldCall
Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
Definition: ConstantFolding.cpp:3245
InlineEnableCostBenefitAnalysis
static cl::opt< bool > InlineEnableCostBenefitAnalysis("inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner"))
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::simplifyBinOp
Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
Definition: InstructionSimplify.cpp:5706
llvm::canConstantFoldCallTo
bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
Definition: ConstantFolding.cpp:1499
TargetLibraryInfo.h
llvm::InlineCostFeatureIndex
InlineCostFeatureIndex
Definition: InlineModelFeatureMaps.h:51
ColdCallSiteThreshold
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
llvm::InlineConstants::getInstrCost
int getInstrCost()
Definition: InlineCost.cpp:183
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::PatternMatch::m_FNeg
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
Definition: PatternMatch.h:1033
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:42
llvm::Operator::getOpcode
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:42
DefaultThreshold
static cl::opt< int > DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::desc("Default amount of inlining to perform"))
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::InlineResult::isSuccess
bool isSuccess() const
Definition: InlineCost.h:189
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:402
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
llvm::InlineCostAnnotationPrinterPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Definition: InlineCost.cpp:3130
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
IR
Statically lint checks LLVM IR
Definition: Lint.cpp:746
SmallPtrSet.h
llvm::getInlineParams
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition: InlineCost.cpp:3101
PatternMatch.h
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:155
HintThreshold
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))
llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:312
OptComputeFullInlineCost
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
IgnoreTTIInlineCompatible
static cl::opt< bool > IgnoreTTIInlineCompatible("ignore-tti-inline-compatible", cl::Hidden, cl::init(false), cl::desc("Ignore TTI attributes compatibility check between callee/caller " "during inline cost calculation"))
LoopInfo.h
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:40
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3213
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74
llvm::InvokeInst
Invoke instruction.
Definition: Instructions.h:3809
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:709
AssemblyAnnotationWriter.h
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::cl::opt
Definition: CommandLine.h:1411
llvm::InlineConstants::MaxInlineStackSizeAttributeName
const char MaxInlineStackSizeAttributeName[]
Definition: InlineCost.h:63
llvm::AllocFnKind::Free
@ Free
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:298
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:822
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:419
llvm::InlineConstants::IndirectCallThreshold
const int IndirectCallThreshold
Definition: InlineCost.h:49
llvm::StructLayout
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:622
uint64_t
ProfileSummaryInfo.h
llvm::CatchReturnInst
Definition: Instructions.h:4579
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::BlockFrequency::getFrequency
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
Definition: BlockFrequency.h:34
llvm::SaturatingMultiplyAdd
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Definition: MathExtras.h:819
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:929
llvm::Attribute::AttrKind
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results,...
Definition: Attributes.h:86
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:446
llvm::InlineConstants::OptAggressiveThreshold
const int OptAggressiveThreshold
Use when -O3 is specified.
Definition: InlineCost.h:45
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::simplifyInstruction
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Definition: InstructionSimplify.cpp:6599
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::formatted_raw_ostream
formatted_raw_ostream - A raw_ostream that wraps another one and keeps track of line and column posit...
Definition: FormattedStream.h:30
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::InlineCostAnnotationPrinterPass::OS
raw_ostream & OS
Definition: InlineCost.h:341
ColdThreshold
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))
getFalse
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Definition: InstructionSimplify.cpp:122
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:79
InlineCost.h
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1735
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:168
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::TargetTransformInfo::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
Definition: TargetTransformInfo.cpp:291
llvm::Record
Definition: Record.h:1574
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::InlineParams::ColdCallSiteThreshold
Optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
Definition: InlineCost.h:230
llvm::GEPOperator
Definition: Operator.h:375
llvm::CallBase::arg_end
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1322
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition: Instructions.h:3210
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
getType
static M68kRelType getType(unsigned Kind, MCSymbolRefExpr::VariantKind &Modifier, bool &IsPCRel)
Definition: M68kELFObjectWriter.cpp:48
llvm::print
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Definition: GCNRegPressure.cpp:138
StackSizeThreshold
static cl::opt< size_t > StackSizeThreshold("inline-max-stacksize", cl::Hidden, cl::init(std::numeric_limits< size_t >::max()), cl::desc("Do not inline functions with a stack size " "that exceeds the specified limit"))
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:244
llvm::ConstantFoldInstOperands
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
Definition: ConstantFolding.cpp:1213
llvm::CallBase::getFnAttr
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Definition: InstrTypes.h:1615
llvm::LoopInfo
Definition: LoopInfo.h:1108
llvm::BinaryOperator
Definition: InstrTypes.h:188
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
DataLayout.h
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:138
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
this
Analysis the ScalarEvolution expression for r is this
Definition: README.txt:8
LocallyHotCallSiteThreshold
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::desc("Threshold for locally hot callsites "))
InstVisitor.h
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:222
DEBUG_PRINT_STAT
#define DEBUG_PRINT_STAT(x)
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:194
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:73
A
* A
Definition: README_ALTIVEC.txt:89
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:805
llvm::BranchProbability
Definition: BranchProbability.h:30
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::PtrToIntInst
This class represents a cast from a pointer to an integer.
Definition: Instructions.h:5202
llvm::InlineResult::getFailureReason
const char * getFailureReason() const
Definition: InlineCost.h:190
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::InstVisitor
Base class for instruction visitors.
Definition: InstVisitor.h:78
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:429
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:432
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
DisableGEPConstOperand
static cl::opt< bool > DisableGEPConstOperand("disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands"))
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::CallBase::paramHasAttr
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Definition: Instructions.cpp:344
llvm::isAssumeLikeIntrinsic
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
Definition: ValueTracking.cpp:528
users
iv users
Definition: IVUsers.cpp:48
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:93
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::InlineConstants::MaxSimplifiedDynamicAllocaToInline
const uint64_t MaxSimplifiedDynamicAllocaToInline
Do not inline dynamic allocas that have been constant propagated to be static allocas above this amou...
Definition: InlineCost.h:58
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:187
llvm::simplifyFNegInst
Value * simplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)
Given operand for an FNeg, fold the result or return null.
Definition: InstructionSimplify.cpp:5233
CallingConv.h
llvm::getAttributeBasedInliningDecision
Optional< InlineResult > getAttributeBasedInliningDecision(CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref< const TargetLibraryInfo &(Function &)> GetTLI)
Returns InlineResult::success() if the call site should be always inlined because of user directives,...
Definition: InlineCost.cpp:2870
llvm::ResumeInst
Resume the propagation of an exception.
Definition: Instructions.h:4251
llvm::InlineParams::HintThreshold
Optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:211
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:827
llvm::InlineCost::getAlways
static InlineCost getAlways(const char *Reason, Optional< CostBenefitPair > CostBenefit=std::nullopt)
Definition: InlineCost.h:126
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:351
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::StructLayout::getElementOffset
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:652
llvm::AssemblyAnnotationWriter
Definition: AssemblyAnnotationWriter.h:27
llvm::InlineConstants::ColdccPenalty
const int ColdccPenalty
Definition: InlineCost.h:52
llvm::getInliningCostFeatures
Optional< InlineCostFeatures > getInliningCostFeatures(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the expanded cost features.
Definition: InlineCost.cpp:2857
llvm::APInt::sextOrTrunc
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1002
llvm::ExtractValueInst
This instruction extracts a struct member or array element value from an aggregate value.
Definition: Instructions.h:2444
llvm::TypeSize
Definition: TypeSize.h:435
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::size
unsigned size() const
Definition: DenseMap.h:99
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:226
llvm::getCallsiteCost
int getCallsiteCost(const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
Definition: InlineCost.cpp:2788
llvm::InlineResult::failure
static InlineResult failure(const char *Reason)
Definition: InlineCost.h:186
InlineSizeAllowance
static cl::opt< int > InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings"))
llvm::TargetTransformInfo::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier() const
Definition: TargetTransformInfo.cpp:198
llvm::InlineConstants::OptMinSizeThreshold
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
Definition: InlineCost.h:42
InlineSavingsMultiplier
static cl::opt< int > InlineSavingsMultiplier("inline-savings-multiplier", cl::Hidden, cl::init(8), cl::desc("Multiplier to multiply cycle savings by during inlining"))
llvm::CleanupReturnInst
Definition: Instructions.h:4660
llvm::BlockFrequencyInfo::getBlockFreq
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
Definition: BlockFrequencyInfo.cpp:204
llvm::IntToPtrInst
This class represents a cast from an integer to a pointer.
Definition: Instructions.h:5159
llvm::IndirectBrInst
Indirect Branch Instruction.
Definition: Instructions.h:3674
GlobalAlias.h
llvm::TargetTransformInfo::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB) const
Definition: TargetTransformInfo.cpp:203
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
computeThresholdFromOptLevels
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
Definition: InlineCost.cpp:3107
llvm::TargetTransformInfo::getEstimatedNumberOfCaseClusters
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Definition: TargetTransformInfo.cpp:218
llvm::getInliningCostEstimate
Optional< int > getInliningCostEstimate(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the cost estimate ignoring thresholds.
Definition: InlineCost.cpp:2832
llvm::AttributeFuncs::areInlineCompatible
bool areInlineCompatible(const Function &Caller, const Function &Callee)
Definition: Attributes.cpp:2125
SmallVector.h
llvm::InlineConstants::LastCallToStaticBonus
const int LastCallToStaticBonus
Definition: InlineCost.h:51
Dominators.h
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
InstructionSimplify.h
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:677
llvm::BlockAddress::get
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1751
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2697
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::erase
bool erase(const ValueT &V)
Definition: DenseSet.h:101
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
llvm::SmallPtrSetImpl< const Value * >
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::BlockFrequencyInfo::getBlockProfileCount
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Definition: BlockFrequencyInfo.cpp:209
HotCallSiteThreshold
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::desc("Threshold for hot callsites "))
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
llvm::RegState::Dead
@ Dead
Unused definition.
Definition: MachineInstrBuilder.h:50
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::UnreachableInst
This function has undefined behavior.
Definition: Instructions.h:4768
llvm::InlineParams::HotCallSiteThreshold
Optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:223
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3276
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:253
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:59
CallPenalty
static cl::opt< int > CallPenalty("inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining"))
llvm::cl::desc
Definition: CommandLine.h:412
llvm::InlineCostFeatures
std::array< int, static_cast< size_t >(InlineCostFeatureIndex::NumberOfFeatures)> InlineCostFeatures
Definition: InlineModelFeatureMaps.h:62
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3132
raw_ostream.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: InlineCost.cpp:51
llvm::InlineConstants::TotalAllocaSizeRecursiveCaller
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive.
Definition: InlineCost.h:55
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:667
llvm::InsertValueInst
This instruction inserts a struct field of array element value into an aggregate value.
Definition: Instructions.h:2555
llvm::InlineConstants::LoopPenalty
const int LoopPenalty
Definition: InlineCost.h:50
llvm::tgtok::TrueVal
@ TrueVal
Definition: TGLexer.h:62
PrintInstructionComments
static cl::opt< bool > PrintInstructionComments("print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis"))
HotCallSiteRelFreq
static cl::opt< int > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
llvm::FunctionType::getReturnType
Type * getReturnType() const
Definition: DerivedTypes.h:124
llvm::TargetTransformInfo::getFPOpCost
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
Definition: TargetTransformInfo.cpp:594
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::InlineResult
InlineResult is basically true or false.
Definition: InlineCost.h:180
Debug.h
ColdCallSiteRelFreq
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
llvm::InlineParams::OptSizeThreshold
Optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
Definition: InlineCost.h:217
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3211
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3225
SetVector.h
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365