LLVM  15.0.0git
InlineCost.cpp
Go to the documentation of this file.
1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements inline cost analysis.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SetVector.h"
16 #include "llvm/ADT/SmallPtrSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/LoopInfo.h"
30 #include "llvm/Config/llvm-config.h"
32 #include "llvm/IR/CallingConv.h"
33 #include "llvm/IR/DataLayout.h"
34 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/GlobalAlias.h"
37 #include "llvm/IR/InstVisitor.h"
38 #include "llvm/IR/IntrinsicInst.h"
39 #include "llvm/IR/Operator.h"
40 #include "llvm/IR/PatternMatch.h"
42 #include "llvm/Support/Debug.h"
45 #include <limits>
46 
47 using namespace llvm;
48 
49 #define DEBUG_TYPE "inline-cost"
50 
51 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
52 
53 static cl::opt<int>
54  DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225),
55  cl::desc("Default amount of inlining to perform"));
56 
57 // We introduce this option since there is a minor compile-time win by avoiding
58 // addition of TTI attributes (target-features in particular) to inline
59 // candidates when they are guaranteed to be the same as top level methods in
60 // some use cases. If we avoid adding the attribute, we need an option to avoid
61 // checking these attributes.
63  "ignore-tti-inline-compatible", cl::Hidden, cl::init(false),
64  cl::desc("Ignore TTI attributes compatibility check between callee/caller "
65  "during inline cost calculation"));
66 
68  "print-instruction-comments", cl::Hidden, cl::init(false),
69  cl::desc("Prints comments for instruction based on inline cost analysis"));
70 
72  "inline-threshold", cl::Hidden, cl::init(225),
73  cl::desc("Control the amount of inlining to perform (default = 225)"));
74 
76  "inlinehint-threshold", cl::Hidden, cl::init(325),
77  cl::desc("Threshold for inlining functions with inline hint"));
78 
79 static cl::opt<int>
80  ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
81  cl::init(45),
82  cl::desc("Threshold for inlining cold callsites"));
83 
85  "inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false),
86  cl::desc("Enable the cost-benefit analysis for the inliner"));
87 
89  "inline-savings-multiplier", cl::Hidden, cl::init(8),
90  cl::desc("Multiplier to multiply cycle savings by during inlining"));
91 
92 static cl::opt<int>
93  InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100),
94  cl::desc("The maximum size of a callee that get's "
95  "inlined without sufficient cycle savings"));
96 
97 // We introduce this threshold to help performance of instrumentation based
98 // PGO before we actually hook up inliner with analysis passes such as BPI and
99 // BFI.
101  "inlinecold-threshold", cl::Hidden, cl::init(45),
102  cl::desc("Threshold for inlining functions with cold attribute"));
103 
104 static cl::opt<int>
105  HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
106  cl::desc("Threshold for hot callsites "));
107 
109  "locally-hot-callsite-threshold", cl::Hidden, cl::init(525),
110  cl::desc("Threshold for locally hot callsites "));
111 
113  "cold-callsite-rel-freq", cl::Hidden, cl::init(2),
114  cl::desc("Maximum block frequency, expressed as a percentage of caller's "
115  "entry frequency, for a callsite to be cold in the absence of "
116  "profile information."));
117 
119  "hot-callsite-rel-freq", cl::Hidden, cl::init(60),
120  cl::desc("Minimum block frequency, expressed as a multiple of caller's "
121  "entry frequency, for a callsite to be hot in the absence of "
122  "profile information."));
123 
125  "inline-call-penalty", cl::Hidden, cl::init(25),
126  cl::desc("Call penalty that is applied per callsite when inlining"));
127 
128 static cl::opt<size_t>
129  StackSizeThreshold("inline-max-stacksize", cl::Hidden,
132  cl::desc("Do not inline functions with a stack size "
133  "that exceeds the specified limit"));
134 
136  "inline-cost-full", cl::Hidden,
137  cl::desc("Compute the full inline cost of a call site even when the cost "
138  "exceeds the threshold."));
139 
141  "inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true),
142  cl::desc("Allow inlining when caller has a superset of callee's nobuiltin "
143  "attributes."));
144 
146  "disable-gep-const-evaluation", cl::Hidden, cl::init(false),
147  cl::desc("Disables evaluation of GetElementPtr with constant operands"));
148 
149 namespace llvm {
151  Attribute Attr = CB.getFnAttr(AttrKind);
152  int AttrValue;
153  if (Attr.getValueAsString().getAsInteger(10, AttrValue))
154  return None;
155  return AttrValue;
156 }
157 } // namespace llvm
158 
159 namespace {
160 class InlineCostCallAnalyzer;
161 
162 // This struct is used to store information about inline cost of a
163 // particular instruction
164 struct InstructionCostDetail {
165  int CostBefore = 0;
166  int CostAfter = 0;
167  int ThresholdBefore = 0;
168  int ThresholdAfter = 0;
169 
170  int getThresholdDelta() const { return ThresholdAfter - ThresholdBefore; }
171 
172  int getCostDelta() const { return CostAfter - CostBefore; }
173 
174  bool hasThresholdChanged() const { return ThresholdAfter != ThresholdBefore; }
175 };
176 
177 class InlineCostAnnotationWriter : public AssemblyAnnotationWriter {
178 private:
179  InlineCostCallAnalyzer *const ICCA;
180 
181 public:
182  InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
183  virtual void emitInstructionAnnot(const Instruction *I,
184  formatted_raw_ostream &OS) override;
185 };
186 
187 /// Carry out call site analysis, in order to evaluate inlinability.
188 /// NOTE: the type is currently used as implementation detail of functions such
189 /// as llvm::getInlineCost. Note the function_ref constructor parameters - the
190 /// expectation is that they come from the outer scope, from the wrapper
191 /// functions. If we want to support constructing CallAnalyzer objects where
192 /// lambdas are provided inline at construction, or where the object needs to
193 /// otherwise survive past the scope of the provided functions, we need to
194 /// revisit the argument types.
195 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
197  friend class InstVisitor<CallAnalyzer, bool>;
198 
199 protected:
200  virtual ~CallAnalyzer() = default;
201  /// The TargetTransformInfo available for this compilation.
202  const TargetTransformInfo &TTI;
203 
204  /// Getter for the cache of @llvm.assume intrinsics.
205  function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
206 
207  /// Getter for BlockFrequencyInfo
209 
210  /// Profile summary information.
211  ProfileSummaryInfo *PSI;
212 
213  /// The called function.
214  Function &F;
215 
216  // Cache the DataLayout since we use it a lot.
217  const DataLayout &DL;
218 
219  /// The OptimizationRemarkEmitter available for this compilation.
221 
222  /// The candidate callsite being analyzed. Please do not use this to do
223  /// analysis in the caller function; we want the inline cost query to be
224  /// easily cacheable. Instead, use the cover function paramHasAttr.
225  CallBase &CandidateCall;
226 
227  /// Extension points for handling callsite features.
228  // Called before a basic block was analyzed.
229  virtual void onBlockStart(const BasicBlock *BB) {}
230 
231  /// Called after a basic block was analyzed.
232  virtual void onBlockAnalyzed(const BasicBlock *BB) {}
233 
234  /// Called before an instruction was analyzed
235  virtual void onInstructionAnalysisStart(const Instruction *I) {}
236 
237  /// Called after an instruction was analyzed
238  virtual void onInstructionAnalysisFinish(const Instruction *I) {}
239 
240  /// Called at the end of the analysis of the callsite. Return the outcome of
241  /// the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or
242  /// the reason it can't.
243  virtual InlineResult finalizeAnalysis() { return InlineResult::success(); }
244  /// Called when we're about to start processing a basic block, and every time
245  /// we are done processing an instruction. Return true if there is no point in
246  /// continuing the analysis (e.g. we've determined already the call site is
247  /// too expensive to inline)
248  virtual bool shouldStop() { return false; }
249 
250  /// Called before the analysis of the callee body starts (with callsite
251  /// contexts propagated). It checks callsite-specific information. Return a
252  /// reason analysis can't continue if that's the case, or 'true' if it may
253  /// continue.
254  virtual InlineResult onAnalysisStart() { return InlineResult::success(); }
255  /// Called if the analysis engine decides SROA cannot be done for the given
256  /// alloca.
257  virtual void onDisableSROA(AllocaInst *Arg) {}
258 
259  /// Called the analysis engine determines load elimination won't happen.
260  virtual void onDisableLoadElimination() {}
261 
262  /// Called when we visit a CallBase, before the analysis starts. Return false
263  /// to stop further processing of the instruction.
264  virtual bool onCallBaseVisitStart(CallBase &Call) { return true; }
265 
266  /// Called to account for a call.
267  virtual void onCallPenalty() {}
268 
269  /// Called to account for the expectation the inlining would result in a load
270  /// elimination.
271  virtual void onLoadEliminationOpportunity() {}
272 
273  /// Called to account for the cost of argument setup for the Call in the
274  /// callee's body (not the callsite currently under analysis).
275  virtual void onCallArgumentSetup(const CallBase &Call) {}
276 
277  /// Called to account for a load relative intrinsic.
278  virtual void onLoadRelativeIntrinsic() {}
279 
280  /// Called to account for a lowered call.
281  virtual void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) {
282  }
283 
284  /// Account for a jump table of given size. Return false to stop further
285  /// processing the switch instruction
286  virtual bool onJumpTable(unsigned JumpTableSize) { return true; }
287 
288  /// Account for a case cluster of given size. Return false to stop further
289  /// processing of the instruction.
290  virtual bool onCaseCluster(unsigned NumCaseCluster) { return true; }
291 
292  /// Called at the end of processing a switch instruction, with the given
293  /// number of case clusters.
294  virtual void onFinalizeSwitch(unsigned JumpTableSize,
295  unsigned NumCaseCluster) {}
296 
297  /// Called to account for any other instruction not specifically accounted
298  /// for.
299  virtual void onMissedSimplification() {}
300 
301  /// Start accounting potential benefits due to SROA for the given alloca.
302  virtual void onInitializeSROAArg(AllocaInst *Arg) {}
303 
304  /// Account SROA savings for the AllocaInst value.
305  virtual void onAggregateSROAUse(AllocaInst *V) {}
306 
307  bool handleSROA(Value *V, bool DoNotDisable) {
308  // Check for SROA candidates in comparisons.
309  if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
310  if (DoNotDisable) {
311  onAggregateSROAUse(SROAArg);
312  return true;
313  }
314  disableSROAForArg(SROAArg);
315  }
316  return false;
317  }
318 
319  bool IsCallerRecursive = false;
320  bool IsRecursiveCall = false;
321  bool ExposesReturnsTwice = false;
322  bool HasDynamicAlloca = false;
323  bool ContainsNoDuplicateCall = false;
324  bool HasReturn = false;
325  bool HasIndirectBr = false;
326  bool HasUninlineableIntrinsic = false;
327  bool InitsVargArgs = false;
328 
329  /// Number of bytes allocated statically by the callee.
330  uint64_t AllocatedSize = 0;
331  unsigned NumInstructions = 0;
332  unsigned NumVectorInstructions = 0;
333 
334  /// While we walk the potentially-inlined instructions, we build up and
335  /// maintain a mapping of simplified values specific to this callsite. The
336  /// idea is to propagate any special information we have about arguments to
337  /// this call through the inlinable section of the function, and account for
338  /// likely simplifications post-inlining. The most important aspect we track
339  /// is CFG altering simplifications -- when we prove a basic block dead, that
340  /// can cause dramatic shifts in the cost of inlining a function.
341  DenseMap<Value *, Constant *> SimplifiedValues;
342 
343  /// Keep track of the values which map back (through function arguments) to
344  /// allocas on the caller stack which could be simplified through SROA.
345  DenseMap<Value *, AllocaInst *> SROAArgValues;
346 
347  /// Keep track of Allocas for which we believe we may get SROA optimization.
348  DenseSet<AllocaInst *> EnabledSROAAllocas;
349 
350  /// Keep track of values which map to a pointer base and constant offset.
352 
353  /// Keep track of dead blocks due to the constant arguments.
355 
356  /// The mapping of the blocks to their known unique successors due to the
357  /// constant arguments.
358  DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
359 
360  /// Model the elimination of repeated loads that is expected to happen
361  /// whenever we simplify away the stores that would otherwise cause them to be
362  /// loads.
363  bool EnableLoadElimination = true;
364 
365  /// Whether we allow inlining for recursive call.
366  bool AllowRecursiveCall = false;
367 
368  SmallPtrSet<Value *, 16> LoadAddrSet;
369 
370  AllocaInst *getSROAArgForValueOrNull(Value *V) const {
371  auto It = SROAArgValues.find(V);
372  if (It == SROAArgValues.end() || EnabledSROAAllocas.count(It->second) == 0)
373  return nullptr;
374  return It->second;
375  }
376 
377  // Custom simplification helper routines.
378  bool isAllocaDerivedArg(Value *V);
379  void disableSROAForArg(AllocaInst *SROAArg);
380  void disableSROA(Value *V);
381  void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
382  void disableLoadElimination();
383  bool isGEPFree(GetElementPtrInst &GEP);
384  bool canFoldInboundsGEP(GetElementPtrInst &I);
385  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
386  bool simplifyCallSite(Function *F, CallBase &Call);
387  template <typename Callable>
388  bool simplifyInstruction(Instruction &I, Callable Evaluate);
389  bool simplifyIntrinsicCallIsConstant(CallBase &CB);
390  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
391 
392  /// Return true if the given argument to the function being considered for
393  /// inlining has the given attribute set either at the call site or the
394  /// function declaration. Primarily used to inspect call site specific
395  /// attributes since these can be more precise than the ones on the callee
396  /// itself.
397  bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
398 
399  /// Return true if the given value is known non null within the callee if
400  /// inlined through this particular callsite.
401  bool isKnownNonNullInCallee(Value *V);
402 
403  /// Return true if size growth is allowed when inlining the callee at \p Call.
404  bool allowSizeGrowth(CallBase &Call);
405 
406  // Custom analysis routines.
407  InlineResult analyzeBlock(BasicBlock *BB,
408  SmallPtrSetImpl<const Value *> &EphValues);
409 
410  // Disable several entry points to the visitor so we don't accidentally use
411  // them by declaring but not defining them here.
412  void visit(Module *);
413  void visit(Module &);
414  void visit(Function *);
415  void visit(Function &);
416  void visit(BasicBlock *);
417  void visit(BasicBlock &);
418 
419  // Provide base case for our instruction visit.
420  bool visitInstruction(Instruction &I);
421 
422  // Our visit overrides.
423  bool visitAlloca(AllocaInst &I);
424  bool visitPHI(PHINode &I);
425  bool visitGetElementPtr(GetElementPtrInst &I);
426  bool visitBitCast(BitCastInst &I);
427  bool visitPtrToInt(PtrToIntInst &I);
428  bool visitIntToPtr(IntToPtrInst &I);
429  bool visitCastInst(CastInst &I);
430  bool visitCmpInst(CmpInst &I);
431  bool visitSub(BinaryOperator &I);
432  bool visitBinaryOperator(BinaryOperator &I);
433  bool visitFNeg(UnaryOperator &I);
434  bool visitLoad(LoadInst &I);
435  bool visitStore(StoreInst &I);
436  bool visitExtractValue(ExtractValueInst &I);
437  bool visitInsertValue(InsertValueInst &I);
438  bool visitCallBase(CallBase &Call);
439  bool visitReturnInst(ReturnInst &RI);
440  bool visitBranchInst(BranchInst &BI);
441  bool visitSelectInst(SelectInst &SI);
442  bool visitSwitchInst(SwitchInst &SI);
443  bool visitIndirectBrInst(IndirectBrInst &IBI);
444  bool visitResumeInst(ResumeInst &RI);
445  bool visitCleanupReturnInst(CleanupReturnInst &RI);
446  bool visitCatchReturnInst(CatchReturnInst &RI);
447  bool visitUnreachableInst(UnreachableInst &I);
448 
449 public:
450  CallAnalyzer(Function &Callee, CallBase &Call, const TargetTransformInfo &TTI,
451  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
452  function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
453  ProfileSummaryInfo *PSI = nullptr,
454  OptimizationRemarkEmitter *ORE = nullptr)
455  : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
456  PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
457  CandidateCall(Call) {}
458 
459  InlineResult analyze();
460 
461  Optional<Constant *> getSimplifiedValue(Instruction *I) {
462  if (SimplifiedValues.find(I) != SimplifiedValues.end())
463  return SimplifiedValues[I];
464  return None;
465  }
466 
467  // Keep a bunch of stats about the cost savings found so we can print them
468  // out when debugging.
469  unsigned NumConstantArgs = 0;
470  unsigned NumConstantOffsetPtrArgs = 0;
471  unsigned NumAllocaArgs = 0;
472  unsigned NumConstantPtrCmps = 0;
473  unsigned NumConstantPtrDiffs = 0;
474  unsigned NumInstructionsSimplified = 0;
475 
476  void dump();
477 };
478 
479 // Considering forming a binary search, we should find the number of nodes
480 // which is same as the number of comparisons when lowered. For a given
481 // number of clusters, n, we can define a recursive function, f(n), to find
482 // the number of nodes in the tree. The recursion is :
483 // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
484 // and f(n) = n, when n <= 3.
485 // This will lead a binary tree where the leaf should be either f(2) or f(3)
486 // when n > 3. So, the number of comparisons from leaves should be n, while
487 // the number of non-leaf should be :
488 // 2^(log2(n) - 1) - 1
489 // = 2^log2(n) * 2^-1 - 1
490 // = n / 2 - 1.
491 // Considering comparisons from leaf and non-leaf nodes, we can estimate the
492 // number of comparisons in a simple closed form :
493 // n + n / 2 - 1 = n * 3 / 2 - 1
494 int64_t getExpectedNumberOfCompare(int NumCaseCluster) {
495  return 3 * static_cast<int64_t>(NumCaseCluster) / 2 - 1;
496 }
497 
498 /// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note
499 /// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer
500 class InlineCostCallAnalyzer final : public CallAnalyzer {
501  const int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
502  const bool ComputeFullInlineCost;
503  int LoadEliminationCost = 0;
504  /// Bonus to be applied when percentage of vector instructions in callee is
505  /// high (see more details in updateThreshold).
506  int VectorBonus = 0;
507  /// Bonus to be applied when the callee has only one reachable basic block.
508  int SingleBBBonus = 0;
509 
510  /// Tunable parameters that control the analysis.
511  const InlineParams &Params;
512 
513  // This DenseMap stores the delta change in cost and threshold after
514  // accounting for the given instruction. The map is filled only with the
515  // flag PrintInstructionComments on.
517 
518  /// Upper bound for the inlining cost. Bonuses are being applied to account
519  /// for speculative "expected profit" of the inlining decision.
520  int Threshold = 0;
521 
522  /// Attempt to evaluate indirect calls to boost its inline cost.
523  const bool BoostIndirectCalls;
524 
525  /// Ignore the threshold when finalizing analysis.
526  const bool IgnoreThreshold;
527 
528  // True if the cost-benefit-analysis-based inliner is enabled.
529  const bool CostBenefitAnalysisEnabled;
530 
531  /// Inlining cost measured in abstract units, accounts for all the
532  /// instructions expected to be executed for a given function invocation.
533  /// Instructions that are statically proven to be dead based on call-site
534  /// arguments are not counted here.
535  int Cost = 0;
536 
537  // The cumulative cost at the beginning of the basic block being analyzed. At
538  // the end of analyzing each basic block, "Cost - CostAtBBStart" represents
539  // the size of that basic block.
540  int CostAtBBStart = 0;
541 
542  // The static size of live but cold basic blocks. This is "static" in the
543  // sense that it's not weighted by profile counts at all.
544  int ColdSize = 0;
545 
546  // Whether inlining is decided by cost-threshold analysis.
547  bool DecidedByCostThreshold = false;
548 
549  // Whether inlining is decided by cost-benefit analysis.
550  bool DecidedByCostBenefit = false;
551 
552  // The cost-benefit pair computed by cost-benefit analysis.
553  Optional<CostBenefitPair> CostBenefit = None;
554 
555  bool SingleBB = true;
556 
557  unsigned SROACostSavings = 0;
558  unsigned SROACostSavingsLost = 0;
559 
560  /// The mapping of caller Alloca values to their accumulated cost savings. If
561  /// we have to disable SROA for one of the allocas, this tells us how much
562  /// cost must be added.
563  DenseMap<AllocaInst *, int> SROAArgCosts;
564 
565  /// Return true if \p Call is a cold callsite.
566  bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
567 
568  /// Update Threshold based on callsite properties such as callee
569  /// attributes and callee hotness for PGO builds. The Callee is explicitly
570  /// passed to support analyzing indirect calls whose target is inferred by
571  /// analysis.
572  void updateThreshold(CallBase &Call, Function &Callee);
573  /// Return a higher threshold if \p Call is a hot callsite.
574  Optional<int> getHotCallSiteThreshold(CallBase &Call,
575  BlockFrequencyInfo *CallerBFI);
576 
577  /// Handle a capped 'int' increment for Cost.
578  void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) {
579  assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound");
580  Cost = std::min<int>(UpperBound, Cost + Inc);
581  }
582 
583  void onDisableSROA(AllocaInst *Arg) override {
584  auto CostIt = SROAArgCosts.find(Arg);
585  if (CostIt == SROAArgCosts.end())
586  return;
587  addCost(CostIt->second);
588  SROACostSavings -= CostIt->second;
589  SROACostSavingsLost += CostIt->second;
590  SROAArgCosts.erase(CostIt);
591  }
592 
593  void onDisableLoadElimination() override {
594  addCost(LoadEliminationCost);
595  LoadEliminationCost = 0;
596  }
597 
598  bool onCallBaseVisitStart(CallBase &Call) override {
599  if (Optional<int> AttrCallThresholdBonus =
600  getStringFnAttrAsInt(Call, "call-threshold-bonus"))
601  Threshold += *AttrCallThresholdBonus;
602 
603  if (Optional<int> AttrCallCost =
604  getStringFnAttrAsInt(Call, "call-inline-cost")) {
605  addCost(*AttrCallCost);
606  // Prevent further processing of the call since we want to override its
607  // inline cost, not just add to it.
608  return false;
609  }
610  return true;
611  }
612 
613  void onCallPenalty() override { addCost(CallPenalty); }
614  void onCallArgumentSetup(const CallBase &Call) override {
615  // Pay the price of the argument setup. We account for the average 1
616  // instruction per call argument setup here.
617  addCost(Call.arg_size() * InlineConstants::InstrCost);
618  }
619  void onLoadRelativeIntrinsic() override {
620  // This is normally lowered to 4 LLVM instructions.
621  addCost(3 * InlineConstants::InstrCost);
622  }
623  void onLoweredCall(Function *F, CallBase &Call,
624  bool IsIndirectCall) override {
625  // We account for the average 1 instruction per call argument setup here.
626  addCost(Call.arg_size() * InlineConstants::InstrCost);
627 
628  // If we have a constant that we are calling as a function, we can peer
629  // through it and see the function target. This happens not infrequently
630  // during devirtualization and so we want to give it a hefty bonus for
631  // inlining, but cap that bonus in the event that inlining wouldn't pan out.
632  // Pretend to inline the function, with a custom threshold.
633  if (IsIndirectCall && BoostIndirectCalls) {
634  auto IndirectCallParams = Params;
635  IndirectCallParams.DefaultThreshold =
637  /// FIXME: if InlineCostCallAnalyzer is derived from, this may need
638  /// to instantiate the derived class.
639  InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
640  GetAssumptionCache, GetBFI, PSI, ORE, false);
641  if (CA.analyze().isSuccess()) {
642  // We were able to inline the indirect call! Subtract the cost from the
643  // threshold to get the bonus we want to apply, but don't go below zero.
644  Cost -= std::max(0, CA.getThreshold() - CA.getCost());
645  }
646  } else
647  // Otherwise simply add the cost for merely making the call.
648  addCost(CallPenalty);
649  }
650 
651  void onFinalizeSwitch(unsigned JumpTableSize,
652  unsigned NumCaseCluster) override {
653  // If suitable for a jump table, consider the cost for the table size and
654  // branch to destination.
655  // Maximum valid cost increased in this function.
656  if (JumpTableSize) {
657  int64_t JTCost =
658  static_cast<int64_t>(JumpTableSize) * InlineConstants::InstrCost +
660 
661  addCost(JTCost, static_cast<int64_t>(CostUpperBound));
662  return;
663  }
664 
665  if (NumCaseCluster <= 3) {
666  // Suppose a comparison includes one compare and one conditional branch.
667  addCost(NumCaseCluster * 2 * InlineConstants::InstrCost);
668  return;
669  }
670 
671  int64_t ExpectedNumberOfCompare =
672  getExpectedNumberOfCompare(NumCaseCluster);
673  int64_t SwitchCost =
674  ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
675 
676  addCost(SwitchCost, static_cast<int64_t>(CostUpperBound));
677  }
678  void onMissedSimplification() override {
680  }
681 
682  void onInitializeSROAArg(AllocaInst *Arg) override {
683  assert(Arg != nullptr &&
684  "Should not initialize SROA costs for null value.");
685  SROAArgCosts[Arg] = 0;
686  }
687 
688  void onAggregateSROAUse(AllocaInst *SROAArg) override {
689  auto CostIt = SROAArgCosts.find(SROAArg);
690  assert(CostIt != SROAArgCosts.end() &&
691  "expected this argument to have a cost");
692  CostIt->second += InlineConstants::InstrCost;
693  SROACostSavings += InlineConstants::InstrCost;
694  }
695 
696  void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; }
697 
698  void onBlockAnalyzed(const BasicBlock *BB) override {
699  if (CostBenefitAnalysisEnabled) {
700  // Keep track of the static size of live but cold basic blocks. For now,
701  // we define a cold basic block to be one that's never executed.
702  assert(GetBFI && "GetBFI must be available");
703  BlockFrequencyInfo *BFI = &(GetBFI(F));
704  assert(BFI && "BFI must be available");
705  auto ProfileCount = BFI->getBlockProfileCount(BB);
707  if (ProfileCount.getValue() == 0)
708  ColdSize += Cost - CostAtBBStart;
709  }
710 
711  auto *TI = BB->getTerminator();
712  // If we had any successors at this point, than post-inlining is likely to
713  // have them as well. Note that we assume any basic blocks which existed
714  // due to branches or switches which folded above will also fold after
715  // inlining.
716  if (SingleBB && TI->getNumSuccessors() > 1) {
717  // Take off the bonus we applied to the threshold.
718  Threshold -= SingleBBBonus;
719  SingleBB = false;
720  }
721  }
722 
723  void onInstructionAnalysisStart(const Instruction *I) override {
724  // This function is called to store the initial cost of inlining before
725  // the given instruction was assessed.
727  return;
728  InstructionCostDetailMap[I].CostBefore = Cost;
729  InstructionCostDetailMap[I].ThresholdBefore = Threshold;
730  }
731 
732  void onInstructionAnalysisFinish(const Instruction *I) override {
733  // This function is called to find new values of cost and threshold after
734  // the instruction has been assessed.
736  return;
737  InstructionCostDetailMap[I].CostAfter = Cost;
738  InstructionCostDetailMap[I].ThresholdAfter = Threshold;
739  }
740 
741  bool isCostBenefitAnalysisEnabled() {
742  if (!PSI || !PSI->hasProfileSummary())
743  return false;
744 
745  if (!GetBFI)
746  return false;
747 
749  // Honor the explicit request from the user.
751  return false;
752  } else {
753  // Otherwise, require instrumentation profile.
754  if (!PSI->hasInstrumentationProfile())
755  return false;
756  }
757 
758  auto *Caller = CandidateCall.getParent()->getParent();
759  if (!Caller->getEntryCount())
760  return false;
761 
762  BlockFrequencyInfo *CallerBFI = &(GetBFI(*Caller));
763  if (!CallerBFI)
764  return false;
765 
766  // For now, limit to hot call site.
767  if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
768  return false;
769 
770  // Make sure we have a nonzero entry count.
771  auto EntryCount = F.getEntryCount();
772  if (!EntryCount || !EntryCount->getCount())
773  return false;
774 
775  BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
776  if (!CalleeBFI)
777  return false;
778 
779  return true;
780  }
781 
782  // Determine whether we should inline the given call site, taking into account
783  // both the size cost and the cycle savings. Return None if we don't have
784  // suficient profiling information to determine.
785  Optional<bool> costBenefitAnalysis() {
786  if (!CostBenefitAnalysisEnabled)
787  return None;
788 
789  // buildInlinerPipeline in the pass builder sets HotCallSiteThreshold to 0
790  // for the prelink phase of the AutoFDO + ThinLTO build. Honor the logic by
791  // falling back to the cost-based metric.
792  // TODO: Improve this hacky condition.
793  if (Threshold == 0)
794  return None;
795 
796  assert(GetBFI);
797  BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
798  assert(CalleeBFI);
799 
800  // The cycle savings expressed as the sum of InlineConstants::InstrCost
801  // multiplied by the estimated dynamic count of each instruction we can
802  // avoid. Savings come from the call site cost, such as argument setup and
803  // the call instruction, as well as the instructions that are folded.
804  //
805  // We use 128-bit APInt here to avoid potential overflow. This variable
806  // should stay well below 10^^24 (or 2^^80) in practice. This "worst" case
807  // assumes that we can avoid or fold a billion instructions, each with a
808  // profile count of 10^^15 -- roughly the number of cycles for a 24-hour
809  // period on a 4GHz machine.
810  APInt CycleSavings(128, 0);
811 
812  for (auto &BB : F) {
813  APInt CurrentSavings(128, 0);
814  for (auto &I : BB) {
815  if (BranchInst *BI = dyn_cast<BranchInst>(&I)) {
816  // Count a conditional branch as savings if it becomes unconditional.
817  if (BI->isConditional() &&
818  isa_and_nonnull<ConstantInt>(
819  SimplifiedValues.lookup(BI->getCondition()))) {
820  CurrentSavings += InlineConstants::InstrCost;
821  }
822  } else if (Value *V = dyn_cast<Value>(&I)) {
823  // Count an instruction as savings if we can fold it.
824  if (SimplifiedValues.count(V)) {
825  CurrentSavings += InlineConstants::InstrCost;
826  }
827  }
828  }
829 
830  auto ProfileCount = CalleeBFI->getBlockProfileCount(&BB);
832  CurrentSavings *= ProfileCount.getValue();
833  CycleSavings += CurrentSavings;
834  }
835 
836  // Compute the cycle savings per call.
837  auto EntryProfileCount = F.getEntryCount();
838  assert(EntryProfileCount && EntryProfileCount->getCount());
839  auto EntryCount = EntryProfileCount->getCount();
840  CycleSavings += EntryCount / 2;
841  CycleSavings = CycleSavings.udiv(EntryCount);
842 
843  // Compute the total savings for the call site.
844  auto *CallerBB = CandidateCall.getParent();
845  BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent())));
846  CycleSavings += getCallsiteCost(this->CandidateCall, DL);
847  CycleSavings *= *CallerBFI->getBlockProfileCount(CallerBB);
848 
849  // Remove the cost of the cold basic blocks.
850  int Size = Cost - ColdSize;
851 
852  // Allow tiny callees to be inlined regardless of whether they meet the
853  // savings threshold.
855 
856  CostBenefit.emplace(APInt(128, Size), CycleSavings);
857 
858  // Return true if the savings justify the cost of inlining. Specifically,
859  // we evaluate the following inequality:
860  //
861  // CycleSavings PSI->getOrCompHotCountThreshold()
862  // -------------- >= -----------------------------------
863  // Size InlineSavingsMultiplier
864  //
865  // Note that the left hand side is specific to a call site. The right hand
866  // side is a constant for the entire executable.
867  APInt LHS = CycleSavings;
869  APInt RHS(128, PSI->getOrCompHotCountThreshold());
870  RHS *= Size;
871  return LHS.uge(RHS);
872  }
873 
874  InlineResult finalizeAnalysis() override {
875  // Loops generally act a lot like calls in that they act like barriers to
876  // movement, require a certain amount of setup, etc. So when optimising for
877  // size, we penalise any call sites that perform loops. We do this after all
878  // other costs here, so will likely only be dealing with relatively small
879  // functions (and hence DT and LI will hopefully be cheap).
880  auto *Caller = CandidateCall.getFunction();
881  if (Caller->hasMinSize()) {
882  DominatorTree DT(F);
883  LoopInfo LI(DT);
884  int NumLoops = 0;
885  for (Loop *L : LI) {
886  // Ignore loops that will not be executed
887  if (DeadBlocks.count(L->getHeader()))
888  continue;
889  NumLoops++;
890  }
891  addCost(NumLoops * InlineConstants::LoopPenalty);
892  }
893 
894  // We applied the maximum possible vector bonus at the beginning. Now,
895  // subtract the excess bonus, if any, from the Threshold before
896  // comparing against Cost.
897  if (NumVectorInstructions <= NumInstructions / 10)
898  Threshold -= VectorBonus;
899  else if (NumVectorInstructions <= NumInstructions / 2)
900  Threshold -= VectorBonus / 2;
901 
902  if (Optional<int> AttrCost =
903  getStringFnAttrAsInt(CandidateCall, "function-inline-cost"))
904  Cost = *AttrCost;
905 
906  if (Optional<int> AttrCostMult = getStringFnAttrAsInt(
907  CandidateCall,
909  Cost *= *AttrCostMult;
910 
911  if (Optional<int> AttrThreshold =
912  getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))
913  Threshold = *AttrThreshold;
914 
915  if (auto Result = costBenefitAnalysis()) {
916  DecidedByCostBenefit = true;
917  if (*Result)
918  return InlineResult::success();
919  else
920  return InlineResult::failure("Cost over threshold.");
921  }
922 
923  if (IgnoreThreshold)
924  return InlineResult::success();
925 
926  DecidedByCostThreshold = true;
927  return Cost < std::max(1, Threshold)
929  : InlineResult::failure("Cost over threshold.");
930  }
931 
932  bool shouldStop() override {
933  if (IgnoreThreshold || ComputeFullInlineCost)
934  return false;
935  // Bail out the moment we cross the threshold. This means we'll under-count
936  // the cost, but only when undercounting doesn't matter.
937  if (Cost < Threshold)
938  return false;
939  DecidedByCostThreshold = true;
940  return true;
941  }
942 
943  void onLoadEliminationOpportunity() override {
944  LoadEliminationCost += InlineConstants::InstrCost;
945  }
946 
947  InlineResult onAnalysisStart() override {
948  // Perform some tweaks to the cost and threshold based on the direct
949  // callsite information.
950 
951  // We want to more aggressively inline vector-dense kernels, so up the
952  // threshold, and we'll lower it if the % of vector instructions gets too
953  // low. Note that these bonuses are some what arbitrary and evolved over
954  // time by accident as much as because they are principled bonuses.
955  //
956  // FIXME: It would be nice to remove all such bonuses. At least it would be
957  // nice to base the bonus values on something more scientific.
958  assert(NumInstructions == 0);
959  assert(NumVectorInstructions == 0);
960 
961  // Update the threshold based on callsite properties
962  updateThreshold(CandidateCall, F);
963 
964  // While Threshold depends on commandline options that can take negative
965  // values, we want to enforce the invariant that the computed threshold and
966  // bonuses are non-negative.
967  assert(Threshold >= 0);
968  assert(SingleBBBonus >= 0);
969  assert(VectorBonus >= 0);
970 
971  // Speculatively apply all possible bonuses to Threshold. If cost exceeds
972  // this Threshold any time, and cost cannot decrease, we can stop processing
973  // the rest of the function body.
974  Threshold += (SingleBBBonus + VectorBonus);
975 
976  // Give out bonuses for the callsite, as the instructions setting them up
977  // will be gone after inlining.
978  addCost(-getCallsiteCost(this->CandidateCall, DL));
979 
980  // If this function uses the coldcc calling convention, prefer not to inline
981  // it.
982  if (F.getCallingConv() == CallingConv::Cold)
984 
985  LLVM_DEBUG(dbgs() << " Initial cost: " << Cost << "\n");
986 
987  // Check if we're done. This can happen due to bonuses and penalties.
988  if (Cost >= Threshold && !ComputeFullInlineCost)
989  return InlineResult::failure("high cost");
990 
991  return InlineResult::success();
992  }
993 
994 public:
995  InlineCostCallAnalyzer(
996  Function &Callee, CallBase &Call, const InlineParams &Params,
997  const TargetTransformInfo &TTI,
998  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
999  function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
1000  ProfileSummaryInfo *PSI = nullptr,
1001  OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true,
1002  bool IgnoreThreshold = false)
1003  : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI, ORE),
1004  ComputeFullInlineCost(OptComputeFullInlineCost ||
1005  Params.ComputeFullInlineCost || ORE ||
1006  isCostBenefitAnalysisEnabled()),
1007  Params(Params), Threshold(Params.DefaultThreshold),
1008  BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
1009  CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
1010  Writer(this) {
1011  AllowRecursiveCall = *Params.AllowRecursiveCall;
1012  }
1013 
1014  /// Annotation Writer for instruction details
1015  InlineCostAnnotationWriter Writer;
1016 
1017  void dump();
1018 
1019  // Prints the same analysis as dump(), but its definition is not dependent
1020  // on the build.
1021  void print(raw_ostream &OS);
1022 
1023  Optional<InstructionCostDetail> getCostDetails(const Instruction *I) {
1024  if (InstructionCostDetailMap.find(I) != InstructionCostDetailMap.end())
1025  return InstructionCostDetailMap[I];
1026  return None;
1027  }
1028 
1029  virtual ~InlineCostCallAnalyzer() = default;
1030  int getThreshold() const { return Threshold; }
1031  int getCost() const { return Cost; }
1032  Optional<CostBenefitPair> getCostBenefitPair() { return CostBenefit; }
1033  bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; }
1034  bool wasDecidedByCostThreshold() const { return DecidedByCostThreshold; }
1035 };
1036 
1037 class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
1038 private:
1039  InlineCostFeatures Cost = {};
1040 
1041  // FIXME: These constants are taken from the heuristic-based cost visitor.
1042  // These should be removed entirely in a later revision to avoid reliance on
1043  // heuristics in the ML inliner.
1044  static constexpr int JTCostMultiplier = 4;
1045  static constexpr int CaseClusterCostMultiplier = 2;
1046  static constexpr int SwitchCostMultiplier = 2;
1047 
1048  // FIXME: These are taken from the heuristic-based cost visitor: we should
1049  // eventually abstract these to the CallAnalyzer to avoid duplication.
1050  unsigned SROACostSavingOpportunities = 0;
1051  int VectorBonus = 0;
1052  int SingleBBBonus = 0;
1053  int Threshold = 5;
1054 
1056 
1057  void increment(InlineCostFeatureIndex Feature, int64_t Delta = 1) {
1058  Cost[static_cast<size_t>(Feature)] += Delta;
1059  }
1060 
1061  void set(InlineCostFeatureIndex Feature, int64_t Value) {
1062  Cost[static_cast<size_t>(Feature)] = Value;
1063  }
1064 
1065  void onDisableSROA(AllocaInst *Arg) override {
1066  auto CostIt = SROACosts.find(Arg);
1067  if (CostIt == SROACosts.end())
1068  return;
1069 
1070  increment(InlineCostFeatureIndex::SROALosses, CostIt->second);
1071  SROACostSavingOpportunities -= CostIt->second;
1072  SROACosts.erase(CostIt);
1073  }
1074 
1075  void onDisableLoadElimination() override {
1076  set(InlineCostFeatureIndex::LoadElimination, 1);
1077  }
1078 
1079  void onCallPenalty() override {
1081  }
1082 
1083  void onCallArgumentSetup(const CallBase &Call) override {
1084  increment(InlineCostFeatureIndex::CallArgumentSetup,
1085  Call.arg_size() * InlineConstants::InstrCost);
1086  }
1087 
1088  void onLoadRelativeIntrinsic() override {
1089  increment(InlineCostFeatureIndex::LoadRelativeIntrinsic,
1091  }
1092 
1093  void onLoweredCall(Function *F, CallBase &Call,
1094  bool IsIndirectCall) override {
1095  increment(InlineCostFeatureIndex::LoweredCallArgSetup,
1096  Call.arg_size() * InlineConstants::InstrCost);
1097 
1098  if (IsIndirectCall) {
1099  InlineParams IndirectCallParams = {/* DefaultThreshold*/ 0,
1100  /*HintThreshold*/ {},
1101  /*ColdThreshold*/ {},
1102  /*OptSizeThreshold*/ {},
1103  /*OptMinSizeThreshold*/ {},
1104  /*HotCallSiteThreshold*/ {},
1105  /*LocallyHotCallSiteThreshold*/ {},
1106  /*ColdCallSiteThreshold*/ {},
1107  /*ComputeFullInlineCost*/ true,
1108  /*EnableDeferral*/ true};
1109  IndirectCallParams.DefaultThreshold =
1111 
1112  InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
1113  GetAssumptionCache, GetBFI, PSI, ORE, false,
1114  true);
1115  if (CA.analyze().isSuccess()) {
1116  increment(InlineCostFeatureIndex::NestedInlineCostEstimate,
1117  CA.getCost());
1118  increment(InlineCostFeatureIndex::NestedInlines, 1);
1119  }
1120  } else {
1121  onCallPenalty();
1122  }
1123  }
1124 
1125  void onFinalizeSwitch(unsigned JumpTableSize,
1126  unsigned NumCaseCluster) override {
1127 
1128  if (JumpTableSize) {
1129  int64_t JTCost =
1130  static_cast<int64_t>(JumpTableSize) * InlineConstants::InstrCost +
1131  JTCostMultiplier * InlineConstants::InstrCost;
1132  increment(InlineCostFeatureIndex::JumpTablePenalty, JTCost);
1133  return;
1134  }
1135 
1136  if (NumCaseCluster <= 3) {
1137  increment(InlineCostFeatureIndex::CaseClusterPenalty,
1138  NumCaseCluster * CaseClusterCostMultiplier *
1140  return;
1141  }
1142 
1143  int64_t ExpectedNumberOfCompare =
1144  getExpectedNumberOfCompare(NumCaseCluster);
1145 
1146  int64_t SwitchCost = ExpectedNumberOfCompare * SwitchCostMultiplier *
1148  increment(InlineCostFeatureIndex::SwitchPenalty, SwitchCost);
1149  }
1150 
1151  void onMissedSimplification() override {
1152  increment(InlineCostFeatureIndex::UnsimplifiedCommonInstructions,
1154  }
1155 
1156  void onInitializeSROAArg(AllocaInst *Arg) override { SROACosts[Arg] = 0; }
1157  void onAggregateSROAUse(AllocaInst *Arg) override {
1158  SROACosts.find(Arg)->second += InlineConstants::InstrCost;
1159  SROACostSavingOpportunities += InlineConstants::InstrCost;
1160  }
1161 
1162  void onBlockAnalyzed(const BasicBlock *BB) override {
1163  if (BB->getTerminator()->getNumSuccessors() > 1)
1164  set(InlineCostFeatureIndex::IsMultipleBlocks, 1);
1165  Threshold -= SingleBBBonus;
1166  }
1167 
1168  InlineResult finalizeAnalysis() override {
1169  auto *Caller = CandidateCall.getFunction();
1170  if (Caller->hasMinSize()) {
1171  DominatorTree DT(F);
1172  LoopInfo LI(DT);
1173  for (Loop *L : LI) {
1174  // Ignore loops that will not be executed
1175  if (DeadBlocks.count(L->getHeader()))
1176  continue;
1177  increment(InlineCostFeatureIndex::NumLoops,
1179  }
1180  }
1181  set(InlineCostFeatureIndex::DeadBlocks, DeadBlocks.size());
1182  set(InlineCostFeatureIndex::SimplifiedInstructions,
1183  NumInstructionsSimplified);
1184  set(InlineCostFeatureIndex::ConstantArgs, NumConstantArgs);
1185  set(InlineCostFeatureIndex::ConstantOffsetPtrArgs,
1186  NumConstantOffsetPtrArgs);
1187  set(InlineCostFeatureIndex::SROASavings, SROACostSavingOpportunities);
1188 
1189  if (NumVectorInstructions <= NumInstructions / 10)
1190  Threshold -= VectorBonus;
1191  else if (NumVectorInstructions <= NumInstructions / 2)
1192  Threshold -= VectorBonus / 2;
1193 
1194  set(InlineCostFeatureIndex::Threshold, Threshold);
1195 
1196  return InlineResult::success();
1197  }
1198 
1199  bool shouldStop() override { return false; }
1200 
1201  void onLoadEliminationOpportunity() override {
1202  increment(InlineCostFeatureIndex::LoadElimination, 1);
1203  }
1204 
1205  InlineResult onAnalysisStart() override {
1206  increment(InlineCostFeatureIndex::CallSiteCost,
1207  -1 * getCallsiteCost(this->CandidateCall, DL));
1208 
1209  set(InlineCostFeatureIndex::ColdCcPenalty,
1210  (F.getCallingConv() == CallingConv::Cold));
1211 
1213  (F.hasLocalLinkage() && F.hasOneLiveUse() &&
1214  &F == CandidateCall.getCalledFunction()));
1215 
1216  // FIXME: we shouldn't repeat this logic in both the Features and Cost
1217  // analyzer - instead, we should abstract it to a common method in the
1218  // CallAnalyzer
1219  int SingleBBBonusPercent = 50;
1220  int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
1221  Threshold += TTI.adjustInliningThreshold(&CandidateCall);
1222  Threshold *= TTI.getInliningThresholdMultiplier();
1223  SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1224  VectorBonus = Threshold * VectorBonusPercent / 100;
1225  Threshold += (SingleBBBonus + VectorBonus);
1226 
1227  return InlineResult::success();
1228  }
1229 
1230 public:
1231  InlineCostFeaturesAnalyzer(
1232  const TargetTransformInfo &TTI,
1233  function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,
1236  CallBase &Call)
1237  : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI) {}
1238 
1239  const InlineCostFeatures &features() const { return Cost; }
1240 };
1241 
1242 } // namespace
1243 
1244 /// Test whether the given value is an Alloca-derived function argument.
1245 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
1246  return SROAArgValues.count(V);
1247 }
1248 
1249 void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
1250  onDisableSROA(SROAArg);
1251  EnabledSROAAllocas.erase(SROAArg);
1252  disableLoadElimination();
1253 }
1254 
1255 void InlineCostAnnotationWriter::emitInstructionAnnot(
1256  const Instruction *I, formatted_raw_ostream &OS) {
1257  // The cost of inlining of the given instruction is printed always.
1258  // The threshold delta is printed only when it is non-zero. It happens
1259  // when we decided to give a bonus at a particular instruction.
1260  Optional<InstructionCostDetail> Record = ICCA->getCostDetails(I);
1261  if (!Record)
1262  OS << "; No analysis for the instruction";
1263  else {
1264  OS << "; cost before = " << Record->CostBefore
1265  << ", cost after = " << Record->CostAfter
1266  << ", threshold before = " << Record->ThresholdBefore
1267  << ", threshold after = " << Record->ThresholdAfter << ", ";
1268  OS << "cost delta = " << Record->getCostDelta();
1269  if (Record->hasThresholdChanged())
1270  OS << ", threshold delta = " << Record->getThresholdDelta();
1271  }
1272  auto C = ICCA->getSimplifiedValue(const_cast<Instruction *>(I));
1273  if (C) {
1274  OS << ", simplified to ";
1275  (*C)->print(OS, true);
1276  }
1277  OS << "\n";
1278 }
1279 
1280 /// If 'V' maps to a SROA candidate, disable SROA for it.
1281 void CallAnalyzer::disableSROA(Value *V) {
1282  if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
1283  disableSROAForArg(SROAArg);
1284  }
1285 }
1286 
1287 void CallAnalyzer::disableLoadElimination() {
1288  if (EnableLoadElimination) {
1289  onDisableLoadElimination();
1290  EnableLoadElimination = false;
1291  }
1292 }
1293 
1294 /// Accumulate a constant GEP offset into an APInt if possible.
1295 ///
1296 /// Returns false if unable to compute the offset for any reason. Respects any
1297 /// simplified values known during the analysis of this callsite.
1298 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
1299  unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
1300  assert(IntPtrWidth == Offset.getBitWidth());
1301 
1302  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
1303  GTI != GTE; ++GTI) {
1304  ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
1305  if (!OpC)
1306  if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
1307  OpC = dyn_cast<ConstantInt>(SimpleOp);
1308  if (!OpC)
1309  return false;
1310  if (OpC->isZero())
1311  continue;
1312 
1313  // Handle a struct index, which adds its field offset to the pointer.
1314  if (StructType *STy = GTI.getStructTypeOrNull()) {
1315  unsigned ElementIdx = OpC->getZExtValue();
1316  const StructLayout *SL = DL.getStructLayout(STy);
1317  Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
1318  continue;
1319  }
1320 
1321  APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
1322  Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
1323  }
1324  return true;
1325 }
1326 
1327 /// Use TTI to check whether a GEP is free.
1328 ///
1329 /// Respects any simplified values known during the analysis of this callsite.
1330 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
1332  Operands.push_back(GEP.getOperand(0));
1333  for (const Use &Op : GEP.indices())
1334  if (Constant *SimpleOp = SimplifiedValues.lookup(Op))
1335  Operands.push_back(SimpleOp);
1336  else
1337  Operands.push_back(Op);
1338  return TTI.getUserCost(&GEP, Operands,
1341 }
1342 
1343 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
1344  disableSROA(I.getOperand(0));
1345 
1346  // Check whether inlining will turn a dynamic alloca into a static
1347  // alloca and handle that case.
1348  if (I.isArrayAllocation()) {
1349  Constant *Size = SimplifiedValues.lookup(I.getArraySize());
1350  if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
1351  // Sometimes a dynamic alloca could be converted into a static alloca
1352  // after this constant prop, and become a huge static alloca on an
1353  // unconditional CFG path. Avoid inlining if this is going to happen above
1354  // a threshold.
1355  // FIXME: If the threshold is removed or lowered too much, we could end up
1356  // being too pessimistic and prevent inlining non-problematic code. This
1357  // could result in unintended perf regressions. A better overall strategy
1358  // is needed to track stack usage during inlining.
1359  Type *Ty = I.getAllocatedType();
1360  AllocatedSize = SaturatingMultiplyAdd(
1361  AllocSize->getLimitedValue(),
1362  DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize);
1364  HasDynamicAlloca = true;
1365  return false;
1366  }
1367  }
1368 
1369  // Accumulate the allocated size.
1370  if (I.isStaticAlloca()) {
1371  Type *Ty = I.getAllocatedType();
1372  AllocatedSize =
1373  SaturatingAdd(DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize);
1374  }
1375 
1376  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
1377  // a variety of reasons, and so we would like to not inline them into
1378  // functions which don't currently have a dynamic alloca. This simply
1379  // disables inlining altogether in the presence of a dynamic alloca.
1380  if (!I.isStaticAlloca())
1381  HasDynamicAlloca = true;
1382 
1383  return false;
1384 }
1385 
1386 bool CallAnalyzer::visitPHI(PHINode &I) {
1387  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
1388  // though we don't want to propagate it's bonuses. The idea is to disable
1389  // SROA if it *might* be used in an inappropriate manner.
1390 
1391  // Phi nodes are always zero-cost.
1392  // FIXME: Pointer sizes may differ between different address spaces, so do we
1393  // need to use correct address space in the call to getPointerSizeInBits here?
1394  // Or could we skip the getPointerSizeInBits call completely? As far as I can
1395  // see the ZeroOffset is used as a dummy value, so we can probably use any
1396  // bit width for the ZeroOffset?
1397  APInt ZeroOffset = APInt::getZero(DL.getPointerSizeInBits(0));
1398  bool CheckSROA = I.getType()->isPointerTy();
1399 
1400  // Track the constant or pointer with constant offset we've seen so far.
1401  Constant *FirstC = nullptr;
1402  std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};
1403  Value *FirstV = nullptr;
1404 
1405  for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {
1406  BasicBlock *Pred = I.getIncomingBlock(i);
1407  // If the incoming block is dead, skip the incoming block.
1408  if (DeadBlocks.count(Pred))
1409  continue;
1410  // If the parent block of phi is not the known successor of the incoming
1411  // block, skip the incoming block.
1412  BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
1413  if (KnownSuccessor && KnownSuccessor != I.getParent())
1414  continue;
1415 
1416  Value *V = I.getIncomingValue(i);
1417  // If the incoming value is this phi itself, skip the incoming value.
1418  if (&I == V)
1419  continue;
1420 
1421  Constant *C = dyn_cast<Constant>(V);
1422  if (!C)
1423  C = SimplifiedValues.lookup(V);
1424 
1425  std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};
1426  if (!C && CheckSROA)
1427  BaseAndOffset = ConstantOffsetPtrs.lookup(V);
1428 
1429  if (!C && !BaseAndOffset.first)
1430  // The incoming value is neither a constant nor a pointer with constant
1431  // offset, exit early.
1432  return true;
1433 
1434  if (FirstC) {
1435  if (FirstC == C)
1436  // If we've seen a constant incoming value before and it is the same
1437  // constant we see this time, continue checking the next incoming value.
1438  continue;
1439  // Otherwise early exit because we either see a different constant or saw
1440  // a constant before but we have a pointer with constant offset this time.
1441  return true;
1442  }
1443 
1444  if (FirstV) {
1445  // The same logic as above, but check pointer with constant offset here.
1446  if (FirstBaseAndOffset == BaseAndOffset)
1447  continue;
1448  return true;
1449  }
1450 
1451  if (C) {
1452  // This is the 1st time we've seen a constant, record it.
1453  FirstC = C;
1454  continue;
1455  }
1456 
1457  // The remaining case is that this is the 1st time we've seen a pointer with
1458  // constant offset, record it.
1459  FirstV = V;
1460  FirstBaseAndOffset = BaseAndOffset;
1461  }
1462 
1463  // Check if we can map phi to a constant.
1464  if (FirstC) {
1465  SimplifiedValues[&I] = FirstC;
1466  return true;
1467  }
1468 
1469  // Check if we can map phi to a pointer with constant offset.
1470  if (FirstBaseAndOffset.first) {
1471  ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
1472 
1473  if (auto *SROAArg = getSROAArgForValueOrNull(FirstV))
1474  SROAArgValues[&I] = SROAArg;
1475  }
1476 
1477  return true;
1478 }
1479 
1480 /// Check we can fold GEPs of constant-offset call site argument pointers.
1481 /// This requires target data and inbounds GEPs.
1482 ///
1483 /// \return true if the specified GEP can be folded.
1484 bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
1485  // Check if we have a base + offset for the pointer.
1486  std::pair<Value *, APInt> BaseAndOffset =
1487  ConstantOffsetPtrs.lookup(I.getPointerOperand());
1488  if (!BaseAndOffset.first)
1489  return false;
1490 
1491  // Check if the offset of this GEP is constant, and if so accumulate it
1492  // into Offset.
1493  if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second))
1494  return false;
1495 
1496  // Add the result as a new mapping to Base + Offset.
1497  ConstantOffsetPtrs[&I] = BaseAndOffset;
1498 
1499  return true;
1500 }
1501 
1502 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
1503  auto *SROAArg = getSROAArgForValueOrNull(I.getPointerOperand());
1504 
1505  // Lambda to check whether a GEP's indices are all constant.
1506  auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
1507  for (const Use &Op : GEP.indices())
1508  if (!isa<Constant>(Op) && !SimplifiedValues.lookup(Op))
1509  return false;
1510  return true;
1511  };
1512 
1516  for (unsigned int Index = 1; Index < COps.size(); ++Index)
1517  Indices.push_back(COps[Index]);
1519  I.getSourceElementType(), COps[0], Indices, I.isInBounds());
1520  }))
1521  return true;
1522 
1523  if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
1524  if (SROAArg)
1525  SROAArgValues[&I] = SROAArg;
1526 
1527  // Constant GEPs are modeled as free.
1528  return true;
1529  }
1530 
1531  // Variable GEPs will require math and will disable SROA.
1532  if (SROAArg)
1533  disableSROAForArg(SROAArg);
1534  return isGEPFree(I);
1535 }
1536 
1537 /// Simplify \p I if its operands are constants and update SimplifiedValues.
1538 /// \p Evaluate is a callable specific to instruction type that evaluates the
1539 /// instruction when all the operands are constants.
1540 template <typename Callable>
1541 bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
1543  for (Value *Op : I.operands()) {
1544  Constant *COp = dyn_cast<Constant>(Op);
1545  if (!COp)
1546  COp = SimplifiedValues.lookup(Op);
1547  if (!COp)
1548  return false;
1549  COps.push_back(COp);
1550  }
1551  auto *C = Evaluate(COps);
1552  if (!C)
1553  return false;
1554  SimplifiedValues[&I] = C;
1555  return true;
1556 }
1557 
1558 /// Try to simplify a call to llvm.is.constant.
1559 ///
1560 /// Duplicate the argument checking from CallAnalyzer::simplifyCallSite since
1561 /// we expect calls of this specific intrinsic to be infrequent.
1562 ///
1563 /// FIXME: Given that we know CB's parent (F) caller
1564 /// (CandidateCall->getParent()->getParent()), we might be able to determine
1565 /// whether inlining F into F's caller would change how the call to
1566 /// llvm.is.constant would evaluate.
1567 bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) {
1568  Value *Arg = CB.getArgOperand(0);
1569  auto *C = dyn_cast<Constant>(Arg);
1570 
1571  if (!C)
1572  C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(Arg));
1573 
1574  Type *RT = CB.getFunctionType()->getReturnType();
1575  SimplifiedValues[&CB] = ConstantInt::get(RT, C ? 1 : 0);
1576  return true;
1577 }
1578 
1579 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
1580  // Propagate constants through bitcasts.
1582  return ConstantExpr::getBitCast(COps[0], I.getType());
1583  }))
1584  return true;
1585 
1586  // Track base/offsets through casts
1587  std::pair<Value *, APInt> BaseAndOffset =
1588  ConstantOffsetPtrs.lookup(I.getOperand(0));
1589  // Casts don't change the offset, just wrap it up.
1590  if (BaseAndOffset.first)
1591  ConstantOffsetPtrs[&I] = BaseAndOffset;
1592 
1593  // Also look for SROA candidates here.
1594  if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
1595  SROAArgValues[&I] = SROAArg;
1596 
1597  // Bitcasts are always zero cost.
1598  return true;
1599 }
1600 
1601 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
1602  // Propagate constants through ptrtoint.
1604  return ConstantExpr::getPtrToInt(COps[0], I.getType());
1605  }))
1606  return true;
1607 
1608  // Track base/offset pairs when converted to a plain integer provided the
1609  // integer is large enough to represent the pointer.
1610  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
1611  unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
1612  if (IntegerSize == DL.getPointerSizeInBits(AS)) {
1613  std::pair<Value *, APInt> BaseAndOffset =
1614  ConstantOffsetPtrs.lookup(I.getOperand(0));
1615  if (BaseAndOffset.first)
1616  ConstantOffsetPtrs[&I] = BaseAndOffset;
1617  }
1618 
1619  // This is really weird. Technically, ptrtoint will disable SROA. However,
1620  // unless that ptrtoint is *used* somewhere in the live basic blocks after
1621  // inlining, it will be nuked, and SROA should proceed. All of the uses which
1622  // would block SROA would also block SROA if applied directly to a pointer,
1623  // and so we can just add the integer in here. The only places where SROA is
1624  // preserved either cannot fire on an integer, or won't in-and-of themselves
1625  // disable SROA (ext) w/o some later use that we would see and disable.
1626  if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
1627  SROAArgValues[&I] = SROAArg;
1628 
1631 }
1632 
1633 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
1634  // Propagate constants through ptrtoint.
1636  return ConstantExpr::getIntToPtr(COps[0], I.getType());
1637  }))
1638  return true;
1639 
1640  // Track base/offset pairs when round-tripped through a pointer without
1641  // modifications provided the integer is not too large.
1642  Value *Op = I.getOperand(0);
1643  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
1644  if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
1645  std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
1646  if (BaseAndOffset.first)
1647  ConstantOffsetPtrs[&I] = BaseAndOffset;
1648  }
1649 
1650  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
1651  if (auto *SROAArg = getSROAArgForValueOrNull(Op))
1652  SROAArgValues[&I] = SROAArg;
1653 
1656 }
1657 
1658 bool CallAnalyzer::visitCastInst(CastInst &I) {
1659  // Propagate constants through casts.
1661  return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
1662  }))
1663  return true;
1664 
1665  // Disable SROA in the face of arbitrary casts we don't explicitly list
1666  // elsewhere.
1667  disableSROA(I.getOperand(0));
1668 
1669  // If this is a floating-point cast, and the target says this operation
1670  // is expensive, this may eventually become a library call. Treat the cost
1671  // as such.
1672  switch (I.getOpcode()) {
1673  case Instruction::FPTrunc:
1674  case Instruction::FPExt:
1675  case Instruction::UIToFP:
1676  case Instruction::SIToFP:
1677  case Instruction::FPToUI:
1678  case Instruction::FPToSI:
1680  onCallPenalty();
1681  break;
1682  default:
1683  break;
1684  }
1685 
1688 }
1689 
1690 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
1691  return CandidateCall.paramHasAttr(A->getArgNo(), Attr);
1692 }
1693 
1694 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
1695  // Does the *call site* have the NonNull attribute set on an argument? We
1696  // use the attribute on the call site to memoize any analysis done in the
1697  // caller. This will also trip if the callee function has a non-null
1698  // parameter attribute, but that's a less interesting case because hopefully
1699  // the callee would already have been simplified based on that.
1700  if (Argument *A = dyn_cast<Argument>(V))
1701  if (paramHasAttr(A, Attribute::NonNull))
1702  return true;
1703 
1704  // Is this an alloca in the caller? This is distinct from the attribute case
1705  // above because attributes aren't updated within the inliner itself and we
1706  // always want to catch the alloca derived case.
1707  if (isAllocaDerivedArg(V))
1708  // We can actually predict the result of comparisons between an
1709  // alloca-derived value and null. Note that this fires regardless of
1710  // SROA firing.
1711  return true;
1712 
1713  return false;
1714 }
1715 
1716 bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {
1717  // If the normal destination of the invoke or the parent block of the call
1718  // site is unreachable-terminated, there is little point in inlining this
1719  // unless there is literally zero cost.
1720  // FIXME: Note that it is possible that an unreachable-terminated block has a
1721  // hot entry. For example, in below scenario inlining hot_call_X() may be
1722  // beneficial :
1723  // main() {
1724  // hot_call_1();
1725  // ...
1726  // hot_call_N()
1727  // exit(0);
1728  // }
1729  // For now, we are not handling this corner case here as it is rare in real
1730  // code. In future, we should elaborate this based on BPI and BFI in more
1731  // general threshold adjusting heuristics in updateThreshold().
1732  if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
1733  if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
1734  return false;
1735  } else if (isa<UnreachableInst>(Call.getParent()->getTerminator()))
1736  return false;
1737 
1738  return true;
1739 }
1740 
1742  BlockFrequencyInfo *CallerBFI) {
1743  // If global profile summary is available, then callsite's coldness is
1744  // determined based on that.
1745  if (PSI && PSI->hasProfileSummary())
1746  return PSI->isColdCallSite(Call, CallerBFI);
1747 
1748  // Otherwise we need BFI to be available.
1749  if (!CallerBFI)
1750  return false;
1751 
1752  // Determine if the callsite is cold relative to caller's entry. We could
1753  // potentially cache the computation of scaled entry frequency, but the added
1754  // complexity is not worth it unless this scaling shows up high in the
1755  // profiles.
1756  const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
1757  auto CallSiteBB = Call.getParent();
1758  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
1759  auto CallerEntryFreq =
1760  CallerBFI->getBlockFreq(&(Call.getCaller()->getEntryBlock()));
1761  return CallSiteFreq < CallerEntryFreq * ColdProb;
1762 }
1763 
1765 InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
1766  BlockFrequencyInfo *CallerBFI) {
1767 
1768  // If global profile summary is available, then callsite's hotness is
1769  // determined based on that.
1770  if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI))
1771  return Params.HotCallSiteThreshold;
1772 
1773  // Otherwise we need BFI to be available and to have a locally hot callsite
1774  // threshold.
1775  if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
1776  return None;
1777 
1778  // Determine if the callsite is hot relative to caller's entry. We could
1779  // potentially cache the computation of scaled entry frequency, but the added
1780  // complexity is not worth it unless this scaling shows up high in the
1781  // profiles.
1782  auto CallSiteBB = Call.getParent();
1783  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
1784  auto CallerEntryFreq = CallerBFI->getEntryFreq();
1785  if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
1786  return Params.LocallyHotCallSiteThreshold;
1787 
1788  // Otherwise treat it normally.
1789  return None;
1790 }
1791 
1792 void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
1793  // If no size growth is allowed for this inlining, set Threshold to 0.
1794  if (!allowSizeGrowth(Call)) {
1795  Threshold = 0;
1796  return;
1797  }
1798 
1799  Function *Caller = Call.getCaller();
1800 
1801  // return min(A, B) if B is valid.
1802  auto MinIfValid = [](int A, Optional<int> B) {
1803  return B ? std::min(A, B.getValue()) : A;
1804  };
1805 
1806  // return max(A, B) if B is valid.
1807  auto MaxIfValid = [](int A, Optional<int> B) {
1808  return B ? std::max(A, B.getValue()) : A;
1809  };
1810 
1811  // Various bonus percentages. These are multiplied by Threshold to get the
1812  // bonus values.
1813  // SingleBBBonus: This bonus is applied if the callee has a single reachable
1814  // basic block at the given callsite context. This is speculatively applied
1815  // and withdrawn if more than one basic block is seen.
1816  //
1817  // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
1818  // of the last call to a static function as inlining such functions is
1819  // guaranteed to reduce code size.
1820  //
1821  // These bonus percentages may be set to 0 based on properties of the caller
1822  // and the callsite.
1823  int SingleBBBonusPercent = 50;
1824  int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
1826 
1827  // Lambda to set all the above bonus and bonus percentages to 0.
1828  auto DisallowAllBonuses = [&]() {
1829  SingleBBBonusPercent = 0;
1830  VectorBonusPercent = 0;
1832  };
1833 
1834  // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
1835  // and reduce the threshold if the caller has the necessary attribute.
1836  if (Caller->hasMinSize()) {
1837  Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
1838  // For minsize, we want to disable the single BB bonus and the vector
1839  // bonuses, but not the last-call-to-static bonus. Inlining the last call to
1840  // a static function will, at the minimum, eliminate the parameter setup and
1841  // call/return instructions.
1842  SingleBBBonusPercent = 0;
1843  VectorBonusPercent = 0;
1844  } else if (Caller->hasOptSize())
1845  Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
1846 
1847  // Adjust the threshold based on inlinehint attribute and profile based
1848  // hotness information if the caller does not have MinSize attribute.
1849  if (!Caller->hasMinSize()) {
1850  if (Callee.hasFnAttribute(Attribute::InlineHint))
1851  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
1852 
1853  // FIXME: After switching to the new passmanager, simplify the logic below
1854  // by checking only the callsite hotness/coldness as we will reliably
1855  // have local profile information.
1856  //
1857  // Callsite hotness and coldness can be determined if sample profile is
1858  // used (which adds hotness metadata to calls) or if caller's
1859  // BlockFrequencyInfo is available.
1860  BlockFrequencyInfo *CallerBFI = GetBFI ? &(GetBFI(*Caller)) : nullptr;
1861  auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI);
1862  if (!Caller->hasOptSize() && HotCallSiteThreshold) {
1863  LLVM_DEBUG(dbgs() << "Hot callsite.\n");
1864  // FIXME: This should update the threshold only if it exceeds the
1865  // current threshold, but AutoFDO + ThinLTO currently relies on this
1866  // behavior to prevent inlining of hot callsites during ThinLTO
1867  // compile phase.
1868  Threshold = *HotCallSiteThreshold;
1869  } else if (isColdCallSite(Call, CallerBFI)) {
1870  LLVM_DEBUG(dbgs() << "Cold callsite.\n");
1871  // Do not apply bonuses for a cold callsite including the
1872  // LastCallToStatic bonus. While this bonus might result in code size
1873  // reduction, it can cause the size of a non-cold caller to increase
1874  // preventing it from being inlined.
1875  DisallowAllBonuses();
1876  Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
1877  } else if (PSI) {
1878  // Use callee's global profile information only if we have no way of
1879  // determining this via callsite information.
1880  if (PSI->isFunctionEntryHot(&Callee)) {
1881  LLVM_DEBUG(dbgs() << "Hot callee.\n");
1882  // If callsite hotness can not be determined, we may still know
1883  // that the callee is hot and treat it as a weaker hint for threshold
1884  // increase.
1885  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
1886  } else if (PSI->isFunctionEntryCold(&Callee)) {
1887  LLVM_DEBUG(dbgs() << "Cold callee.\n");
1888  // Do not apply bonuses for a cold callee including the
1889  // LastCallToStatic bonus. While this bonus might result in code size
1890  // reduction, it can cause the size of a non-cold caller to increase
1891  // preventing it from being inlined.
1892  DisallowAllBonuses();
1893  Threshold = MinIfValid(Threshold, Params.ColdThreshold);
1894  }
1895  }
1896  }
1897 
1898  Threshold += TTI.adjustInliningThreshold(&Call);
1899 
1900  // Finally, take the target-specific inlining threshold multiplier into
1901  // account.
1902  Threshold *= TTI.getInliningThresholdMultiplier();
1903 
1904  SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1905  VectorBonus = Threshold * VectorBonusPercent / 100;
1906 
1907  bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneLiveUse() &&
1908  &F == Call.getCalledFunction();
1909  // If there is only one call of the function, and it has internal linkage,
1910  // the cost of inlining it drops dramatically. It may seem odd to update
1911  // Cost in updateThreshold, but the bonus depends on the logic in this method.
1912  if (OnlyOneCallAndLocalLinkage)
1913  Cost -= LastCallToStaticBonus;
1914 }
1915 
1916 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
1917  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1918  // First try to handle simplified comparisons.
1920  return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
1921  }))
1922  return true;
1923 
1924  if (I.getOpcode() == Instruction::FCmp)
1925  return false;
1926 
1927  // Otherwise look for a comparison between constant offset pointers with
1928  // a common base.
1929  Value *LHSBase, *RHSBase;
1930  APInt LHSOffset, RHSOffset;
1931  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1932  if (LHSBase) {
1933  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1934  if (RHSBase && LHSBase == RHSBase) {
1935  // We have common bases, fold the icmp to a constant based on the
1936  // offsets.
1937  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1938  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1939  if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
1940  SimplifiedValues[&I] = C;
1941  ++NumConstantPtrCmps;
1942  return true;
1943  }
1944  }
1945  }
1946 
1947  // If the comparison is an equality comparison with null, we can simplify it
1948  // if we know the value (argument) can't be null
1949  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
1950  isKnownNonNullInCallee(I.getOperand(0))) {
1951  bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
1952  SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
1953  : ConstantInt::getFalse(I.getType());
1954  return true;
1955  }
1956  return handleSROA(I.getOperand(0), isa<ConstantPointerNull>(I.getOperand(1)));
1957 }
1958 
1959 bool CallAnalyzer::visitSub(BinaryOperator &I) {
1960  // Try to handle a special case: we can fold computing the difference of two
1961  // constant-related pointers.
1962  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1963  Value *LHSBase, *RHSBase;
1964  APInt LHSOffset, RHSOffset;
1965  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1966  if (LHSBase) {
1967  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1968  if (RHSBase && LHSBase == RHSBase) {
1969  // We have common bases, fold the subtract to a constant based on the
1970  // offsets.
1971  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1972  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1973  if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
1974  SimplifiedValues[&I] = C;
1975  ++NumConstantPtrDiffs;
1976  return true;
1977  }
1978  }
1979  }
1980 
1981  // Otherwise, fall back to the generic logic for simplifying and handling
1982  // instructions.
1983  return Base::visitSub(I);
1984 }
1985 
1986 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
1987  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1988  Constant *CLHS = dyn_cast<Constant>(LHS);
1989  if (!CLHS)
1990  CLHS = SimplifiedValues.lookup(LHS);
1991  Constant *CRHS = dyn_cast<Constant>(RHS);
1992  if (!CRHS)
1993  CRHS = SimplifiedValues.lookup(RHS);
1994 
1995  Value *SimpleV = nullptr;
1996  if (auto FI = dyn_cast<FPMathOperator>(&I))
1997  SimpleV = simplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
1998  FI->getFastMathFlags(), DL);
1999  else
2000  SimpleV =
2001  simplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
2002 
2003  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
2004  SimplifiedValues[&I] = C;
2005 
2006  if (SimpleV)
2007  return true;
2008 
2009  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
2010  disableSROA(LHS);
2011  disableSROA(RHS);
2012 
2013  // If the instruction is floating point, and the target says this operation
2014  // is expensive, this may eventually become a library call. Treat the cost
2015  // as such. Unless it's fneg which can be implemented with an xor.
2016  using namespace llvm::PatternMatch;
2017  if (I.getType()->isFloatingPointTy() &&
2019  !match(&I, m_FNeg(m_Value())))
2020  onCallPenalty();
2021 
2022  return false;
2023 }
2024 
2025 bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
2026  Value *Op = I.getOperand(0);
2027  Constant *COp = dyn_cast<Constant>(Op);
2028  if (!COp)
2029  COp = SimplifiedValues.lookup(Op);
2030 
2031  Value *SimpleV = simplifyFNegInst(
2032  COp ? COp : Op, cast<FPMathOperator>(I).getFastMathFlags(), DL);
2033 
2034  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
2035  SimplifiedValues[&I] = C;
2036 
2037  if (SimpleV)
2038  return true;
2039 
2040  // Disable any SROA on arguments to arbitrary, unsimplified fneg.
2041  disableSROA(Op);
2042 
2043  return false;
2044 }
2045 
2046 bool CallAnalyzer::visitLoad(LoadInst &I) {
2047  if (handleSROA(I.getPointerOperand(), I.isSimple()))
2048  return true;
2049 
2050  // If the data is already loaded from this address and hasn't been clobbered
2051  // by any stores or calls, this load is likely to be redundant and can be
2052  // eliminated.
2053  if (EnableLoadElimination &&
2054  !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
2055  onLoadEliminationOpportunity();
2056  return true;
2057  }
2058 
2059  return false;
2060 }
2061 
2062 bool CallAnalyzer::visitStore(StoreInst &I) {
2063  if (handleSROA(I.getPointerOperand(), I.isSimple()))
2064  return true;
2065 
2066  // The store can potentially clobber loads and prevent repeated loads from
2067  // being eliminated.
2068  // FIXME:
2069  // 1. We can probably keep an initial set of eliminatable loads substracted
2070  // from the cost even when we finally see a store. We just need to disable
2071  // *further* accumulation of elimination savings.
2072  // 2. We should probably at some point thread MemorySSA for the callee into
2073  // this and then use that to actually compute *really* precise savings.
2074  disableLoadElimination();
2075  return false;
2076 }
2077 
2078 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
2079  // Constant folding for extract value is trivial.
2081  return ConstantExpr::getExtractValue(COps[0], I.getIndices());
2082  }))
2083  return true;
2084 
2085  // SROA can't look through these, but they may be free.
2086  return Base::visitExtractValue(I);
2087 }
2088 
2089 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
2090  // Constant folding for insert value is trivial.
2092  return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
2093  /*InsertedValueOperand*/ COps[1],
2094  I.getIndices());
2095  }))
2096  return true;
2097 
2098  // SROA can't look through these, but they may be free.
2099  return Base::visitInsertValue(I);
2100 }
2101 
2102 /// Try to simplify a call site.
2103 ///
2104 /// Takes a concrete function and callsite and tries to actually simplify it by
2105 /// analyzing the arguments and call itself with instsimplify. Returns true if
2106 /// it has simplified the callsite to some other entity (a constant), making it
2107 /// free.
2108 bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
2109  // FIXME: Using the instsimplify logic directly for this is inefficient
2110  // because we have to continually rebuild the argument list even when no
2111  // simplifications can be performed. Until that is fixed with remapping
2112  // inside of instsimplify, directly constant fold calls here.
2113  if (!canConstantFoldCallTo(&Call, F))
2114  return false;
2115 
2116  // Try to re-map the arguments to constants.
2117  SmallVector<Constant *, 4> ConstantArgs;
2118  ConstantArgs.reserve(Call.arg_size());
2119  for (Value *I : Call.args()) {
2120  Constant *C = dyn_cast<Constant>(I);
2121  if (!C)
2122  C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(I));
2123  if (!C)
2124  return false; // This argument doesn't map to a constant.
2125 
2126  ConstantArgs.push_back(C);
2127  }
2128  if (Constant *C = ConstantFoldCall(&Call, F, ConstantArgs)) {
2129  SimplifiedValues[&Call] = C;
2130  return true;
2131  }
2132 
2133  return false;
2134 }
2135 
2136 bool CallAnalyzer::visitCallBase(CallBase &Call) {
2137  if (!onCallBaseVisitStart(Call))
2138  return true;
2139 
2140  if (Call.hasFnAttr(Attribute::ReturnsTwice) &&
2141  !F.hasFnAttribute(Attribute::ReturnsTwice)) {
2142  // This aborts the entire analysis.
2143  ExposesReturnsTwice = true;
2144  return false;
2145  }
2146  if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
2147  ContainsNoDuplicateCall = true;
2148 
2149  Function *F = Call.getCalledFunction();
2150  bool IsIndirectCall = !F;
2151  if (IsIndirectCall) {
2152  // Check if this happens to be an indirect function call to a known function
2153  // in this inline context. If not, we've done all we can.
2154  Value *Callee = Call.getCalledOperand();
2155  F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
2156  if (!F || F->getFunctionType() != Call.getFunctionType()) {
2157  onCallArgumentSetup(Call);
2158 
2159  if (!Call.onlyReadsMemory())
2160  disableLoadElimination();
2161  return Base::visitCallBase(Call);
2162  }
2163  }
2164 
2165  assert(F && "Expected a call to a known function");
2166 
2167  // When we have a concrete function, first try to simplify it directly.
2168  if (simplifyCallSite(F, Call))
2169  return true;
2170 
2171  // Next check if it is an intrinsic we know about.
2172  // FIXME: Lift this into part of the InstVisitor.
2173  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
2174  switch (II->getIntrinsicID()) {
2175  default:
2176  if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
2177  disableLoadElimination();
2178  return Base::visitCallBase(Call);
2179 
2180  case Intrinsic::load_relative:
2181  onLoadRelativeIntrinsic();
2182  return false;
2183 
2184  case Intrinsic::memset:
2185  case Intrinsic::memcpy:
2186  case Intrinsic::memmove:
2187  disableLoadElimination();
2188  // SROA can usually chew through these intrinsics, but they aren't free.
2189  return false;
2190  case Intrinsic::icall_branch_funnel:
2191  case Intrinsic::localescape:
2192  HasUninlineableIntrinsic = true;
2193  return false;
2194  case Intrinsic::vastart:
2195  InitsVargArgs = true;
2196  return false;
2197  case Intrinsic::launder_invariant_group:
2198  case Intrinsic::strip_invariant_group:
2199  if (auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0)))
2200  SROAArgValues[II] = SROAArg;
2201  return true;
2202  case Intrinsic::is_constant:
2203  return simplifyIntrinsicCallIsConstant(Call);
2204  }
2205  }
2206 
2207  if (F == Call.getFunction()) {
2208  // This flag will fully abort the analysis, so don't bother with anything
2209  // else.
2210  IsRecursiveCall = true;
2211  if (!AllowRecursiveCall)
2212  return false;
2213  }
2214 
2215  if (TTI.isLoweredToCall(F)) {
2216  onLoweredCall(F, Call, IsIndirectCall);
2217  }
2218 
2219  if (!(Call.onlyReadsMemory() || (IsIndirectCall && F->onlyReadsMemory())))
2220  disableLoadElimination();
2221  return Base::visitCallBase(Call);
2222 }
2223 
2224 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
2225  // At least one return instruction will be free after inlining.
2226  bool Free = !HasReturn;
2227  HasReturn = true;
2228  return Free;
2229 }
2230 
2231 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
2232  // We model unconditional branches as essentially free -- they really
2233  // shouldn't exist at all, but handling them makes the behavior of the
2234  // inliner more regular and predictable. Interestingly, conditional branches
2235  // which will fold away are also free.
2236  return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
2237  isa_and_nonnull<ConstantInt>(
2238  SimplifiedValues.lookup(BI.getCondition()));
2239 }
2240 
2241 bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
2242  bool CheckSROA = SI.getType()->isPointerTy();
2243  Value *TrueVal = SI.getTrueValue();
2244  Value *FalseVal = SI.getFalseValue();
2245 
2246  Constant *TrueC = dyn_cast<Constant>(TrueVal);
2247  if (!TrueC)
2248  TrueC = SimplifiedValues.lookup(TrueVal);
2249  Constant *FalseC = dyn_cast<Constant>(FalseVal);
2250  if (!FalseC)
2251  FalseC = SimplifiedValues.lookup(FalseVal);
2252  Constant *CondC =
2253  dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition()));
2254 
2255  if (!CondC) {
2256  // Select C, X, X => X
2257  if (TrueC == FalseC && TrueC) {
2258  SimplifiedValues[&SI] = TrueC;
2259  return true;
2260  }
2261 
2262  if (!CheckSROA)
2263  return Base::visitSelectInst(SI);
2264 
2265  std::pair<Value *, APInt> TrueBaseAndOffset =
2266  ConstantOffsetPtrs.lookup(TrueVal);
2267  std::pair<Value *, APInt> FalseBaseAndOffset =
2268  ConstantOffsetPtrs.lookup(FalseVal);
2269  if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
2270  ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
2271 
2272  if (auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
2273  SROAArgValues[&SI] = SROAArg;
2274  return true;
2275  }
2276 
2277  return Base::visitSelectInst(SI);
2278  }
2279 
2280  // Select condition is a constant.
2281  Value *SelectedV = CondC->isAllOnesValue() ? TrueVal
2282  : (CondC->isNullValue()) ? FalseVal
2283  : nullptr;
2284  if (!SelectedV) {
2285  // Condition is a vector constant that is not all 1s or all 0s. If all
2286  // operands are constants, ConstantExpr::getSelect() can handle the cases
2287  // such as select vectors.
2288  if (TrueC && FalseC) {
2289  if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {
2290  SimplifiedValues[&SI] = C;
2291  return true;
2292  }
2293  }
2294  return Base::visitSelectInst(SI);
2295  }
2296 
2297  // Condition is either all 1s or all 0s. SI can be simplified.
2298  if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
2299  SimplifiedValues[&SI] = SelectedC;
2300  return true;
2301  }
2302 
2303  if (!CheckSROA)
2304  return true;
2305 
2306  std::pair<Value *, APInt> BaseAndOffset =
2307  ConstantOffsetPtrs.lookup(SelectedV);
2308  if (BaseAndOffset.first) {
2309  ConstantOffsetPtrs[&SI] = BaseAndOffset;
2310 
2311  if (auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
2312  SROAArgValues[&SI] = SROAArg;
2313  }
2314 
2315  return true;
2316 }
2317 
2318 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
2319  // We model unconditional switches as free, see the comments on handling
2320  // branches.
2321  if (isa<ConstantInt>(SI.getCondition()))
2322  return true;
2323  if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
2324  if (isa<ConstantInt>(V))
2325  return true;
2326 
2327  // Assume the most general case where the switch is lowered into
2328  // either a jump table, bit test, or a balanced binary tree consisting of
2329  // case clusters without merging adjacent clusters with the same
2330  // destination. We do not consider the switches that are lowered with a mix
2331  // of jump table/bit test/binary search tree. The cost of the switch is
2332  // proportional to the size of the tree or the size of jump table range.
2333  //
2334  // NB: We convert large switches which are just used to initialize large phi
2335  // nodes to lookup tables instead in simplifycfg, so this shouldn't prevent
2336  // inlining those. It will prevent inlining in cases where the optimization
2337  // does not (yet) fire.
2338 
2339  unsigned JumpTableSize = 0;
2340  BlockFrequencyInfo *BFI = GetBFI ? &(GetBFI(F)) : nullptr;
2341  unsigned NumCaseCluster =
2342  TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize, PSI, BFI);
2343 
2344  onFinalizeSwitch(JumpTableSize, NumCaseCluster);
2345  return false;
2346 }
2347 
2348 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
2349  // We never want to inline functions that contain an indirectbr. This is
2350  // incorrect because all the blockaddress's (in static global initializers
2351  // for example) would be referring to the original function, and this
2352  // indirect jump would jump from the inlined copy of the function into the
2353  // original function which is extremely undefined behavior.
2354  // FIXME: This logic isn't really right; we can safely inline functions with
2355  // indirectbr's as long as no other function or global references the
2356  // blockaddress of a block within the current function.
2357  HasIndirectBr = true;
2358  return false;
2359 }
2360 
2361 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
2362  // FIXME: It's not clear that a single instruction is an accurate model for
2363  // the inline cost of a resume instruction.
2364  return false;
2365 }
2366 
2367 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
2368  // FIXME: It's not clear that a single instruction is an accurate model for
2369  // the inline cost of a cleanupret instruction.
2370  return false;
2371 }
2372 
2373 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
2374  // FIXME: It's not clear that a single instruction is an accurate model for
2375  // the inline cost of a catchret instruction.
2376  return false;
2377 }
2378 
2379 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
2380  // FIXME: It might be reasonably to discount the cost of instructions leading
2381  // to unreachable as they have the lowest possible impact on both runtime and
2382  // code size.
2383  return true; // No actual code is needed for unreachable.
2384 }
2385 
2386 bool CallAnalyzer::visitInstruction(Instruction &I) {
2387  // Some instructions are free. All of the free intrinsics can also be
2388  // handled by SROA, etc.
2391  return true;
2392 
2393  // We found something we don't understand or can't handle. Mark any SROA-able
2394  // values in the operand list as no longer viable.
2395  for (const Use &Op : I.operands())
2396  disableSROA(Op);
2397 
2398  return false;
2399 }
2400 
2401 /// Analyze a basic block for its contribution to the inline cost.
2402 ///
2403 /// This method walks the analyzer over every instruction in the given basic
2404 /// block and accounts for their cost during inlining at this callsite. It
2405 /// aborts early if the threshold has been exceeded or an impossible to inline
2406 /// construct has been detected. It returns false if inlining is no longer
2407 /// viable, and true if inlining remains viable.
2409 CallAnalyzer::analyzeBlock(BasicBlock *BB,
2410  SmallPtrSetImpl<const Value *> &EphValues) {
2411  for (Instruction &I : *BB) {
2412  // FIXME: Currently, the number of instructions in a function regardless of
2413  // our ability to simplify them during inline to constants or dead code,
2414  // are actually used by the vector bonus heuristic. As long as that's true,
2415  // we have to special case debug intrinsics here to prevent differences in
2416  // inlining due to debug symbols. Eventually, the number of unsimplified
2417  // instructions shouldn't factor into the cost computation, but until then,
2418  // hack around it here.
2419  // Similarly, skip pseudo-probes.
2420  if (I.isDebugOrPseudoInst())
2421  continue;
2422 
2423  // Skip ephemeral values.
2424  if (EphValues.count(&I))
2425  continue;
2426 
2427  ++NumInstructions;
2428  if (isa<ExtractElementInst>(I) || I.getType()->isVectorTy())
2429  ++NumVectorInstructions;
2430 
2431  // If the instruction simplified to a constant, there is no cost to this
2432  // instruction. Visit the instructions using our InstVisitor to account for
2433  // all of the per-instruction logic. The visit tree returns true if we
2434  // consumed the instruction in any way, and false if the instruction's base
2435  // cost should count against inlining.
2436  onInstructionAnalysisStart(&I);
2437 
2438  if (Base::visit(&I))
2439  ++NumInstructionsSimplified;
2440  else
2441  onMissedSimplification();
2442 
2443  onInstructionAnalysisFinish(&I);
2444  using namespace ore;
2445  // If the visit this instruction detected an uninlinable pattern, abort.
2447  if (IsRecursiveCall && !AllowRecursiveCall)
2448  IR = InlineResult::failure("recursive");
2449  else if (ExposesReturnsTwice)
2450  IR = InlineResult::failure("exposes returns twice");
2451  else if (HasDynamicAlloca)
2452  IR = InlineResult::failure("dynamic alloca");
2453  else if (HasIndirectBr)
2454  IR = InlineResult::failure("indirect branch");
2455  else if (HasUninlineableIntrinsic)
2456  IR = InlineResult::failure("uninlinable intrinsic");
2457  else if (InitsVargArgs)
2458  IR = InlineResult::failure("varargs");
2459  if (!IR.isSuccess()) {
2460  if (ORE)
2461  ORE->emit([&]() {
2462  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
2463  &CandidateCall)
2464  << NV("Callee", &F) << " has uninlinable pattern ("
2465  << NV("InlineResult", IR.getFailureReason())
2466  << ") and cost is not fully computed";
2467  });
2468  return IR;
2469  }
2470 
2471  // If the caller is a recursive function then we don't want to inline
2472  // functions which allocate a lot of stack space because it would increase
2473  // the caller stack usage dramatically.
2474  if (IsCallerRecursive &&
2476  auto IR =
2477  InlineResult::failure("recursive and allocates too much stack space");
2478  if (ORE)
2479  ORE->emit([&]() {
2480  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
2481  &CandidateCall)
2482  << NV("Callee", &F) << " is "
2483  << NV("InlineResult", IR.getFailureReason())
2484  << ". Cost is not fully computed";
2485  });
2486  return IR;
2487  }
2488 
2489  if (shouldStop())
2490  return InlineResult::failure(
2491  "Call site analysis is not favorable to inlining.");
2492  }
2493 
2494  return InlineResult::success();
2495 }
2496 
2497 /// Compute the base pointer and cumulative constant offsets for V.
2498 ///
2499 /// This strips all constant offsets off of V, leaving it the base pointer, and
2500 /// accumulates the total constant offset applied in the returned constant. It
2501 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
2502 /// no constant offsets applied.
2503 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
2504  if (!V->getType()->isPointerTy())
2505  return nullptr;
2506 
2507  unsigned AS = V->getType()->getPointerAddressSpace();
2508  unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
2509  APInt Offset = APInt::getZero(IntPtrWidth);
2510 
2511  // Even though we don't look through PHI nodes, we could be called on an
2512  // instruction in an unreachable block, which may be on a cycle.
2513  SmallPtrSet<Value *, 4> Visited;
2514  Visited.insert(V);
2515  do {
2516  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
2517  if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
2518  return nullptr;
2519  V = GEP->getPointerOperand();
2520  } else if (Operator::getOpcode(V) == Instruction::BitCast) {
2521  V = cast<Operator>(V)->getOperand(0);
2522  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2523  if (GA->isInterposable())
2524  break;
2525  V = GA->getAliasee();
2526  } else {
2527  break;
2528  }
2529  assert(V->getType()->isPointerTy() && "Unexpected operand type!");
2530  } while (Visited.insert(V).second);
2531 
2532  Type *IdxPtrTy = DL.getIndexType(V->getType());
2533  return cast<ConstantInt>(ConstantInt::get(IdxPtrTy, Offset));
2534 }
2535 
2536 /// Find dead blocks due to deleted CFG edges during inlining.
2537 ///
2538 /// If we know the successor of the current block, \p CurrBB, has to be \p
2539 /// NextBB, the other successors of \p CurrBB are dead if these successors have
2540 /// no live incoming CFG edges. If one block is found to be dead, we can
2541 /// continue growing the dead block list by checking the successors of the dead
2542 /// blocks to see if all their incoming edges are dead or not.
2543 void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
2544  auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
2545  // A CFG edge is dead if the predecessor is dead or the predecessor has a
2546  // known successor which is not the one under exam.
2547  return (DeadBlocks.count(Pred) ||
2548  (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
2549  };
2550 
2551  auto IsNewlyDead = [&](BasicBlock *BB) {
2552  // If all the edges to a block are dead, the block is also dead.
2553  return (!DeadBlocks.count(BB) &&
2555  [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));
2556  };
2557 
2558  for (BasicBlock *Succ : successors(CurrBB)) {
2559  if (Succ == NextBB || !IsNewlyDead(Succ))
2560  continue;
2562  NewDead.push_back(Succ);
2563  while (!NewDead.empty()) {
2564  BasicBlock *Dead = NewDead.pop_back_val();
2565  if (DeadBlocks.insert(Dead).second)
2566  // Continue growing the dead block lists.
2567  for (BasicBlock *S : successors(Dead))
2568  if (IsNewlyDead(S))
2569  NewDead.push_back(S);
2570  }
2571  }
2572 }
2573 
2574 /// Analyze a call site for potential inlining.
2575 ///
2576 /// Returns true if inlining this call is viable, and false if it is not
2577 /// viable. It computes the cost and adjusts the threshold based on numerous
2578 /// factors and heuristics. If this method returns false but the computed cost
2579 /// is below the computed threshold, then inlining was forcibly disabled by
2580 /// some artifact of the routine.
2581 InlineResult CallAnalyzer::analyze() {
2582  ++NumCallsAnalyzed;
2583 
2584  auto Result = onAnalysisStart();
2585  if (!Result.isSuccess())
2586  return Result;
2587 
2588  if (F.empty())
2589  return InlineResult::success();
2590 
2591  Function *Caller = CandidateCall.getFunction();
2592  // Check if the caller function is recursive itself.
2593  for (User *U : Caller->users()) {
2594  CallBase *Call = dyn_cast<CallBase>(U);
2595  if (Call && Call->getFunction() == Caller) {
2596  IsCallerRecursive = true;
2597  break;
2598  }
2599  }
2600 
2601  // Populate our simplified values by mapping from function arguments to call
2602  // arguments with known important simplifications.
2603  auto CAI = CandidateCall.arg_begin();
2604  for (Argument &FAI : F.args()) {
2605  assert(CAI != CandidateCall.arg_end());
2606  if (Constant *C = dyn_cast<Constant>(CAI))
2607  SimplifiedValues[&FAI] = C;
2608 
2609  Value *PtrArg = *CAI;
2610  if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
2611  ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg, C->getValue());
2612 
2613  // We can SROA any pointer arguments derived from alloca instructions.
2614  if (auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) {
2615  SROAArgValues[&FAI] = SROAArg;
2616  onInitializeSROAArg(SROAArg);
2617  EnabledSROAAllocas.insert(SROAArg);
2618  }
2619  }
2620  ++CAI;
2621  }
2622  NumConstantArgs = SimplifiedValues.size();
2623  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
2624  NumAllocaArgs = SROAArgValues.size();
2625 
2626  // FIXME: If a caller has multiple calls to a callee, we end up recomputing
2627  // the ephemeral values multiple times (and they're completely determined by
2628  // the callee, so this is purely duplicate work).
2630  CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
2631 
2632  // The worklist of live basic blocks in the callee *after* inlining. We avoid
2633  // adding basic blocks of the callee which can be proven to be dead for this
2634  // particular call site in order to get more accurate cost estimates. This
2635  // requires a somewhat heavyweight iteration pattern: we need to walk the
2636  // basic blocks in a breadth-first order as we insert live successors. To
2637  // accomplish this, prioritizing for small iterations because we exit after
2638  // crossing our threshold, we use a small-size optimized SetVector.
2641  BBSetVector;
2642  BBSetVector BBWorklist;
2643  BBWorklist.insert(&F.getEntryBlock());
2644 
2645  // Note that we *must not* cache the size, this loop grows the worklist.
2646  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
2647  if (shouldStop())
2648  break;
2649 
2650  BasicBlock *BB = BBWorklist[Idx];
2651  if (BB->empty())
2652  continue;
2653 
2654  onBlockStart(BB);
2655 
2656  // Disallow inlining a blockaddress with uses other than strictly callbr.
2657  // A blockaddress only has defined behavior for an indirect branch in the
2658  // same function, and we do not currently support inlining indirect
2659  // branches. But, the inliner may not see an indirect branch that ends up
2660  // being dead code at a particular call site. If the blockaddress escapes
2661  // the function, e.g., via a global variable, inlining may lead to an
2662  // invalid cross-function reference.
2663  // FIXME: pr/39560: continue relaxing this overt restriction.
2664  if (BB->hasAddressTaken())
2665  for (User *U : BlockAddress::get(&*BB)->users())
2666  if (!isa<CallBrInst>(*U))
2667  return InlineResult::failure("blockaddress used outside of callbr");
2668 
2669  // Analyze the cost of this block. If we blow through the threshold, this
2670  // returns false, and we can bail on out.
2671  InlineResult IR = analyzeBlock(BB, EphValues);
2672  if (!IR.isSuccess())
2673  return IR;
2674 
2675  Instruction *TI = BB->getTerminator();
2676 
2677  // Add in the live successors by first checking whether we have terminator
2678  // that may be simplified based on the values simplified by this call.
2679  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
2680  if (BI->isConditional()) {
2681  Value *Cond = BI->getCondition();
2682  if (ConstantInt *SimpleCond =
2683  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
2684  BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0);
2685  BBWorklist.insert(NextBB);
2686  KnownSuccessors[BB] = NextBB;
2687  findDeadBlocks(BB, NextBB);
2688  continue;
2689  }
2690  }
2691  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
2692  Value *Cond = SI->getCondition();
2693  if (ConstantInt *SimpleCond =
2694  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
2695  BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
2696  BBWorklist.insert(NextBB);
2697  KnownSuccessors[BB] = NextBB;
2698  findDeadBlocks(BB, NextBB);
2699  continue;
2700  }
2701  }
2702 
2703  // If we're unable to select a particular successor, just count all of
2704  // them.
2705  for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
2706  ++TIdx)
2707  BBWorklist.insert(TI->getSuccessor(TIdx));
2708 
2709  onBlockAnalyzed(BB);
2710  }
2711 
2712  bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneLiveUse() &&
2713  &F == CandidateCall.getCalledFunction();
2714  // If this is a noduplicate call, we can still inline as long as
2715  // inlining this would cause the removal of the caller (so the instruction
2716  // is not actually duplicated, just moved).
2717  if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
2718  return InlineResult::failure("noduplicate");
2719 
2720  // If the callee's stack size exceeds the user-specified threshold,
2721  // do not let it be inlined.
2722  if (AllocatedSize > StackSizeThreshold)
2723  return InlineResult::failure("stacksize");
2724 
2725  return finalizeAnalysis();
2726 }
2727 
2729 #define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
2731  F.print(OS, &Writer);
2732  DEBUG_PRINT_STAT(NumConstantArgs);
2733  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
2734  DEBUG_PRINT_STAT(NumAllocaArgs);
2735  DEBUG_PRINT_STAT(NumConstantPtrCmps);
2736  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
2737  DEBUG_PRINT_STAT(NumInstructionsSimplified);
2738  DEBUG_PRINT_STAT(NumInstructions);
2739  DEBUG_PRINT_STAT(SROACostSavings);
2740  DEBUG_PRINT_STAT(SROACostSavingsLost);
2741  DEBUG_PRINT_STAT(LoadEliminationCost);
2742  DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
2743  DEBUG_PRINT_STAT(Cost);
2744  DEBUG_PRINT_STAT(Threshold);
2745 #undef DEBUG_PRINT_STAT
2746 }
2747 
2748 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2749 /// Dump stats about this call's analysis.
2751 #endif
2752 
2753 /// Test that there are no attribute conflicts between Caller and Callee
2754 /// that prevent inlining.
2756  Function *Caller, Function *Callee, TargetTransformInfo &TTI,
2757  function_ref<const TargetLibraryInfo &(Function &)> &GetTLI) {
2758  // Note that CalleeTLI must be a copy not a reference. The legacy pass manager
2759  // caches the most recently created TLI in the TargetLibraryInfoWrapperPass
2760  // object, and always returns the same object (which is overwritten on each
2761  // GetTLI call). Therefore we copy the first result.
2762  auto CalleeTLI = GetTLI(*Callee);
2763  return (IgnoreTTIInlineCompatible ||
2764  TTI.areInlineCompatible(Caller, Callee)) &&
2765  GetTLI(*Caller).areInlineCompatible(CalleeTLI,
2768 }
2769 
2771  int Cost = 0;
2772  for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) {
2773  if (Call.isByValArgument(I)) {
2774  // We approximate the number of loads and stores needed by dividing the
2775  // size of the byval type by the target's pointer size.
2776  PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
2777  unsigned TypeSize = DL.getTypeSizeInBits(Call.getParamByValType(I));
2778  unsigned AS = PTy->getAddressSpace();
2779  unsigned PointerSize = DL.getPointerSizeInBits(AS);
2780  // Ceiling division.
2781  unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
2782 
2783  // If it generates more than 8 stores it is likely to be expanded as an
2784  // inline memcpy so we take that as an upper bound. Otherwise we assume
2785  // one load and one store per word copied.
2786  // FIXME: The maxStoresPerMemcpy setting from the target should be used
2787  // here instead of a magic number of 8, but it's not available via
2788  // DataLayout.
2789  NumStores = std::min(NumStores, 8U);
2790 
2791  Cost += 2 * NumStores * InlineConstants::InstrCost;
2792  } else {
2793  // For non-byval arguments subtract off one instruction per call
2794  // argument.
2796  }
2797  }
2798  // The call instruction also disappears after inlining.
2800  return Cost;
2801 }
2802 
2804  CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
2805  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2806  function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
2809  return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
2810  GetAssumptionCache, GetTLI, GetBFI, PSI, ORE);
2811 }
2812 
2814  CallBase &Call, TargetTransformInfo &CalleeTTI,
2815  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2818  const InlineParams Params = {/* DefaultThreshold*/ 0,
2819  /*HintThreshold*/ {},
2820  /*ColdThreshold*/ {},
2821  /*OptSizeThreshold*/ {},
2822  /*OptMinSizeThreshold*/ {},
2823  /*HotCallSiteThreshold*/ {},
2824  /*LocallyHotCallSiteThreshold*/ {},
2825  /*ColdCallSiteThreshold*/ {},
2826  /*ComputeFullInlineCost*/ true,
2827  /*EnableDeferral*/ true};
2828 
2829  InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
2830  GetAssumptionCache, GetBFI, PSI, ORE, true,
2831  /*IgnoreThreshold*/ true);
2832  auto R = CA.analyze();
2833  if (!R.isSuccess())
2834  return None;
2835  return CA.getCost();
2836 }
2837 
2839  CallBase &Call, TargetTransformInfo &CalleeTTI,
2840  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2843  InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI,
2844  ORE, *Call.getCalledFunction(), Call);
2845  auto R = CFA.analyze();
2846  if (!R.isSuccess())
2847  return None;
2848  return CFA.features();
2849 }
2850 
2852  CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI,
2853  function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
2854 
2855  // Cannot inline indirect calls.
2856  if (!Callee)
2857  return InlineResult::failure("indirect call");
2858 
2859  // When callee coroutine function is inlined into caller coroutine function
2860  // before coro-split pass,
2861  // coro-early pass can not handle this quiet well.
2862  // So we won't inline the coroutine function if it have not been unsplited
2863  if (Callee->isPresplitCoroutine())
2864  return InlineResult::failure("unsplited coroutine call");
2865 
2866  // Never inline calls with byval arguments that does not have the alloca
2867  // address space. Since byval arguments can be replaced with a copy to an
2868  // alloca, the inlined code would need to be adjusted to handle that the
2869  // argument is in the alloca address space (so it is a little bit complicated
2870  // to solve).
2871  unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
2872  for (unsigned I = 0, E = Call.arg_size(); I != E; ++I)
2873  if (Call.isByValArgument(I)) {
2874  PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
2875  if (PTy->getAddressSpace() != AllocaAS)
2876  return InlineResult::failure("byval arguments without alloca"
2877  " address space");
2878  }
2879 
2880  // Calls to functions with always-inline attributes should be inlined
2881  // whenever possible.
2882  if (Call.hasFnAttr(Attribute::AlwaysInline)) {
2883  if (Call.getAttributes().hasFnAttr(Attribute::NoInline))
2884  return InlineResult::failure("noinline call site attribute");
2885 
2886  auto IsViable = isInlineViable(*Callee);
2887  if (IsViable.isSuccess())
2888  return InlineResult::success();
2889  return InlineResult::failure(IsViable.getFailureReason());
2890  }
2891 
2892  // Never inline functions with conflicting attributes (unless callee has
2893  // always-inline attribute).
2894  Function *Caller = Call.getCaller();
2895  if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI, GetTLI))
2896  return InlineResult::failure("conflicting attributes");
2897 
2898  // Don't inline this call if the caller has the optnone attribute.
2899  if (Caller->hasOptNone())
2900  return InlineResult::failure("optnone attribute");
2901 
2902  // Don't inline a function that treats null pointer as valid into a caller
2903  // that does not have this attribute.
2904  if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
2905  return InlineResult::failure("nullptr definitions incompatible");
2906 
2907  // Don't inline functions which can be interposed at link-time.
2908  if (Callee->isInterposable())
2909  return InlineResult::failure("interposable");
2910 
2911  // Don't inline functions marked noinline.
2912  if (Callee->hasFnAttribute(Attribute::NoInline))
2913  return InlineResult::failure("noinline function attribute");
2914 
2915  // Don't inline call sites marked noinline.
2916  if (Call.isNoInline())
2917  return InlineResult::failure("noinline call site attribute");
2918 
2919  return None;
2920 }
2921 
2923  CallBase &Call, Function *Callee, const InlineParams &Params,
2924  TargetTransformInfo &CalleeTTI,
2925  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2926  function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
2929 
2930  auto UserDecision =
2931  llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI);
2932 
2933  if (UserDecision) {
2934  if (UserDecision->isSuccess())
2935  return llvm::InlineCost::getAlways("always inline attribute");
2936  return llvm::InlineCost::getNever(UserDecision->getFailureReason());
2937  }
2938 
2939  LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
2940  << "... (caller:" << Call.getCaller()->getName()
2941  << ")\n");
2942 
2943  InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
2944  GetAssumptionCache, GetBFI, PSI, ORE);
2945  InlineResult ShouldInline = CA.analyze();
2946 
2947  LLVM_DEBUG(CA.dump());
2948 
2949  // Always make cost benefit based decision explicit.
2950  // We use always/never here since threshold is not meaningful,
2951  // as it's not what drives cost-benefit analysis.
2952  if (CA.wasDecidedByCostBenefit()) {
2953  if (ShouldInline.isSuccess())
2954  return InlineCost::getAlways("benefit over cost",
2955  CA.getCostBenefitPair());
2956  else
2957  return InlineCost::getNever("cost over benefit", CA.getCostBenefitPair());
2958  }
2959 
2960  if (CA.wasDecidedByCostThreshold())
2961  return InlineCost::get(CA.getCost(), CA.getThreshold());
2962 
2963  // No details on how the decision was made, simply return always or never.
2964  return ShouldInline.isSuccess()
2965  ? InlineCost::getAlways("empty function")
2966  : InlineCost::getNever(ShouldInline.getFailureReason());
2967 }
2968 
2970  bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
2971  for (BasicBlock &BB : F) {
2972  // Disallow inlining of functions which contain indirect branches.
2973  if (isa<IndirectBrInst>(BB.getTerminator()))
2974  return InlineResult::failure("contains indirect branches");
2975 
2976  // Disallow inlining of blockaddresses which are used by non-callbr
2977  // instructions.
2978  if (BB.hasAddressTaken())
2979  for (User *U : BlockAddress::get(&BB)->users())
2980  if (!isa<CallBrInst>(*U))
2981  return InlineResult::failure("blockaddress used outside of callbr");
2982 
2983  for (auto &II : BB) {
2984  CallBase *Call = dyn_cast<CallBase>(&II);
2985  if (!Call)
2986  continue;
2987 
2988  // Disallow recursive calls.
2989  Function *Callee = Call->getCalledFunction();
2990  if (&F == Callee)
2991  return InlineResult::failure("recursive call");
2992 
2993  // Disallow calls which expose returns-twice to a function not previously
2994  // attributed as such.
2995  if (!ReturnsTwice && isa<CallInst>(Call) &&
2996  cast<CallInst>(Call)->canReturnTwice())
2997  return InlineResult::failure("exposes returns-twice attribute");
2998 
2999  if (Callee)
3000  switch (Callee->getIntrinsicID()) {
3001  default:
3002  break;
3003  case llvm::Intrinsic::icall_branch_funnel:
3004  // Disallow inlining of @llvm.icall.branch.funnel because current
3005  // backend can't separate call targets from call arguments.
3006  return InlineResult::failure(
3007  "disallowed inlining of @llvm.icall.branch.funnel");
3008  case llvm::Intrinsic::localescape:
3009  // Disallow inlining functions that call @llvm.localescape. Doing this
3010  // correctly would require major changes to the inliner.
3011  return InlineResult::failure(
3012  "disallowed inlining of @llvm.localescape");
3013  case llvm::Intrinsic::vastart:
3014  // Disallow inlining of functions that initialize VarArgs with
3015  // va_start.
3016  return InlineResult::failure(
3017  "contains VarArgs initialized with va_start");
3018  }
3019  }
3020  }
3021 
3022  return InlineResult::success();
3023 }
3024 
3025 // APIs to create InlineParams based on command line flags and/or other
3026 // parameters.
3027 
3029  InlineParams Params;
3030 
3031  // This field is the threshold to use for a callee by default. This is
3032  // derived from one or more of:
3033  // * optimization or size-optimization levels,
3034  // * a value passed to createFunctionInliningPass function, or
3035  // * the -inline-threshold flag.
3036  // If the -inline-threshold flag is explicitly specified, that is used
3037  // irrespective of anything else.
3038  if (InlineThreshold.getNumOccurrences() > 0)
3040  else
3041  Params.DefaultThreshold = Threshold;
3042 
3043  // Set the HintThreshold knob from the -inlinehint-threshold.
3044  Params.HintThreshold = HintThreshold;
3045 
3046  // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
3048 
3049  // If the -locally-hot-callsite-threshold is explicitly specified, use it to
3050  // populate LocallyHotCallSiteThreshold. Later, we populate
3051  // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
3052  // we know that optimization level is O3 (in the getInlineParams variant that
3053  // takes the opt and size levels).
3054  // FIXME: Remove this check (and make the assignment unconditional) after
3055  // addressing size regression issues at O2.
3056  if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
3058 
3059  // Set the ColdCallSiteThreshold knob from the
3060  // -inline-cold-callsite-threshold.
3062 
3063  // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
3064  // -inlinehint-threshold commandline option is not explicitly given. If that
3065  // option is present, then its value applies even for callees with size and
3066  // minsize attributes.
3067  // If the -inline-threshold is not specified, set the ColdThreshold from the
3068  // -inlinecold-threshold even if it is not explicitly passed. If
3069  // -inline-threshold is specified, then -inlinecold-threshold needs to be
3070  // explicitly specified to set the ColdThreshold knob
3071  if (InlineThreshold.getNumOccurrences() == 0) {
3074  Params.ColdThreshold = ColdThreshold;
3075  } else if (ColdThreshold.getNumOccurrences() > 0) {
3076  Params.ColdThreshold = ColdThreshold;
3077  }
3078  return Params;
3079 }
3080 
3083 }
3084 
3085 // Compute the default threshold for inlining based on the opt level and the
3086 // size opt level.
3087 static int computeThresholdFromOptLevels(unsigned OptLevel,
3088  unsigned SizeOptLevel) {
3089  if (OptLevel > 2)
3091  if (SizeOptLevel == 1) // -Os
3093  if (SizeOptLevel == 2) // -Oz
3095  return DefaultThreshold;
3096 }
3097 
3098 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
3099  auto Params =
3100  getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
3101  // At O3, use the value of -locally-hot-callsite-threshold option to populate
3102  // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
3103  // when it is specified explicitly.
3104  if (OptLevel > 2)
3106  return Params;
3107 }
3108 
3112  PrintInstructionComments = true;
3113  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
3114  [&](Function &F) -> AssumptionCache & {
3115  return FAM.getResult<AssumptionAnalysis>(F);
3116  };
3117  Module *M = F.getParent();
3118  ProfileSummaryInfo PSI(*M);
3119  DataLayout DL(M);
3121  // FIXME: Redesign the usage of InlineParams to expand the scope of this pass.
3122  // In the current implementation, the type of InlineParams doesn't matter as
3123  // the pass serves only for verification of inliner's decisions.
3124  // We can add a flag which determines InlineParams for this run. Right now,
3125  // the default InlineParams are used.
3126  const InlineParams Params = llvm::getInlineParams();
3127  for (BasicBlock &BB : F) {
3128  for (Instruction &I : BB) {
3129  if (CallInst *CI = dyn_cast<CallInst>(&I)) {
3130  Function *CalledFunction = CI->getCalledFunction();
3131  if (!CalledFunction || CalledFunction->isDeclaration())
3132  continue;
3133  OptimizationRemarkEmitter ORE(CalledFunction);
3134  InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params, TTI,
3135  GetAssumptionCache, nullptr, &PSI, &ORE);
3136  ICCA.analyze();
3137  OS << " Analyzing call of " << CalledFunction->getName()
3138  << "... (caller:" << CI->getCaller()->getName() << ")\n";
3139  ICCA.print(OS);
3140  OS << "\n";
3141  }
3142  }
3143  }
3144  return PreservedAnalyses::all();
3145 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
set
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 atomic and others It is also currently not done for read modify write instructions It is also current not done if the OF or CF flags are needed The shift operators have the complication that when the shift count is EFLAGS is not set
Definition: README.txt:1277
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
AssumptionCache.h
llvm::Constant::isAllOnesValue
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
Definition: Constants.cpp:93
llvm::InlineResult::success
static InlineResult success()
Definition: InlineCost.h:169
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:735
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:264
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:494
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::getStringFnAttrAsInt
Optional< int > getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind)
Definition: InlineCost.cpp:150
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::SaturatingAdd
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:837
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:3004
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:189
llvm::InlineParams::DefaultThreshold
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:192
isColdCallSite
static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
Definition: GlobalOpt.cpp:1767
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:218
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
llvm::Attribute
Definition: Attributes.h:65
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:199
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5212
GetElementPtrTypeIterator.h
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::ConstantExpr::getICmp
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:2535
llvm::InlineParams::LocallyHotCallSiteThreshold
Optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.
Definition: InlineCost.h:211
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
StackSizeThreshold
static cl::opt< size_t > StackSizeThreshold("inline-max-stacksize", cl::Hidden, cl::init(std::numeric_limits< size_t >::max()), cl::ZeroOrMore, cl::desc("Do not inline functions with a stack size " "that exceeds the specified limit"))
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:682
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2266
llvm::GlobalAlias
Definition: GlobalAlias.h:28
ValueTracking.h
OptimizationRemarkEmitter.h
llvm::ConstantExpr::getSelect
static Constant * getSelect(Constant *C, Constant *V1, Constant *V2, Type *OnlyIfReducedTy=nullptr)
Select constant expr.
Definition: Constants.cpp:2453
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::ConstantExpr::getInsertValue
static Constant * getInsertValue(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2656
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:304
llvm::InlineConstants::OptSizeThreshold
const int OptSizeThreshold
Use when optsize (-Os) is specified.
Definition: InlineCost.h:38
InlineCallerSupersetNoBuiltin
static cl::opt< bool > InlineCallerSupersetNoBuiltin("inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes."))
BBSetVector
SetVector< BasicBlock * > BBSetVector
Definition: BasicBlockUtils.cpp:1618
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::InlineCost::getAlways
static InlineCost getAlways(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
Definition: InlineCost.h:117
llvm::CallBase::getFunctionType
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1254
llvm::Optional< int >
llvm::isInlineViable
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
Definition: InlineCost.cpp:2969
llvm::InlineParams
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:190
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:147
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:79
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::tgtok::FalseVal
@ FalseVal
Definition: TGLexer.h:62
Operator.h
llvm::InlineConstants::FunctionInlineCostMultiplierAttributeName
const char FunctionInlineCostMultiplierAttributeName[]
Definition: InlineCost.h:59
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
llvm::InlineParams::ColdThreshold
Optional< int > ColdThreshold
Threshold to use for cold callees.
Definition: InlineCost.h:198
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::TargetTransformInfo::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent() const
Definition: TargetTransformInfo.cpp:204
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:877
STLExtras.h
llvm::CallBase::arg_begin
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1316
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:654
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::InlineParams::OptMinSizeThreshold
Optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
Definition: InlineCost.h:204
llvm::UnaryOperator
Definition: InstrTypes.h:101
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:123
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::count
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
ConstantFolding.h
llvm::APInt::getZero
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:177
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
functionsHaveCompatibleAttributes
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref< const TargetLibraryInfo &(Function &)> &GetTLI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining.
Definition: InlineCost.cpp:2755
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::gep_type_end
gep_type_iterator gep_type_end(const User *GEP)
Definition: GetElementPtrTypeIterator.h:130
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ConstantExpr::getSub
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2734
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:186
CommandLine.h
llvm::BlockFrequencyInfo::getEntryFreq
uint64_t getEntryFreq() const
Definition: BlockFrequencyInfo.cpp:280
CodeMetrics.h
FormattedStream.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::TargetTransformInfo::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: TargetTransformInfo.cpp:1014
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:777
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1374
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1617
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:264
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::getInlineCost
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Definition: InlineCost.cpp:2803
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
InlineThreshold
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::desc("Control the amount of inlining to perform (default = 225)"))
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::ConstantExpr::getIntToPtr
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2252
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
llvm::ConstantExpr::getPtrToInt
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2238
llvm::InlineCost
Represents the cost of inlining a function.
Definition: InlineCost.h:87
llvm::ConstantFoldCall
Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
Definition: ConstantFolding.cpp:3186
InlineEnableCostBenefitAnalysis
static cl::opt< bool > InlineEnableCostBenefitAnalysis("inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner"))
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::simplifyBinOp
Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
Definition: InstructionSimplify.cpp:5559
llvm::canConstantFoldCallTo
bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
Definition: ConstantFolding.cpp:1490
TargetLibraryInfo.h
llvm::InlineCostFeatureIndex
InlineCostFeatureIndex
Definition: InlineModelFeatureMaps.h:51
ColdCallSiteThreshold
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::PatternMatch::m_FNeg
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
Definition: PatternMatch.h:1027
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:42
llvm::Operator::getOpcode
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:42
DefaultThreshold
static cl::opt< int > DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::desc("Default amount of inlining to perform"))
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:54
llvm::InlineResult::isSuccess
bool isSuccess() const
Definition: InlineCost.h:173
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:395
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:928
llvm::InlineCostAnnotationPrinterPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Definition: InlineCost.cpp:3110
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
IR
Statically lint checks LLVM IR
Definition: Lint.cpp:751
SmallPtrSet.h
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:789
llvm::InlineCost::get
static InlineCost get(int Cost, int Threshold)
Definition: InlineCost.h:112
llvm::getInlineParams
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition: InlineCost.cpp:3081
llvm::InlineConstants::InstrCost
const int InstrCost
Definition: InlineCost.h:47
PatternMatch.h
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:155
HintThreshold
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))
llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:304
llvm::None
const NoneType None
Definition: None.h:24
llvm::StringRef::getAsInteger
std::enable_if_t< std::numeric_limits< T >::is_signed, bool > getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:514
OptComputeFullInlineCost
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
IgnoreTTIInlineCompatible
static cl::opt< bool > IgnoreTTIInlineCompatible("ignore-tti-inline-compatible", cl::Hidden, cl::init(false), cl::desc("Ignore TTI attributes compatibility check between callee/caller " "during inline cost calculation"))
LoopInfo.h
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3167
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74
llvm::InvokeInst
Invoke instruction.
Definition: Instructions.h:3763
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:709
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:116
AssemblyAnnotationWriter.h
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::cl::opt
Definition: CommandLine.h:1392
llvm::AllocFnKind::Free
@ Free
llvm::ConstantExpr::getCompare
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:2431
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:297
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:413
llvm::InlineConstants::IndirectCallThreshold
const int IndirectCallThreshold
Definition: InlineCost.h:48
llvm::StructLayout
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:622
uint64_t
ProfileSummaryInfo.h
llvm::CatchReturnInst
Definition: Instructions.h:4538
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::Optional::emplace
void emplace(ArgTypes &&... Args)
Create a new object by constructing it in place with the given arguments.
Definition: Optional.h:287
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:716
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::BlockFrequency::getFrequency
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
Definition: BlockFrequency.h:34
llvm::SaturatingMultiplyAdd
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Definition: MathExtras.h:900
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:916
llvm::Attribute::AttrKind
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results,...
Definition: Attributes.h:84
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::InlineConstants::OptAggressiveThreshold
const int OptAggressiveThreshold
Use when -O3 is specified.
Definition: InlineCost.h:44
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::simplifyInstruction
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Definition: InstructionSimplify.cpp:6486
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:152
llvm::formatted_raw_ostream
formatted_raw_ostream - A raw_ostream that wraps another one and keeps track of line and column posit...
Definition: FormattedStream.h:30
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::InlineCostAnnotationPrinterPass::OS
raw_ostream & OS
Definition: InlineCost.h:325
ColdThreshold
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))
getFalse
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Definition: InstructionSimplify.cpp:121
llvm::RegState::Dead
@ Dead
Unused definition.
Definition: MachineInstrBuilder.h:50
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::ConstantExpr::getCast
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:2010
InlineCost.h
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1724
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::TargetTransformInfo::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
Definition: TargetTransformInfo.cpp:284
llvm::Record
Definition: Record.h:1543
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::InlineParams::ColdCallSiteThreshold
Optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
Definition: InlineCost.h:214
llvm::InlineCost::getNever
static InlineCost getNever(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
Definition: InlineCost.h:121
llvm::GEPOperator
Definition: Operator.h:375
llvm::CallBase::arg_end
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1322
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition: Instructions.h:3164
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:262
llvm::CallBase::getFnAttr
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Definition: InstrTypes.h:1615
llvm::LoopInfo
Definition: LoopInfo.h:1102
llvm::BinaryOperator
Definition: InstrTypes.h:188
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
DataLayout.h
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:137
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
this
Analysis the ScalarEvolution expression for r is this
Definition: README.txt:8
LocallyHotCallSiteThreshold
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::desc("Threshold for locally hot callsites "))
InstVisitor.h
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:215
DEBUG_PRINT_STAT
#define DEBUG_PRINT_STAT(x)
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:194
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:222
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:69
A
* A
Definition: README_ALTIVEC.txt:89
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:845
llvm::BranchProbability
Definition: BranchProbability.h:30
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::PtrToIntInst
This class represents a cast from a pointer to an integer.
Definition: Instructions.h:5161
llvm::InlineResult::getFailureReason
const char * getFailureReason() const
Definition: InlineCost.h:174
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::InstVisitor
Base class for instruction visitors.
Definition: InstVisitor.h:78
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:429
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:429
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
DisableGEPConstOperand
static cl::opt< bool > DisableGEPConstOperand("disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands"))
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:173
llvm::CallBase::paramHasAttr
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Definition: Instructions.cpp:341
llvm::isAssumeLikeIntrinsic
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
Definition: ValueTracking.cpp:559
users
iv users
Definition: IVUsers.cpp:48
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:93
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::InlineConstants::MaxSimplifiedDynamicAllocaToInline
const uint64_t MaxSimplifiedDynamicAllocaToInline
Do not inline dynamic allocas that have been constant propagated to be static allocas above this amou...
Definition: InlineCost.h:57
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:186
llvm::simplifyFNegInst
Value * simplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)
Given operand for an FNeg, fold the result or return null.
Definition: InstructionSimplify.cpp:5108
CallingConv.h
llvm::getAttributeBasedInliningDecision
Optional< InlineResult > getAttributeBasedInliningDecision(CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref< const TargetLibraryInfo &(Function &)> GetTLI)
Returns InlineResult::success() if the call site should be always inlined because of user directives,...
Definition: InlineCost.cpp:2851
llvm::ResumeInst
Resume the propagation of an exception.
Definition: Instructions.h:4211
llvm::InlineParams::HintThreshold
Optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:195
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:876
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:345
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::StructLayout::getElementOffset
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:652
llvm::AssemblyAnnotationWriter
Definition: AssemblyAnnotationWriter.h:27
llvm::InlineConstants::ColdccPenalty
const int ColdccPenalty
Definition: InlineCost.h:51
llvm::getInliningCostFeatures
Optional< InlineCostFeatures > getInliningCostFeatures(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the expanded cost features.
Definition: InlineCost.cpp:2838
llvm::APInt::sextOrTrunc
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1002
llvm::ExtractValueInst
This instruction extracts a struct member or array element value from an aggregate value.
Definition: Instructions.h:2398
llvm::TypeSize
Definition: TypeSize.h:435
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::size
unsigned size() const
Definition: DenseMap.h:101
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::InlineResult::failure
static InlineResult failure(const char *Reason)
Definition: InlineCost.h:170
llvm::ConstantExpr::getGetElementPtr
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, bool InBounds=false, Optional< unsigned > InRangeIndex=None, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition: Constants.h:1247
InlineSizeAllowance
static cl::opt< int > InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings"))
llvm::TargetTransformInfo::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier() const
Definition: TargetTransformInfo.cpp:195
llvm::InlineConstants::OptMinSizeThreshold
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
Definition: InlineCost.h:41
InlineSavingsMultiplier
static cl::opt< int > InlineSavingsMultiplier("inline-savings-multiplier", cl::Hidden, cl::init(8), cl::desc("Multiplier to multiply cycle savings by during inlining"))
llvm::CleanupReturnInst
Definition: Instructions.h:4619
llvm::BlockFrequencyInfo::getBlockFreq
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
Definition: BlockFrequencyInfo.cpp:203
llvm::IntToPtrInst
This class represents a cast from an integer to a pointer.
Definition: Instructions.h:5118
llvm::IndirectBrInst
Indirect Branch Instruction.
Definition: Instructions.h:3628
GlobalAlias.h
llvm::TargetTransformInfo::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB) const
Definition: TargetTransformInfo.cpp:200
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
computeThresholdFromOptLevels
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
Definition: InlineCost.cpp:3087
llvm::TargetTransformInfo::getEstimatedNumberOfCaseClusters
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Definition: TargetTransformInfo.cpp:215
llvm::getInliningCostEstimate
Optional< int > getInliningCostEstimate(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the cost estimate ignoring thresholds.
Definition: InlineCost.cpp:2813
llvm::AttributeFuncs::areInlineCompatible
bool areInlineCompatible(const Function &Caller, const Function &Callee)
Definition: Attributes.cpp:2040
llvm::getCallsiteCost
int getCallsiteCost(CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
Definition: InlineCost.cpp:2770
SmallVector.h
llvm::InlineConstants::LastCallToStaticBonus
const int LastCallToStaticBonus
Definition: InlineCost.h:50
llvm::BlockFrequencyInfo::getBlockProfileCount
Optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Definition: BlockFrequencyInfo.cpp:208
Dominators.h
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
InstructionSimplify.h
llvm::BlockAddress::get
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1815
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2651
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::erase
bool erase(const ValueT &V)
Definition: DenseSet.h:101
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
llvm::SmallPtrSetImpl< const Value * >
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
HotCallSiteThreshold
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::desc("Threshold for hot callsites "))
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1461
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::UnreachableInst
This function has undefined behavior.
Definition: Instructions.h:4727
llvm::InlineParams::HotCallSiteThreshold
Optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:207
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3230
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:253
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:58
CallPenalty
static cl::opt< int > CallPenalty("inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining"))
llvm::cl::desc
Definition: CommandLine.h:405
llvm::InlineCostFeatures
std::array< int, static_cast< size_t >(InlineCostFeatureIndex::NumberOfFeatures)> InlineCostFeatures
Definition: InlineModelFeatureMaps.h:62
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3086
raw_ostream.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: InlineCost.cpp:49
llvm::InlineConstants::TotalAllocaSizeRecursiveCaller
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive.
Definition: InlineCost.h:54
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::InlineParams::AllowRecursiveCall
Optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
Definition: InlineCost.h:223
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:644
llvm::InsertValueInst
This instruction inserts a struct field of array element value into an aggregate value.
Definition: Instructions.h:2509
llvm::InlineConstants::LoopPenalty
const int LoopPenalty
Definition: InlineCost.h:49
llvm::tgtok::TrueVal
@ TrueVal
Definition: TGLexer.h:62
PrintInstructionComments
static cl::opt< bool > PrintInstructionComments("print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis"))
HotCallSiteRelFreq
static cl::opt< int > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
llvm::FunctionType::getReturnType
Type * getReturnType() const
Definition: DerivedTypes.h:124
llvm::TargetTransformInfo::getFPOpCost
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
Definition: TargetTransformInfo.cpp:569
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::InlineResult
InlineResult is basically true or false.
Definition: InlineCost.h:164
Debug.h
ColdCallSiteRelFreq
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
llvm::InlineParams::OptSizeThreshold
Optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
Definition: InlineCost.h:201
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3165
llvm::ConstantExpr::getExtractValue
static Constant * getExtractValue(Constant *Agg, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2680
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3179
SetVector.h
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::CallingConv::Cold
@ Cold
Definition: CallingConv.h:48
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365