LLVM  14.0.0git
InlineCost.cpp
Go to the documentation of this file.
1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements inline cost analysis.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SetVector.h"
16 #include "llvm/ADT/SmallPtrSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
21 #include "llvm/Analysis/CFG.h"
25 #include "llvm/Analysis/LoopInfo.h"
30 #include "llvm/Config/llvm-config.h"
32 #include "llvm/IR/CallingConv.h"
33 #include "llvm/IR/DataLayout.h"
34 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/GlobalAlias.h"
37 #include "llvm/IR/InstVisitor.h"
38 #include "llvm/IR/IntrinsicInst.h"
39 #include "llvm/IR/Operator.h"
40 #include "llvm/IR/PatternMatch.h"
42 #include "llvm/Support/Debug.h"
45 
46 using namespace llvm;
47 
48 #define DEBUG_TYPE "inline-cost"
49 
50 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
51 
52 static cl::opt<int>
53  DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225),
55  cl::desc("Default amount of inlining to perform"));
56 
58  "print-instruction-comments", cl::Hidden, cl::init(false),
59  cl::desc("Prints comments for instruction based on inline cost analysis"));
60 
62  "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
63  cl::desc("Control the amount of inlining to perform (default = 225)"));
64 
66  "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore,
67  cl::desc("Threshold for inlining functions with inline hint"));
68 
69 static cl::opt<int>
70  ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
72  cl::desc("Threshold for inlining cold callsites"));
73 
75  "inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false),
76  cl::desc("Enable the cost-benefit analysis for the inliner"));
77 
79  "inline-savings-multiplier", cl::Hidden, cl::init(8), cl::ZeroOrMore,
80  cl::desc("Multiplier to multiply cycle savings by during inlining"));
81 
82 static cl::opt<int>
83  InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100),
85  cl::desc("The maximum size of a callee that get's "
86  "inlined without sufficient cycle savings"));
87 
88 // We introduce this threshold to help performance of instrumentation based
89 // PGO before we actually hook up inliner with analysis passes such as BPI and
90 // BFI.
92  "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore,
93  cl::desc("Threshold for inlining functions with cold attribute"));
94 
95 static cl::opt<int>
96  HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
98  cl::desc("Threshold for hot callsites "));
99 
101  "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore,
102  cl::desc("Threshold for locally hot callsites "));
103 
105  "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
106  cl::desc("Maximum block frequency, expressed as a percentage of caller's "
107  "entry frequency, for a callsite to be cold in the absence of "
108  "profile information."));
109 
111  "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore,
112  cl::desc("Minimum block frequency, expressed as a multiple of caller's "
113  "entry frequency, for a callsite to be hot in the absence of "
114  "profile information."));
115 
117  "inline-call-penalty", cl::Hidden, cl::init(25),
118  cl::desc("Call penalty that is applied per callsite when inlining"));
119 
121  "inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore,
122  cl::desc("Compute the full inline cost of a call site even when the cost "
123  "exceeds the threshold."));
124 
126  "inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true),
128  cl::desc("Allow inlining when caller has a superset of callee's nobuiltin "
129  "attributes."));
130 
132  "disable-gep-const-evaluation", cl::Hidden, cl::init(false),
133  cl::desc("Disables evaluation of GetElementPtr with constant operands"));
134 
135 namespace {
136 class InlineCostCallAnalyzer;
137 
138 /// This function behaves more like CallBase::hasFnAttr: when it looks for the
139 /// requested attribute, it check both the call instruction and the called
140 /// function (if it's available and operand bundles don't prohibit that).
141 Attribute getFnAttr(CallBase &CB, StringRef AttrKind) {
142  Attribute CallAttr = CB.getFnAttr(AttrKind);
143  if (CallAttr.isValid())
144  return CallAttr;
145 
146  // Operand bundles override attributes on the called function, but don't
147  // override attributes directly present on the call instruction.
148  if (!CB.isFnAttrDisallowedByOpBundle(AttrKind))
149  if (const Function *F = CB.getCalledFunction())
150  return F->getFnAttribute(AttrKind);
151 
152  return {};
153 }
154 
155 Optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) {
156  Attribute Attr = getFnAttr(CB, AttrKind);
157  int AttrValue;
158  if (Attr.getValueAsString().getAsInteger(10, AttrValue))
159  return None;
160  return AttrValue;
161 }
162 
163 // This struct is used to store information about inline cost of a
164 // particular instruction
165 struct InstructionCostDetail {
166  int CostBefore = 0;
167  int CostAfter = 0;
168  int ThresholdBefore = 0;
169  int ThresholdAfter = 0;
170 
171  int getThresholdDelta() const { return ThresholdAfter - ThresholdBefore; }
172 
173  int getCostDelta() const { return CostAfter - CostBefore; }
174 
175  bool hasThresholdChanged() const { return ThresholdAfter != ThresholdBefore; }
176 };
177 
178 class InlineCostAnnotationWriter : public AssemblyAnnotationWriter {
179 private:
180  InlineCostCallAnalyzer *const ICCA;
181 
182 public:
183  InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
184  virtual void emitInstructionAnnot(const Instruction *I,
185  formatted_raw_ostream &OS) override;
186 };
187 
188 /// Carry out call site analysis, in order to evaluate inlinability.
189 /// NOTE: the type is currently used as implementation detail of functions such
190 /// as llvm::getInlineCost. Note the function_ref constructor parameters - the
191 /// expectation is that they come from the outer scope, from the wrapper
192 /// functions. If we want to support constructing CallAnalyzer objects where
193 /// lambdas are provided inline at construction, or where the object needs to
194 /// otherwise survive past the scope of the provided functions, we need to
195 /// revisit the argument types.
196 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
198  friend class InstVisitor<CallAnalyzer, bool>;
199 
200 protected:
201  virtual ~CallAnalyzer() {}
202  /// The TargetTransformInfo available for this compilation.
203  const TargetTransformInfo &TTI;
204 
205  /// Getter for the cache of @llvm.assume intrinsics.
206  function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
207 
208  /// Getter for BlockFrequencyInfo
210 
211  /// Profile summary information.
212  ProfileSummaryInfo *PSI;
213 
214  /// The called function.
215  Function &F;
216 
217  // Cache the DataLayout since we use it a lot.
218  const DataLayout &DL;
219 
220  /// The OptimizationRemarkEmitter available for this compilation.
222 
223  /// The candidate callsite being analyzed. Please do not use this to do
224  /// analysis in the caller function; we want the inline cost query to be
225  /// easily cacheable. Instead, use the cover function paramHasAttr.
226  CallBase &CandidateCall;
227 
228  /// Extension points for handling callsite features.
229  // Called before a basic block was analyzed.
230  virtual void onBlockStart(const BasicBlock *BB) {}
231 
232  /// Called after a basic block was analyzed.
233  virtual void onBlockAnalyzed(const BasicBlock *BB) {}
234 
235  /// Called before an instruction was analyzed
236  virtual void onInstructionAnalysisStart(const Instruction *I) {}
237 
238  /// Called after an instruction was analyzed
239  virtual void onInstructionAnalysisFinish(const Instruction *I) {}
240 
241  /// Called at the end of the analysis of the callsite. Return the outcome of
242  /// the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or
243  /// the reason it can't.
244  virtual InlineResult finalizeAnalysis() { return InlineResult::success(); }
245  /// Called when we're about to start processing a basic block, and every time
246  /// we are done processing an instruction. Return true if there is no point in
247  /// continuing the analysis (e.g. we've determined already the call site is
248  /// too expensive to inline)
249  virtual bool shouldStop() { return false; }
250 
251  /// Called before the analysis of the callee body starts (with callsite
252  /// contexts propagated). It checks callsite-specific information. Return a
253  /// reason analysis can't continue if that's the case, or 'true' if it may
254  /// continue.
255  virtual InlineResult onAnalysisStart() { return InlineResult::success(); }
256  /// Called if the analysis engine decides SROA cannot be done for the given
257  /// alloca.
258  virtual void onDisableSROA(AllocaInst *Arg) {}
259 
260  /// Called the analysis engine determines load elimination won't happen.
261  virtual void onDisableLoadElimination() {}
262 
263  /// Called when we visit a CallBase, before the analysis starts. Return false
264  /// to stop further processing of the instruction.
265  virtual bool onCallBaseVisitStart(CallBase &Call) { return true; }
266 
267  /// Called to account for a call.
268  virtual void onCallPenalty() {}
269 
270  /// Called to account for the expectation the inlining would result in a load
271  /// elimination.
272  virtual void onLoadEliminationOpportunity() {}
273 
274  /// Called to account for the cost of argument setup for the Call in the
275  /// callee's body (not the callsite currently under analysis).
276  virtual void onCallArgumentSetup(const CallBase &Call) {}
277 
278  /// Called to account for a load relative intrinsic.
279  virtual void onLoadRelativeIntrinsic() {}
280 
281  /// Called to account for a lowered call.
282  virtual void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) {
283  }
284 
285  /// Account for a jump table of given size. Return false to stop further
286  /// processing the switch instruction
287  virtual bool onJumpTable(unsigned JumpTableSize) { return true; }
288 
289  /// Account for a case cluster of given size. Return false to stop further
290  /// processing of the instruction.
291  virtual bool onCaseCluster(unsigned NumCaseCluster) { return true; }
292 
293  /// Called at the end of processing a switch instruction, with the given
294  /// number of case clusters.
295  virtual void onFinalizeSwitch(unsigned JumpTableSize,
296  unsigned NumCaseCluster) {}
297 
298  /// Called to account for any other instruction not specifically accounted
299  /// for.
300  virtual void onMissedSimplification() {}
301 
302  /// Start accounting potential benefits due to SROA for the given alloca.
303  virtual void onInitializeSROAArg(AllocaInst *Arg) {}
304 
305  /// Account SROA savings for the AllocaInst value.
306  virtual void onAggregateSROAUse(AllocaInst *V) {}
307 
308  bool handleSROA(Value *V, bool DoNotDisable) {
309  // Check for SROA candidates in comparisons.
310  if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
311  if (DoNotDisable) {
312  onAggregateSROAUse(SROAArg);
313  return true;
314  }
315  disableSROAForArg(SROAArg);
316  }
317  return false;
318  }
319 
320  bool IsCallerRecursive = false;
321  bool IsRecursiveCall = false;
322  bool ExposesReturnsTwice = false;
323  bool HasDynamicAlloca = false;
324  bool ContainsNoDuplicateCall = false;
325  bool HasReturn = false;
326  bool HasIndirectBr = false;
327  bool HasUninlineableIntrinsic = false;
328  bool InitsVargArgs = false;
329 
330  /// Number of bytes allocated statically by the callee.
331  uint64_t AllocatedSize = 0;
332  unsigned NumInstructions = 0;
333  unsigned NumVectorInstructions = 0;
334 
335  /// While we walk the potentially-inlined instructions, we build up and
336  /// maintain a mapping of simplified values specific to this callsite. The
337  /// idea is to propagate any special information we have about arguments to
338  /// this call through the inlinable section of the function, and account for
339  /// likely simplifications post-inlining. The most important aspect we track
340  /// is CFG altering simplifications -- when we prove a basic block dead, that
341  /// can cause dramatic shifts in the cost of inlining a function.
342  DenseMap<Value *, Constant *> SimplifiedValues;
343 
344  /// Keep track of the values which map back (through function arguments) to
345  /// allocas on the caller stack which could be simplified through SROA.
346  DenseMap<Value *, AllocaInst *> SROAArgValues;
347 
348  /// Keep track of Allocas for which we believe we may get SROA optimization.
349  DenseSet<AllocaInst *> EnabledSROAAllocas;
350 
351  /// Keep track of values which map to a pointer base and constant offset.
353 
354  /// Keep track of dead blocks due to the constant arguments.
355  SetVector<BasicBlock *> DeadBlocks;
356 
357  /// The mapping of the blocks to their known unique successors due to the
358  /// constant arguments.
359  DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
360 
361  /// Model the elimination of repeated loads that is expected to happen
362  /// whenever we simplify away the stores that would otherwise cause them to be
363  /// loads.
364  bool EnableLoadElimination;
365 
366  /// Whether we allow inlining for recursive call.
367  bool AllowRecursiveCall;
368 
369  SmallPtrSet<Value *, 16> LoadAddrSet;
370 
371  AllocaInst *getSROAArgForValueOrNull(Value *V) const {
372  auto It = SROAArgValues.find(V);
373  if (It == SROAArgValues.end() || EnabledSROAAllocas.count(It->second) == 0)
374  return nullptr;
375  return It->second;
376  }
377 
378  // Custom simplification helper routines.
379  bool isAllocaDerivedArg(Value *V);
380  void disableSROAForArg(AllocaInst *SROAArg);
381  void disableSROA(Value *V);
382  void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
383  void disableLoadElimination();
384  bool isGEPFree(GetElementPtrInst &GEP);
385  bool canFoldInboundsGEP(GetElementPtrInst &I);
386  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
387  bool simplifyCallSite(Function *F, CallBase &Call);
388  template <typename Callable>
389  bool simplifyInstruction(Instruction &I, Callable Evaluate);
390  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
391 
392  /// Return true if the given argument to the function being considered for
393  /// inlining has the given attribute set either at the call site or the
394  /// function declaration. Primarily used to inspect call site specific
395  /// attributes since these can be more precise than the ones on the callee
396  /// itself.
397  bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
398 
399  /// Return true if the given value is known non null within the callee if
400  /// inlined through this particular callsite.
401  bool isKnownNonNullInCallee(Value *V);
402 
403  /// Return true if size growth is allowed when inlining the callee at \p Call.
404  bool allowSizeGrowth(CallBase &Call);
405 
406  // Custom analysis routines.
407  InlineResult analyzeBlock(BasicBlock *BB,
408  SmallPtrSetImpl<const Value *> &EphValues);
409 
410  // Disable several entry points to the visitor so we don't accidentally use
411  // them by declaring but not defining them here.
412  void visit(Module *);
413  void visit(Module &);
414  void visit(Function *);
415  void visit(Function &);
416  void visit(BasicBlock *);
417  void visit(BasicBlock &);
418 
419  // Provide base case for our instruction visit.
420  bool visitInstruction(Instruction &I);
421 
422  // Our visit overrides.
423  bool visitAlloca(AllocaInst &I);
424  bool visitPHI(PHINode &I);
425  bool visitGetElementPtr(GetElementPtrInst &I);
426  bool visitBitCast(BitCastInst &I);
427  bool visitPtrToInt(PtrToIntInst &I);
428  bool visitIntToPtr(IntToPtrInst &I);
429  bool visitCastInst(CastInst &I);
430  bool visitCmpInst(CmpInst &I);
431  bool visitSub(BinaryOperator &I);
432  bool visitBinaryOperator(BinaryOperator &I);
433  bool visitFNeg(UnaryOperator &I);
434  bool visitLoad(LoadInst &I);
435  bool visitStore(StoreInst &I);
436  bool visitExtractValue(ExtractValueInst &I);
437  bool visitInsertValue(InsertValueInst &I);
438  bool visitCallBase(CallBase &Call);
439  bool visitReturnInst(ReturnInst &RI);
440  bool visitBranchInst(BranchInst &BI);
441  bool visitSelectInst(SelectInst &SI);
442  bool visitSwitchInst(SwitchInst &SI);
443  bool visitIndirectBrInst(IndirectBrInst &IBI);
444  bool visitResumeInst(ResumeInst &RI);
445  bool visitCleanupReturnInst(CleanupReturnInst &RI);
446  bool visitCatchReturnInst(CatchReturnInst &RI);
447  bool visitUnreachableInst(UnreachableInst &I);
448 
449 public:
450  CallAnalyzer(Function &Callee, CallBase &Call, const TargetTransformInfo &TTI,
451  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
452  function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
453  ProfileSummaryInfo *PSI = nullptr,
454  OptimizationRemarkEmitter *ORE = nullptr)
455  : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
456  PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
457  CandidateCall(Call), EnableLoadElimination(true),
458  AllowRecursiveCall(false) {}
459 
460  InlineResult analyze();
461 
462  Optional<Constant *> getSimplifiedValue(Instruction *I) {
463  if (SimplifiedValues.find(I) != SimplifiedValues.end())
464  return SimplifiedValues[I];
465  return None;
466  }
467 
468  // Keep a bunch of stats about the cost savings found so we can print them
469  // out when debugging.
470  unsigned NumConstantArgs = 0;
471  unsigned NumConstantOffsetPtrArgs = 0;
472  unsigned NumAllocaArgs = 0;
473  unsigned NumConstantPtrCmps = 0;
474  unsigned NumConstantPtrDiffs = 0;
475  unsigned NumInstructionsSimplified = 0;
476 
477  void dump();
478 };
479 
480 // Considering forming a binary search, we should find the number of nodes
481 // which is same as the number of comparisons when lowered. For a given
482 // number of clusters, n, we can define a recursive function, f(n), to find
483 // the number of nodes in the tree. The recursion is :
484 // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
485 // and f(n) = n, when n <= 3.
486 // This will lead a binary tree where the leaf should be either f(2) or f(3)
487 // when n > 3. So, the number of comparisons from leaves should be n, while
488 // the number of non-leaf should be :
489 // 2^(log2(n) - 1) - 1
490 // = 2^log2(n) * 2^-1 - 1
491 // = n / 2 - 1.
492 // Considering comparisons from leaf and non-leaf nodes, we can estimate the
493 // number of comparisons in a simple closed form :
494 // n + n / 2 - 1 = n * 3 / 2 - 1
495 int64_t getExpectedNumberOfCompare(int NumCaseCluster) {
496  return 3 * static_cast<int64_t>(NumCaseCluster) / 2 - 1;
497 }
498 
499 /// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note
500 /// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer
501 class InlineCostCallAnalyzer final : public CallAnalyzer {
502  const int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
503  const bool ComputeFullInlineCost;
504  int LoadEliminationCost = 0;
505  /// Bonus to be applied when percentage of vector instructions in callee is
506  /// high (see more details in updateThreshold).
507  int VectorBonus = 0;
508  /// Bonus to be applied when the callee has only one reachable basic block.
509  int SingleBBBonus = 0;
510 
511  /// Tunable parameters that control the analysis.
512  const InlineParams &Params;
513 
514  // This DenseMap stores the delta change in cost and threshold after
515  // accounting for the given instruction. The map is filled only with the
516  // flag PrintInstructionComments on.
518 
519  /// Upper bound for the inlining cost. Bonuses are being applied to account
520  /// for speculative "expected profit" of the inlining decision.
521  int Threshold = 0;
522 
523  /// Attempt to evaluate indirect calls to boost its inline cost.
524  const bool BoostIndirectCalls;
525 
526  /// Ignore the threshold when finalizing analysis.
527  const bool IgnoreThreshold;
528 
529  // True if the cost-benefit-analysis-based inliner is enabled.
530  const bool CostBenefitAnalysisEnabled;
531 
532  /// Inlining cost measured in abstract units, accounts for all the
533  /// instructions expected to be executed for a given function invocation.
534  /// Instructions that are statically proven to be dead based on call-site
535  /// arguments are not counted here.
536  int Cost = 0;
537 
538  // The cumulative cost at the beginning of the basic block being analyzed. At
539  // the end of analyzing each basic block, "Cost - CostAtBBStart" represents
540  // the size of that basic block.
541  int CostAtBBStart = 0;
542 
543  // The static size of live but cold basic blocks. This is "static" in the
544  // sense that it's not weighted by profile counts at all.
545  int ColdSize = 0;
546 
547  // Whether inlining is decided by cost-benefit analysis.
548  bool DecidedByCostBenefit = false;
549 
550  // The cost-benefit pair computed by cost-benefit analysis.
551  Optional<CostBenefitPair> CostBenefit = None;
552 
553  bool SingleBB = true;
554 
555  unsigned SROACostSavings = 0;
556  unsigned SROACostSavingsLost = 0;
557 
558  /// The mapping of caller Alloca values to their accumulated cost savings. If
559  /// we have to disable SROA for one of the allocas, this tells us how much
560  /// cost must be added.
561  DenseMap<AllocaInst *, int> SROAArgCosts;
562 
563  /// Return true if \p Call is a cold callsite.
564  bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
565 
566  /// Update Threshold based on callsite properties such as callee
567  /// attributes and callee hotness for PGO builds. The Callee is explicitly
568  /// passed to support analyzing indirect calls whose target is inferred by
569  /// analysis.
570  void updateThreshold(CallBase &Call, Function &Callee);
571  /// Return a higher threshold if \p Call is a hot callsite.
572  Optional<int> getHotCallSiteThreshold(CallBase &Call,
573  BlockFrequencyInfo *CallerBFI);
574 
575  /// Handle a capped 'int' increment for Cost.
576  void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) {
577  assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound");
578  Cost = std::min<int>(UpperBound, Cost + Inc);
579  }
580 
581  void onDisableSROA(AllocaInst *Arg) override {
582  auto CostIt = SROAArgCosts.find(Arg);
583  if (CostIt == SROAArgCosts.end())
584  return;
585  addCost(CostIt->second);
586  SROACostSavings -= CostIt->second;
587  SROACostSavingsLost += CostIt->second;
588  SROAArgCosts.erase(CostIt);
589  }
590 
591  void onDisableLoadElimination() override {
592  addCost(LoadEliminationCost);
593  LoadEliminationCost = 0;
594  }
595 
596  bool onCallBaseVisitStart(CallBase &Call) override {
597  if (Optional<int> AttrCallThresholdBonus =
598  getStringFnAttrAsInt(Call, "call-threshold-bonus"))
599  Threshold += *AttrCallThresholdBonus;
600 
601  if (Optional<int> AttrCallCost =
602  getStringFnAttrAsInt(Call, "call-inline-cost")) {
603  addCost(*AttrCallCost);
604  // Prevent further processing of the call since we want to override its
605  // inline cost, not just add to it.
606  return false;
607  }
608  return true;
609  }
610 
611  void onCallPenalty() override { addCost(CallPenalty); }
612  void onCallArgumentSetup(const CallBase &Call) override {
613  // Pay the price of the argument setup. We account for the average 1
614  // instruction per call argument setup here.
615  addCost(Call.arg_size() * InlineConstants::InstrCost);
616  }
617  void onLoadRelativeIntrinsic() override {
618  // This is normally lowered to 4 LLVM instructions.
619  addCost(3 * InlineConstants::InstrCost);
620  }
621  void onLoweredCall(Function *F, CallBase &Call,
622  bool IsIndirectCall) override {
623  // We account for the average 1 instruction per call argument setup here.
624  addCost(Call.arg_size() * InlineConstants::InstrCost);
625 
626  // If we have a constant that we are calling as a function, we can peer
627  // through it and see the function target. This happens not infrequently
628  // during devirtualization and so we want to give it a hefty bonus for
629  // inlining, but cap that bonus in the event that inlining wouldn't pan out.
630  // Pretend to inline the function, with a custom threshold.
631  if (IsIndirectCall && BoostIndirectCalls) {
632  auto IndirectCallParams = Params;
633  IndirectCallParams.DefaultThreshold =
635  /// FIXME: if InlineCostCallAnalyzer is derived from, this may need
636  /// to instantiate the derived class.
637  InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
638  GetAssumptionCache, GetBFI, PSI, ORE, false);
639  if (CA.analyze().isSuccess()) {
640  // We were able to inline the indirect call! Subtract the cost from the
641  // threshold to get the bonus we want to apply, but don't go below zero.
642  Cost -= std::max(0, CA.getThreshold() - CA.getCost());
643  }
644  } else
645  // Otherwise simply add the cost for merely making the call.
646  addCost(CallPenalty);
647  }
648 
649  void onFinalizeSwitch(unsigned JumpTableSize,
650  unsigned NumCaseCluster) override {
651  // If suitable for a jump table, consider the cost for the table size and
652  // branch to destination.
653  // Maximum valid cost increased in this function.
654  if (JumpTableSize) {
655  int64_t JTCost =
656  static_cast<int64_t>(JumpTableSize) * InlineConstants::InstrCost +
658 
659  addCost(JTCost, static_cast<int64_t>(CostUpperBound));
660  return;
661  }
662 
663  if (NumCaseCluster <= 3) {
664  // Suppose a comparison includes one compare and one conditional branch.
665  addCost(NumCaseCluster * 2 * InlineConstants::InstrCost);
666  return;
667  }
668 
669  int64_t ExpectedNumberOfCompare =
670  getExpectedNumberOfCompare(NumCaseCluster);
671  int64_t SwitchCost =
672  ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
673 
674  addCost(SwitchCost, static_cast<int64_t>(CostUpperBound));
675  }
676  void onMissedSimplification() override {
678  }
679 
680  void onInitializeSROAArg(AllocaInst *Arg) override {
681  assert(Arg != nullptr &&
682  "Should not initialize SROA costs for null value.");
683  SROAArgCosts[Arg] = 0;
684  }
685 
686  void onAggregateSROAUse(AllocaInst *SROAArg) override {
687  auto CostIt = SROAArgCosts.find(SROAArg);
688  assert(CostIt != SROAArgCosts.end() &&
689  "expected this argument to have a cost");
690  CostIt->second += InlineConstants::InstrCost;
691  SROACostSavings += InlineConstants::InstrCost;
692  }
693 
694  void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; }
695 
696  void onBlockAnalyzed(const BasicBlock *BB) override {
697  if (CostBenefitAnalysisEnabled) {
698  // Keep track of the static size of live but cold basic blocks. For now,
699  // we define a cold basic block to be one that's never executed.
700  assert(GetBFI && "GetBFI must be available");
701  BlockFrequencyInfo *BFI = &(GetBFI(F));
702  assert(BFI && "BFI must be available");
703  auto ProfileCount = BFI->getBlockProfileCount(BB);
705  if (ProfileCount.getValue() == 0)
706  ColdSize += Cost - CostAtBBStart;
707  }
708 
709  auto *TI = BB->getTerminator();
710  // If we had any successors at this point, than post-inlining is likely to
711  // have them as well. Note that we assume any basic blocks which existed
712  // due to branches or switches which folded above will also fold after
713  // inlining.
714  if (SingleBB && TI->getNumSuccessors() > 1) {
715  // Take off the bonus we applied to the threshold.
716  Threshold -= SingleBBBonus;
717  SingleBB = false;
718  }
719  }
720 
721  void onInstructionAnalysisStart(const Instruction *I) override {
722  // This function is called to store the initial cost of inlining before
723  // the given instruction was assessed.
725  return;
726  InstructionCostDetailMap[I].CostBefore = Cost;
727  InstructionCostDetailMap[I].ThresholdBefore = Threshold;
728  }
729 
730  void onInstructionAnalysisFinish(const Instruction *I) override {
731  // This function is called to find new values of cost and threshold after
732  // the instruction has been assessed.
734  return;
735  InstructionCostDetailMap[I].CostAfter = Cost;
736  InstructionCostDetailMap[I].ThresholdAfter = Threshold;
737  }
738 
739  bool isCostBenefitAnalysisEnabled() {
740  if (!PSI || !PSI->hasProfileSummary())
741  return false;
742 
743  if (!GetBFI)
744  return false;
745 
747  // Honor the explicit request from the user.
749  return false;
750  } else {
751  // Otherwise, require instrumentation profile.
752  if (!PSI->hasInstrumentationProfile())
753  return false;
754  }
755 
756  auto *Caller = CandidateCall.getParent()->getParent();
757  if (!Caller->getEntryCount())
758  return false;
759 
760  BlockFrequencyInfo *CallerBFI = &(GetBFI(*Caller));
761  if (!CallerBFI)
762  return false;
763 
764  // For now, limit to hot call site.
765  if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
766  return false;
767 
768  // Make sure we have a nonzero entry count.
769  auto EntryCount = F.getEntryCount();
770  if (!EntryCount || !EntryCount.getCount())
771  return false;
772 
773  BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
774  if (!CalleeBFI)
775  return false;
776 
777  return true;
778  }
779 
780  // Determine whether we should inline the given call site, taking into account
781  // both the size cost and the cycle savings. Return None if we don't have
782  // suficient profiling information to determine.
783  Optional<bool> costBenefitAnalysis() {
784  if (!CostBenefitAnalysisEnabled)
785  return None;
786 
787  // buildInlinerPipeline in the pass builder sets HotCallSiteThreshold to 0
788  // for the prelink phase of the AutoFDO + ThinLTO build. Honor the logic by
789  // falling back to the cost-based metric.
790  // TODO: Improve this hacky condition.
791  if (Threshold == 0)
792  return None;
793 
794  assert(GetBFI);
795  BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
796  assert(CalleeBFI);
797 
798  // The cycle savings expressed as the sum of InlineConstants::InstrCost
799  // multiplied by the estimated dynamic count of each instruction we can
800  // avoid. Savings come from the call site cost, such as argument setup and
801  // the call instruction, as well as the instructions that are folded.
802  //
803  // We use 128-bit APInt here to avoid potential overflow. This variable
804  // should stay well below 10^^24 (or 2^^80) in practice. This "worst" case
805  // assumes that we can avoid or fold a billion instructions, each with a
806  // profile count of 10^^15 -- roughly the number of cycles for a 24-hour
807  // period on a 4GHz machine.
808  APInt CycleSavings(128, 0);
809 
810  for (auto &BB : F) {
811  APInt CurrentSavings(128, 0);
812  for (auto &I : BB) {
813  if (BranchInst *BI = dyn_cast<BranchInst>(&I)) {
814  // Count a conditional branch as savings if it becomes unconditional.
815  if (BI->isConditional() &&
816  dyn_cast_or_null<ConstantInt>(
817  SimplifiedValues.lookup(BI->getCondition()))) {
818  CurrentSavings += InlineConstants::InstrCost;
819  }
820  } else if (Value *V = dyn_cast<Value>(&I)) {
821  // Count an instruction as savings if we can fold it.
822  if (SimplifiedValues.count(V)) {
823  CurrentSavings += InlineConstants::InstrCost;
824  }
825  }
826  }
827 
828  auto ProfileCount = CalleeBFI->getBlockProfileCount(&BB);
830  CurrentSavings *= ProfileCount.getValue();
831  CycleSavings += CurrentSavings;
832  }
833 
834  // Compute the cycle savings per call.
835  auto EntryProfileCount = F.getEntryCount();
836  assert(EntryProfileCount.hasValue() && EntryProfileCount.getCount());
837  auto EntryCount = EntryProfileCount.getCount();
838  CycleSavings += EntryCount / 2;
839  CycleSavings = CycleSavings.udiv(EntryCount);
840 
841  // Compute the total savings for the call site.
842  auto *CallerBB = CandidateCall.getParent();
843  BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent())));
844  CycleSavings += getCallsiteCost(this->CandidateCall, DL);
845  CycleSavings *= CallerBFI->getBlockProfileCount(CallerBB).getValue();
846 
847  // Remove the cost of the cold basic blocks.
848  int Size = Cost - ColdSize;
849 
850  // Allow tiny callees to be inlined regardless of whether they meet the
851  // savings threshold.
853 
854  CostBenefit.emplace(APInt(128, Size), CycleSavings);
855 
856  // Return true if the savings justify the cost of inlining. Specifically,
857  // we evaluate the following inequality:
858  //
859  // CycleSavings PSI->getOrCompHotCountThreshold()
860  // -------------- >= -----------------------------------
861  // Size InlineSavingsMultiplier
862  //
863  // Note that the left hand side is specific to a call site. The right hand
864  // side is a constant for the entire executable.
865  APInt LHS = CycleSavings;
867  APInt RHS(128, PSI->getOrCompHotCountThreshold());
868  RHS *= Size;
869  return LHS.uge(RHS);
870  }
871 
872  InlineResult finalizeAnalysis() override {
873  // Loops generally act a lot like calls in that they act like barriers to
874  // movement, require a certain amount of setup, etc. So when optimising for
875  // size, we penalise any call sites that perform loops. We do this after all
876  // other costs here, so will likely only be dealing with relatively small
877  // functions (and hence DT and LI will hopefully be cheap).
878  auto *Caller = CandidateCall.getFunction();
879  if (Caller->hasMinSize()) {
880  DominatorTree DT(F);
881  LoopInfo LI(DT);
882  int NumLoops = 0;
883  for (Loop *L : LI) {
884  // Ignore loops that will not be executed
885  if (DeadBlocks.count(L->getHeader()))
886  continue;
887  NumLoops++;
888  }
889  addCost(NumLoops * InlineConstants::LoopPenalty);
890  }
891 
892  // We applied the maximum possible vector bonus at the beginning. Now,
893  // subtract the excess bonus, if any, from the Threshold before
894  // comparing against Cost.
895  if (NumVectorInstructions <= NumInstructions / 10)
896  Threshold -= VectorBonus;
897  else if (NumVectorInstructions <= NumInstructions / 2)
898  Threshold -= VectorBonus / 2;
899 
900  if (Optional<int> AttrCost =
901  getStringFnAttrAsInt(CandidateCall, "function-inline-cost"))
902  Cost = *AttrCost;
903 
904  if (Optional<int> AttrThreshold =
905  getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))
906  Threshold = *AttrThreshold;
907 
908  if (auto Result = costBenefitAnalysis()) {
909  DecidedByCostBenefit = true;
910  if (Result.getValue())
911  return InlineResult::success();
912  else
913  return InlineResult::failure("Cost over threshold.");
914  }
915 
916  if (IgnoreThreshold || Cost < std::max(1, Threshold))
917  return InlineResult::success();
918  return InlineResult::failure("Cost over threshold.");
919  }
920  bool shouldStop() override {
921  // Bail out the moment we cross the threshold. This means we'll under-count
922  // the cost, but only when undercounting doesn't matter.
923  return !IgnoreThreshold && Cost >= Threshold && !ComputeFullInlineCost;
924  }
925 
926  void onLoadEliminationOpportunity() override {
927  LoadEliminationCost += InlineConstants::InstrCost;
928  }
929 
930  InlineResult onAnalysisStart() override {
931  // Perform some tweaks to the cost and threshold based on the direct
932  // callsite information.
933 
934  // We want to more aggressively inline vector-dense kernels, so up the
935  // threshold, and we'll lower it if the % of vector instructions gets too
936  // low. Note that these bonuses are some what arbitrary and evolved over
937  // time by accident as much as because they are principled bonuses.
938  //
939  // FIXME: It would be nice to remove all such bonuses. At least it would be
940  // nice to base the bonus values on something more scientific.
941  assert(NumInstructions == 0);
942  assert(NumVectorInstructions == 0);
943 
944  // Update the threshold based on callsite properties
945  updateThreshold(CandidateCall, F);
946 
947  // While Threshold depends on commandline options that can take negative
948  // values, we want to enforce the invariant that the computed threshold and
949  // bonuses are non-negative.
950  assert(Threshold >= 0);
951  assert(SingleBBBonus >= 0);
952  assert(VectorBonus >= 0);
953 
954  // Speculatively apply all possible bonuses to Threshold. If cost exceeds
955  // this Threshold any time, and cost cannot decrease, we can stop processing
956  // the rest of the function body.
957  Threshold += (SingleBBBonus + VectorBonus);
958 
959  // Give out bonuses for the callsite, as the instructions setting them up
960  // will be gone after inlining.
961  addCost(-getCallsiteCost(this->CandidateCall, DL));
962 
963  // If this function uses the coldcc calling convention, prefer not to inline
964  // it.
965  if (F.getCallingConv() == CallingConv::Cold)
967 
968  // Check if we're done. This can happen due to bonuses and penalties.
969  if (Cost >= Threshold && !ComputeFullInlineCost)
970  return InlineResult::failure("high cost");
971 
972  return InlineResult::success();
973  }
974 
975 public:
976  InlineCostCallAnalyzer(
977  Function &Callee, CallBase &Call, const InlineParams &Params,
978  const TargetTransformInfo &TTI,
979  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
980  function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
981  ProfileSummaryInfo *PSI = nullptr,
982  OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true,
983  bool IgnoreThreshold = false)
984  : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI, ORE),
985  ComputeFullInlineCost(OptComputeFullInlineCost ||
986  Params.ComputeFullInlineCost || ORE ||
987  isCostBenefitAnalysisEnabled()),
988  Params(Params), Threshold(Params.DefaultThreshold),
989  BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
990  CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
991  Writer(this) {
992  AllowRecursiveCall = Params.AllowRecursiveCall.getValue();
993  }
994 
995  /// Annotation Writer for instruction details
996  InlineCostAnnotationWriter Writer;
997 
998  void dump();
999 
1000  // Prints the same analysis as dump(), but its definition is not dependent
1001  // on the build.
1002  void print();
1003 
1004  Optional<InstructionCostDetail> getCostDetails(const Instruction *I) {
1005  if (InstructionCostDetailMap.find(I) != InstructionCostDetailMap.end())
1006  return InstructionCostDetailMap[I];
1007  return None;
1008  }
1009 
1010  virtual ~InlineCostCallAnalyzer() {}
1011  int getThreshold() const { return Threshold; }
1012  int getCost() const { return Cost; }
1013  Optional<CostBenefitPair> getCostBenefitPair() { return CostBenefit; }
1014  bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; }
1015 };
1016 
1017 class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
1018 private:
1019  InlineCostFeatures Cost = {};
1020 
1021  // FIXME: These constants are taken from the heuristic-based cost visitor.
1022  // These should be removed entirely in a later revision to avoid reliance on
1023  // heuristics in the ML inliner.
1024  static constexpr int JTCostMultiplier = 4;
1025  static constexpr int CaseClusterCostMultiplier = 2;
1026  static constexpr int SwitchCostMultiplier = 2;
1027 
1028  // FIXME: These are taken from the heuristic-based cost visitor: we should
1029  // eventually abstract these to the CallAnalyzer to avoid duplication.
1030  unsigned SROACostSavingOpportunities = 0;
1031  int VectorBonus = 0;
1032  int SingleBBBonus = 0;
1033  int Threshold = 5;
1034 
1036 
1037  void increment(InlineCostFeatureIndex Feature, int64_t Delta = 1) {
1038  Cost[static_cast<size_t>(Feature)] += Delta;
1039  }
1040 
1041  void set(InlineCostFeatureIndex Feature, int64_t Value) {
1042  Cost[static_cast<size_t>(Feature)] = Value;
1043  }
1044 
1045  void onDisableSROA(AllocaInst *Arg) override {
1046  auto CostIt = SROACosts.find(Arg);
1047  if (CostIt == SROACosts.end())
1048  return;
1049 
1050  increment(InlineCostFeatureIndex::SROALosses, CostIt->second);
1051  SROACostSavingOpportunities -= CostIt->second;
1052  SROACosts.erase(CostIt);
1053  }
1054 
1055  void onDisableLoadElimination() override {
1056  set(InlineCostFeatureIndex::LoadElimination, 1);
1057  }
1058 
1059  void onCallPenalty() override {
1061  }
1062 
1063  void onCallArgumentSetup(const CallBase &Call) override {
1064  increment(InlineCostFeatureIndex::CallArgumentSetup,
1065  Call.arg_size() * InlineConstants::InstrCost);
1066  }
1067 
1068  void onLoadRelativeIntrinsic() override {
1069  increment(InlineCostFeatureIndex::LoadRelativeIntrinsic,
1071  }
1072 
1073  void onLoweredCall(Function *F, CallBase &Call,
1074  bool IsIndirectCall) override {
1075  increment(InlineCostFeatureIndex::LoweredCallArgSetup,
1076  Call.arg_size() * InlineConstants::InstrCost);
1077 
1078  if (IsIndirectCall) {
1079  InlineParams IndirectCallParams = {/* DefaultThreshold*/ 0,
1080  /*HintThreshold*/ {},
1081  /*ColdThreshold*/ {},
1082  /*OptSizeThreshold*/ {},
1083  /*OptMinSizeThreshold*/ {},
1084  /*HotCallSiteThreshold*/ {},
1085  /*LocallyHotCallSiteThreshold*/ {},
1086  /*ColdCallSiteThreshold*/ {},
1087  /*ComputeFullInlineCost*/ true,
1088  /*EnableDeferral*/ true};
1089  IndirectCallParams.DefaultThreshold =
1091 
1092  InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
1093  GetAssumptionCache, GetBFI, PSI, ORE, false,
1094  true);
1095  if (CA.analyze().isSuccess()) {
1096  increment(InlineCostFeatureIndex::NestedInlineCostEstimate,
1097  CA.getCost());
1098  increment(InlineCostFeatureIndex::NestedInlines, 1);
1099  }
1100  } else {
1101  onCallPenalty();
1102  }
1103  }
1104 
1105  void onFinalizeSwitch(unsigned JumpTableSize,
1106  unsigned NumCaseCluster) override {
1107 
1108  if (JumpTableSize) {
1109  int64_t JTCost =
1110  static_cast<int64_t>(JumpTableSize) * InlineConstants::InstrCost +
1111  JTCostMultiplier * InlineConstants::InstrCost;
1112  increment(InlineCostFeatureIndex::JumpTablePenalty, JTCost);
1113  return;
1114  }
1115 
1116  if (NumCaseCluster <= 3) {
1117  increment(InlineCostFeatureIndex::CaseClusterPenalty,
1118  NumCaseCluster * CaseClusterCostMultiplier *
1120  return;
1121  }
1122 
1123  int64_t ExpectedNumberOfCompare =
1124  getExpectedNumberOfCompare(NumCaseCluster);
1125 
1126  int64_t SwitchCost = ExpectedNumberOfCompare * SwitchCostMultiplier *
1128  increment(InlineCostFeatureIndex::SwitchPenalty, SwitchCost);
1129  }
1130 
1131  void onMissedSimplification() override {
1132  increment(InlineCostFeatureIndex::UnsimplifiedCommonInstructions,
1134  }
1135 
1136  void onInitializeSROAArg(AllocaInst *Arg) override { SROACosts[Arg] = 0; }
1137  void onAggregateSROAUse(AllocaInst *Arg) override {
1138  SROACosts.find(Arg)->second += InlineConstants::InstrCost;
1139  SROACostSavingOpportunities += InlineConstants::InstrCost;
1140  }
1141 
1142  void onBlockAnalyzed(const BasicBlock *BB) override {
1143  if (BB->getTerminator()->getNumSuccessors() > 1)
1144  set(InlineCostFeatureIndex::IsMultipleBlocks, 1);
1145  Threshold -= SingleBBBonus;
1146  }
1147 
1148  InlineResult finalizeAnalysis() override {
1149  auto *Caller = CandidateCall.getFunction();
1150  if (Caller->hasMinSize()) {
1151  DominatorTree DT(F);
1152  LoopInfo LI(DT);
1153  for (Loop *L : LI) {
1154  // Ignore loops that will not be executed
1155  if (DeadBlocks.count(L->getHeader()))
1156  continue;
1157  increment(InlineCostFeatureIndex::NumLoops,
1159  }
1160  }
1161  set(InlineCostFeatureIndex::DeadBlocks, DeadBlocks.size());
1162  set(InlineCostFeatureIndex::SimplifiedInstructions,
1163  NumInstructionsSimplified);
1164  set(InlineCostFeatureIndex::ConstantArgs, NumConstantArgs);
1165  set(InlineCostFeatureIndex::ConstantOffsetPtrArgs,
1166  NumConstantOffsetPtrArgs);
1167  set(InlineCostFeatureIndex::SROASavings, SROACostSavingOpportunities);
1168 
1169  if (NumVectorInstructions <= NumInstructions / 10)
1170  Threshold -= VectorBonus;
1171  else if (NumVectorInstructions <= NumInstructions / 2)
1172  Threshold -= VectorBonus / 2;
1173 
1175 
1176  return InlineResult::success();
1177  }
1178 
1179  bool shouldStop() override { return false; }
1180 
1181  void onLoadEliminationOpportunity() override {
1182  increment(InlineCostFeatureIndex::LoadElimination, 1);
1183  }
1184 
1185  InlineResult onAnalysisStart() override {
1186  increment(InlineCostFeatureIndex::CallSiteCost,
1187  -1 * getCallsiteCost(this->CandidateCall, DL));
1188 
1189  set(InlineCostFeatureIndex::ColdCcPenalty,
1190  (F.getCallingConv() == CallingConv::Cold));
1191 
1192  // FIXME: we shouldn't repeat this logic in both the Features and Cost
1193  // analyzer - instead, we should abstract it to a common method in the
1194  // CallAnalyzer
1195  int SingleBBBonusPercent = 50;
1196  int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
1197  Threshold += TTI.adjustInliningThreshold(&CandidateCall);
1199  SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1200  VectorBonus = Threshold * VectorBonusPercent / 100;
1201  Threshold += (SingleBBBonus + VectorBonus);
1202 
1203  return InlineResult::success();
1204  }
1205 
1206 public:
1207  InlineCostFeaturesAnalyzer(
1208  const TargetTransformInfo &TTI,
1209  function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,
1212  CallBase &Call)
1213  : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI) {}
1214 
1215  const InlineCostFeatures &features() const { return Cost; }
1216 };
1217 
1218 } // namespace
1219 
1220 /// Test whether the given value is an Alloca-derived function argument.
1221 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
1222  return SROAArgValues.count(V);
1223 }
1224 
1225 void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
1226  onDisableSROA(SROAArg);
1227  EnabledSROAAllocas.erase(SROAArg);
1228  disableLoadElimination();
1229 }
1230 
1231 void InlineCostAnnotationWriter::emitInstructionAnnot(
1232  const Instruction *I, formatted_raw_ostream &OS) {
1233  // The cost of inlining of the given instruction is printed always.
1234  // The threshold delta is printed only when it is non-zero. It happens
1235  // when we decided to give a bonus at a particular instruction.
1236  Optional<InstructionCostDetail> Record = ICCA->getCostDetails(I);
1237  if (!Record)
1238  OS << "; No analysis for the instruction";
1239  else {
1240  OS << "; cost before = " << Record->CostBefore
1241  << ", cost after = " << Record->CostAfter
1242  << ", threshold before = " << Record->ThresholdBefore
1243  << ", threshold after = " << Record->ThresholdAfter << ", ";
1244  OS << "cost delta = " << Record->getCostDelta();
1245  if (Record->hasThresholdChanged())
1246  OS << ", threshold delta = " << Record->getThresholdDelta();
1247  }
1248  auto C = ICCA->getSimplifiedValue(const_cast<Instruction *>(I));
1249  if (C) {
1250  OS << ", simplified to ";
1251  C.getValue()->print(OS, true);
1252  }
1253  OS << "\n";
1254 }
1255 
1256 /// If 'V' maps to a SROA candidate, disable SROA for it.
1257 void CallAnalyzer::disableSROA(Value *V) {
1258  if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
1259  disableSROAForArg(SROAArg);
1260  }
1261 }
1262 
1263 void CallAnalyzer::disableLoadElimination() {
1264  if (EnableLoadElimination) {
1265  onDisableLoadElimination();
1266  EnableLoadElimination = false;
1267  }
1268 }
1269 
1270 /// Accumulate a constant GEP offset into an APInt if possible.
1271 ///
1272 /// Returns false if unable to compute the offset for any reason. Respects any
1273 /// simplified values known during the analysis of this callsite.
1274 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
1275  unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
1276  assert(IntPtrWidth == Offset.getBitWidth());
1277 
1278  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
1279  GTI != GTE; ++GTI) {
1280  ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
1281  if (!OpC)
1282  if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
1283  OpC = dyn_cast<ConstantInt>(SimpleOp);
1284  if (!OpC)
1285  return false;
1286  if (OpC->isZero())
1287  continue;
1288 
1289  // Handle a struct index, which adds its field offset to the pointer.
1290  if (StructType *STy = GTI.getStructTypeOrNull()) {
1291  unsigned ElementIdx = OpC->getZExtValue();
1292  const StructLayout *SL = DL.getStructLayout(STy);
1293  Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
1294  continue;
1295  }
1296 
1297  APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
1298  Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
1299  }
1300  return true;
1301 }
1302 
1303 /// Use TTI to check whether a GEP is free.
1304 ///
1305 /// Respects any simplified values known during the analysis of this callsite.
1306 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
1308  Operands.push_back(GEP.getOperand(0));
1309  for (const Use &Op : GEP.indices())
1310  if (Constant *SimpleOp = SimplifiedValues.lookup(Op))
1311  Operands.push_back(SimpleOp);
1312  else
1313  Operands.push_back(Op);
1314  return TTI.getUserCost(&GEP, Operands,
1317 }
1318 
1319 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
1320  disableSROA(I.getOperand(0));
1321 
1322  // Check whether inlining will turn a dynamic alloca into a static
1323  // alloca and handle that case.
1324  if (I.isArrayAllocation()) {
1325  Constant *Size = SimplifiedValues.lookup(I.getArraySize());
1326  if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
1327  // Sometimes a dynamic alloca could be converted into a static alloca
1328  // after this constant prop, and become a huge static alloca on an
1329  // unconditional CFG path. Avoid inlining if this is going to happen above
1330  // a threshold.
1331  // FIXME: If the threshold is removed or lowered too much, we could end up
1332  // being too pessimistic and prevent inlining non-problematic code. This
1333  // could result in unintended perf regressions. A better overall strategy
1334  // is needed to track stack usage during inlining.
1335  Type *Ty = I.getAllocatedType();
1336  AllocatedSize = SaturatingMultiplyAdd(
1337  AllocSize->getLimitedValue(),
1338  DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize);
1340  HasDynamicAlloca = true;
1341  return false;
1342  }
1343  }
1344 
1345  // Accumulate the allocated size.
1346  if (I.isStaticAlloca()) {
1347  Type *Ty = I.getAllocatedType();
1348  AllocatedSize =
1349  SaturatingAdd(DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize);
1350  }
1351 
1352  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
1353  // a variety of reasons, and so we would like to not inline them into
1354  // functions which don't currently have a dynamic alloca. This simply
1355  // disables inlining altogether in the presence of a dynamic alloca.
1356  if (!I.isStaticAlloca())
1357  HasDynamicAlloca = true;
1358 
1359  return false;
1360 }
1361 
1362 bool CallAnalyzer::visitPHI(PHINode &I) {
1363  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
1364  // though we don't want to propagate it's bonuses. The idea is to disable
1365  // SROA if it *might* be used in an inappropriate manner.
1366 
1367  // Phi nodes are always zero-cost.
1368  // FIXME: Pointer sizes may differ between different address spaces, so do we
1369  // need to use correct address space in the call to getPointerSizeInBits here?
1370  // Or could we skip the getPointerSizeInBits call completely? As far as I can
1371  // see the ZeroOffset is used as a dummy value, so we can probably use any
1372  // bit width for the ZeroOffset?
1373  APInt ZeroOffset = APInt::getZero(DL.getPointerSizeInBits(0));
1374  bool CheckSROA = I.getType()->isPointerTy();
1375 
1376  // Track the constant or pointer with constant offset we've seen so far.
1377  Constant *FirstC = nullptr;
1378  std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};
1379  Value *FirstV = nullptr;
1380 
1381  for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {
1382  BasicBlock *Pred = I.getIncomingBlock(i);
1383  // If the incoming block is dead, skip the incoming block.
1384  if (DeadBlocks.count(Pred))
1385  continue;
1386  // If the parent block of phi is not the known successor of the incoming
1387  // block, skip the incoming block.
1388  BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
1389  if (KnownSuccessor && KnownSuccessor != I.getParent())
1390  continue;
1391 
1392  Value *V = I.getIncomingValue(i);
1393  // If the incoming value is this phi itself, skip the incoming value.
1394  if (&I == V)
1395  continue;
1396 
1397  Constant *C = dyn_cast<Constant>(V);
1398  if (!C)
1399  C = SimplifiedValues.lookup(V);
1400 
1401  std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};
1402  if (!C && CheckSROA)
1403  BaseAndOffset = ConstantOffsetPtrs.lookup(V);
1404 
1405  if (!C && !BaseAndOffset.first)
1406  // The incoming value is neither a constant nor a pointer with constant
1407  // offset, exit early.
1408  return true;
1409 
1410  if (FirstC) {
1411  if (FirstC == C)
1412  // If we've seen a constant incoming value before and it is the same
1413  // constant we see this time, continue checking the next incoming value.
1414  continue;
1415  // Otherwise early exit because we either see a different constant or saw
1416  // a constant before but we have a pointer with constant offset this time.
1417  return true;
1418  }
1419 
1420  if (FirstV) {
1421  // The same logic as above, but check pointer with constant offset here.
1422  if (FirstBaseAndOffset == BaseAndOffset)
1423  continue;
1424  return true;
1425  }
1426 
1427  if (C) {
1428  // This is the 1st time we've seen a constant, record it.
1429  FirstC = C;
1430  continue;
1431  }
1432 
1433  // The remaining case is that this is the 1st time we've seen a pointer with
1434  // constant offset, record it.
1435  FirstV = V;
1436  FirstBaseAndOffset = BaseAndOffset;
1437  }
1438 
1439  // Check if we can map phi to a constant.
1440  if (FirstC) {
1441  SimplifiedValues[&I] = FirstC;
1442  return true;
1443  }
1444 
1445  // Check if we can map phi to a pointer with constant offset.
1446  if (FirstBaseAndOffset.first) {
1447  ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
1448 
1449  if (auto *SROAArg = getSROAArgForValueOrNull(FirstV))
1450  SROAArgValues[&I] = SROAArg;
1451  }
1452 
1453  return true;
1454 }
1455 
1456 /// Check we can fold GEPs of constant-offset call site argument pointers.
1457 /// This requires target data and inbounds GEPs.
1458 ///
1459 /// \return true if the specified GEP can be folded.
1460 bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
1461  // Check if we have a base + offset for the pointer.
1462  std::pair<Value *, APInt> BaseAndOffset =
1463  ConstantOffsetPtrs.lookup(I.getPointerOperand());
1464  if (!BaseAndOffset.first)
1465  return false;
1466 
1467  // Check if the offset of this GEP is constant, and if so accumulate it
1468  // into Offset.
1469  if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second))
1470  return false;
1471 
1472  // Add the result as a new mapping to Base + Offset.
1473  ConstantOffsetPtrs[&I] = BaseAndOffset;
1474 
1475  return true;
1476 }
1477 
1478 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
1479  auto *SROAArg = getSROAArgForValueOrNull(I.getPointerOperand());
1480 
1481  // Lambda to check whether a GEP's indices are all constant.
1482  auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
1483  for (const Use &Op : GEP.indices())
1484  if (!isa<Constant>(Op) && !SimplifiedValues.lookup(Op))
1485  return false;
1486  return true;
1487  };
1488 
1490  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1492  for (unsigned int Index = 1; Index < COps.size(); ++Index)
1493  Indices.push_back(COps[Index]);
1495  I.getSourceElementType(), COps[0], Indices, I.isInBounds());
1496  }))
1497  return true;
1498 
1499  if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
1500  if (SROAArg)
1501  SROAArgValues[&I] = SROAArg;
1502 
1503  // Constant GEPs are modeled as free.
1504  return true;
1505  }
1506 
1507  // Variable GEPs will require math and will disable SROA.
1508  if (SROAArg)
1509  disableSROAForArg(SROAArg);
1510  return isGEPFree(I);
1511 }
1512 
1513 /// Simplify \p I if its operands are constants and update SimplifiedValues.
1514 /// \p Evaluate is a callable specific to instruction type that evaluates the
1515 /// instruction when all the operands are constants.
1516 template <typename Callable>
1517 bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
1519  for (Value *Op : I.operands()) {
1520  Constant *COp = dyn_cast<Constant>(Op);
1521  if (!COp)
1522  COp = SimplifiedValues.lookup(Op);
1523  if (!COp)
1524  return false;
1525  COps.push_back(COp);
1526  }
1527  auto *C = Evaluate(COps);
1528  if (!C)
1529  return false;
1530  SimplifiedValues[&I] = C;
1531  return true;
1532 }
1533 
1534 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
1535  // Propagate constants through bitcasts.
1536  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1537  return ConstantExpr::getBitCast(COps[0], I.getType());
1538  }))
1539  return true;
1540 
1541  // Track base/offsets through casts
1542  std::pair<Value *, APInt> BaseAndOffset =
1543  ConstantOffsetPtrs.lookup(I.getOperand(0));
1544  // Casts don't change the offset, just wrap it up.
1545  if (BaseAndOffset.first)
1546  ConstantOffsetPtrs[&I] = BaseAndOffset;
1547 
1548  // Also look for SROA candidates here.
1549  if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
1550  SROAArgValues[&I] = SROAArg;
1551 
1552  // Bitcasts are always zero cost.
1553  return true;
1554 }
1555 
1556 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
1557  // Propagate constants through ptrtoint.
1558  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1559  return ConstantExpr::getPtrToInt(COps[0], I.getType());
1560  }))
1561  return true;
1562 
1563  // Track base/offset pairs when converted to a plain integer provided the
1564  // integer is large enough to represent the pointer.
1565  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
1566  unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
1567  if (IntegerSize == DL.getPointerSizeInBits(AS)) {
1568  std::pair<Value *, APInt> BaseAndOffset =
1569  ConstantOffsetPtrs.lookup(I.getOperand(0));
1570  if (BaseAndOffset.first)
1571  ConstantOffsetPtrs[&I] = BaseAndOffset;
1572  }
1573 
1574  // This is really weird. Technically, ptrtoint will disable SROA. However,
1575  // unless that ptrtoint is *used* somewhere in the live basic blocks after
1576  // inlining, it will be nuked, and SROA should proceed. All of the uses which
1577  // would block SROA would also block SROA if applied directly to a pointer,
1578  // and so we can just add the integer in here. The only places where SROA is
1579  // preserved either cannot fire on an integer, or won't in-and-of themselves
1580  // disable SROA (ext) w/o some later use that we would see and disable.
1581  if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
1582  SROAArgValues[&I] = SROAArg;
1583 
1586 }
1587 
1588 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
1589  // Propagate constants through ptrtoint.
1590  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1591  return ConstantExpr::getIntToPtr(COps[0], I.getType());
1592  }))
1593  return true;
1594 
1595  // Track base/offset pairs when round-tripped through a pointer without
1596  // modifications provided the integer is not too large.
1597  Value *Op = I.getOperand(0);
1598  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
1599  if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
1600  std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
1601  if (BaseAndOffset.first)
1602  ConstantOffsetPtrs[&I] = BaseAndOffset;
1603  }
1604 
1605  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
1606  if (auto *SROAArg = getSROAArgForValueOrNull(Op))
1607  SROAArgValues[&I] = SROAArg;
1608 
1611 }
1612 
1613 bool CallAnalyzer::visitCastInst(CastInst &I) {
1614  // Propagate constants through casts.
1615  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1616  return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
1617  }))
1618  return true;
1619 
1620  // Disable SROA in the face of arbitrary casts we don't explicitly list
1621  // elsewhere.
1622  disableSROA(I.getOperand(0));
1623 
1624  // If this is a floating-point cast, and the target says this operation
1625  // is expensive, this may eventually become a library call. Treat the cost
1626  // as such.
1627  switch (I.getOpcode()) {
1628  case Instruction::FPTrunc:
1629  case Instruction::FPExt:
1630  case Instruction::UIToFP:
1631  case Instruction::SIToFP:
1632  case Instruction::FPToUI:
1633  case Instruction::FPToSI:
1635  onCallPenalty();
1636  break;
1637  default:
1638  break;
1639  }
1640 
1643 }
1644 
1645 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
1646  return CandidateCall.paramHasAttr(A->getArgNo(), Attr);
1647 }
1648 
1649 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
1650  // Does the *call site* have the NonNull attribute set on an argument? We
1651  // use the attribute on the call site to memoize any analysis done in the
1652  // caller. This will also trip if the callee function has a non-null
1653  // parameter attribute, but that's a less interesting case because hopefully
1654  // the callee would already have been simplified based on that.
1655  if (Argument *A = dyn_cast<Argument>(V))
1656  if (paramHasAttr(A, Attribute::NonNull))
1657  return true;
1658 
1659  // Is this an alloca in the caller? This is distinct from the attribute case
1660  // above because attributes aren't updated within the inliner itself and we
1661  // always want to catch the alloca derived case.
1662  if (isAllocaDerivedArg(V))
1663  // We can actually predict the result of comparisons between an
1664  // alloca-derived value and null. Note that this fires regardless of
1665  // SROA firing.
1666  return true;
1667 
1668  return false;
1669 }
1670 
1671 bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {
1672  // If the normal destination of the invoke or the parent block of the call
1673  // site is unreachable-terminated, there is little point in inlining this
1674  // unless there is literally zero cost.
1675  // FIXME: Note that it is possible that an unreachable-terminated block has a
1676  // hot entry. For example, in below scenario inlining hot_call_X() may be
1677  // beneficial :
1678  // main() {
1679  // hot_call_1();
1680  // ...
1681  // hot_call_N()
1682  // exit(0);
1683  // }
1684  // For now, we are not handling this corner case here as it is rare in real
1685  // code. In future, we should elaborate this based on BPI and BFI in more
1686  // general threshold adjusting heuristics in updateThreshold().
1687  if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
1688  if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
1689  return false;
1690  } else if (isa<UnreachableInst>(Call.getParent()->getTerminator()))
1691  return false;
1692 
1693  return true;
1694 }
1695 
1697  BlockFrequencyInfo *CallerBFI) {
1698  // If global profile summary is available, then callsite's coldness is
1699  // determined based on that.
1700  if (PSI && PSI->hasProfileSummary())
1701  return PSI->isColdCallSite(Call, CallerBFI);
1702 
1703  // Otherwise we need BFI to be available.
1704  if (!CallerBFI)
1705  return false;
1706 
1707  // Determine if the callsite is cold relative to caller's entry. We could
1708  // potentially cache the computation of scaled entry frequency, but the added
1709  // complexity is not worth it unless this scaling shows up high in the
1710  // profiles.
1711  const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
1712  auto CallSiteBB = Call.getParent();
1713  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
1714  auto CallerEntryFreq =
1715  CallerBFI->getBlockFreq(&(Call.getCaller()->getEntryBlock()));
1716  return CallSiteFreq < CallerEntryFreq * ColdProb;
1717 }
1718 
1720 InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
1721  BlockFrequencyInfo *CallerBFI) {
1722 
1723  // If global profile summary is available, then callsite's hotness is
1724  // determined based on that.
1725  if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI))
1726  return Params.HotCallSiteThreshold;
1727 
1728  // Otherwise we need BFI to be available and to have a locally hot callsite
1729  // threshold.
1730  if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
1731  return None;
1732 
1733  // Determine if the callsite is hot relative to caller's entry. We could
1734  // potentially cache the computation of scaled entry frequency, but the added
1735  // complexity is not worth it unless this scaling shows up high in the
1736  // profiles.
1737  auto CallSiteBB = Call.getParent();
1738  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
1739  auto CallerEntryFreq = CallerBFI->getEntryFreq();
1740  if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
1741  return Params.LocallyHotCallSiteThreshold;
1742 
1743  // Otherwise treat it normally.
1744  return None;
1745 }
1746 
1747 void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
1748  // If no size growth is allowed for this inlining, set Threshold to 0.
1749  if (!allowSizeGrowth(Call)) {
1750  Threshold = 0;
1751  return;
1752  }
1753 
1754  Function *Caller = Call.getCaller();
1755 
1756  // return min(A, B) if B is valid.
1757  auto MinIfValid = [](int A, Optional<int> B) {
1758  return B ? std::min(A, B.getValue()) : A;
1759  };
1760 
1761  // return max(A, B) if B is valid.
1762  auto MaxIfValid = [](int A, Optional<int> B) {
1763  return B ? std::max(A, B.getValue()) : A;
1764  };
1765 
1766  // Various bonus percentages. These are multiplied by Threshold to get the
1767  // bonus values.
1768  // SingleBBBonus: This bonus is applied if the callee has a single reachable
1769  // basic block at the given callsite context. This is speculatively applied
1770  // and withdrawn if more than one basic block is seen.
1771  //
1772  // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
1773  // of the last call to a static function as inlining such functions is
1774  // guaranteed to reduce code size.
1775  //
1776  // These bonus percentages may be set to 0 based on properties of the caller
1777  // and the callsite.
1778  int SingleBBBonusPercent = 50;
1779  int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
1781 
1782  // Lambda to set all the above bonus and bonus percentages to 0.
1783  auto DisallowAllBonuses = [&]() {
1784  SingleBBBonusPercent = 0;
1785  VectorBonusPercent = 0;
1787  };
1788 
1789  // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
1790  // and reduce the threshold if the caller has the necessary attribute.
1791  if (Caller->hasMinSize()) {
1792  Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
1793  // For minsize, we want to disable the single BB bonus and the vector
1794  // bonuses, but not the last-call-to-static bonus. Inlining the last call to
1795  // a static function will, at the minimum, eliminate the parameter setup and
1796  // call/return instructions.
1797  SingleBBBonusPercent = 0;
1798  VectorBonusPercent = 0;
1799  } else if (Caller->hasOptSize())
1800  Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
1801 
1802  // Adjust the threshold based on inlinehint attribute and profile based
1803  // hotness information if the caller does not have MinSize attribute.
1804  if (!Caller->hasMinSize()) {
1805  if (Callee.hasFnAttribute(Attribute::InlineHint))
1806  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
1807 
1808  // FIXME: After switching to the new passmanager, simplify the logic below
1809  // by checking only the callsite hotness/coldness as we will reliably
1810  // have local profile information.
1811  //
1812  // Callsite hotness and coldness can be determined if sample profile is
1813  // used (which adds hotness metadata to calls) or if caller's
1814  // BlockFrequencyInfo is available.
1815  BlockFrequencyInfo *CallerBFI = GetBFI ? &(GetBFI(*Caller)) : nullptr;
1816  auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI);
1817  if (!Caller->hasOptSize() && HotCallSiteThreshold) {
1818  LLVM_DEBUG(dbgs() << "Hot callsite.\n");
1819  // FIXME: This should update the threshold only if it exceeds the
1820  // current threshold, but AutoFDO + ThinLTO currently relies on this
1821  // behavior to prevent inlining of hot callsites during ThinLTO
1822  // compile phase.
1823  Threshold = HotCallSiteThreshold.getValue();
1824  } else if (isColdCallSite(Call, CallerBFI)) {
1825  LLVM_DEBUG(dbgs() << "Cold callsite.\n");
1826  // Do not apply bonuses for a cold callsite including the
1827  // LastCallToStatic bonus. While this bonus might result in code size
1828  // reduction, it can cause the size of a non-cold caller to increase
1829  // preventing it from being inlined.
1830  DisallowAllBonuses();
1831  Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
1832  } else if (PSI) {
1833  // Use callee's global profile information only if we have no way of
1834  // determining this via callsite information.
1835  if (PSI->isFunctionEntryHot(&Callee)) {
1836  LLVM_DEBUG(dbgs() << "Hot callee.\n");
1837  // If callsite hotness can not be determined, we may still know
1838  // that the callee is hot and treat it as a weaker hint for threshold
1839  // increase.
1840  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
1841  } else if (PSI->isFunctionEntryCold(&Callee)) {
1842  LLVM_DEBUG(dbgs() << "Cold callee.\n");
1843  // Do not apply bonuses for a cold callee including the
1844  // LastCallToStatic bonus. While this bonus might result in code size
1845  // reduction, it can cause the size of a non-cold caller to increase
1846  // preventing it from being inlined.
1847  DisallowAllBonuses();
1848  Threshold = MinIfValid(Threshold, Params.ColdThreshold);
1849  }
1850  }
1851  }
1852 
1854 
1855  // Finally, take the target-specific inlining threshold multiplier into
1856  // account.
1858 
1859  SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1860  VectorBonus = Threshold * VectorBonusPercent / 100;
1861 
1862  bool OnlyOneCallAndLocalLinkage =
1863  F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction();
1864  // If there is only one call of the function, and it has internal linkage,
1865  // the cost of inlining it drops dramatically. It may seem odd to update
1866  // Cost in updateThreshold, but the bonus depends on the logic in this method.
1867  if (OnlyOneCallAndLocalLinkage)
1868  Cost -= LastCallToStaticBonus;
1869 }
1870 
1871 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
1872  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1873  // First try to handle simplified comparisons.
1874  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1875  return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
1876  }))
1877  return true;
1878 
1879  if (I.getOpcode() == Instruction::FCmp)
1880  return false;
1881 
1882  // Otherwise look for a comparison between constant offset pointers with
1883  // a common base.
1884  Value *LHSBase, *RHSBase;
1885  APInt LHSOffset, RHSOffset;
1886  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1887  if (LHSBase) {
1888  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1889  if (RHSBase && LHSBase == RHSBase) {
1890  // We have common bases, fold the icmp to a constant based on the
1891  // offsets.
1892  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1893  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1894  if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
1895  SimplifiedValues[&I] = C;
1896  ++NumConstantPtrCmps;
1897  return true;
1898  }
1899  }
1900  }
1901 
1902  // If the comparison is an equality comparison with null, we can simplify it
1903  // if we know the value (argument) can't be null
1904  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
1905  isKnownNonNullInCallee(I.getOperand(0))) {
1906  bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
1907  SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
1908  : ConstantInt::getFalse(I.getType());
1909  return true;
1910  }
1911  return handleSROA(I.getOperand(0), isa<ConstantPointerNull>(I.getOperand(1)));
1912 }
1913 
1914 bool CallAnalyzer::visitSub(BinaryOperator &I) {
1915  // Try to handle a special case: we can fold computing the difference of two
1916  // constant-related pointers.
1917  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1918  Value *LHSBase, *RHSBase;
1919  APInt LHSOffset, RHSOffset;
1920  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1921  if (LHSBase) {
1922  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1923  if (RHSBase && LHSBase == RHSBase) {
1924  // We have common bases, fold the subtract to a constant based on the
1925  // offsets.
1926  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1927  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1928  if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
1929  SimplifiedValues[&I] = C;
1930  ++NumConstantPtrDiffs;
1931  return true;
1932  }
1933  }
1934  }
1935 
1936  // Otherwise, fall back to the generic logic for simplifying and handling
1937  // instructions.
1938  return Base::visitSub(I);
1939 }
1940 
1941 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
1942  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1943  Constant *CLHS = dyn_cast<Constant>(LHS);
1944  if (!CLHS)
1945  CLHS = SimplifiedValues.lookup(LHS);
1946  Constant *CRHS = dyn_cast<Constant>(RHS);
1947  if (!CRHS)
1948  CRHS = SimplifiedValues.lookup(RHS);
1949 
1950  Value *SimpleV = nullptr;
1951  if (auto FI = dyn_cast<FPMathOperator>(&I))
1952  SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
1953  FI->getFastMathFlags(), DL);
1954  else
1955  SimpleV =
1956  SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
1957 
1958  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
1959  SimplifiedValues[&I] = C;
1960 
1961  if (SimpleV)
1962  return true;
1963 
1964  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
1965  disableSROA(LHS);
1966  disableSROA(RHS);
1967 
1968  // If the instruction is floating point, and the target says this operation
1969  // is expensive, this may eventually become a library call. Treat the cost
1970  // as such. Unless it's fneg which can be implemented with an xor.
1971  using namespace llvm::PatternMatch;
1972  if (I.getType()->isFloatingPointTy() &&
1974  !match(&I, m_FNeg(m_Value())))
1975  onCallPenalty();
1976 
1977  return false;
1978 }
1979 
1980 bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
1981  Value *Op = I.getOperand(0);
1982  Constant *COp = dyn_cast<Constant>(Op);
1983  if (!COp)
1984  COp = SimplifiedValues.lookup(Op);
1985 
1986  Value *SimpleV = SimplifyFNegInst(
1987  COp ? COp : Op, cast<FPMathOperator>(I).getFastMathFlags(), DL);
1988 
1989  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
1990  SimplifiedValues[&I] = C;
1991 
1992  if (SimpleV)
1993  return true;
1994 
1995  // Disable any SROA on arguments to arbitrary, unsimplified fneg.
1996  disableSROA(Op);
1997 
1998  return false;
1999 }
2000 
2001 bool CallAnalyzer::visitLoad(LoadInst &I) {
2002  if (handleSROA(I.getPointerOperand(), I.isSimple()))
2003  return true;
2004 
2005  // If the data is already loaded from this address and hasn't been clobbered
2006  // by any stores or calls, this load is likely to be redundant and can be
2007  // eliminated.
2008  if (EnableLoadElimination &&
2009  !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
2010  onLoadEliminationOpportunity();
2011  return true;
2012  }
2013 
2014  return false;
2015 }
2016 
2017 bool CallAnalyzer::visitStore(StoreInst &I) {
2018  if (handleSROA(I.getPointerOperand(), I.isSimple()))
2019  return true;
2020 
2021  // The store can potentially clobber loads and prevent repeated loads from
2022  // being eliminated.
2023  // FIXME:
2024  // 1. We can probably keep an initial set of eliminatable loads substracted
2025  // from the cost even when we finally see a store. We just need to disable
2026  // *further* accumulation of elimination savings.
2027  // 2. We should probably at some point thread MemorySSA for the callee into
2028  // this and then use that to actually compute *really* precise savings.
2029  disableLoadElimination();
2030  return false;
2031 }
2032 
2033 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
2034  // Constant folding for extract value is trivial.
2035  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
2036  return ConstantExpr::getExtractValue(COps[0], I.getIndices());
2037  }))
2038  return true;
2039 
2040  // SROA can't look through these, but they may be free.
2041  return Base::visitExtractValue(I);
2042 }
2043 
2044 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
2045  // Constant folding for insert value is trivial.
2046  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
2047  return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
2048  /*InsertedValueOperand*/ COps[1],
2049  I.getIndices());
2050  }))
2051  return true;
2052 
2053  // SROA can't look through these, but they may be free.
2054  return Base::visitInsertValue(I);
2055 }
2056 
2057 /// Try to simplify a call site.
2058 ///
2059 /// Takes a concrete function and callsite and tries to actually simplify it by
2060 /// analyzing the arguments and call itself with instsimplify. Returns true if
2061 /// it has simplified the callsite to some other entity (a constant), making it
2062 /// free.
2063 bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
2064  // FIXME: Using the instsimplify logic directly for this is inefficient
2065  // because we have to continually rebuild the argument list even when no
2066  // simplifications can be performed. Until that is fixed with remapping
2067  // inside of instsimplify, directly constant fold calls here.
2068  if (!canConstantFoldCallTo(&Call, F))
2069  return false;
2070 
2071  // Try to re-map the arguments to constants.
2072  SmallVector<Constant *, 4> ConstantArgs;
2073  ConstantArgs.reserve(Call.arg_size());
2074  for (Value *I : Call.args()) {
2075  Constant *C = dyn_cast<Constant>(I);
2076  if (!C)
2077  C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(I));
2078  if (!C)
2079  return false; // This argument doesn't map to a constant.
2080 
2081  ConstantArgs.push_back(C);
2082  }
2083  if (Constant *C = ConstantFoldCall(&Call, F, ConstantArgs)) {
2084  SimplifiedValues[&Call] = C;
2085  return true;
2086  }
2087 
2088  return false;
2089 }
2090 
2091 bool CallAnalyzer::visitCallBase(CallBase &Call) {
2092  if (!onCallBaseVisitStart(Call))
2093  return true;
2094 
2095  if (Call.hasFnAttr(Attribute::ReturnsTwice) &&
2096  !F.hasFnAttribute(Attribute::ReturnsTwice)) {
2097  // This aborts the entire analysis.
2098  ExposesReturnsTwice = true;
2099  return false;
2100  }
2101  if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
2102  ContainsNoDuplicateCall = true;
2103 
2104  Value *Callee = Call.getCalledOperand();
2105  Function *F = dyn_cast_or_null<Function>(Callee);
2106  bool IsIndirectCall = !F;
2107  if (IsIndirectCall) {
2108  // Check if this happens to be an indirect function call to a known function
2109  // in this inline context. If not, we've done all we can.
2110  F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
2111  if (!F) {
2112  onCallArgumentSetup(Call);
2113 
2114  if (!Call.onlyReadsMemory())
2115  disableLoadElimination();
2116  return Base::visitCallBase(Call);
2117  }
2118  }
2119 
2120  assert(F && "Expected a call to a known function");
2121 
2122  // When we have a concrete function, first try to simplify it directly.
2123  if (simplifyCallSite(F, Call))
2124  return true;
2125 
2126  // Next check if it is an intrinsic we know about.
2127  // FIXME: Lift this into part of the InstVisitor.
2128  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
2129  switch (II->getIntrinsicID()) {
2130  default:
2131  if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
2132  disableLoadElimination();
2133  return Base::visitCallBase(Call);
2134 
2135  case Intrinsic::load_relative:
2136  onLoadRelativeIntrinsic();
2137  return false;
2138 
2139  case Intrinsic::memset:
2140  case Intrinsic::memcpy:
2141  case Intrinsic::memmove:
2142  disableLoadElimination();
2143  // SROA can usually chew through these intrinsics, but they aren't free.
2144  return false;
2145  case Intrinsic::icall_branch_funnel:
2146  case Intrinsic::localescape:
2147  HasUninlineableIntrinsic = true;
2148  return false;
2149  case Intrinsic::vastart:
2150  InitsVargArgs = true;
2151  return false;
2152  case Intrinsic::launder_invariant_group:
2153  case Intrinsic::strip_invariant_group:
2154  if (auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0)))
2155  SROAArgValues[II] = SROAArg;
2156  return true;
2157  }
2158  }
2159 
2160  if (F == Call.getFunction()) {
2161  // This flag will fully abort the analysis, so don't bother with anything
2162  // else.
2163  IsRecursiveCall = true;
2164  if (!AllowRecursiveCall)
2165  return false;
2166  }
2167 
2168  if (TTI.isLoweredToCall(F)) {
2169  onLoweredCall(F, Call, IsIndirectCall);
2170  }
2171 
2172  if (!(Call.onlyReadsMemory() || (IsIndirectCall && F->onlyReadsMemory())))
2173  disableLoadElimination();
2174  return Base::visitCallBase(Call);
2175 }
2176 
2177 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
2178  // At least one return instruction will be free after inlining.
2179  bool Free = !HasReturn;
2180  HasReturn = true;
2181  return Free;
2182 }
2183 
2184 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
2185  // We model unconditional branches as essentially free -- they really
2186  // shouldn't exist at all, but handling them makes the behavior of the
2187  // inliner more regular and predictable. Interestingly, conditional branches
2188  // which will fold away are also free.
2189  return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
2190  dyn_cast_or_null<ConstantInt>(
2191  SimplifiedValues.lookup(BI.getCondition()));
2192 }
2193 
2194 bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
2195  bool CheckSROA = SI.getType()->isPointerTy();
2196  Value *TrueVal = SI.getTrueValue();
2197  Value *FalseVal = SI.getFalseValue();
2198 
2199  Constant *TrueC = dyn_cast<Constant>(TrueVal);
2200  if (!TrueC)
2201  TrueC = SimplifiedValues.lookup(TrueVal);
2202  Constant *FalseC = dyn_cast<Constant>(FalseVal);
2203  if (!FalseC)
2204  FalseC = SimplifiedValues.lookup(FalseVal);
2205  Constant *CondC =
2206  dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition()));
2207 
2208  if (!CondC) {
2209  // Select C, X, X => X
2210  if (TrueC == FalseC && TrueC) {
2211  SimplifiedValues[&SI] = TrueC;
2212  return true;
2213  }
2214 
2215  if (!CheckSROA)
2216  return Base::visitSelectInst(SI);
2217 
2218  std::pair<Value *, APInt> TrueBaseAndOffset =
2219  ConstantOffsetPtrs.lookup(TrueVal);
2220  std::pair<Value *, APInt> FalseBaseAndOffset =
2221  ConstantOffsetPtrs.lookup(FalseVal);
2222  if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
2223  ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
2224 
2225  if (auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
2226  SROAArgValues[&SI] = SROAArg;
2227  return true;
2228  }
2229 
2230  return Base::visitSelectInst(SI);
2231  }
2232 
2233  // Select condition is a constant.
2234  Value *SelectedV = CondC->isAllOnesValue() ? TrueVal
2235  : (CondC->isNullValue()) ? FalseVal
2236  : nullptr;
2237  if (!SelectedV) {
2238  // Condition is a vector constant that is not all 1s or all 0s. If all
2239  // operands are constants, ConstantExpr::getSelect() can handle the cases
2240  // such as select vectors.
2241  if (TrueC && FalseC) {
2242  if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {
2243  SimplifiedValues[&SI] = C;
2244  return true;
2245  }
2246  }
2247  return Base::visitSelectInst(SI);
2248  }
2249 
2250  // Condition is either all 1s or all 0s. SI can be simplified.
2251  if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
2252  SimplifiedValues[&SI] = SelectedC;
2253  return true;
2254  }
2255 
2256  if (!CheckSROA)
2257  return true;
2258 
2259  std::pair<Value *, APInt> BaseAndOffset =
2260  ConstantOffsetPtrs.lookup(SelectedV);
2261  if (BaseAndOffset.first) {
2262  ConstantOffsetPtrs[&SI] = BaseAndOffset;
2263 
2264  if (auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
2265  SROAArgValues[&SI] = SROAArg;
2266  }
2267 
2268  return true;
2269 }
2270 
2271 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
2272  // We model unconditional switches as free, see the comments on handling
2273  // branches.
2274  if (isa<ConstantInt>(SI.getCondition()))
2275  return true;
2276  if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
2277  if (isa<ConstantInt>(V))
2278  return true;
2279 
2280  // Assume the most general case where the switch is lowered into
2281  // either a jump table, bit test, or a balanced binary tree consisting of
2282  // case clusters without merging adjacent clusters with the same
2283  // destination. We do not consider the switches that are lowered with a mix
2284  // of jump table/bit test/binary search tree. The cost of the switch is
2285  // proportional to the size of the tree or the size of jump table range.
2286  //
2287  // NB: We convert large switches which are just used to initialize large phi
2288  // nodes to lookup tables instead in simplifycfg, so this shouldn't prevent
2289  // inlining those. It will prevent inlining in cases where the optimization
2290  // does not (yet) fire.
2291 
2292  unsigned JumpTableSize = 0;
2293  BlockFrequencyInfo *BFI = GetBFI ? &(GetBFI(F)) : nullptr;
2294  unsigned NumCaseCluster =
2295  TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize, PSI, BFI);
2296 
2297  onFinalizeSwitch(JumpTableSize, NumCaseCluster);
2298  return false;
2299 }
2300 
2301 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
2302  // We never want to inline functions that contain an indirectbr. This is
2303  // incorrect because all the blockaddress's (in static global initializers
2304  // for example) would be referring to the original function, and this
2305  // indirect jump would jump from the inlined copy of the function into the
2306  // original function which is extremely undefined behavior.
2307  // FIXME: This logic isn't really right; we can safely inline functions with
2308  // indirectbr's as long as no other function or global references the
2309  // blockaddress of a block within the current function.
2310  HasIndirectBr = true;
2311  return false;
2312 }
2313 
2314 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
2315  // FIXME: It's not clear that a single instruction is an accurate model for
2316  // the inline cost of a resume instruction.
2317  return false;
2318 }
2319 
2320 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
2321  // FIXME: It's not clear that a single instruction is an accurate model for
2322  // the inline cost of a cleanupret instruction.
2323  return false;
2324 }
2325 
2326 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
2327  // FIXME: It's not clear that a single instruction is an accurate model for
2328  // the inline cost of a catchret instruction.
2329  return false;
2330 }
2331 
2332 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
2333  // FIXME: It might be reasonably to discount the cost of instructions leading
2334  // to unreachable as they have the lowest possible impact on both runtime and
2335  // code size.
2336  return true; // No actual code is needed for unreachable.
2337 }
2338 
2339 bool CallAnalyzer::visitInstruction(Instruction &I) {
2340  // Some instructions are free. All of the free intrinsics can also be
2341  // handled by SROA, etc.
2344  return true;
2345 
2346  // We found something we don't understand or can't handle. Mark any SROA-able
2347  // values in the operand list as no longer viable.
2348  for (const Use &Op : I.operands())
2349  disableSROA(Op);
2350 
2351  return false;
2352 }
2353 
2354 /// Analyze a basic block for its contribution to the inline cost.
2355 ///
2356 /// This method walks the analyzer over every instruction in the given basic
2357 /// block and accounts for their cost during inlining at this callsite. It
2358 /// aborts early if the threshold has been exceeded or an impossible to inline
2359 /// construct has been detected. It returns false if inlining is no longer
2360 /// viable, and true if inlining remains viable.
2362 CallAnalyzer::analyzeBlock(BasicBlock *BB,
2363  SmallPtrSetImpl<const Value *> &EphValues) {
2364  for (Instruction &I : *BB) {
2365  // FIXME: Currently, the number of instructions in a function regardless of
2366  // our ability to simplify them during inline to constants or dead code,
2367  // are actually used by the vector bonus heuristic. As long as that's true,
2368  // we have to special case debug intrinsics here to prevent differences in
2369  // inlining due to debug symbols. Eventually, the number of unsimplified
2370  // instructions shouldn't factor into the cost computation, but until then,
2371  // hack around it here.
2372  if (isa<DbgInfoIntrinsic>(I))
2373  continue;
2374 
2375  // Skip pseudo-probes.
2376  if (isa<PseudoProbeInst>(I))
2377  continue;
2378 
2379  // Skip ephemeral values.
2380  if (EphValues.count(&I))
2381  continue;
2382 
2383  ++NumInstructions;
2384  if (isa<ExtractElementInst>(I) || I.getType()->isVectorTy())
2385  ++NumVectorInstructions;
2386 
2387  // If the instruction simplified to a constant, there is no cost to this
2388  // instruction. Visit the instructions using our InstVisitor to account for
2389  // all of the per-instruction logic. The visit tree returns true if we
2390  // consumed the instruction in any way, and false if the instruction's base
2391  // cost should count against inlining.
2392  onInstructionAnalysisStart(&I);
2393 
2394  if (Base::visit(&I))
2395  ++NumInstructionsSimplified;
2396  else
2397  onMissedSimplification();
2398 
2399  onInstructionAnalysisFinish(&I);
2400  using namespace ore;
2401  // If the visit this instruction detected an uninlinable pattern, abort.
2403  if (IsRecursiveCall && !AllowRecursiveCall)
2404  IR = InlineResult::failure("recursive");
2405  else if (ExposesReturnsTwice)
2406  IR = InlineResult::failure("exposes returns twice");
2407  else if (HasDynamicAlloca)
2408  IR = InlineResult::failure("dynamic alloca");
2409  else if (HasIndirectBr)
2410  IR = InlineResult::failure("indirect branch");
2411  else if (HasUninlineableIntrinsic)
2412  IR = InlineResult::failure("uninlinable intrinsic");
2413  else if (InitsVargArgs)
2414  IR = InlineResult::failure("varargs");
2415  if (!IR.isSuccess()) {
2416  if (ORE)
2417  ORE->emit([&]() {
2418  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
2419  &CandidateCall)
2420  << NV("Callee", &F) << " has uninlinable pattern ("
2421  << NV("InlineResult", IR.getFailureReason())
2422  << ") and cost is not fully computed";
2423  });
2424  return IR;
2425  }
2426 
2427  // If the caller is a recursive function then we don't want to inline
2428  // functions which allocate a lot of stack space because it would increase
2429  // the caller stack usage dramatically.
2430  if (IsCallerRecursive &&
2432  auto IR =
2433  InlineResult::failure("recursive and allocates too much stack space");
2434  if (ORE)
2435  ORE->emit([&]() {
2436  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
2437  &CandidateCall)
2438  << NV("Callee", &F) << " is "
2439  << NV("InlineResult", IR.getFailureReason())
2440  << ". Cost is not fully computed";
2441  });
2442  return IR;
2443  }
2444 
2445  if (shouldStop())
2446  return InlineResult::failure(
2447  "Call site analysis is not favorable to inlining.");
2448  }
2449 
2450  return InlineResult::success();
2451 }
2452 
2453 /// Compute the base pointer and cumulative constant offsets for V.
2454 ///
2455 /// This strips all constant offsets off of V, leaving it the base pointer, and
2456 /// accumulates the total constant offset applied in the returned constant. It
2457 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
2458 /// no constant offsets applied.
2459 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
2460  if (!V->getType()->isPointerTy())
2461  return nullptr;
2462 
2463  unsigned AS = V->getType()->getPointerAddressSpace();
2464  unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
2465  APInt Offset = APInt::getZero(IntPtrWidth);
2466 
2467  // Even though we don't look through PHI nodes, we could be called on an
2468  // instruction in an unreachable block, which may be on a cycle.
2469  SmallPtrSet<Value *, 4> Visited;
2470  Visited.insert(V);
2471  do {
2472  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
2473  if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
2474  return nullptr;
2475  V = GEP->getPointerOperand();
2476  } else if (Operator::getOpcode(V) == Instruction::BitCast) {
2477  V = cast<Operator>(V)->getOperand(0);
2478  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2479  if (GA->isInterposable())
2480  break;
2481  V = GA->getAliasee();
2482  } else {
2483  break;
2484  }
2485  assert(V->getType()->isPointerTy() && "Unexpected operand type!");
2486  } while (Visited.insert(V).second);
2487 
2488  Type *IdxPtrTy = DL.getIndexType(V->getType());
2489  return cast<ConstantInt>(ConstantInt::get(IdxPtrTy, Offset));
2490 }
2491 
2492 /// Find dead blocks due to deleted CFG edges during inlining.
2493 ///
2494 /// If we know the successor of the current block, \p CurrBB, has to be \p
2495 /// NextBB, the other successors of \p CurrBB are dead if these successors have
2496 /// no live incoming CFG edges. If one block is found to be dead, we can
2497 /// continue growing the dead block list by checking the successors of the dead
2498 /// blocks to see if all their incoming edges are dead or not.
2499 void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
2500  auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
2501  // A CFG edge is dead if the predecessor is dead or the predecessor has a
2502  // known successor which is not the one under exam.
2503  return (DeadBlocks.count(Pred) ||
2504  (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
2505  };
2506 
2507  auto IsNewlyDead = [&](BasicBlock *BB) {
2508  // If all the edges to a block are dead, the block is also dead.
2509  return (!DeadBlocks.count(BB) &&
2511  [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));
2512  };
2513 
2514  for (BasicBlock *Succ : successors(CurrBB)) {
2515  if (Succ == NextBB || !IsNewlyDead(Succ))
2516  continue;
2518  NewDead.push_back(Succ);
2519  while (!NewDead.empty()) {
2520  BasicBlock *Dead = NewDead.pop_back_val();
2521  if (DeadBlocks.insert(Dead))
2522  // Continue growing the dead block lists.
2523  for (BasicBlock *S : successors(Dead))
2524  if (IsNewlyDead(S))
2525  NewDead.push_back(S);
2526  }
2527  }
2528 }
2529 
2530 /// Analyze a call site for potential inlining.
2531 ///
2532 /// Returns true if inlining this call is viable, and false if it is not
2533 /// viable. It computes the cost and adjusts the threshold based on numerous
2534 /// factors and heuristics. If this method returns false but the computed cost
2535 /// is below the computed threshold, then inlining was forcibly disabled by
2536 /// some artifact of the routine.
2537 InlineResult CallAnalyzer::analyze() {
2538  ++NumCallsAnalyzed;
2539 
2540  auto Result = onAnalysisStart();
2541  if (!Result.isSuccess())
2542  return Result;
2543 
2544  if (F.empty())
2545  return InlineResult::success();
2546 
2547  Function *Caller = CandidateCall.getFunction();
2548  // Check if the caller function is recursive itself.
2549  for (User *U : Caller->users()) {
2550  CallBase *Call = dyn_cast<CallBase>(U);
2551  if (Call && Call->getFunction() == Caller) {
2552  IsCallerRecursive = true;
2553  break;
2554  }
2555  }
2556 
2557  // Populate our simplified values by mapping from function arguments to call
2558  // arguments with known important simplifications.
2559  auto CAI = CandidateCall.arg_begin();
2560  for (Argument &FAI : F.args()) {
2561  assert(CAI != CandidateCall.arg_end());
2562  if (Constant *C = dyn_cast<Constant>(CAI))
2563  SimplifiedValues[&FAI] = C;
2564 
2565  Value *PtrArg = *CAI;
2566  if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
2567  ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg, C->getValue());
2568 
2569  // We can SROA any pointer arguments derived from alloca instructions.
2570  if (auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) {
2571  SROAArgValues[&FAI] = SROAArg;
2572  onInitializeSROAArg(SROAArg);
2573  EnabledSROAAllocas.insert(SROAArg);
2574  }
2575  }
2576  ++CAI;
2577  }
2578  NumConstantArgs = SimplifiedValues.size();
2579  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
2580  NumAllocaArgs = SROAArgValues.size();
2581 
2582  // FIXME: If a caller has multiple calls to a callee, we end up recomputing
2583  // the ephemeral values multiple times (and they're completely determined by
2584  // the callee, so this is purely duplicate work).
2586  CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
2587 
2588  // The worklist of live basic blocks in the callee *after* inlining. We avoid
2589  // adding basic blocks of the callee which can be proven to be dead for this
2590  // particular call site in order to get more accurate cost estimates. This
2591  // requires a somewhat heavyweight iteration pattern: we need to walk the
2592  // basic blocks in a breadth-first order as we insert live successors. To
2593  // accomplish this, prioritizing for small iterations because we exit after
2594  // crossing our threshold, we use a small-size optimized SetVector.
2597  BBSetVector;
2598  BBSetVector BBWorklist;
2599  BBWorklist.insert(&F.getEntryBlock());
2600 
2601  // Note that we *must not* cache the size, this loop grows the worklist.
2602  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
2603  if (shouldStop())
2604  break;
2605 
2606  BasicBlock *BB = BBWorklist[Idx];
2607  if (BB->empty())
2608  continue;
2609 
2610  onBlockStart(BB);
2611 
2612  // Disallow inlining a blockaddress with uses other than strictly callbr.
2613  // A blockaddress only has defined behavior for an indirect branch in the
2614  // same function, and we do not currently support inlining indirect
2615  // branches. But, the inliner may not see an indirect branch that ends up
2616  // being dead code at a particular call site. If the blockaddress escapes
2617  // the function, e.g., via a global variable, inlining may lead to an
2618  // invalid cross-function reference.
2619  // FIXME: pr/39560: continue relaxing this overt restriction.
2620  if (BB->hasAddressTaken())
2621  for (User *U : BlockAddress::get(&*BB)->users())
2622  if (!isa<CallBrInst>(*U))
2623  return InlineResult::failure("blockaddress used outside of callbr");
2624 
2625  // Analyze the cost of this block. If we blow through the threshold, this
2626  // returns false, and we can bail on out.
2627  InlineResult IR = analyzeBlock(BB, EphValues);
2628  if (!IR.isSuccess())
2629  return IR;
2630 
2631  Instruction *TI = BB->getTerminator();
2632 
2633  // Add in the live successors by first checking whether we have terminator
2634  // that may be simplified based on the values simplified by this call.
2635  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
2636  if (BI->isConditional()) {
2637  Value *Cond = BI->getCondition();
2638  if (ConstantInt *SimpleCond =
2639  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
2640  BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0);
2641  BBWorklist.insert(NextBB);
2642  KnownSuccessors[BB] = NextBB;
2643  findDeadBlocks(BB, NextBB);
2644  continue;
2645  }
2646  }
2647  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
2648  Value *Cond = SI->getCondition();
2649  if (ConstantInt *SimpleCond =
2650  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
2651  BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
2652  BBWorklist.insert(NextBB);
2653  KnownSuccessors[BB] = NextBB;
2654  findDeadBlocks(BB, NextBB);
2655  continue;
2656  }
2657  }
2658 
2659  // If we're unable to select a particular successor, just count all of
2660  // them.
2661  for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
2662  ++TIdx)
2663  BBWorklist.insert(TI->getSuccessor(TIdx));
2664 
2665  onBlockAnalyzed(BB);
2666  }
2667 
2668  bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() &&
2669  &F == CandidateCall.getCalledFunction();
2670  // If this is a noduplicate call, we can still inline as long as
2671  // inlining this would cause the removal of the caller (so the instruction
2672  // is not actually duplicated, just moved).
2673  if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
2674  return InlineResult::failure("noduplicate");
2675 
2676  return finalizeAnalysis();
2677 }
2678 
2680 #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
2682  F.print(dbgs(), &Writer);
2683  DEBUG_PRINT_STAT(NumConstantArgs);
2684  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
2685  DEBUG_PRINT_STAT(NumAllocaArgs);
2686  DEBUG_PRINT_STAT(NumConstantPtrCmps);
2687  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
2688  DEBUG_PRINT_STAT(NumInstructionsSimplified);
2689  DEBUG_PRINT_STAT(NumInstructions);
2690  DEBUG_PRINT_STAT(SROACostSavings);
2691  DEBUG_PRINT_STAT(SROACostSavingsLost);
2692  DEBUG_PRINT_STAT(LoadEliminationCost);
2693  DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
2694  DEBUG_PRINT_STAT(Cost);
2696 #undef DEBUG_PRINT_STAT
2697 }
2698 
2699 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2700 /// Dump stats about this call's analysis.
2702 #endif
2703 
2704 /// Test that there are no attribute conflicts between Caller and Callee
2705 /// that prevent inlining.
2707  Function *Caller, Function *Callee, TargetTransformInfo &TTI,
2708  function_ref<const TargetLibraryInfo &(Function &)> &GetTLI) {
2709  // Note that CalleeTLI must be a copy not a reference. The legacy pass manager
2710  // caches the most recently created TLI in the TargetLibraryInfoWrapperPass
2711  // object, and always returns the same object (which is overwritten on each
2712  // GetTLI call). Therefore we copy the first result.
2713  auto CalleeTLI = GetTLI(*Callee);
2714  return TTI.areInlineCompatible(Caller, Callee) &&
2715  GetTLI(*Caller).areInlineCompatible(CalleeTLI,
2718 }
2719 
2721  int Cost = 0;
2722  for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) {
2723  if (Call.isByValArgument(I)) {
2724  // We approximate the number of loads and stores needed by dividing the
2725  // size of the byval type by the target's pointer size.
2726  PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
2727  unsigned TypeSize = DL.getTypeSizeInBits(Call.getParamByValType(I));
2728  unsigned AS = PTy->getAddressSpace();
2729  unsigned PointerSize = DL.getPointerSizeInBits(AS);
2730  // Ceiling division.
2731  unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
2732 
2733  // If it generates more than 8 stores it is likely to be expanded as an
2734  // inline memcpy so we take that as an upper bound. Otherwise we assume
2735  // one load and one store per word copied.
2736  // FIXME: The maxStoresPerMemcpy setting from the target should be used
2737  // here instead of a magic number of 8, but it's not available via
2738  // DataLayout.
2739  NumStores = std::min(NumStores, 8U);
2740 
2741  Cost += 2 * NumStores * InlineConstants::InstrCost;
2742  } else {
2743  // For non-byval arguments subtract off one instruction per call
2744  // argument.
2746  }
2747  }
2748  // The call instruction also disappears after inlining.
2750  return Cost;
2751 }
2752 
2754  CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
2755  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2756  function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
2759  return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
2760  GetAssumptionCache, GetTLI, GetBFI, PSI, ORE);
2761 }
2762 
2764  CallBase &Call, TargetTransformInfo &CalleeTTI,
2765  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2768  const InlineParams Params = {/* DefaultThreshold*/ 0,
2769  /*HintThreshold*/ {},
2770  /*ColdThreshold*/ {},
2771  /*OptSizeThreshold*/ {},
2772  /*OptMinSizeThreshold*/ {},
2773  /*HotCallSiteThreshold*/ {},
2774  /*LocallyHotCallSiteThreshold*/ {},
2775  /*ColdCallSiteThreshold*/ {},
2776  /*ComputeFullInlineCost*/ true,
2777  /*EnableDeferral*/ true};
2778 
2779  InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
2780  GetAssumptionCache, GetBFI, PSI, ORE, true,
2781  /*IgnoreThreshold*/ true);
2782  auto R = CA.analyze();
2783  if (!R.isSuccess())
2784  return None;
2785  return CA.getCost();
2786 }
2787 
2789  CallBase &Call, TargetTransformInfo &CalleeTTI,
2790  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2793  InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI,
2794  ORE, *Call.getCalledFunction(), Call);
2795  auto R = CFA.analyze();
2796  if (!R.isSuccess())
2797  return None;
2798  return CFA.features();
2799 }
2800 
2802  CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI,
2803  function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
2804 
2805  // Cannot inline indirect calls.
2806  if (!Callee)
2807  return InlineResult::failure("indirect call");
2808 
2809  // When callee coroutine function is inlined into caller coroutine function
2810  // before coro-split pass,
2811  // coro-early pass can not handle this quiet well.
2812  // So we won't inline the coroutine function if it have not been unsplited
2813  if (Callee->isPresplitCoroutine())
2814  return InlineResult::failure("unsplited coroutine call");
2815 
2816  // Never inline calls with byval arguments that does not have the alloca
2817  // address space. Since byval arguments can be replaced with a copy to an
2818  // alloca, the inlined code would need to be adjusted to handle that the
2819  // argument is in the alloca address space (so it is a little bit complicated
2820  // to solve).
2821  unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
2822  for (unsigned I = 0, E = Call.arg_size(); I != E; ++I)
2823  if (Call.isByValArgument(I)) {
2824  PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
2825  if (PTy->getAddressSpace() != AllocaAS)
2826  return InlineResult::failure("byval arguments without alloca"
2827  " address space");
2828  }
2829 
2830  // Calls to functions with always-inline attributes should be inlined
2831  // whenever possible.
2832  if (Call.hasFnAttr(Attribute::AlwaysInline)) {
2833  auto IsViable = isInlineViable(*Callee);
2834  if (IsViable.isSuccess())
2835  return InlineResult::success();
2836  return InlineResult::failure(IsViable.getFailureReason());
2837  }
2838 
2839  // Never inline functions with conflicting attributes (unless callee has
2840  // always-inline attribute).
2841  Function *Caller = Call.getCaller();
2842  if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI, GetTLI))
2843  return InlineResult::failure("conflicting attributes");
2844 
2845  // Don't inline this call if the caller has the optnone attribute.
2846  if (Caller->hasOptNone())
2847  return InlineResult::failure("optnone attribute");
2848 
2849  // Don't inline a function that treats null pointer as valid into a caller
2850  // that does not have this attribute.
2851  if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
2852  return InlineResult::failure("nullptr definitions incompatible");
2853 
2854  // Don't inline functions which can be interposed at link-time.
2855  if (Callee->isInterposable())
2856  return InlineResult::failure("interposable");
2857 
2858  // Don't inline functions marked noinline.
2859  if (Callee->hasFnAttribute(Attribute::NoInline))
2860  return InlineResult::failure("noinline function attribute");
2861 
2862  // Don't inline call sites marked noinline.
2863  if (Call.isNoInline())
2864  return InlineResult::failure("noinline call site attribute");
2865 
2866  // Don't inline functions if one does not have any stack protector attribute
2867  // but the other does.
2868  if (Caller->hasStackProtectorFnAttr() && !Callee->hasStackProtectorFnAttr())
2869  return InlineResult::failure(
2870  "stack protected caller but callee requested no stack protector");
2871  if (Callee->hasStackProtectorFnAttr() && !Caller->hasStackProtectorFnAttr())
2872  return InlineResult::failure(
2873  "stack protected callee but caller requested no stack protector");
2874 
2875  return None;
2876 }
2877 
2879  CallBase &Call, Function *Callee, const InlineParams &Params,
2880  TargetTransformInfo &CalleeTTI,
2881  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2882  function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
2885 
2886  auto UserDecision =
2887  llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI);
2888 
2889  if (UserDecision.hasValue()) {
2890  if (UserDecision->isSuccess())
2891  return llvm::InlineCost::getAlways("always inline attribute");
2892  return llvm::InlineCost::getNever(UserDecision->getFailureReason());
2893  }
2894 
2895  LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
2896  << "... (caller:" << Call.getCaller()->getName()
2897  << ")\n");
2898 
2899  InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
2900  GetAssumptionCache, GetBFI, PSI, ORE);
2901  InlineResult ShouldInline = CA.analyze();
2902 
2903  LLVM_DEBUG(CA.dump());
2904 
2905  // Always make cost benefit based decision explicit.
2906  // We use always/never here since threshold is not meaningful,
2907  // as it's not what drives cost-benefit analysis.
2908  if (CA.wasDecidedByCostBenefit()) {
2909  if (ShouldInline.isSuccess())
2910  return InlineCost::getAlways("benefit over cost",
2911  CA.getCostBenefitPair());
2912  else
2913  return InlineCost::getNever("cost over benefit", CA.getCostBenefitPair());
2914  }
2915 
2916  // Check if there was a reason to force inlining or no inlining.
2917  if (!ShouldInline.isSuccess() && CA.getCost() < CA.getThreshold())
2918  return InlineCost::getNever(ShouldInline.getFailureReason());
2919  if (ShouldInline.isSuccess() && CA.getCost() >= CA.getThreshold())
2920  return InlineCost::getAlways("empty function");
2921 
2922  return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
2923 }
2924 
2926  bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
2927  for (BasicBlock &BB : F) {
2928  // Disallow inlining of functions which contain indirect branches.
2929  if (isa<IndirectBrInst>(BB.getTerminator()))
2930  return InlineResult::failure("contains indirect branches");
2931 
2932  // Disallow inlining of blockaddresses which are used by non-callbr
2933  // instructions.
2934  if (BB.hasAddressTaken())
2935  for (User *U : BlockAddress::get(&BB)->users())
2936  if (!isa<CallBrInst>(*U))
2937  return InlineResult::failure("blockaddress used outside of callbr");
2938 
2939  for (auto &II : BB) {
2940  CallBase *Call = dyn_cast<CallBase>(&II);
2941  if (!Call)
2942  continue;
2943 
2944  // Disallow recursive calls.
2945  Function *Callee = Call->getCalledFunction();
2946  if (&F == Callee)
2947  return InlineResult::failure("recursive call");
2948 
2949  // Disallow calls which expose returns-twice to a function not previously
2950  // attributed as such.
2951  if (!ReturnsTwice && isa<CallInst>(Call) &&
2952  cast<CallInst>(Call)->canReturnTwice())
2953  return InlineResult::failure("exposes returns-twice attribute");
2954 
2955  if (Callee)
2956  switch (Callee->getIntrinsicID()) {
2957  default:
2958  break;
2959  case llvm::Intrinsic::icall_branch_funnel:
2960  // Disallow inlining of @llvm.icall.branch.funnel because current
2961  // backend can't separate call targets from call arguments.
2962  return InlineResult::failure(
2963  "disallowed inlining of @llvm.icall.branch.funnel");
2964  case llvm::Intrinsic::localescape:
2965  // Disallow inlining functions that call @llvm.localescape. Doing this
2966  // correctly would require major changes to the inliner.
2967  return InlineResult::failure(
2968  "disallowed inlining of @llvm.localescape");
2969  case llvm::Intrinsic::vastart:
2970  // Disallow inlining of functions that initialize VarArgs with
2971  // va_start.
2972  return InlineResult::failure(
2973  "contains VarArgs initialized with va_start");
2974  }
2975  }
2976  }
2977 
2978  return InlineResult::success();
2979 }
2980 
2981 // APIs to create InlineParams based on command line flags and/or other
2982 // parameters.
2983 
2985  InlineParams Params;
2986 
2987  // This field is the threshold to use for a callee by default. This is
2988  // derived from one or more of:
2989  // * optimization or size-optimization levels,
2990  // * a value passed to createFunctionInliningPass function, or
2991  // * the -inline-threshold flag.
2992  // If the -inline-threshold flag is explicitly specified, that is used
2993  // irrespective of anything else.
2994  if (InlineThreshold.getNumOccurrences() > 0)
2996  else
2997  Params.DefaultThreshold = Threshold;
2998 
2999  // Set the HintThreshold knob from the -inlinehint-threshold.
3000  Params.HintThreshold = HintThreshold;
3001 
3002  // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
3004 
3005  // If the -locally-hot-callsite-threshold is explicitly specified, use it to
3006  // populate LocallyHotCallSiteThreshold. Later, we populate
3007  // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
3008  // we know that optimization level is O3 (in the getInlineParams variant that
3009  // takes the opt and size levels).
3010  // FIXME: Remove this check (and make the assignment unconditional) after
3011  // addressing size regression issues at O2.
3012  if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
3014 
3015  // Set the ColdCallSiteThreshold knob from the
3016  // -inline-cold-callsite-threshold.
3018 
3019  // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
3020  // -inlinehint-threshold commandline option is not explicitly given. If that
3021  // option is present, then its value applies even for callees with size and
3022  // minsize attributes.
3023  // If the -inline-threshold is not specified, set the ColdThreshold from the
3024  // -inlinecold-threshold even if it is not explicitly passed. If
3025  // -inline-threshold is specified, then -inlinecold-threshold needs to be
3026  // explicitly specified to set the ColdThreshold knob
3027  if (InlineThreshold.getNumOccurrences() == 0) {
3030  Params.ColdThreshold = ColdThreshold;
3031  } else if (ColdThreshold.getNumOccurrences() > 0) {
3032  Params.ColdThreshold = ColdThreshold;
3033  }
3034  return Params;
3035 }
3036 
3039 }
3040 
3041 // Compute the default threshold for inlining based on the opt level and the
3042 // size opt level.
3043 static int computeThresholdFromOptLevels(unsigned OptLevel,
3044  unsigned SizeOptLevel) {
3045  if (OptLevel > 2)
3047  if (SizeOptLevel == 1) // -Os
3049  if (SizeOptLevel == 2) // -Oz
3051  return DefaultThreshold;
3052 }
3053 
3054 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
3055  auto Params =
3056  getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
3057  // At O3, use the value of -locally-hot-callsite-threshold option to populate
3058  // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
3059  // when it is specified explicitly.
3060  if (OptLevel > 2)
3062  return Params;
3063 }
3064 
3068  PrintInstructionComments = true;
3069  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
3070  [&](Function &F) -> AssumptionCache & {
3071  return FAM.getResult<AssumptionAnalysis>(F);
3072  };
3073  Module *M = F.getParent();
3074  ProfileSummaryInfo PSI(*M);
3075  DataLayout DL(M);
3077  // FIXME: Redesign the usage of InlineParams to expand the scope of this pass.
3078  // In the current implementation, the type of InlineParams doesn't matter as
3079  // the pass serves only for verification of inliner's decisions.
3080  // We can add a flag which determines InlineParams for this run. Right now,
3081  // the default InlineParams are used.
3082  const InlineParams Params = llvm::getInlineParams();
3083  for (BasicBlock &BB : F) {
3084  for (Instruction &I : BB) {
3085  if (CallInst *CI = dyn_cast<CallInst>(&I)) {
3086  Function *CalledFunction = CI->getCalledFunction();
3087  if (!CalledFunction || CalledFunction->isDeclaration())
3088  continue;
3089  OptimizationRemarkEmitter ORE(CalledFunction);
3090  InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params, TTI,
3091  GetAssumptionCache, nullptr, &PSI, &ORE);
3092  ICCA.analyze();
3093  OS << " Analyzing call of " << CalledFunction->getName()
3094  << "... (caller:" << CI->getCaller()->getName() << ")\n";
3095  ICCA.print();
3096  }
3097  }
3098  }
3099  return PreservedAnalyses::all();
3100 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
set
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 atomic and others It is also currently not done for read modify write instructions It is also current not done if the OF or CF flags are needed The shift operators have the complication that when the shift count is EFLAGS is not set
Definition: README.txt:1277
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
AssumptionCache.h
llvm::Constant::isAllOnesValue
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
Definition: Constants.cpp:91
llvm::InlineResult::success
static InlineResult success()
Definition: InlineCost.h:164
OptComputeFullInlineCost
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:729
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:264
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:491
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::Attribute::isValid
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition: Attributes.h:167
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
ColdThreshold
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with cold attribute"))
llvm::SaturatingAdd
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:803
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:2986
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:147
llvm::InlineParams::DefaultThreshold
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:187
isColdCallSite
static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
Definition: GlobalOpt.cpp:1768
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:228
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:779
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::Attribute
Definition: Attributes.h:52
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5200
GetElementPtrTypeIterator.h
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::ConstantExpr::getICmp
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:2477
llvm::InlineParams::LocallyHotCallSiteThreshold
Optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.
Definition: InlineCost.h:206
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:734
HotCallSiteThreshold
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::ZeroOrMore, cl::desc("Threshold for hot callsites "))
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:687
InlineSizeAllowance
static cl::opt< int > InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::ZeroOrMore, cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings"))
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:742
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2208
llvm::GlobalAlias
Definition: GlobalAlias.h:27
ValueTracking.h
llvm::ConstantExpr::getSelect
static Constant * getSelect(Constant *C, Constant *V1, Constant *V2, Type *OnlyIfReducedTy=nullptr)
Select constant expr.
Definition: Constants.cpp:2395
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::ConstantExpr::getInsertValue
static Constant * getInsertValue(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2598
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
llvm::InlineConstants::OptSizeThreshold
const int OptSizeThreshold
Use when optsize (-Os) is specified.
Definition: InlineCost.h:36
DefaultThreshold
static cl::opt< int > DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Default amount of inlining to perform"))
BBSetVector
SetVector< BasicBlock * > BBSetVector
Definition: BasicBlockUtils.cpp:1590
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::InlineCost::getAlways
static InlineCost getAlways(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
Definition: InlineCost.h:112
llvm::SimplifyFNegInst
Value * SimplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)
Given operand for an FNeg, fold the result or return null.
Definition: InstructionSimplify.cpp:4877
llvm::CallingConv::Cold
@ Cold
Definition: CallingConv.h:48
llvm::Optional< int >
llvm::isInlineViable
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
Definition: InlineCost.cpp:2925
llvm::InlineParams
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:185
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::SmallPtrSet< Value *, 16 >
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::tgtok::FalseVal
@ FalseVal
Definition: TGLexer.h:61
Operator.h
llvm::InlineParams::ColdThreshold
Optional< int > ColdThreshold
Threshold to use for cold callees.
Definition: InlineCost.h:193
llvm::TargetTransformInfo::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent() const
Definition: TargetTransformInfo.cpp:201
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:876
STLExtras.h
llvm::CallBase::arg_begin
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1303
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::InlineParams::OptMinSizeThreshold
Optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
Definition: InlineCost.h:199
llvm::UnaryOperator
Definition: InstrTypes.h:102
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:139
llvm::CallBase::isFnAttrDisallowedByOpBundle
bool isFnAttrDisallowedByOpBundle(StringRef S) const
Is the function attribute S disallowed by some operand bundle on this operand bundle user?
Definition: InstrTypes.h:2084
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::count
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
ConstantFolding.h
llvm::APInt::getZero
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:177
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
functionsHaveCompatibleAttributes
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref< const TargetLibraryInfo &(Function &)> &GetTLI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining.
Definition: InlineCost.cpp:2706
F
#define F(x, y, z)
Definition: MD5.cpp:56
InlineSavingsMultiplier
static cl::opt< int > InlineSavingsMultiplier("inline-savings-multiplier", cl::Hidden, cl::init(8), cl::ZeroOrMore, cl::desc("Multiplier to multiply cycle savings by during inlining"))
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::gep_type_end
gep_type_iterator gep_type_end(const User *GEP)
Definition: GetElementPtrTypeIterator.h:146
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ConstantExpr::getSub
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2676
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
CommandLine.h
llvm::BlockFrequencyInfo::getEntryFreq
uint64_t getEntryFreq() const
Definition: BlockFrequencyInfo.cpp:281
CodeMetrics.h
FormattedStream.h
llvm::APInt::uge
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1145
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::TargetTransformInfo::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: TargetTransformInfo.cpp:962
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:765
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1335
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1551
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:228
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::getInlineCost
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Definition: InlineCost.cpp:2753
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:74
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::ConstantExpr::getIntToPtr
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2194
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1393
llvm::ConstantExpr::getPtrToInt
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2180
llvm::InlineCost
Represents the cost of inlining a function.
Definition: InlineCost.h:82
llvm::ConstantFoldCall
Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
Definition: ConstantFolding.cpp:3114
InlineEnableCostBenefitAnalysis
static cl::opt< bool > InlineEnableCostBenefitAnalysis("inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner"))
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::canConstantFoldCallTo
bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
Definition: ConstantFolding.cpp:1436
TargetLibraryInfo.h
llvm::InlineCostFeatureIndex
InlineCostFeatureIndex
Definition: InlineModelFeatureMaps.h:49
false
Definition: StackSlotColoring.cpp:142
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::PatternMatch::m_FNeg
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
Definition: PatternMatch.h:1063
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:45
llvm::Operator::getOpcode
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:41
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::InlineResult::isSuccess
bool isSuccess() const
Definition: InlineCost.h:168
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:404
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:900
llvm::InlineCostAnnotationPrinterPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Definition: InlineCost.cpp:3066
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
IR
Statically lint checks LLVM IR
Definition: Lint.cpp:744
SmallPtrSet.h
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:777
llvm::InlineCost::get
static InlineCost get(int Cost, int Threshold)
Definition: InlineCost.h:107
llvm::getInlineParams
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition: InlineCost.cpp:3037
llvm::InlineConstants::InstrCost
const int InstrCost
Definition: InlineCost.h:45
PatternMatch.h
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:153
llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:301
llvm::None
const NoneType None
Definition: None.h:23
llvm::StringRef::getAsInteger
std::enable_if_t< std::numeric_limits< T >::is_signed, bool > getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:510
LoopInfo.h
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3149
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:78
llvm::InvokeInst
Invoke instruction.
Definition: Instructions.h:3749
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:710
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
AssemblyAnnotationWriter.h
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::cl::opt
Definition: CommandLine.h:1434
llvm::ConstantExpr::getCompare
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:2373
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
InlineThreshold
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)"))
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::InlineConstants::IndirectCallThreshold
const int IndirectCallThreshold
Definition: InlineCost.h:46
llvm::StructLayout
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:612
uint64_t
ProfileSummaryInfo.h
llvm::CatchReturnInst
Definition: Instructions.h:4526
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:169
llvm::SimplifyBinOp
Value * SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
Definition: InstructionSimplify.cpp:5316
llvm::Optional::emplace
void emplace(ArgTypes &&... Args)
Create a new object by constructing it in place with the given arguments.
Definition: Optional.h:264
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::BlockFrequency::getFrequency
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
Definition: BlockFrequency.h:35
llvm::SaturatingMultiplyAdd
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Definition: MathExtras.h:866
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:928
llvm::Attribute::AttrKind
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results,...
Definition: Attributes.h:71
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::InlineConstants::OptAggressiveThreshold
const int OptAggressiveThreshold
Use when -O3 is specified.
Definition: InlineCost.h:42
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::formatted_raw_ostream
formatted_raw_ostream - A raw_ostream that wraps another one and keeps track of line and column posit...
Definition: FormattedStream.h:30
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::InlineCostAnnotationPrinterPass::OS
raw_ostream & OS
Definition: InlineCost.h:318
getFalse
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Definition: InstructionSimplify.cpp:122
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::ConstantExpr::getCast
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1963
InlineCost.h
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1738
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::TargetTransformInfo::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
Definition: TargetTransformInfo.cpp:276
llvm::Record
Definition: Record.h:1472
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::InlineParams::ColdCallSiteThreshold
Optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
Definition: InlineCost.h:209
llvm::InlineCost::getNever
static InlineCost getNever(const char *Reason, Optional< CostBenefitPair > CostBenefit=None)
Definition: InlineCost.h:116
llvm::GEPOperator
Definition: Operator.h:458
llvm::CallBase::arg_end
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1309
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition: Instructions.h:3146
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
CFG.h
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:262
llvm::CallBase::getFnAttr
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Definition: InstrTypes.h:1609
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::BinaryOperator
Definition: InstrTypes.h:189
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
DataLayout.h
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
this
Analysis the ScalarEvolution expression for r is this
Definition: README.txt:8
InstVisitor.h
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:215
DEBUG_PRINT_STAT
#define DEBUG_PRINT_STAT(x)
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:194
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:219
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
A
* A
Definition: README_ALTIVEC.txt:89
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:814
llvm::BranchProbability
Definition: BranchProbability.h:30
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:990
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::PtrToIntInst
This class represents a cast from a pointer to an integer.
Definition: Instructions.h:5149
llvm::InlineResult::getFailureReason
const char * getFailureReason() const
Definition: InlineCost.h:169
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::InstVisitor
Base class for instruction visitors.
Definition: InstVisitor.h:79
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:430
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:421
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
DisableGEPConstOperand
static cl::opt< bool > DisableGEPConstOperand("disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands"))
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::CallBase::paramHasAttr
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Definition: Instructions.cpp:341
llvm::isAssumeLikeIntrinsic
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
Definition: ValueTracking.cpp:509
users
iv users
Definition: IVUsers.cpp:52
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::InlineConstants::MaxSimplifiedDynamicAllocaToInline
const uint64_t MaxSimplifiedDynamicAllocaToInline
Do not inline dynamic allocas that have been constant propagated to be static allocas above this amou...
Definition: InlineCost.h:55
InlineCallerSupersetNoBuiltin
static cl::opt< bool > InlineCallerSupersetNoBuiltin("inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes."))
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
ColdCallSiteRelFreq
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
ColdCallSiteThreshold
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining cold callsites"))
CallingConv.h
llvm::getAttributeBasedInliningDecision
Optional< InlineResult > getAttributeBasedInliningDecision(CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref< const TargetLibraryInfo &(Function &)> GetTLI)
Returns InlineResult::success() if the call site should be always inlined because of user directives,...
Definition: InlineCost.cpp:2801
llvm::ResumeInst
Resume the propagation of an exception.
Definition: Instructions.h:4199
LocallyHotCallSiteThreshold
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore, cl::desc("Threshold for locally hot callsites "))
llvm::InlineParams::HintThreshold
Optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:190
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:848
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:321
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::StructLayout::getElementOffset
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:642
llvm::AssemblyAnnotationWriter
Definition: AssemblyAnnotationWriter.h:27
llvm::InlineConstants::ColdccPenalty
const int ColdccPenalty
Definition: InlineCost.h:49
HintThreshold
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with inline hint"))
llvm::getInliningCostFeatures
Optional< InlineCostFeatures > getInliningCostFeatures(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the expanded cost features.
Definition: InlineCost.cpp:2788
llvm::APInt::sextOrTrunc
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:976
llvm::ExtractValueInst
This instruction extracts a struct member or array element value from an aggregate value.
Definition: Instructions.h:2380
llvm::TypeSize
Definition: TypeSize.h:417
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::size
unsigned size() const
Definition: DenseMap.h:100
llvm::SetVector::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:215
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:219
llvm::InlineResult::failure
static InlineResult failure(const char *Reason)
Definition: InlineCost.h:165
llvm::ConstantExpr::getGetElementPtr
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, bool InBounds=false, Optional< unsigned > InRangeIndex=None, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition: Constants.h:1210
llvm::TargetTransformInfo::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier() const
Definition: TargetTransformInfo.cpp:192
llvm::InlineConstants::OptMinSizeThreshold
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
Definition: InlineCost.h:39
llvm::CleanupReturnInst
Definition: Instructions.h:4607
llvm::BlockFrequencyInfo::getBlockFreq
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
Definition: BlockFrequencyInfo.cpp:204
llvm::IntToPtrInst
This class represents a cast from an integer to a pointer.
Definition: Instructions.h:5106
llvm::IndirectBrInst
Indirect Branch Instruction.
Definition: Instructions.h:3614
GlobalAlias.h
llvm::TargetTransformInfo::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB) const
Definition: TargetTransformInfo.cpp:197
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
computeThresholdFromOptLevels
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
Definition: InlineCost.cpp:3043
llvm::TargetTransformInfo::getEstimatedNumberOfCaseClusters
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Definition: TargetTransformInfo.cpp:212
llvm::RegState::Dead
@ Dead
Unused definition.
Definition: MachineInstrBuilder.h:50
llvm::getInliningCostEstimate
Optional< int > getInliningCostEstimate(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the cost estimate ignoring thresholds.
Definition: InlineCost.cpp:2763
llvm::AttributeFuncs::areInlineCompatible
bool areInlineCompatible(const Function &Caller, const Function &Callee)
Definition: Attributes.cpp:2100
llvm::getCallsiteCost
int getCallsiteCost(CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
Definition: InlineCost.cpp:2720
SmallVector.h
llvm::InlineConstants::LastCallToStaticBonus
const int LastCallToStaticBonus
Definition: InlineCost.h:48
llvm::BlockFrequencyInfo::getBlockProfileCount
Optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Definition: BlockFrequencyInfo.cpp:209
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
InstructionSimplify.h
llvm::BlockAddress::get
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1809
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2633
Threshold
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::erase
bool erase(const ValueT &V)
Definition: DenseSet.h:101
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1161
llvm::SmallPtrSetImpl< const Value * >
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
HotCallSiteRelFreq
static cl::opt< int > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore, cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::UnreachableInst
This function has undefined behavior.
Definition: Instructions.h:4715
llvm::InlineParams::HotCallSiteThreshold
Optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:202
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3212
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:254
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:62
CallPenalty
static cl::opt< int > CallPenalty("inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining"))
llvm::cl::desc
Definition: CommandLine.h:414
llvm::InlineCostFeatures
std::array< int, static_cast< size_t >(InlineCostFeatureIndex::NumberOfFeatures)> InlineCostFeatures
Definition: InlineModelFeatureMaps.h:60
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3068
raw_ostream.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: InlineCost.cpp:48
llvm::InlineConstants::TotalAllocaSizeRecursiveCaller
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive.
Definition: InlineCost.h:52
llvm::Function::ProfileCount::hasValue
bool hasValue() const
Definition: Function.h:264
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::InlineParams::AllowRecursiveCall
Optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
Definition: InlineCost.h:218
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:624
llvm::InsertValueInst
This instruction inserts a struct field of array element value into an aggregate value.
Definition: Instructions.h:2491
llvm::InlineConstants::LoopPenalty
const int LoopPenalty
Definition: InlineCost.h:47
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1853
llvm::tgtok::TrueVal
@ TrueVal
Definition: TGLexer.h:61
PrintInstructionComments
static cl::opt< bool > PrintInstructionComments("print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis"))
llvm::TargetTransformInfo::getFPOpCost
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
Definition: TargetTransformInfo.cpp:539
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::InlineResult
InlineResult is basically true or false.
Definition: InlineCost.h:159
Debug.h
llvm::InlineParams::OptSizeThreshold
Optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
Definition: InlineCost.h:196
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3147
llvm::ConstantExpr::getExtractValue
static Constant * getExtractValue(Constant *Agg, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2622
llvm::Optional::getValue
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:282
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3161
SetVector.h
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364