LLVM  13.0.0git
InlineCost.cpp
Go to the documentation of this file.
1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements inline cost analysis.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SetVector.h"
16 #include "llvm/ADT/SmallPtrSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
21 #include "llvm/Analysis/CFG.h"
25 #include "llvm/Analysis/LoopInfo.h"
30 #include "llvm/Config/llvm-config.h"
32 #include "llvm/IR/CallingConv.h"
33 #include "llvm/IR/DataLayout.h"
34 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/GlobalAlias.h"
37 #include "llvm/IR/InstVisitor.h"
38 #include "llvm/IR/IntrinsicInst.h"
39 #include "llvm/IR/Operator.h"
40 #include "llvm/IR/PatternMatch.h"
42 #include "llvm/Support/Debug.h"
45 
46 using namespace llvm;
47 
48 #define DEBUG_TYPE "inline-cost"
49 
50 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
51 
52 static cl::opt<int>
53  DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225),
55  cl::desc("Default amount of inlining to perform"));
56 
58  "print-instruction-comments", cl::Hidden, cl::init(false),
59  cl::desc("Prints comments for instruction based on inline cost analysis"));
60 
62  "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
63  cl::desc("Control the amount of inlining to perform (default = 225)"));
64 
66  "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore,
67  cl::desc("Threshold for inlining functions with inline hint"));
68 
69 static cl::opt<int>
70  ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
72  cl::desc("Threshold for inlining cold callsites"));
73 
75  "inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false),
76  cl::desc("Enable the cost-benefit analysis for the inliner"));
77 
79  "inline-savings-multiplier", cl::Hidden, cl::init(8), cl::ZeroOrMore,
80  cl::desc("Multiplier to multiply cycle savings by during inlining"));
81 
82 static cl::opt<int>
83  InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100),
85  cl::desc("The maximum size of a callee that get's "
86  "inlined without sufficient cycle savings"));
87 
88 // We introduce this threshold to help performance of instrumentation based
89 // PGO before we actually hook up inliner with analysis passes such as BPI and
90 // BFI.
92  "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore,
93  cl::desc("Threshold for inlining functions with cold attribute"));
94 
95 static cl::opt<int>
96  HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
98  cl::desc("Threshold for hot callsites "));
99 
101  "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore,
102  cl::desc("Threshold for locally hot callsites "));
103 
105  "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
106  cl::desc("Maximum block frequency, expressed as a percentage of caller's "
107  "entry frequency, for a callsite to be cold in the absence of "
108  "profile information."));
109 
111  "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore,
112  cl::desc("Minimum block frequency, expressed as a multiple of caller's "
113  "entry frequency, for a callsite to be hot in the absence of "
114  "profile information."));
115 
117  "inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore,
118  cl::desc("Compute the full inline cost of a call site even when the cost "
119  "exceeds the threshold."));
120 
122  "inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true),
124  cl::desc("Allow inlining when caller has a superset of callee's nobuiltin "
125  "attributes."));
126 
128  "disable-gep-const-evaluation", cl::Hidden, cl::init(false),
129  cl::desc("Disables evaluation of GetElementPtr with constant operands"));
130 
131 namespace {
132 class InlineCostCallAnalyzer;
133 
134 // This struct is used to store information about inline cost of a
135 // particular instruction
136 struct InstructionCostDetail {
137  int CostBefore = 0;
138  int CostAfter = 0;
139  int ThresholdBefore = 0;
140  int ThresholdAfter = 0;
141 
142  int getThresholdDelta() const { return ThresholdAfter - ThresholdBefore; }
143 
144  int getCostDelta() const { return CostAfter - CostBefore; }
145 
146  bool hasThresholdChanged() const { return ThresholdAfter != ThresholdBefore; }
147 };
148 
149 class InlineCostAnnotationWriter : public AssemblyAnnotationWriter {
150 private:
151  InlineCostCallAnalyzer *const ICCA;
152 
153 public:
154  InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
155  virtual void emitInstructionAnnot(const Instruction *I,
156  formatted_raw_ostream &OS) override;
157 };
158 
159 /// Carry out call site analysis, in order to evaluate inlinability.
160 /// NOTE: the type is currently used as implementation detail of functions such
161 /// as llvm::getInlineCost. Note the function_ref constructor parameters - the
162 /// expectation is that they come from the outer scope, from the wrapper
163 /// functions. If we want to support constructing CallAnalyzer objects where
164 /// lambdas are provided inline at construction, or where the object needs to
165 /// otherwise survive past the scope of the provided functions, we need to
166 /// revisit the argument types.
167 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
168  typedef InstVisitor<CallAnalyzer, bool> Base;
169  friend class InstVisitor<CallAnalyzer, bool>;
170 
171 protected:
172  virtual ~CallAnalyzer() {}
173  /// The TargetTransformInfo available for this compilation.
174  const TargetTransformInfo &TTI;
175 
176  /// Getter for the cache of @llvm.assume intrinsics.
177  function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
178 
179  /// Getter for BlockFrequencyInfo
181 
182  /// Profile summary information.
183  ProfileSummaryInfo *PSI;
184 
185  /// The called function.
186  Function &F;
187 
188  // Cache the DataLayout since we use it a lot.
189  const DataLayout &DL;
190 
191  /// The OptimizationRemarkEmitter available for this compilation.
193 
194  /// The candidate callsite being analyzed. Please do not use this to do
195  /// analysis in the caller function; we want the inline cost query to be
196  /// easily cacheable. Instead, use the cover function paramHasAttr.
197  CallBase &CandidateCall;
198 
199  /// Extension points for handling callsite features.
200  // Called before a basic block was analyzed.
201  virtual void onBlockStart(const BasicBlock *BB) {}
202 
203  /// Called after a basic block was analyzed.
204  virtual void onBlockAnalyzed(const BasicBlock *BB) {}
205 
206  /// Called before an instruction was analyzed
207  virtual void onInstructionAnalysisStart(const Instruction *I) {}
208 
209  /// Called after an instruction was analyzed
210  virtual void onInstructionAnalysisFinish(const Instruction *I) {}
211 
212  /// Called at the end of the analysis of the callsite. Return the outcome of
213  /// the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or
214  /// the reason it can't.
215  virtual InlineResult finalizeAnalysis() { return InlineResult::success(); }
216  /// Called when we're about to start processing a basic block, and every time
217  /// we are done processing an instruction. Return true if there is no point in
218  /// continuing the analysis (e.g. we've determined already the call site is
219  /// too expensive to inline)
220  virtual bool shouldStop() { return false; }
221 
222  /// Called before the analysis of the callee body starts (with callsite
223  /// contexts propagated). It checks callsite-specific information. Return a
224  /// reason analysis can't continue if that's the case, or 'true' if it may
225  /// continue.
226  virtual InlineResult onAnalysisStart() { return InlineResult::success(); }
227  /// Called if the analysis engine decides SROA cannot be done for the given
228  /// alloca.
229  virtual void onDisableSROA(AllocaInst *Arg) {}
230 
231  /// Called the analysis engine determines load elimination won't happen.
232  virtual void onDisableLoadElimination() {}
233 
234  /// Called to account for a call.
235  virtual void onCallPenalty() {}
236 
237  /// Called to account for the expectation the inlining would result in a load
238  /// elimination.
239  virtual void onLoadEliminationOpportunity() {}
240 
241  /// Called to account for the cost of argument setup for the Call in the
242  /// callee's body (not the callsite currently under analysis).
243  virtual void onCallArgumentSetup(const CallBase &Call) {}
244 
245  /// Called to account for a load relative intrinsic.
246  virtual void onLoadRelativeIntrinsic() {}
247 
248  /// Called to account for a lowered call.
249  virtual void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) {
250  }
251 
252  /// Account for a jump table of given size. Return false to stop further
253  /// processing the switch instruction
254  virtual bool onJumpTable(unsigned JumpTableSize) { return true; }
255 
256  /// Account for a case cluster of given size. Return false to stop further
257  /// processing of the instruction.
258  virtual bool onCaseCluster(unsigned NumCaseCluster) { return true; }
259 
260  /// Called at the end of processing a switch instruction, with the given
261  /// number of case clusters.
262  virtual void onFinalizeSwitch(unsigned JumpTableSize,
263  unsigned NumCaseCluster) {}
264 
265  /// Called to account for any other instruction not specifically accounted
266  /// for.
267  virtual void onMissedSimplification() {}
268 
269  /// Start accounting potential benefits due to SROA for the given alloca.
270  virtual void onInitializeSROAArg(AllocaInst *Arg) {}
271 
272  /// Account SROA savings for the AllocaInst value.
273  virtual void onAggregateSROAUse(AllocaInst *V) {}
274 
275  bool handleSROA(Value *V, bool DoNotDisable) {
276  // Check for SROA candidates in comparisons.
277  if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
278  if (DoNotDisable) {
279  onAggregateSROAUse(SROAArg);
280  return true;
281  }
282  disableSROAForArg(SROAArg);
283  }
284  return false;
285  }
286 
287  bool IsCallerRecursive = false;
288  bool IsRecursiveCall = false;
289  bool ExposesReturnsTwice = false;
290  bool HasDynamicAlloca = false;
291  bool ContainsNoDuplicateCall = false;
292  bool HasReturn = false;
293  bool HasIndirectBr = false;
294  bool HasUninlineableIntrinsic = false;
295  bool InitsVargArgs = false;
296 
297  /// Number of bytes allocated statically by the callee.
298  uint64_t AllocatedSize = 0;
299  unsigned NumInstructions = 0;
300  unsigned NumVectorInstructions = 0;
301 
302  /// While we walk the potentially-inlined instructions, we build up and
303  /// maintain a mapping of simplified values specific to this callsite. The
304  /// idea is to propagate any special information we have about arguments to
305  /// this call through the inlinable section of the function, and account for
306  /// likely simplifications post-inlining. The most important aspect we track
307  /// is CFG altering simplifications -- when we prove a basic block dead, that
308  /// can cause dramatic shifts in the cost of inlining a function.
309  DenseMap<Value *, Constant *> SimplifiedValues;
310 
311  /// Keep track of the values which map back (through function arguments) to
312  /// allocas on the caller stack which could be simplified through SROA.
313  DenseMap<Value *, AllocaInst *> SROAArgValues;
314 
315  /// Keep track of Allocas for which we believe we may get SROA optimization.
316  DenseSet<AllocaInst *> EnabledSROAAllocas;
317 
318  /// Keep track of values which map to a pointer base and constant offset.
320 
321  /// Keep track of dead blocks due to the constant arguments.
322  SetVector<BasicBlock *> DeadBlocks;
323 
324  /// The mapping of the blocks to their known unique successors due to the
325  /// constant arguments.
326  DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
327 
328  /// Model the elimination of repeated loads that is expected to happen
329  /// whenever we simplify away the stores that would otherwise cause them to be
330  /// loads.
331  bool EnableLoadElimination;
332  SmallPtrSet<Value *, 16> LoadAddrSet;
333 
334  AllocaInst *getSROAArgForValueOrNull(Value *V) const {
335  auto It = SROAArgValues.find(V);
336  if (It == SROAArgValues.end() || EnabledSROAAllocas.count(It->second) == 0)
337  return nullptr;
338  return It->second;
339  }
340 
341  // Custom simplification helper routines.
342  bool isAllocaDerivedArg(Value *V);
343  void disableSROAForArg(AllocaInst *SROAArg);
344  void disableSROA(Value *V);
345  void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
346  void disableLoadElimination();
347  bool isGEPFree(GetElementPtrInst &GEP);
348  bool canFoldInboundsGEP(GetElementPtrInst &I);
349  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
350  bool simplifyCallSite(Function *F, CallBase &Call);
351  template <typename Callable>
352  bool simplifyInstruction(Instruction &I, Callable Evaluate);
353  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
354 
355  /// Return true if the given argument to the function being considered for
356  /// inlining has the given attribute set either at the call site or the
357  /// function declaration. Primarily used to inspect call site specific
358  /// attributes since these can be more precise than the ones on the callee
359  /// itself.
360  bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
361 
362  /// Return true if the given value is known non null within the callee if
363  /// inlined through this particular callsite.
364  bool isKnownNonNullInCallee(Value *V);
365 
366  /// Return true if size growth is allowed when inlining the callee at \p Call.
367  bool allowSizeGrowth(CallBase &Call);
368 
369  // Custom analysis routines.
370  InlineResult analyzeBlock(BasicBlock *BB,
371  SmallPtrSetImpl<const Value *> &EphValues);
372 
373  // Disable several entry points to the visitor so we don't accidentally use
374  // them by declaring but not defining them here.
375  void visit(Module *);
376  void visit(Module &);
377  void visit(Function *);
378  void visit(Function &);
379  void visit(BasicBlock *);
380  void visit(BasicBlock &);
381 
382  // Provide base case for our instruction visit.
383  bool visitInstruction(Instruction &I);
384 
385  // Our visit overrides.
386  bool visitAlloca(AllocaInst &I);
387  bool visitPHI(PHINode &I);
388  bool visitGetElementPtr(GetElementPtrInst &I);
389  bool visitBitCast(BitCastInst &I);
390  bool visitPtrToInt(PtrToIntInst &I);
391  bool visitIntToPtr(IntToPtrInst &I);
392  bool visitCastInst(CastInst &I);
393  bool visitCmpInst(CmpInst &I);
394  bool visitSub(BinaryOperator &I);
395  bool visitBinaryOperator(BinaryOperator &I);
396  bool visitFNeg(UnaryOperator &I);
397  bool visitLoad(LoadInst &I);
398  bool visitStore(StoreInst &I);
399  bool visitExtractValue(ExtractValueInst &I);
400  bool visitInsertValue(InsertValueInst &I);
401  bool visitCallBase(CallBase &Call);
402  bool visitReturnInst(ReturnInst &RI);
403  bool visitBranchInst(BranchInst &BI);
404  bool visitSelectInst(SelectInst &SI);
405  bool visitSwitchInst(SwitchInst &SI);
406  bool visitIndirectBrInst(IndirectBrInst &IBI);
407  bool visitResumeInst(ResumeInst &RI);
408  bool visitCleanupReturnInst(CleanupReturnInst &RI);
409  bool visitCatchReturnInst(CatchReturnInst &RI);
410  bool visitUnreachableInst(UnreachableInst &I);
411 
412 public:
413  CallAnalyzer(
415  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
416  function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
417  ProfileSummaryInfo *PSI = nullptr,
418  OptimizationRemarkEmitter *ORE = nullptr)
419  : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
420  PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
421  CandidateCall(Call), EnableLoadElimination(true) {}
422 
423  InlineResult analyze();
424 
425  Optional<Constant*> getSimplifiedValue(Instruction *I) {
426  if (SimplifiedValues.find(I) != SimplifiedValues.end())
427  return SimplifiedValues[I];
428  return None;
429  }
430 
431  // Keep a bunch of stats about the cost savings found so we can print them
432  // out when debugging.
433  unsigned NumConstantArgs = 0;
434  unsigned NumConstantOffsetPtrArgs = 0;
435  unsigned NumAllocaArgs = 0;
436  unsigned NumConstantPtrCmps = 0;
437  unsigned NumConstantPtrDiffs = 0;
438  unsigned NumInstructionsSimplified = 0;
439 
440  void dump();
441 };
442 
443 /// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note
444 /// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer
445 class InlineCostCallAnalyzer final : public CallAnalyzer {
446  const int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
447  const bool ComputeFullInlineCost;
448  int LoadEliminationCost = 0;
449  /// Bonus to be applied when percentage of vector instructions in callee is
450  /// high (see more details in updateThreshold).
451  int VectorBonus = 0;
452  /// Bonus to be applied when the callee has only one reachable basic block.
453  int SingleBBBonus = 0;
454 
455  /// Tunable parameters that control the analysis.
456  const InlineParams &Params;
457 
458  // This DenseMap stores the delta change in cost and threshold after
459  // accounting for the given instruction. The map is filled only with the
460  // flag PrintInstructionComments on.
462 
463  /// Upper bound for the inlining cost. Bonuses are being applied to account
464  /// for speculative "expected profit" of the inlining decision.
465  int Threshold = 0;
466 
467  /// Attempt to evaluate indirect calls to boost its inline cost.
468  const bool BoostIndirectCalls;
469 
470  /// Ignore the threshold when finalizing analysis.
471  const bool IgnoreThreshold;
472 
473  // True if the cost-benefit-analysis-based inliner is enabled.
474  const bool CostBenefitAnalysisEnabled;
475 
476  /// Inlining cost measured in abstract units, accounts for all the
477  /// instructions expected to be executed for a given function invocation.
478  /// Instructions that are statically proven to be dead based on call-site
479  /// arguments are not counted here.
480  int Cost = 0;
481 
482  // The cumulative cost at the beginning of the basic block being analyzed. At
483  // the end of analyzing each basic block, "Cost - CostAtBBStart" represents
484  // the size of that basic block.
485  int CostAtBBStart = 0;
486 
487  // The static size of live but cold basic blocks. This is "static" in the
488  // sense that it's not weighted by profile counts at all.
489  int ColdSize = 0;
490 
491  // Whether inlining is decided by cost-benefit analysis.
492  bool DecidedByCostBenefit = false;
493 
494  bool SingleBB = true;
495 
496  unsigned SROACostSavings = 0;
497  unsigned SROACostSavingsLost = 0;
498 
499  /// The mapping of caller Alloca values to their accumulated cost savings. If
500  /// we have to disable SROA for one of the allocas, this tells us how much
501  /// cost must be added.
502  DenseMap<AllocaInst *, int> SROAArgCosts;
503 
504  /// Return true if \p Call is a cold callsite.
505  bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
506 
507  /// Update Threshold based on callsite properties such as callee
508  /// attributes and callee hotness for PGO builds. The Callee is explicitly
509  /// passed to support analyzing indirect calls whose target is inferred by
510  /// analysis.
511  void updateThreshold(CallBase &Call, Function &Callee);
512  /// Return a higher threshold if \p Call is a hot callsite.
513  Optional<int> getHotCallSiteThreshold(CallBase &Call,
514  BlockFrequencyInfo *CallerBFI);
515 
516  /// Handle a capped 'int' increment for Cost.
517  void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) {
518  assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound");
519  Cost = (int)std::min(UpperBound, Cost + Inc);
520  }
521 
522  void onDisableSROA(AllocaInst *Arg) override {
523  auto CostIt = SROAArgCosts.find(Arg);
524  if (CostIt == SROAArgCosts.end())
525  return;
526  addCost(CostIt->second);
527  SROACostSavings -= CostIt->second;
528  SROACostSavingsLost += CostIt->second;
529  SROAArgCosts.erase(CostIt);
530  }
531 
532  void onDisableLoadElimination() override {
533  addCost(LoadEliminationCost);
534  LoadEliminationCost = 0;
535  }
536  void onCallPenalty() override { addCost(InlineConstants::CallPenalty); }
537  void onCallArgumentSetup(const CallBase &Call) override {
538  // Pay the price of the argument setup. We account for the average 1
539  // instruction per call argument setup here.
540  addCost(Call.arg_size() * InlineConstants::InstrCost);
541  }
542  void onLoadRelativeIntrinsic() override {
543  // This is normally lowered to 4 LLVM instructions.
544  addCost(3 * InlineConstants::InstrCost);
545  }
546  void onLoweredCall(Function *F, CallBase &Call,
547  bool IsIndirectCall) override {
548  // We account for the average 1 instruction per call argument setup here.
549  addCost(Call.arg_size() * InlineConstants::InstrCost);
550 
551  // If we have a constant that we are calling as a function, we can peer
552  // through it and see the function target. This happens not infrequently
553  // during devirtualization and so we want to give it a hefty bonus for
554  // inlining, but cap that bonus in the event that inlining wouldn't pan out.
555  // Pretend to inline the function, with a custom threshold.
556  if (IsIndirectCall && BoostIndirectCalls) {
557  auto IndirectCallParams = Params;
558  IndirectCallParams.DefaultThreshold =
560  /// FIXME: if InlineCostCallAnalyzer is derived from, this may need
561  /// to instantiate the derived class.
562  InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
563  GetAssumptionCache, GetBFI, PSI, ORE, false);
564  if (CA.analyze().isSuccess()) {
565  // We were able to inline the indirect call! Subtract the cost from the
566  // threshold to get the bonus we want to apply, but don't go below zero.
567  Cost -= std::max(0, CA.getThreshold() - CA.getCost());
568  }
569  } else
570  // Otherwise simply add the cost for merely making the call.
572  }
573 
574  void onFinalizeSwitch(unsigned JumpTableSize,
575  unsigned NumCaseCluster) override {
576  // If suitable for a jump table, consider the cost for the table size and
577  // branch to destination.
578  // Maximum valid cost increased in this function.
579  if (JumpTableSize) {
580  int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
582 
583  addCost(JTCost, (int64_t)CostUpperBound);
584  return;
585  }
586  // Considering forming a binary search, we should find the number of nodes
587  // which is same as the number of comparisons when lowered. For a given
588  // number of clusters, n, we can define a recursive function, f(n), to find
589  // the number of nodes in the tree. The recursion is :
590  // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
591  // and f(n) = n, when n <= 3.
592  // This will lead a binary tree where the leaf should be either f(2) or f(3)
593  // when n > 3. So, the number of comparisons from leaves should be n, while
594  // the number of non-leaf should be :
595  // 2^(log2(n) - 1) - 1
596  // = 2^log2(n) * 2^-1 - 1
597  // = n / 2 - 1.
598  // Considering comparisons from leaf and non-leaf nodes, we can estimate the
599  // number of comparisons in a simple closed form :
600  // n + n / 2 - 1 = n * 3 / 2 - 1
601  if (NumCaseCluster <= 3) {
602  // Suppose a comparison includes one compare and one conditional branch.
603  addCost(NumCaseCluster * 2 * InlineConstants::InstrCost);
604  return;
605  }
606 
607  int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
608  int64_t SwitchCost =
609  ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
610 
611  addCost(SwitchCost, (int64_t)CostUpperBound);
612  }
613  void onMissedSimplification() override {
615  }
616 
617  void onInitializeSROAArg(AllocaInst *Arg) override {
618  assert(Arg != nullptr &&
619  "Should not initialize SROA costs for null value.");
620  SROAArgCosts[Arg] = 0;
621  }
622 
623  void onAggregateSROAUse(AllocaInst *SROAArg) override {
624  auto CostIt = SROAArgCosts.find(SROAArg);
625  assert(CostIt != SROAArgCosts.end() &&
626  "expected this argument to have a cost");
627  CostIt->second += InlineConstants::InstrCost;
628  SROACostSavings += InlineConstants::InstrCost;
629  }
630 
631  void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; }
632 
633  void onBlockAnalyzed(const BasicBlock *BB) override {
634  if (CostBenefitAnalysisEnabled) {
635  // Keep track of the static size of live but cold basic blocks. For now,
636  // we define a cold basic block to be one that's never executed.
637  assert(GetBFI && "GetBFI must be available");
638  BlockFrequencyInfo *BFI = &(GetBFI(F));
639  assert(BFI && "BFI must be available");
640  auto ProfileCount = BFI->getBlockProfileCount(BB);
642  if (ProfileCount.getValue() == 0)
643  ColdSize += Cost - CostAtBBStart;
644  }
645 
646  auto *TI = BB->getTerminator();
647  // If we had any successors at this point, than post-inlining is likely to
648  // have them as well. Note that we assume any basic blocks which existed
649  // due to branches or switches which folded above will also fold after
650  // inlining.
651  if (SingleBB && TI->getNumSuccessors() > 1) {
652  // Take off the bonus we applied to the threshold.
653  Threshold -= SingleBBBonus;
654  SingleBB = false;
655  }
656  }
657 
658  void onInstructionAnalysisStart(const Instruction *I) override {
659  // This function is called to store the initial cost of inlining before
660  // the given instruction was assessed.
662  return;
663  InstructionCostDetailMap[I].CostBefore = Cost;
664  InstructionCostDetailMap[I].ThresholdBefore = Threshold;
665  }
666 
667  void onInstructionAnalysisFinish(const Instruction *I) override {
668  // This function is called to find new values of cost and threshold after
669  // the instruction has been assessed.
671  return;
672  InstructionCostDetailMap[I].CostAfter = Cost;
673  InstructionCostDetailMap[I].ThresholdAfter = Threshold;
674  }
675 
676  bool isCostBenefitAnalysisEnabled() {
677  if (!PSI || !PSI->hasProfileSummary())
678  return false;
679 
680  if (!GetBFI)
681  return false;
682 
684  // Honor the explicit request from the user.
686  return false;
687  } else {
688  // Otherwise, require instrumentation profile.
689  if (!PSI->hasInstrumentationProfile())
690  return false;
691  }
692 
693  auto *Caller = CandidateCall.getParent()->getParent();
694  if (!Caller->getEntryCount())
695  return false;
696 
697  BlockFrequencyInfo *CallerBFI = &(GetBFI(*Caller));
698  if (!CallerBFI)
699  return false;
700 
701  // For now, limit to hot call site.
702  if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
703  return false;
704 
705  // Make sure we have a nonzero entry count.
706  auto EntryCount = F.getEntryCount();
707  if (!EntryCount || !EntryCount.getCount())
708  return false;
709 
710  BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
711  if (!CalleeBFI)
712  return false;
713 
714  return true;
715  }
716 
717  // Determine whether we should inline the given call site, taking into account
718  // both the size cost and the cycle savings. Return None if we don't have
719  // suficient profiling information to determine.
720  Optional<bool> costBenefitAnalysis() {
721  if (!CostBenefitAnalysisEnabled)
722  return None;
723 
724  // buildInlinerPipeline in the pass builder sets HotCallSiteThreshold to 0
725  // for the prelink phase of the AutoFDO + ThinLTO build. Honor the logic by
726  // falling back to the cost-based metric.
727  // TODO: Improve this hacky condition.
728  if (Threshold == 0)
729  return None;
730 
731  assert(GetBFI);
732  BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
733  assert(CalleeBFI);
734 
735  // The cycle savings expressed as the sum of InlineConstants::InstrCost
736  // multiplied by the estimated dynamic count of each instruction we can
737  // avoid. Savings come from the call site cost, such as argument setup and
738  // the call instruction, as well as the instructions that are folded.
739  //
740  // We use 128-bit APInt here to avoid potential overflow. This variable
741  // should stay well below 10^^24 (or 2^^80) in practice. This "worst" case
742  // assumes that we can avoid or fold a billion instructions, each with a
743  // profile count of 10^^15 -- roughly the number of cycles for a 24-hour
744  // period on a 4GHz machine.
745  APInt CycleSavings(128, 0);
746 
747  for (auto &BB : F) {
748  APInt CurrentSavings(128, 0);
749  for (auto &I : BB) {
750  if (BranchInst *BI = dyn_cast<BranchInst>(&I)) {
751  // Count a conditional branch as savings if it becomes unconditional.
752  if (BI->isConditional() &&
753  dyn_cast_or_null<ConstantInt>(
754  SimplifiedValues.lookup(BI->getCondition()))) {
755  CurrentSavings += InlineConstants::InstrCost;
756  }
757  } else if (Value *V = dyn_cast<Value>(&I)) {
758  // Count an instruction as savings if we can fold it.
759  if (SimplifiedValues.count(V)) {
760  CurrentSavings += InlineConstants::InstrCost;
761  }
762  }
763  }
764 
765  auto ProfileCount = CalleeBFI->getBlockProfileCount(&BB);
767  CurrentSavings *= ProfileCount.getValue();
768  CycleSavings += CurrentSavings;
769  }
770 
771  // Compute the cycle savings per call.
772  auto EntryProfileCount = F.getEntryCount();
773  assert(EntryProfileCount.hasValue() && EntryProfileCount.getCount());
774  auto EntryCount = EntryProfileCount.getCount();
775  CycleSavings += EntryCount / 2;
776  CycleSavings = CycleSavings.udiv(EntryCount);
777 
778  // Compute the total savings for the call site.
779  auto *CallerBB = CandidateCall.getParent();
780  BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent())));
781  CycleSavings += getCallsiteCost(this->CandidateCall, DL);
782  CycleSavings *= CallerBFI->getBlockProfileCount(CallerBB).getValue();
783 
784  // Remove the cost of the cold basic blocks.
785  int Size = Cost - ColdSize;
786 
787  // Allow tiny callees to be inlined regardless of whether they meet the
788  // savings threshold.
790 
791  // Return true if the savings justify the cost of inlining. Specifically,
792  // we evaluate the following inequality:
793  //
794  // CycleSavings PSI->getOrCompHotCountThreshold()
795  // -------------- >= -----------------------------------
796  // Size InlineSavingsMultiplier
797  //
798  // Note that the left hand side is specific to a call site. The right hand
799  // side is a constant for the entire executable.
800  APInt LHS = CycleSavings;
802  APInt RHS(128, PSI->getOrCompHotCountThreshold());
803  RHS *= Size;
804  return LHS.uge(RHS);
805  }
806 
807  InlineResult finalizeAnalysis() override {
808  // Loops generally act a lot like calls in that they act like barriers to
809  // movement, require a certain amount of setup, etc. So when optimising for
810  // size, we penalise any call sites that perform loops. We do this after all
811  // other costs here, so will likely only be dealing with relatively small
812  // functions (and hence DT and LI will hopefully be cheap).
813  auto *Caller = CandidateCall.getFunction();
814  if (Caller->hasMinSize()) {
815  DominatorTree DT(F);
816  LoopInfo LI(DT);
817  int NumLoops = 0;
818  for (Loop *L : LI) {
819  // Ignore loops that will not be executed
820  if (DeadBlocks.count(L->getHeader()))
821  continue;
822  NumLoops++;
823  }
824  addCost(NumLoops * InlineConstants::CallPenalty);
825  }
826 
827  // We applied the maximum possible vector bonus at the beginning. Now,
828  // subtract the excess bonus, if any, from the Threshold before
829  // comparing against Cost.
830  if (NumVectorInstructions <= NumInstructions / 10)
831  Threshold -= VectorBonus;
832  else if (NumVectorInstructions <= NumInstructions / 2)
833  Threshold -= VectorBonus / 2;
834 
835  if (auto Result = costBenefitAnalysis()) {
836  DecidedByCostBenefit = true;
837  if (Result.getValue())
838  return InlineResult::success();
839  else
840  return InlineResult::failure("Cost over threshold.");
841  }
842 
843  if (IgnoreThreshold || Cost < std::max(1, Threshold))
844  return InlineResult::success();
845  return InlineResult::failure("Cost over threshold.");
846  }
847  bool shouldStop() override {
848  // Bail out the moment we cross the threshold. This means we'll under-count
849  // the cost, but only when undercounting doesn't matter.
850  return !IgnoreThreshold && Cost >= Threshold && !ComputeFullInlineCost;
851  }
852 
853  void onLoadEliminationOpportunity() override {
854  LoadEliminationCost += InlineConstants::InstrCost;
855  }
856 
857  InlineResult onAnalysisStart() override {
858  // Perform some tweaks to the cost and threshold based on the direct
859  // callsite information.
860 
861  // We want to more aggressively inline vector-dense kernels, so up the
862  // threshold, and we'll lower it if the % of vector instructions gets too
863  // low. Note that these bonuses are some what arbitrary and evolved over
864  // time by accident as much as because they are principled bonuses.
865  //
866  // FIXME: It would be nice to remove all such bonuses. At least it would be
867  // nice to base the bonus values on something more scientific.
868  assert(NumInstructions == 0);
869  assert(NumVectorInstructions == 0);
870 
871  // Update the threshold based on callsite properties
872  updateThreshold(CandidateCall, F);
873 
874  // While Threshold depends on commandline options that can take negative
875  // values, we want to enforce the invariant that the computed threshold and
876  // bonuses are non-negative.
877  assert(Threshold >= 0);
878  assert(SingleBBBonus >= 0);
879  assert(VectorBonus >= 0);
880 
881  // Speculatively apply all possible bonuses to Threshold. If cost exceeds
882  // this Threshold any time, and cost cannot decrease, we can stop processing
883  // the rest of the function body.
884  Threshold += (SingleBBBonus + VectorBonus);
885 
886  // Give out bonuses for the callsite, as the instructions setting them up
887  // will be gone after inlining.
888  addCost(-getCallsiteCost(this->CandidateCall, DL));
889 
890  // If this function uses the coldcc calling convention, prefer not to inline
891  // it.
892  if (F.getCallingConv() == CallingConv::Cold)
894 
895  // Check if we're done. This can happen due to bonuses and penalties.
896  if (Cost >= Threshold && !ComputeFullInlineCost)
897  return InlineResult::failure("high cost");
898 
899  return InlineResult::success();
900  }
901 
902 public:
903  InlineCostCallAnalyzer(
904  Function &Callee, CallBase &Call, const InlineParams &Params,
905  const TargetTransformInfo &TTI,
906  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
907  function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
908  ProfileSummaryInfo *PSI = nullptr,
909  OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true,
910  bool IgnoreThreshold = false)
911  : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI, ORE),
912  ComputeFullInlineCost(OptComputeFullInlineCost ||
913  Params.ComputeFullInlineCost || ORE ||
914  isCostBenefitAnalysisEnabled()),
915  Params(Params), Threshold(Params.DefaultThreshold),
916  BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
917  CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
918  Writer(this) {}
919 
920  /// Annotation Writer for instruction details
921  InlineCostAnnotationWriter Writer;
922 
923  void dump();
924 
925  // Prints the same analysis as dump(), but its definition is not dependent
926  // on the build.
927  void print();
928 
929  Optional<InstructionCostDetail> getCostDetails(const Instruction *I) {
930  if (InstructionCostDetailMap.find(I) != InstructionCostDetailMap.end())
931  return InstructionCostDetailMap[I];
932  return None;
933  }
934 
935  virtual ~InlineCostCallAnalyzer() {}
936  int getThreshold() { return Threshold; }
937  int getCost() { return Cost; }
938  bool wasDecidedByCostBenefit() { return DecidedByCostBenefit; }
939 };
940 } // namespace
941 
942 /// Test whether the given value is an Alloca-derived function argument.
943 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
944  return SROAArgValues.count(V);
945 }
946 
947 void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
948  onDisableSROA(SROAArg);
949  EnabledSROAAllocas.erase(SROAArg);
950  disableLoadElimination();
951 }
952 
953 void InlineCostAnnotationWriter::emitInstructionAnnot(const Instruction *I,
954  formatted_raw_ostream &OS) {
955  // The cost of inlining of the given instruction is printed always.
956  // The threshold delta is printed only when it is non-zero. It happens
957  // when we decided to give a bonus at a particular instruction.
958  Optional<InstructionCostDetail> Record = ICCA->getCostDetails(I);
959  if (!Record)
960  OS << "; No analysis for the instruction";
961  else {
962  OS << "; cost before = " << Record->CostBefore
963  << ", cost after = " << Record->CostAfter
964  << ", threshold before = " << Record->ThresholdBefore
965  << ", threshold after = " << Record->ThresholdAfter << ", ";
966  OS << "cost delta = " << Record->getCostDelta();
967  if (Record->hasThresholdChanged())
968  OS << ", threshold delta = " << Record->getThresholdDelta();
969  }
970  auto C = ICCA->getSimplifiedValue(const_cast<Instruction *>(I));
971  if (C) {
972  OS << ", simplified to ";
973  C.getValue()->print(OS, true);
974  }
975  OS << "\n";
976 }
977 
978 /// If 'V' maps to a SROA candidate, disable SROA for it.
979 void CallAnalyzer::disableSROA(Value *V) {
980  if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
981  disableSROAForArg(SROAArg);
982  }
983 }
984 
985 void CallAnalyzer::disableLoadElimination() {
986  if (EnableLoadElimination) {
987  onDisableLoadElimination();
988  EnableLoadElimination = false;
989  }
990 }
991 
992 /// Accumulate a constant GEP offset into an APInt if possible.
993 ///
994 /// Returns false if unable to compute the offset for any reason. Respects any
995 /// simplified values known during the analysis of this callsite.
996 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
997  unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
998  assert(IntPtrWidth == Offset.getBitWidth());
999 
1000  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
1001  GTI != GTE; ++GTI) {
1002  ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
1003  if (!OpC)
1004  if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
1005  OpC = dyn_cast<ConstantInt>(SimpleOp);
1006  if (!OpC)
1007  return false;
1008  if (OpC->isZero())
1009  continue;
1010 
1011  // Handle a struct index, which adds its field offset to the pointer.
1012  if (StructType *STy = GTI.getStructTypeOrNull()) {
1013  unsigned ElementIdx = OpC->getZExtValue();
1014  const StructLayout *SL = DL.getStructLayout(STy);
1015  Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
1016  continue;
1017  }
1018 
1019  APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
1020  Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
1021  }
1022  return true;
1023 }
1024 
1025 /// Use TTI to check whether a GEP is free.
1026 ///
1027 /// Respects any simplified values known during the analysis of this callsite.
1028 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
1030  Operands.push_back(GEP.getOperand(0));
1031  for (const Use &Op : GEP.indices())
1032  if (Constant *SimpleOp = SimplifiedValues.lookup(Op))
1033  Operands.push_back(SimpleOp);
1034  else
1035  Operands.push_back(Op);
1036  return TTI.getUserCost(&GEP, Operands,
1039 }
1040 
1041 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
1042  disableSROA(I.getOperand(0));
1043 
1044  // Check whether inlining will turn a dynamic alloca into a static
1045  // alloca and handle that case.
1046  if (I.isArrayAllocation()) {
1047  Constant *Size = SimplifiedValues.lookup(I.getArraySize());
1048  if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
1049  // Sometimes a dynamic alloca could be converted into a static alloca
1050  // after this constant prop, and become a huge static alloca on an
1051  // unconditional CFG path. Avoid inlining if this is going to happen above
1052  // a threshold.
1053  // FIXME: If the threshold is removed or lowered too much, we could end up
1054  // being too pessimistic and prevent inlining non-problematic code. This
1055  // could result in unintended perf regressions. A better overall strategy
1056  // is needed to track stack usage during inlining.
1057  Type *Ty = I.getAllocatedType();
1058  AllocatedSize = SaturatingMultiplyAdd(
1059  AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty).getKnownMinSize(),
1060  AllocatedSize);
1062  HasDynamicAlloca = true;
1063  return false;
1064  }
1065  }
1066 
1067  // Accumulate the allocated size.
1068  if (I.isStaticAlloca()) {
1069  Type *Ty = I.getAllocatedType();
1070  AllocatedSize =
1071  SaturatingAdd(DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize);
1072  }
1073 
1074  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
1075  // a variety of reasons, and so we would like to not inline them into
1076  // functions which don't currently have a dynamic alloca. This simply
1077  // disables inlining altogether in the presence of a dynamic alloca.
1078  if (!I.isStaticAlloca())
1079  HasDynamicAlloca = true;
1080 
1081  return false;
1082 }
1083 
1084 bool CallAnalyzer::visitPHI(PHINode &I) {
1085  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
1086  // though we don't want to propagate it's bonuses. The idea is to disable
1087  // SROA if it *might* be used in an inappropriate manner.
1088 
1089  // Phi nodes are always zero-cost.
1090  // FIXME: Pointer sizes may differ between different address spaces, so do we
1091  // need to use correct address space in the call to getPointerSizeInBits here?
1092  // Or could we skip the getPointerSizeInBits call completely? As far as I can
1093  // see the ZeroOffset is used as a dummy value, so we can probably use any
1094  // bit width for the ZeroOffset?
1095  APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits(0));
1096  bool CheckSROA = I.getType()->isPointerTy();
1097 
1098  // Track the constant or pointer with constant offset we've seen so far.
1099  Constant *FirstC = nullptr;
1100  std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};
1101  Value *FirstV = nullptr;
1102 
1103  for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {
1104  BasicBlock *Pred = I.getIncomingBlock(i);
1105  // If the incoming block is dead, skip the incoming block.
1106  if (DeadBlocks.count(Pred))
1107  continue;
1108  // If the parent block of phi is not the known successor of the incoming
1109  // block, skip the incoming block.
1110  BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
1111  if (KnownSuccessor && KnownSuccessor != I.getParent())
1112  continue;
1113 
1114  Value *V = I.getIncomingValue(i);
1115  // If the incoming value is this phi itself, skip the incoming value.
1116  if (&I == V)
1117  continue;
1118 
1119  Constant *C = dyn_cast<Constant>(V);
1120  if (!C)
1121  C = SimplifiedValues.lookup(V);
1122 
1123  std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};
1124  if (!C && CheckSROA)
1125  BaseAndOffset = ConstantOffsetPtrs.lookup(V);
1126 
1127  if (!C && !BaseAndOffset.first)
1128  // The incoming value is neither a constant nor a pointer with constant
1129  // offset, exit early.
1130  return true;
1131 
1132  if (FirstC) {
1133  if (FirstC == C)
1134  // If we've seen a constant incoming value before and it is the same
1135  // constant we see this time, continue checking the next incoming value.
1136  continue;
1137  // Otherwise early exit because we either see a different constant or saw
1138  // a constant before but we have a pointer with constant offset this time.
1139  return true;
1140  }
1141 
1142  if (FirstV) {
1143  // The same logic as above, but check pointer with constant offset here.
1144  if (FirstBaseAndOffset == BaseAndOffset)
1145  continue;
1146  return true;
1147  }
1148 
1149  if (C) {
1150  // This is the 1st time we've seen a constant, record it.
1151  FirstC = C;
1152  continue;
1153  }
1154 
1155  // The remaining case is that this is the 1st time we've seen a pointer with
1156  // constant offset, record it.
1157  FirstV = V;
1158  FirstBaseAndOffset = BaseAndOffset;
1159  }
1160 
1161  // Check if we can map phi to a constant.
1162  if (FirstC) {
1163  SimplifiedValues[&I] = FirstC;
1164  return true;
1165  }
1166 
1167  // Check if we can map phi to a pointer with constant offset.
1168  if (FirstBaseAndOffset.first) {
1169  ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
1170 
1171  if (auto *SROAArg = getSROAArgForValueOrNull(FirstV))
1172  SROAArgValues[&I] = SROAArg;
1173  }
1174 
1175  return true;
1176 }
1177 
1178 /// Check we can fold GEPs of constant-offset call site argument pointers.
1179 /// This requires target data and inbounds GEPs.
1180 ///
1181 /// \return true if the specified GEP can be folded.
1182 bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
1183  // Check if we have a base + offset for the pointer.
1184  std::pair<Value *, APInt> BaseAndOffset =
1185  ConstantOffsetPtrs.lookup(I.getPointerOperand());
1186  if (!BaseAndOffset.first)
1187  return false;
1188 
1189  // Check if the offset of this GEP is constant, and if so accumulate it
1190  // into Offset.
1191  if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second))
1192  return false;
1193 
1194  // Add the result as a new mapping to Base + Offset.
1195  ConstantOffsetPtrs[&I] = BaseAndOffset;
1196 
1197  return true;
1198 }
1199 
1200 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
1201  auto *SROAArg = getSROAArgForValueOrNull(I.getPointerOperand());
1202 
1203  // Lambda to check whether a GEP's indices are all constant.
1204  auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
1205  for (const Use &Op : GEP.indices())
1206  if (!isa<Constant>(Op) && !SimplifiedValues.lookup(Op))
1207  return false;
1208  return true;
1209  };
1210 
1212  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1214  for (unsigned int Index = 1 ; Index < COps.size() ; ++Index)
1215  Indices.push_back(COps[Index]);
1216  return ConstantExpr::getGetElementPtr(I.getSourceElementType(), COps[0],
1217  Indices, I.isInBounds());
1218  }))
1219  return true;
1220 
1221  if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
1222  if (SROAArg)
1223  SROAArgValues[&I] = SROAArg;
1224 
1225  // Constant GEPs are modeled as free.
1226  return true;
1227  }
1228 
1229  // Variable GEPs will require math and will disable SROA.
1230  if (SROAArg)
1231  disableSROAForArg(SROAArg);
1232  return isGEPFree(I);
1233 }
1234 
1235 /// Simplify \p I if its operands are constants and update SimplifiedValues.
1236 /// \p Evaluate is a callable specific to instruction type that evaluates the
1237 /// instruction when all the operands are constants.
1238 template <typename Callable>
1239 bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
1241  for (Value *Op : I.operands()) {
1242  Constant *COp = dyn_cast<Constant>(Op);
1243  if (!COp)
1244  COp = SimplifiedValues.lookup(Op);
1245  if (!COp)
1246  return false;
1247  COps.push_back(COp);
1248  }
1249  auto *C = Evaluate(COps);
1250  if (!C)
1251  return false;
1252  SimplifiedValues[&I] = C;
1253  return true;
1254 }
1255 
1256 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
1257  // Propagate constants through bitcasts.
1258  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1259  return ConstantExpr::getBitCast(COps[0], I.getType());
1260  }))
1261  return true;
1262 
1263  // Track base/offsets through casts
1264  std::pair<Value *, APInt> BaseAndOffset =
1265  ConstantOffsetPtrs.lookup(I.getOperand(0));
1266  // Casts don't change the offset, just wrap it up.
1267  if (BaseAndOffset.first)
1268  ConstantOffsetPtrs[&I] = BaseAndOffset;
1269 
1270  // Also look for SROA candidates here.
1271  if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
1272  SROAArgValues[&I] = SROAArg;
1273 
1274  // Bitcasts are always zero cost.
1275  return true;
1276 }
1277 
1278 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
1279  // Propagate constants through ptrtoint.
1280  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1281  return ConstantExpr::getPtrToInt(COps[0], I.getType());
1282  }))
1283  return true;
1284 
1285  // Track base/offset pairs when converted to a plain integer provided the
1286  // integer is large enough to represent the pointer.
1287  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
1288  unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
1289  if (IntegerSize == DL.getPointerSizeInBits(AS)) {
1290  std::pair<Value *, APInt> BaseAndOffset =
1291  ConstantOffsetPtrs.lookup(I.getOperand(0));
1292  if (BaseAndOffset.first)
1293  ConstantOffsetPtrs[&I] = BaseAndOffset;
1294  }
1295 
1296  // This is really weird. Technically, ptrtoint will disable SROA. However,
1297  // unless that ptrtoint is *used* somewhere in the live basic blocks after
1298  // inlining, it will be nuked, and SROA should proceed. All of the uses which
1299  // would block SROA would also block SROA if applied directly to a pointer,
1300  // and so we can just add the integer in here. The only places where SROA is
1301  // preserved either cannot fire on an integer, or won't in-and-of themselves
1302  // disable SROA (ext) w/o some later use that we would see and disable.
1303  if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
1304  SROAArgValues[&I] = SROAArg;
1305 
1308 }
1309 
1310 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
1311  // Propagate constants through ptrtoint.
1312  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1313  return ConstantExpr::getIntToPtr(COps[0], I.getType());
1314  }))
1315  return true;
1316 
1317  // Track base/offset pairs when round-tripped through a pointer without
1318  // modifications provided the integer is not too large.
1319  Value *Op = I.getOperand(0);
1320  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
1321  if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
1322  std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
1323  if (BaseAndOffset.first)
1324  ConstantOffsetPtrs[&I] = BaseAndOffset;
1325  }
1326 
1327  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
1328  if (auto *SROAArg = getSROAArgForValueOrNull(Op))
1329  SROAArgValues[&I] = SROAArg;
1330 
1333 }
1334 
1335 bool CallAnalyzer::visitCastInst(CastInst &I) {
1336  // Propagate constants through casts.
1337  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1338  return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
1339  }))
1340  return true;
1341 
1342  // Disable SROA in the face of arbitrary casts we don't explicitly list
1343  // elsewhere.
1344  disableSROA(I.getOperand(0));
1345 
1346  // If this is a floating-point cast, and the target says this operation
1347  // is expensive, this may eventually become a library call. Treat the cost
1348  // as such.
1349  switch (I.getOpcode()) {
1350  case Instruction::FPTrunc:
1351  case Instruction::FPExt:
1352  case Instruction::UIToFP:
1353  case Instruction::SIToFP:
1354  case Instruction::FPToUI:
1355  case Instruction::FPToSI:
1357  onCallPenalty();
1358  break;
1359  default:
1360  break;
1361  }
1362 
1365 }
1366 
1367 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
1368  return CandidateCall.paramHasAttr(A->getArgNo(), Attr);
1369 }
1370 
1371 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
1372  // Does the *call site* have the NonNull attribute set on an argument? We
1373  // use the attribute on the call site to memoize any analysis done in the
1374  // caller. This will also trip if the callee function has a non-null
1375  // parameter attribute, but that's a less interesting case because hopefully
1376  // the callee would already have been simplified based on that.
1377  if (Argument *A = dyn_cast<Argument>(V))
1378  if (paramHasAttr(A, Attribute::NonNull))
1379  return true;
1380 
1381  // Is this an alloca in the caller? This is distinct from the attribute case
1382  // above because attributes aren't updated within the inliner itself and we
1383  // always want to catch the alloca derived case.
1384  if (isAllocaDerivedArg(V))
1385  // We can actually predict the result of comparisons between an
1386  // alloca-derived value and null. Note that this fires regardless of
1387  // SROA firing.
1388  return true;
1389 
1390  return false;
1391 }
1392 
1393 bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {
1394  // If the normal destination of the invoke or the parent block of the call
1395  // site is unreachable-terminated, there is little point in inlining this
1396  // unless there is literally zero cost.
1397  // FIXME: Note that it is possible that an unreachable-terminated block has a
1398  // hot entry. For example, in below scenario inlining hot_call_X() may be
1399  // beneficial :
1400  // main() {
1401  // hot_call_1();
1402  // ...
1403  // hot_call_N()
1404  // exit(0);
1405  // }
1406  // For now, we are not handling this corner case here as it is rare in real
1407  // code. In future, we should elaborate this based on BPI and BFI in more
1408  // general threshold adjusting heuristics in updateThreshold().
1409  if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
1410  if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
1411  return false;
1412  } else if (isa<UnreachableInst>(Call.getParent()->getTerminator()))
1413  return false;
1414 
1415  return true;
1416 }
1417 
1419  BlockFrequencyInfo *CallerBFI) {
1420  // If global profile summary is available, then callsite's coldness is
1421  // determined based on that.
1422  if (PSI && PSI->hasProfileSummary())
1423  return PSI->isColdCallSite(Call, CallerBFI);
1424 
1425  // Otherwise we need BFI to be available.
1426  if (!CallerBFI)
1427  return false;
1428 
1429  // Determine if the callsite is cold relative to caller's entry. We could
1430  // potentially cache the computation of scaled entry frequency, but the added
1431  // complexity is not worth it unless this scaling shows up high in the
1432  // profiles.
1433  const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
1434  auto CallSiteBB = Call.getParent();
1435  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
1436  auto CallerEntryFreq =
1437  CallerBFI->getBlockFreq(&(Call.getCaller()->getEntryBlock()));
1438  return CallSiteFreq < CallerEntryFreq * ColdProb;
1439 }
1440 
1442 InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
1443  BlockFrequencyInfo *CallerBFI) {
1444 
1445  // If global profile summary is available, then callsite's hotness is
1446  // determined based on that.
1447  if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI))
1448  return Params.HotCallSiteThreshold;
1449 
1450  // Otherwise we need BFI to be available and to have a locally hot callsite
1451  // threshold.
1452  if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
1453  return None;
1454 
1455  // Determine if the callsite is hot relative to caller's entry. We could
1456  // potentially cache the computation of scaled entry frequency, but the added
1457  // complexity is not worth it unless this scaling shows up high in the
1458  // profiles.
1459  auto CallSiteBB = Call.getParent();
1460  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
1461  auto CallerEntryFreq = CallerBFI->getEntryFreq();
1462  if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
1463  return Params.LocallyHotCallSiteThreshold;
1464 
1465  // Otherwise treat it normally.
1466  return None;
1467 }
1468 
1469 void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
1470  // If no size growth is allowed for this inlining, set Threshold to 0.
1471  if (!allowSizeGrowth(Call)) {
1472  Threshold = 0;
1473  return;
1474  }
1475 
1476  Function *Caller = Call.getCaller();
1477 
1478  // return min(A, B) if B is valid.
1479  auto MinIfValid = [](int A, Optional<int> B) {
1480  return B ? std::min(A, B.getValue()) : A;
1481  };
1482 
1483  // return max(A, B) if B is valid.
1484  auto MaxIfValid = [](int A, Optional<int> B) {
1485  return B ? std::max(A, B.getValue()) : A;
1486  };
1487 
1488  // Various bonus percentages. These are multiplied by Threshold to get the
1489  // bonus values.
1490  // SingleBBBonus: This bonus is applied if the callee has a single reachable
1491  // basic block at the given callsite context. This is speculatively applied
1492  // and withdrawn if more than one basic block is seen.
1493  //
1494  // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
1495  // of the last call to a static function as inlining such functions is
1496  // guaranteed to reduce code size.
1497  //
1498  // These bonus percentages may be set to 0 based on properties of the caller
1499  // and the callsite.
1500  int SingleBBBonusPercent = 50;
1501  int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
1503 
1504  // Lambda to set all the above bonus and bonus percentages to 0.
1505  auto DisallowAllBonuses = [&]() {
1506  SingleBBBonusPercent = 0;
1507  VectorBonusPercent = 0;
1509  };
1510 
1511  // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
1512  // and reduce the threshold if the caller has the necessary attribute.
1513  if (Caller->hasMinSize()) {
1514  Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
1515  // For minsize, we want to disable the single BB bonus and the vector
1516  // bonuses, but not the last-call-to-static bonus. Inlining the last call to
1517  // a static function will, at the minimum, eliminate the parameter setup and
1518  // call/return instructions.
1519  SingleBBBonusPercent = 0;
1520  VectorBonusPercent = 0;
1521  } else if (Caller->hasOptSize())
1522  Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
1523 
1524  // Adjust the threshold based on inlinehint attribute and profile based
1525  // hotness information if the caller does not have MinSize attribute.
1526  if (!Caller->hasMinSize()) {
1527  if (Callee.hasFnAttribute(Attribute::InlineHint))
1528  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
1529 
1530  // FIXME: After switching to the new passmanager, simplify the logic below
1531  // by checking only the callsite hotness/coldness as we will reliably
1532  // have local profile information.
1533  //
1534  // Callsite hotness and coldness can be determined if sample profile is
1535  // used (which adds hotness metadata to calls) or if caller's
1536  // BlockFrequencyInfo is available.
1537  BlockFrequencyInfo *CallerBFI = GetBFI ? &(GetBFI(*Caller)) : nullptr;
1538  auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI);
1539  if (!Caller->hasOptSize() && HotCallSiteThreshold) {
1540  LLVM_DEBUG(dbgs() << "Hot callsite.\n");
1541  // FIXME: This should update the threshold only if it exceeds the
1542  // current threshold, but AutoFDO + ThinLTO currently relies on this
1543  // behavior to prevent inlining of hot callsites during ThinLTO
1544  // compile phase.
1545  Threshold = HotCallSiteThreshold.getValue();
1546  } else if (isColdCallSite(Call, CallerBFI)) {
1547  LLVM_DEBUG(dbgs() << "Cold callsite.\n");
1548  // Do not apply bonuses for a cold callsite including the
1549  // LastCallToStatic bonus. While this bonus might result in code size
1550  // reduction, it can cause the size of a non-cold caller to increase
1551  // preventing it from being inlined.
1552  DisallowAllBonuses();
1553  Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
1554  } else if (PSI) {
1555  // Use callee's global profile information only if we have no way of
1556  // determining this via callsite information.
1557  if (PSI->isFunctionEntryHot(&Callee)) {
1558  LLVM_DEBUG(dbgs() << "Hot callee.\n");
1559  // If callsite hotness can not be determined, we may still know
1560  // that the callee is hot and treat it as a weaker hint for threshold
1561  // increase.
1562  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
1563  } else if (PSI->isFunctionEntryCold(&Callee)) {
1564  LLVM_DEBUG(dbgs() << "Cold callee.\n");
1565  // Do not apply bonuses for a cold callee including the
1566  // LastCallToStatic bonus. While this bonus might result in code size
1567  // reduction, it can cause the size of a non-cold caller to increase
1568  // preventing it from being inlined.
1569  DisallowAllBonuses();
1570  Threshold = MinIfValid(Threshold, Params.ColdThreshold);
1571  }
1572  }
1573  }
1574 
1576 
1577  // Finally, take the target-specific inlining threshold multiplier into
1578  // account.
1580 
1581  SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1582  VectorBonus = Threshold * VectorBonusPercent / 100;
1583 
1584  bool OnlyOneCallAndLocalLinkage =
1585  F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction();
1586  // If there is only one call of the function, and it has internal linkage,
1587  // the cost of inlining it drops dramatically. It may seem odd to update
1588  // Cost in updateThreshold, but the bonus depends on the logic in this method.
1589  if (OnlyOneCallAndLocalLinkage)
1590  Cost -= LastCallToStaticBonus;
1591 }
1592 
1593 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
1594  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1595  // First try to handle simplified comparisons.
1596  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1597  return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
1598  }))
1599  return true;
1600 
1601  if (I.getOpcode() == Instruction::FCmp)
1602  return false;
1603 
1604  // Otherwise look for a comparison between constant offset pointers with
1605  // a common base.
1606  Value *LHSBase, *RHSBase;
1607  APInt LHSOffset, RHSOffset;
1608  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1609  if (LHSBase) {
1610  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1611  if (RHSBase && LHSBase == RHSBase) {
1612  // We have common bases, fold the icmp to a constant based on the
1613  // offsets.
1614  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1615  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1616  if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
1617  SimplifiedValues[&I] = C;
1618  ++NumConstantPtrCmps;
1619  return true;
1620  }
1621  }
1622  }
1623 
1624  // If the comparison is an equality comparison with null, we can simplify it
1625  // if we know the value (argument) can't be null
1626  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
1627  isKnownNonNullInCallee(I.getOperand(0))) {
1628  bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
1629  SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
1630  : ConstantInt::getFalse(I.getType());
1631  return true;
1632  }
1633  return handleSROA(I.getOperand(0), isa<ConstantPointerNull>(I.getOperand(1)));
1634 }
1635 
1636 bool CallAnalyzer::visitSub(BinaryOperator &I) {
1637  // Try to handle a special case: we can fold computing the difference of two
1638  // constant-related pointers.
1639  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1640  Value *LHSBase, *RHSBase;
1641  APInt LHSOffset, RHSOffset;
1642  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1643  if (LHSBase) {
1644  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1645  if (RHSBase && LHSBase == RHSBase) {
1646  // We have common bases, fold the subtract to a constant based on the
1647  // offsets.
1648  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1649  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1650  if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
1651  SimplifiedValues[&I] = C;
1652  ++NumConstantPtrDiffs;
1653  return true;
1654  }
1655  }
1656  }
1657 
1658  // Otherwise, fall back to the generic logic for simplifying and handling
1659  // instructions.
1660  return Base::visitSub(I);
1661 }
1662 
1663 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
1664  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1665  Constant *CLHS = dyn_cast<Constant>(LHS);
1666  if (!CLHS)
1667  CLHS = SimplifiedValues.lookup(LHS);
1668  Constant *CRHS = dyn_cast<Constant>(RHS);
1669  if (!CRHS)
1670  CRHS = SimplifiedValues.lookup(RHS);
1671 
1672  Value *SimpleV = nullptr;
1673  if (auto FI = dyn_cast<FPMathOperator>(&I))
1674  SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
1675  FI->getFastMathFlags(), DL);
1676  else
1677  SimpleV =
1678  SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
1679 
1680  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
1681  SimplifiedValues[&I] = C;
1682 
1683  if (SimpleV)
1684  return true;
1685 
1686  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
1687  disableSROA(LHS);
1688  disableSROA(RHS);
1689 
1690  // If the instruction is floating point, and the target says this operation
1691  // is expensive, this may eventually become a library call. Treat the cost
1692  // as such. Unless it's fneg which can be implemented with an xor.
1693  using namespace llvm::PatternMatch;
1694  if (I.getType()->isFloatingPointTy() &&
1696  !match(&I, m_FNeg(m_Value())))
1697  onCallPenalty();
1698 
1699  return false;
1700 }
1701 
1702 bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
1703  Value *Op = I.getOperand(0);
1704  Constant *COp = dyn_cast<Constant>(Op);
1705  if (!COp)
1706  COp = SimplifiedValues.lookup(Op);
1707 
1708  Value *SimpleV = SimplifyFNegInst(
1709  COp ? COp : Op, cast<FPMathOperator>(I).getFastMathFlags(), DL);
1710 
1711  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
1712  SimplifiedValues[&I] = C;
1713 
1714  if (SimpleV)
1715  return true;
1716 
1717  // Disable any SROA on arguments to arbitrary, unsimplified fneg.
1718  disableSROA(Op);
1719 
1720  return false;
1721 }
1722 
1723 bool CallAnalyzer::visitLoad(LoadInst &I) {
1724  if (handleSROA(I.getPointerOperand(), I.isSimple()))
1725  return true;
1726 
1727  // If the data is already loaded from this address and hasn't been clobbered
1728  // by any stores or calls, this load is likely to be redundant and can be
1729  // eliminated.
1730  if (EnableLoadElimination &&
1731  !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
1732  onLoadEliminationOpportunity();
1733  return true;
1734  }
1735 
1736  return false;
1737 }
1738 
1739 bool CallAnalyzer::visitStore(StoreInst &I) {
1740  if (handleSROA(I.getPointerOperand(), I.isSimple()))
1741  return true;
1742 
1743  // The store can potentially clobber loads and prevent repeated loads from
1744  // being eliminated.
1745  // FIXME:
1746  // 1. We can probably keep an initial set of eliminatable loads substracted
1747  // from the cost even when we finally see a store. We just need to disable
1748  // *further* accumulation of elimination savings.
1749  // 2. We should probably at some point thread MemorySSA for the callee into
1750  // this and then use that to actually compute *really* precise savings.
1751  disableLoadElimination();
1752  return false;
1753 }
1754 
1755 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
1756  // Constant folding for extract value is trivial.
1757  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1758  return ConstantExpr::getExtractValue(COps[0], I.getIndices());
1759  }))
1760  return true;
1761 
1762  // SROA can't look through these, but they may be free.
1763  return Base::visitExtractValue(I);
1764 }
1765 
1766 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
1767  // Constant folding for insert value is trivial.
1768  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1769  return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
1770  /*InsertedValueOperand*/ COps[1],
1771  I.getIndices());
1772  }))
1773  return true;
1774 
1775  // SROA can't look through these, but they may be free.
1776  return Base::visitInsertValue(I);
1777 }
1778 
1779 /// Try to simplify a call site.
1780 ///
1781 /// Takes a concrete function and callsite and tries to actually simplify it by
1782 /// analyzing the arguments and call itself with instsimplify. Returns true if
1783 /// it has simplified the callsite to some other entity (a constant), making it
1784 /// free.
1785 bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
1786  // FIXME: Using the instsimplify logic directly for this is inefficient
1787  // because we have to continually rebuild the argument list even when no
1788  // simplifications can be performed. Until that is fixed with remapping
1789  // inside of instsimplify, directly constant fold calls here.
1790  if (!canConstantFoldCallTo(&Call, F))
1791  return false;
1792 
1793  // Try to re-map the arguments to constants.
1794  SmallVector<Constant *, 4> ConstantArgs;
1795  ConstantArgs.reserve(Call.arg_size());
1796  for (Value *I : Call.args()) {
1797  Constant *C = dyn_cast<Constant>(I);
1798  if (!C)
1799  C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(I));
1800  if (!C)
1801  return false; // This argument doesn't map to a constant.
1802 
1803  ConstantArgs.push_back(C);
1804  }
1805  if (Constant *C = ConstantFoldCall(&Call, F, ConstantArgs)) {
1806  SimplifiedValues[&Call] = C;
1807  return true;
1808  }
1809 
1810  return false;
1811 }
1812 
1813 bool CallAnalyzer::visitCallBase(CallBase &Call) {
1814  if (Call.hasFnAttr(Attribute::ReturnsTwice) &&
1815  !F.hasFnAttribute(Attribute::ReturnsTwice)) {
1816  // This aborts the entire analysis.
1817  ExposesReturnsTwice = true;
1818  return false;
1819  }
1820  if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
1821  ContainsNoDuplicateCall = true;
1822 
1823  Value *Callee = Call.getCalledOperand();
1824  Function *F = dyn_cast_or_null<Function>(Callee);
1825  bool IsIndirectCall = !F;
1826  if (IsIndirectCall) {
1827  // Check if this happens to be an indirect function call to a known function
1828  // in this inline context. If not, we've done all we can.
1829  F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
1830  if (!F) {
1831  onCallArgumentSetup(Call);
1832 
1833  if (!Call.onlyReadsMemory())
1834  disableLoadElimination();
1835  return Base::visitCallBase(Call);
1836  }
1837  }
1838 
1839  assert(F && "Expected a call to a known function");
1840 
1841  // When we have a concrete function, first try to simplify it directly.
1842  if (simplifyCallSite(F, Call))
1843  return true;
1844 
1845  // Next check if it is an intrinsic we know about.
1846  // FIXME: Lift this into part of the InstVisitor.
1847  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
1848  switch (II->getIntrinsicID()) {
1849  default:
1850  if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
1851  disableLoadElimination();
1852  return Base::visitCallBase(Call);
1853 
1854  case Intrinsic::load_relative:
1855  onLoadRelativeIntrinsic();
1856  return false;
1857 
1858  case Intrinsic::memset:
1859  case Intrinsic::memcpy:
1860  case Intrinsic::memmove:
1861  disableLoadElimination();
1862  // SROA can usually chew through these intrinsics, but they aren't free.
1863  return false;
1864  case Intrinsic::icall_branch_funnel:
1865  case Intrinsic::localescape:
1866  HasUninlineableIntrinsic = true;
1867  return false;
1868  case Intrinsic::vastart:
1869  InitsVargArgs = true;
1870  return false;
1871  case Intrinsic::launder_invariant_group:
1872  case Intrinsic::strip_invariant_group:
1873  if (auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0)))
1874  SROAArgValues[II] = SROAArg;
1875  return true;
1876  }
1877  }
1878 
1879  if (F == Call.getFunction()) {
1880  // This flag will fully abort the analysis, so don't bother with anything
1881  // else.
1882  IsRecursiveCall = true;
1883  return false;
1884  }
1885 
1886  if (TTI.isLoweredToCall(F)) {
1887  onLoweredCall(F, Call, IsIndirectCall);
1888  }
1889 
1890  if (!(Call.onlyReadsMemory() || (IsIndirectCall && F->onlyReadsMemory())))
1891  disableLoadElimination();
1892  return Base::visitCallBase(Call);
1893 }
1894 
1895 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
1896  // At least one return instruction will be free after inlining.
1897  bool Free = !HasReturn;
1898  HasReturn = true;
1899  return Free;
1900 }
1901 
1902 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
1903  // We model unconditional branches as essentially free -- they really
1904  // shouldn't exist at all, but handling them makes the behavior of the
1905  // inliner more regular and predictable. Interestingly, conditional branches
1906  // which will fold away are also free.
1907  return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
1908  dyn_cast_or_null<ConstantInt>(
1909  SimplifiedValues.lookup(BI.getCondition()));
1910 }
1911 
1912 bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
1913  bool CheckSROA = SI.getType()->isPointerTy();
1914  Value *TrueVal = SI.getTrueValue();
1915  Value *FalseVal = SI.getFalseValue();
1916 
1917  Constant *TrueC = dyn_cast<Constant>(TrueVal);
1918  if (!TrueC)
1919  TrueC = SimplifiedValues.lookup(TrueVal);
1920  Constant *FalseC = dyn_cast<Constant>(FalseVal);
1921  if (!FalseC)
1922  FalseC = SimplifiedValues.lookup(FalseVal);
1923  Constant *CondC =
1924  dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition()));
1925 
1926  if (!CondC) {
1927  // Select C, X, X => X
1928  if (TrueC == FalseC && TrueC) {
1929  SimplifiedValues[&SI] = TrueC;
1930  return true;
1931  }
1932 
1933  if (!CheckSROA)
1934  return Base::visitSelectInst(SI);
1935 
1936  std::pair<Value *, APInt> TrueBaseAndOffset =
1937  ConstantOffsetPtrs.lookup(TrueVal);
1938  std::pair<Value *, APInt> FalseBaseAndOffset =
1939  ConstantOffsetPtrs.lookup(FalseVal);
1940  if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
1941  ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
1942 
1943  if (auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
1944  SROAArgValues[&SI] = SROAArg;
1945  return true;
1946  }
1947 
1948  return Base::visitSelectInst(SI);
1949  }
1950 
1951  // Select condition is a constant.
1952  Value *SelectedV = CondC->isAllOnesValue()
1953  ? TrueVal
1954  : (CondC->isNullValue()) ? FalseVal : nullptr;
1955  if (!SelectedV) {
1956  // Condition is a vector constant that is not all 1s or all 0s. If all
1957  // operands are constants, ConstantExpr::getSelect() can handle the cases
1958  // such as select vectors.
1959  if (TrueC && FalseC) {
1960  if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {
1961  SimplifiedValues[&SI] = C;
1962  return true;
1963  }
1964  }
1965  return Base::visitSelectInst(SI);
1966  }
1967 
1968  // Condition is either all 1s or all 0s. SI can be simplified.
1969  if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
1970  SimplifiedValues[&SI] = SelectedC;
1971  return true;
1972  }
1973 
1974  if (!CheckSROA)
1975  return true;
1976 
1977  std::pair<Value *, APInt> BaseAndOffset =
1978  ConstantOffsetPtrs.lookup(SelectedV);
1979  if (BaseAndOffset.first) {
1980  ConstantOffsetPtrs[&SI] = BaseAndOffset;
1981 
1982  if (auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
1983  SROAArgValues[&SI] = SROAArg;
1984  }
1985 
1986  return true;
1987 }
1988 
1989 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
1990  // We model unconditional switches as free, see the comments on handling
1991  // branches.
1992  if (isa<ConstantInt>(SI.getCondition()))
1993  return true;
1994  if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
1995  if (isa<ConstantInt>(V))
1996  return true;
1997 
1998  // Assume the most general case where the switch is lowered into
1999  // either a jump table, bit test, or a balanced binary tree consisting of
2000  // case clusters without merging adjacent clusters with the same
2001  // destination. We do not consider the switches that are lowered with a mix
2002  // of jump table/bit test/binary search tree. The cost of the switch is
2003  // proportional to the size of the tree or the size of jump table range.
2004  //
2005  // NB: We convert large switches which are just used to initialize large phi
2006  // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
2007  // inlining those. It will prevent inlining in cases where the optimization
2008  // does not (yet) fire.
2009 
2010  unsigned JumpTableSize = 0;
2011  BlockFrequencyInfo *BFI = GetBFI ? &(GetBFI(F)) : nullptr;
2012  unsigned NumCaseCluster =
2013  TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize, PSI, BFI);
2014 
2015  onFinalizeSwitch(JumpTableSize, NumCaseCluster);
2016  return false;
2017 }
2018 
2019 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
2020  // We never want to inline functions that contain an indirectbr. This is
2021  // incorrect because all the blockaddress's (in static global initializers
2022  // for example) would be referring to the original function, and this
2023  // indirect jump would jump from the inlined copy of the function into the
2024  // original function which is extremely undefined behavior.
2025  // FIXME: This logic isn't really right; we can safely inline functions with
2026  // indirectbr's as long as no other function or global references the
2027  // blockaddress of a block within the current function.
2028  HasIndirectBr = true;
2029  return false;
2030 }
2031 
2032 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
2033  // FIXME: It's not clear that a single instruction is an accurate model for
2034  // the inline cost of a resume instruction.
2035  return false;
2036 }
2037 
2038 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
2039  // FIXME: It's not clear that a single instruction is an accurate model for
2040  // the inline cost of a cleanupret instruction.
2041  return false;
2042 }
2043 
2044 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
2045  // FIXME: It's not clear that a single instruction is an accurate model for
2046  // the inline cost of a catchret instruction.
2047  return false;
2048 }
2049 
2050 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
2051  // FIXME: It might be reasonably to discount the cost of instructions leading
2052  // to unreachable as they have the lowest possible impact on both runtime and
2053  // code size.
2054  return true; // No actual code is needed for unreachable.
2055 }
2056 
2057 bool CallAnalyzer::visitInstruction(Instruction &I) {
2058  // Some instructions are free. All of the free intrinsics can also be
2059  // handled by SROA, etc.
2062  return true;
2063 
2064  // We found something we don't understand or can't handle. Mark any SROA-able
2065  // values in the operand list as no longer viable.
2066  for (const Use &Op : I.operands())
2067  disableSROA(Op);
2068 
2069  return false;
2070 }
2071 
2072 /// Analyze a basic block for its contribution to the inline cost.
2073 ///
2074 /// This method walks the analyzer over every instruction in the given basic
2075 /// block and accounts for their cost during inlining at this callsite. It
2076 /// aborts early if the threshold has been exceeded or an impossible to inline
2077 /// construct has been detected. It returns false if inlining is no longer
2078 /// viable, and true if inlining remains viable.
2080 CallAnalyzer::analyzeBlock(BasicBlock *BB,
2081  SmallPtrSetImpl<const Value *> &EphValues) {
2082  for (Instruction &I : *BB) {
2083  // FIXME: Currently, the number of instructions in a function regardless of
2084  // our ability to simplify them during inline to constants or dead code,
2085  // are actually used by the vector bonus heuristic. As long as that's true,
2086  // we have to special case debug intrinsics here to prevent differences in
2087  // inlining due to debug symbols. Eventually, the number of unsimplified
2088  // instructions shouldn't factor into the cost computation, but until then,
2089  // hack around it here.
2090  if (isa<DbgInfoIntrinsic>(I))
2091  continue;
2092 
2093  // Skip pseudo-probes.
2094  if (isa<PseudoProbeInst>(I))
2095  continue;
2096 
2097  // Skip ephemeral values.
2098  if (EphValues.count(&I))
2099  continue;
2100 
2101  ++NumInstructions;
2102  if (isa<ExtractElementInst>(I) || I.getType()->isVectorTy())
2103  ++NumVectorInstructions;
2104 
2105  // If the instruction simplified to a constant, there is no cost to this
2106  // instruction. Visit the instructions using our InstVisitor to account for
2107  // all of the per-instruction logic. The visit tree returns true if we
2108  // consumed the instruction in any way, and false if the instruction's base
2109  // cost should count against inlining.
2110  onInstructionAnalysisStart(&I);
2111 
2112  if (Base::visit(&I))
2113  ++NumInstructionsSimplified;
2114  else
2115  onMissedSimplification();
2116 
2117  onInstructionAnalysisFinish(&I);
2118  using namespace ore;
2119  // If the visit this instruction detected an uninlinable pattern, abort.
2121  if (IsRecursiveCall)
2122  IR = InlineResult::failure("recursive");
2123  else if (ExposesReturnsTwice)
2124  IR = InlineResult::failure("exposes returns twice");
2125  else if (HasDynamicAlloca)
2126  IR = InlineResult::failure("dynamic alloca");
2127  else if (HasIndirectBr)
2128  IR = InlineResult::failure("indirect branch");
2129  else if (HasUninlineableIntrinsic)
2130  IR = InlineResult::failure("uninlinable intrinsic");
2131  else if (InitsVargArgs)
2132  IR = InlineResult::failure("varargs");
2133  if (!IR.isSuccess()) {
2134  if (ORE)
2135  ORE->emit([&]() {
2136  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
2137  &CandidateCall)
2138  << NV("Callee", &F) << " has uninlinable pattern ("
2139  << NV("InlineResult", IR.getFailureReason())
2140  << ") and cost is not fully computed";
2141  });
2142  return IR;
2143  }
2144 
2145  // If the caller is a recursive function then we don't want to inline
2146  // functions which allocate a lot of stack space because it would increase
2147  // the caller stack usage dramatically.
2148  if (IsCallerRecursive &&
2150  auto IR =
2151  InlineResult::failure("recursive and allocates too much stack space");
2152  if (ORE)
2153  ORE->emit([&]() {
2154  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
2155  &CandidateCall)
2156  << NV("Callee", &F) << " is "
2157  << NV("InlineResult", IR.getFailureReason())
2158  << ". Cost is not fully computed";
2159  });
2160  return IR;
2161  }
2162 
2163  if (shouldStop())
2164  return InlineResult::failure(
2165  "Call site analysis is not favorable to inlining.");
2166  }
2167 
2168  return InlineResult::success();
2169 }
2170 
2171 /// Compute the base pointer and cumulative constant offsets for V.
2172 ///
2173 /// This strips all constant offsets off of V, leaving it the base pointer, and
2174 /// accumulates the total constant offset applied in the returned constant. It
2175 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
2176 /// no constant offsets applied.
2177 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
2178  if (!V->getType()->isPointerTy())
2179  return nullptr;
2180 
2181  unsigned AS = V->getType()->getPointerAddressSpace();
2182  unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
2183  APInt Offset = APInt::getNullValue(IntPtrWidth);
2184 
2185  // Even though we don't look through PHI nodes, we could be called on an
2186  // instruction in an unreachable block, which may be on a cycle.
2187  SmallPtrSet<Value *, 4> Visited;
2188  Visited.insert(V);
2189  do {
2190  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
2191  if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
2192  return nullptr;
2193  V = GEP->getPointerOperand();
2194  } else if (Operator::getOpcode(V) == Instruction::BitCast) {
2195  V = cast<Operator>(V)->getOperand(0);
2196  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2197  if (GA->isInterposable())
2198  break;
2199  V = GA->getAliasee();
2200  } else {
2201  break;
2202  }
2203  assert(V->getType()->isPointerTy() && "Unexpected operand type!");
2204  } while (Visited.insert(V).second);
2205 
2206  Type *IdxPtrTy = DL.getIndexType(V->getType());
2207  return cast<ConstantInt>(ConstantInt::get(IdxPtrTy, Offset));
2208 }
2209 
2210 /// Find dead blocks due to deleted CFG edges during inlining.
2211 ///
2212 /// If we know the successor of the current block, \p CurrBB, has to be \p
2213 /// NextBB, the other successors of \p CurrBB are dead if these successors have
2214 /// no live incoming CFG edges. If one block is found to be dead, we can
2215 /// continue growing the dead block list by checking the successors of the dead
2216 /// blocks to see if all their incoming edges are dead or not.
2217 void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
2218  auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
2219  // A CFG edge is dead if the predecessor is dead or the predecessor has a
2220  // known successor which is not the one under exam.
2221  return (DeadBlocks.count(Pred) ||
2222  (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
2223  };
2224 
2225  auto IsNewlyDead = [&](BasicBlock *BB) {
2226  // If all the edges to a block are dead, the block is also dead.
2227  return (!DeadBlocks.count(BB) &&
2229  [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));
2230  };
2231 
2232  for (BasicBlock *Succ : successors(CurrBB)) {
2233  if (Succ == NextBB || !IsNewlyDead(Succ))
2234  continue;
2236  NewDead.push_back(Succ);
2237  while (!NewDead.empty()) {
2238  BasicBlock *Dead = NewDead.pop_back_val();
2239  if (DeadBlocks.insert(Dead))
2240  // Continue growing the dead block lists.
2241  for (BasicBlock *S : successors(Dead))
2242  if (IsNewlyDead(S))
2243  NewDead.push_back(S);
2244  }
2245  }
2246 }
2247 
2248 /// Analyze a call site for potential inlining.
2249 ///
2250 /// Returns true if inlining this call is viable, and false if it is not
2251 /// viable. It computes the cost and adjusts the threshold based on numerous
2252 /// factors and heuristics. If this method returns false but the computed cost
2253 /// is below the computed threshold, then inlining was forcibly disabled by
2254 /// some artifact of the routine.
2255 InlineResult CallAnalyzer::analyze() {
2256  ++NumCallsAnalyzed;
2257 
2258  auto Result = onAnalysisStart();
2259  if (!Result.isSuccess())
2260  return Result;
2261 
2262  if (F.empty())
2263  return InlineResult::success();
2264 
2265  Function *Caller = CandidateCall.getFunction();
2266  // Check if the caller function is recursive itself.
2267  for (User *U : Caller->users()) {
2268  CallBase *Call = dyn_cast<CallBase>(U);
2269  if (Call && Call->getFunction() == Caller) {
2270  IsCallerRecursive = true;
2271  break;
2272  }
2273  }
2274 
2275  // Populate our simplified values by mapping from function arguments to call
2276  // arguments with known important simplifications.
2277  auto CAI = CandidateCall.arg_begin();
2278  for (Argument &FAI : F.args()) {
2279  assert(CAI != CandidateCall.arg_end());
2280  if (Constant *C = dyn_cast<Constant>(CAI))
2281  SimplifiedValues[&FAI] = C;
2282 
2283  Value *PtrArg = *CAI;
2284  if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
2285  ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg, C->getValue());
2286 
2287  // We can SROA any pointer arguments derived from alloca instructions.
2288  if (auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) {
2289  SROAArgValues[&FAI] = SROAArg;
2290  onInitializeSROAArg(SROAArg);
2291  EnabledSROAAllocas.insert(SROAArg);
2292  }
2293  }
2294  ++CAI;
2295  }
2296  NumConstantArgs = SimplifiedValues.size();
2297  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
2298  NumAllocaArgs = SROAArgValues.size();
2299 
2300  // FIXME: If a caller has multiple calls to a callee, we end up recomputing
2301  // the ephemeral values multiple times (and they're completely determined by
2302  // the callee, so this is purely duplicate work).
2304  CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
2305 
2306  // The worklist of live basic blocks in the callee *after* inlining. We avoid
2307  // adding basic blocks of the callee which can be proven to be dead for this
2308  // particular call site in order to get more accurate cost estimates. This
2309  // requires a somewhat heavyweight iteration pattern: we need to walk the
2310  // basic blocks in a breadth-first order as we insert live successors. To
2311  // accomplish this, prioritizing for small iterations because we exit after
2312  // crossing our threshold, we use a small-size optimized SetVector.
2315  BBSetVector;
2316  BBSetVector BBWorklist;
2317  BBWorklist.insert(&F.getEntryBlock());
2318 
2319  // Note that we *must not* cache the size, this loop grows the worklist.
2320  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
2321  if (shouldStop())
2322  break;
2323 
2324  BasicBlock *BB = BBWorklist[Idx];
2325  if (BB->empty())
2326  continue;
2327 
2328  onBlockStart(BB);
2329 
2330  // Disallow inlining a blockaddress with uses other than strictly callbr.
2331  // A blockaddress only has defined behavior for an indirect branch in the
2332  // same function, and we do not currently support inlining indirect
2333  // branches. But, the inliner may not see an indirect branch that ends up
2334  // being dead code at a particular call site. If the blockaddress escapes
2335  // the function, e.g., via a global variable, inlining may lead to an
2336  // invalid cross-function reference.
2337  // FIXME: pr/39560: continue relaxing this overt restriction.
2338  if (BB->hasAddressTaken())
2339  for (User *U : BlockAddress::get(&*BB)->users())
2340  if (!isa<CallBrInst>(*U))
2341  return InlineResult::failure("blockaddress used outside of callbr");
2342 
2343  // Analyze the cost of this block. If we blow through the threshold, this
2344  // returns false, and we can bail on out.
2345  InlineResult IR = analyzeBlock(BB, EphValues);
2346  if (!IR.isSuccess())
2347  return IR;
2348 
2349  Instruction *TI = BB->getTerminator();
2350 
2351  // Add in the live successors by first checking whether we have terminator
2352  // that may be simplified based on the values simplified by this call.
2353  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
2354  if (BI->isConditional()) {
2355  Value *Cond = BI->getCondition();
2356  if (ConstantInt *SimpleCond =
2357  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
2358  BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0);
2359  BBWorklist.insert(NextBB);
2360  KnownSuccessors[BB] = NextBB;
2361  findDeadBlocks(BB, NextBB);
2362  continue;
2363  }
2364  }
2365  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
2366  Value *Cond = SI->getCondition();
2367  if (ConstantInt *SimpleCond =
2368  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
2369  BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
2370  BBWorklist.insert(NextBB);
2371  KnownSuccessors[BB] = NextBB;
2372  findDeadBlocks(BB, NextBB);
2373  continue;
2374  }
2375  }
2376 
2377  // If we're unable to select a particular successor, just count all of
2378  // them.
2379  for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
2380  ++TIdx)
2381  BBWorklist.insert(TI->getSuccessor(TIdx));
2382 
2383  onBlockAnalyzed(BB);
2384  }
2385 
2386  bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() &&
2387  &F == CandidateCall.getCalledFunction();
2388  // If this is a noduplicate call, we can still inline as long as
2389  // inlining this would cause the removal of the caller (so the instruction
2390  // is not actually duplicated, just moved).
2391  if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
2392  return InlineResult::failure("noduplicate");
2393 
2394  return finalizeAnalysis();
2395 }
2396 
2398 #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
2400  F.print(dbgs(), &Writer);
2401  DEBUG_PRINT_STAT(NumConstantArgs);
2402  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
2403  DEBUG_PRINT_STAT(NumAllocaArgs);
2404  DEBUG_PRINT_STAT(NumConstantPtrCmps);
2405  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
2406  DEBUG_PRINT_STAT(NumInstructionsSimplified);
2407  DEBUG_PRINT_STAT(NumInstructions);
2408  DEBUG_PRINT_STAT(SROACostSavings);
2409  DEBUG_PRINT_STAT(SROACostSavingsLost);
2410  DEBUG_PRINT_STAT(LoadEliminationCost);
2411  DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
2412  DEBUG_PRINT_STAT(Cost);
2414 #undef DEBUG_PRINT_STAT
2415 }
2416 
2417 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2418 /// Dump stats about this call's analysis.
2420  print();
2421 }
2422 #endif
2423 
2424 /// Test that there are no attribute conflicts between Caller and Callee
2425 /// that prevent inlining.
2427  Function *Caller, Function *Callee, TargetTransformInfo &TTI,
2428  function_ref<const TargetLibraryInfo &(Function &)> &GetTLI) {
2429  // Note that CalleeTLI must be a copy not a reference. The legacy pass manager
2430  // caches the most recently created TLI in the TargetLibraryInfoWrapperPass
2431  // object, and always returns the same object (which is overwritten on each
2432  // GetTLI call). Therefore we copy the first result.
2433  auto CalleeTLI = GetTLI(*Callee);
2434  return TTI.areInlineCompatible(Caller, Callee) &&
2435  GetTLI(*Caller).areInlineCompatible(CalleeTLI,
2438 }
2439 
2441  int Cost = 0;
2442  for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) {
2443  if (Call.isByValArgument(I)) {
2444  // We approximate the number of loads and stores needed by dividing the
2445  // size of the byval type by the target's pointer size.
2446  PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
2447  unsigned TypeSize = DL.getTypeSizeInBits(Call.getParamByValType(I));
2448  unsigned AS = PTy->getAddressSpace();
2449  unsigned PointerSize = DL.getPointerSizeInBits(AS);
2450  // Ceiling division.
2451  unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
2452 
2453  // If it generates more than 8 stores it is likely to be expanded as an
2454  // inline memcpy so we take that as an upper bound. Otherwise we assume
2455  // one load and one store per word copied.
2456  // FIXME: The maxStoresPerMemcpy setting from the target should be used
2457  // here instead of a magic number of 8, but it's not available via
2458  // DataLayout.
2459  NumStores = std::min(NumStores, 8U);
2460 
2461  Cost += 2 * NumStores * InlineConstants::InstrCost;
2462  } else {
2463  // For non-byval arguments subtract off one instruction per call
2464  // argument.
2466  }
2467  }
2468  // The call instruction also disappears after inlining.
2470  return Cost;
2471 }
2472 
2474  CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
2475  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2476  function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
2479  return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
2480  GetAssumptionCache, GetTLI, GetBFI, PSI, ORE);
2481 }
2482 
2484  CallBase &Call, TargetTransformInfo &CalleeTTI,
2485  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2488  const InlineParams Params = {/* DefaultThreshold*/ 0,
2489  /*HintThreshold*/ {},
2490  /*ColdThreshold*/ {},
2491  /*OptSizeThreshold*/ {},
2492  /*OptMinSizeThreshold*/ {},
2493  /*HotCallSiteThreshold*/ {},
2494  /*LocallyHotCallSiteThreshold*/ {},
2495  /*ColdCallSiteThreshold*/ {},
2496  /*ComputeFullInlineCost*/ true,
2497  /*EnableDeferral*/ true};
2498 
2499  InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
2500  GetAssumptionCache, GetBFI, PSI, ORE, true,
2501  /*IgnoreThreshold*/ true);
2502  auto R = CA.analyze();
2503  if (!R.isSuccess())
2504  return None;
2505  return CA.getCost();
2506 }
2507 
2509  CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI,
2510  function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
2511 
2512  // Cannot inline indirect calls.
2513  if (!Callee)
2514  return InlineResult::failure("indirect call");
2515 
2516  // When callee coroutine function is inlined into caller coroutine function
2517  // before coro-split pass,
2518  // coro-early pass can not handle this quiet well.
2519  // So we won't inline the coroutine function if it have not been unsplited
2520  if (Callee->isPresplitCoroutine())
2521  return InlineResult::failure("unsplited coroutine call");
2522 
2523  // Never inline calls with byval arguments that does not have the alloca
2524  // address space. Since byval arguments can be replaced with a copy to an
2525  // alloca, the inlined code would need to be adjusted to handle that the
2526  // argument is in the alloca address space (so it is a little bit complicated
2527  // to solve).
2528  unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
2529  for (unsigned I = 0, E = Call.arg_size(); I != E; ++I)
2530  if (Call.isByValArgument(I)) {
2531  PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
2532  if (PTy->getAddressSpace() != AllocaAS)
2533  return InlineResult::failure("byval arguments without alloca"
2534  " address space");
2535  }
2536 
2537  // Calls to functions with always-inline attributes should be inlined
2538  // whenever possible.
2539  if (Call.hasFnAttr(Attribute::AlwaysInline)) {
2540  auto IsViable = isInlineViable(*Callee);
2541  if (IsViable.isSuccess())
2542  return InlineResult::success();
2543  return InlineResult::failure(IsViable.getFailureReason());
2544  }
2545 
2546  // Never inline functions with conflicting attributes (unless callee has
2547  // always-inline attribute).
2548  Function *Caller = Call.getCaller();
2549  if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI, GetTLI))
2550  return InlineResult::failure("conflicting attributes");
2551 
2552  // Don't inline this call if the caller has the optnone attribute.
2553  if (Caller->hasOptNone())
2554  return InlineResult::failure("optnone attribute");
2555 
2556  // Don't inline a function that treats null pointer as valid into a caller
2557  // that does not have this attribute.
2558  if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
2559  return InlineResult::failure("nullptr definitions incompatible");
2560 
2561  // Don't inline functions which can be interposed at link-time.
2562  if (Callee->isInterposable())
2563  return InlineResult::failure("interposable");
2564 
2565  // Don't inline functions marked noinline.
2566  if (Callee->hasFnAttribute(Attribute::NoInline))
2567  return InlineResult::failure("noinline function attribute");
2568 
2569  // Don't inline call sites marked noinline.
2570  if (Call.isNoInline())
2571  return InlineResult::failure("noinline call site attribute");
2572 
2573  // Don't inline functions if one does not have any stack protector attribute
2574  // but the other does.
2575  if (Caller->hasStackProtectorFnAttr() && !Callee->hasStackProtectorFnAttr())
2576  return InlineResult::failure(
2577  "stack protected caller but callee requested no stack protector");
2578  if (Callee->hasStackProtectorFnAttr() && !Caller->hasStackProtectorFnAttr())
2579  return InlineResult::failure(
2580  "stack protected callee but caller requested no stack protector");
2581 
2582  return None;
2583 }
2584 
2586  CallBase &Call, Function *Callee, const InlineParams &Params,
2587  TargetTransformInfo &CalleeTTI,
2588  function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
2589  function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
2592 
2593  auto UserDecision =
2594  llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI);
2595 
2596  if (UserDecision.hasValue()) {
2597  if (UserDecision->isSuccess())
2598  return llvm::InlineCost::getAlways("always inline attribute");
2599  return llvm::InlineCost::getNever(UserDecision->getFailureReason());
2600  }
2601 
2602  LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
2603  << "... (caller:" << Call.getCaller()->getName()
2604  << ")\n");
2605 
2606  InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
2607  GetAssumptionCache, GetBFI, PSI, ORE);
2608  InlineResult ShouldInline = CA.analyze();
2609 
2610  LLVM_DEBUG(CA.dump());
2611 
2612  // Always make cost benefit based decision explicit.
2613  // We use always/never here since threshold is not meaningful,
2614  // as it's not what drives cost-benefit analysis.
2615  if (CA.wasDecidedByCostBenefit()) {
2616  if (ShouldInline.isSuccess())
2617  return InlineCost::getAlways("benefit over cost");
2618  else
2619  return InlineCost::getNever("cost over benefit");
2620  }
2621 
2622  // Check if there was a reason to force inlining or no inlining.
2623  if (!ShouldInline.isSuccess() && CA.getCost() < CA.getThreshold())
2624  return InlineCost::getNever(ShouldInline.getFailureReason());
2625  if (ShouldInline.isSuccess() && CA.getCost() >= CA.getThreshold())
2626  return InlineCost::getAlways("empty function");
2627 
2628  return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
2629 }
2630 
2632  bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
2633  for (BasicBlock &BB : F) {
2634  // Disallow inlining of functions which contain indirect branches.
2635  if (isa<IndirectBrInst>(BB.getTerminator()))
2636  return InlineResult::failure("contains indirect branches");
2637 
2638  // Disallow inlining of blockaddresses which are used by non-callbr
2639  // instructions.
2640  if (BB.hasAddressTaken())
2641  for (User *U : BlockAddress::get(&BB)->users())
2642  if (!isa<CallBrInst>(*U))
2643  return InlineResult::failure("blockaddress used outside of callbr");
2644 
2645  for (auto &II : BB) {
2646  CallBase *Call = dyn_cast<CallBase>(&II);
2647  if (!Call)
2648  continue;
2649 
2650  // Disallow recursive calls.
2651  Function *Callee = Call->getCalledFunction();
2652  if (&F == Callee)
2653  return InlineResult::failure("recursive call");
2654 
2655  // Disallow calls which expose returns-twice to a function not previously
2656  // attributed as such.
2657  if (!ReturnsTwice && isa<CallInst>(Call) &&
2658  cast<CallInst>(Call)->canReturnTwice())
2659  return InlineResult::failure("exposes returns-twice attribute");
2660 
2661  if (Callee)
2662  switch (Callee->getIntrinsicID()) {
2663  default:
2664  break;
2665  case llvm::Intrinsic::icall_branch_funnel:
2666  // Disallow inlining of @llvm.icall.branch.funnel because current
2667  // backend can't separate call targets from call arguments.
2668  return InlineResult::failure(
2669  "disallowed inlining of @llvm.icall.branch.funnel");
2670  case llvm::Intrinsic::localescape:
2671  // Disallow inlining functions that call @llvm.localescape. Doing this
2672  // correctly would require major changes to the inliner.
2673  return InlineResult::failure(
2674  "disallowed inlining of @llvm.localescape");
2675  case llvm::Intrinsic::vastart:
2676  // Disallow inlining of functions that initialize VarArgs with
2677  // va_start.
2678  return InlineResult::failure(
2679  "contains VarArgs initialized with va_start");
2680  }
2681  }
2682  }
2683 
2684  return InlineResult::success();
2685 }
2686 
2687 // APIs to create InlineParams based on command line flags and/or other
2688 // parameters.
2689 
2691  InlineParams Params;
2692 
2693  // This field is the threshold to use for a callee by default. This is
2694  // derived from one or more of:
2695  // * optimization or size-optimization levels,
2696  // * a value passed to createFunctionInliningPass function, or
2697  // * the -inline-threshold flag.
2698  // If the -inline-threshold flag is explicitly specified, that is used
2699  // irrespective of anything else.
2700  if (InlineThreshold.getNumOccurrences() > 0)
2702  else
2703  Params.DefaultThreshold = Threshold;
2704 
2705  // Set the HintThreshold knob from the -inlinehint-threshold.
2706  Params.HintThreshold = HintThreshold;
2707 
2708  // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
2710 
2711  // If the -locally-hot-callsite-threshold is explicitly specified, use it to
2712  // populate LocallyHotCallSiteThreshold. Later, we populate
2713  // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
2714  // we know that optimization level is O3 (in the getInlineParams variant that
2715  // takes the opt and size levels).
2716  // FIXME: Remove this check (and make the assignment unconditional) after
2717  // addressing size regression issues at O2.
2718  if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
2720 
2721  // Set the ColdCallSiteThreshold knob from the
2722  // -inline-cold-callsite-threshold.
2724 
2725  // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
2726  // -inlinehint-threshold commandline option is not explicitly given. If that
2727  // option is present, then its value applies even for callees with size and
2728  // minsize attributes.
2729  // If the -inline-threshold is not specified, set the ColdThreshold from the
2730  // -inlinecold-threshold even if it is not explicitly passed. If
2731  // -inline-threshold is specified, then -inlinecold-threshold needs to be
2732  // explicitly specified to set the ColdThreshold knob
2733  if (InlineThreshold.getNumOccurrences() == 0) {
2736  Params.ColdThreshold = ColdThreshold;
2737  } else if (ColdThreshold.getNumOccurrences() > 0) {
2738  Params.ColdThreshold = ColdThreshold;
2739  }
2740  return Params;
2741 }
2742 
2745 }
2746 
2747 // Compute the default threshold for inlining based on the opt level and the
2748 // size opt level.
2749 static int computeThresholdFromOptLevels(unsigned OptLevel,
2750  unsigned SizeOptLevel) {
2751  if (OptLevel > 2)
2753  if (SizeOptLevel == 1) // -Os
2755  if (SizeOptLevel == 2) // -Oz
2757  return DefaultThreshold;
2758 }
2759 
2760 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
2761  auto Params =
2762  getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
2763  // At O3, use the value of -locally-hot-callsite-threshold option to populate
2764  // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
2765  // when it is specified explicitly.
2766  if (OptLevel > 2)
2768  return Params;
2769 }
2770 
2773  FunctionAnalysisManager &FAM) {
2774  PrintInstructionComments = true;
2775  std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&](
2776  Function &F) -> AssumptionCache & {
2777  return FAM.getResult<AssumptionAnalysis>(F);
2778  };
2779  Module *M = F.getParent();
2780  ProfileSummaryInfo PSI(*M);
2781  DataLayout DL(M);
2783  // FIXME: Redesign the usage of InlineParams to expand the scope of this pass.
2784  // In the current implementation, the type of InlineParams doesn't matter as
2785  // the pass serves only for verification of inliner's decisions.
2786  // We can add a flag which determines InlineParams for this run. Right now,
2787  // the default InlineParams are used.
2788  const InlineParams Params = llvm::getInlineParams();
2789  for (BasicBlock &BB : F) {
2790  for (Instruction &I : BB) {
2791  if (CallInst *CI = dyn_cast<CallInst>(&I)) {
2792  Function *CalledFunction = CI->getCalledFunction();
2793  if (!CalledFunction || CalledFunction->isDeclaration())
2794  continue;
2795  OptimizationRemarkEmitter ORE(CalledFunction);
2796  InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params, TTI,
2797  GetAssumptionCache, nullptr, &PSI, &ORE);
2798  ICCA.analyze();
2799  OS << " Analyzing call of " << CalledFunction->getName()
2800  << "... (caller:" << CI->getCaller()->getName() << ")\n";
2801  ICCA.print();
2802  }
2803  }
2804  }
2805  return PreservedAnalyses::all();
2806 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
AssumptionCache.h
llvm::Constant::isAllOnesValue
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
Definition: Constants.cpp:101
llvm::InlineResult::success
static InlineResult success()
Definition: InlineCost.h:139
OptComputeFullInlineCost
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:729
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:263
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:499
llvm
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
ColdThreshold
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with cold attribute"))
llvm::SaturatingAdd
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:803
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:2925
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:147
llvm::InlineParams::DefaultThreshold
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:162
isColdCallSite
static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
Definition: GlobalOpt.cpp:2202
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:229
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:785
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5138
GetElementPtrTypeIterator.h
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:131
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::ConstantExpr::getICmp
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:2476
llvm::InlineParams::LocallyHotCallSiteThreshold
Optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.
Definition: InlineCost.h:181
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:693
HotCallSiteThreshold
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::ZeroOrMore, cl::desc("Threshold for hot callsites "))
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:662
InlineSizeAllowance
static cl::opt< int > InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::ZeroOrMore, cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings"))
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:744
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2207
llvm::GlobalAlias
Definition: GlobalAlias.h:27
ValueTracking.h
llvm::ConstantExpr::getSelect
static Constant * getSelect(Constant *C, Constant *V1, Constant *V2, Type *OnlyIfReducedTy=nullptr)
Select constant expr.
Definition: Constants.cpp:2394
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::ConstantExpr::getInsertValue
static Constant * getInsertValue(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2597
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
llvm::InlineConstants::OptSizeThreshold
const int OptSizeThreshold
Use when optsize (-Os) is specified.
Definition: InlineCost.h:35
DefaultThreshold
static cl::opt< int > DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Default amount of inlining to perform"))
BBSetVector
SetVector< BasicBlock * > BBSetVector
Definition: BasicBlockUtils.cpp:1596
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::SimplifyFNegInst
Value * SimplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)
Given operand for an FNeg, fold the result or return null.
Definition: InstructionSimplify.cpp:4783
llvm::InlineCost::getNever
static InlineCost getNever(const char *Reason)
Definition: InlineCost.h:95
llvm::Optional
Definition: APInt.h:33
llvm::isInlineViable
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
Definition: InlineCost.cpp:2631
llvm::InlineParams
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:160
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::SmallPtrSet< Value *, 16 >
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:128
llvm::tgtok::FalseVal
@ FalseVal
Definition: TGLexer.h:61
Operator.h
llvm::InlineParams::ColdThreshold
Optional< int > ColdThreshold
Threshold to use for cold callees.
Definition: InlineCost.h:168
llvm::TargetTransformInfo::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent() const
Definition: TargetTransformInfo.cpp:205
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:876
STLExtras.h
llvm::CallBase::arg_begin
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1306
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::InlineParams::OptMinSizeThreshold
Optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
Definition: InlineCost.h:174
llvm::UnaryOperator
Definition: InstrTypes.h:103
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:139
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::count
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
ConstantFolding.h
llvm::InlineConstants::CallPenalty
const int CallPenalty
Definition: InlineCost.h:46
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
functionsHaveCompatibleAttributes
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref< const TargetLibraryInfo &(Function &)> &GetTLI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining.
Definition: InlineCost.cpp:2426
F
#define F(x, y, z)
Definition: MD5.cpp:56
InlineSavingsMultiplier
static cl::opt< int > InlineSavingsMultiplier("inline-savings-multiplier", cl::Hidden, cl::init(8), cl::ZeroOrMore, cl::desc("Multiplier to multiply cycle savings by during inlining"))
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::gep_type_end
gep_type_iterator gep_type_end(const User *GEP)
Definition: GetElementPtrTypeIterator.h:146
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::ConstantExpr::getSub
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2675
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:205
CommandLine.h
llvm::BlockFrequencyInfo::getEntryFreq
uint64_t getEntryFreq() const
Definition: BlockFrequencyInfo.cpp:281
CodeMetrics.h
FormattedStream.h
llvm::APInt::uge
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1313
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::TargetTransformInfo::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: TargetTransformInfo.cpp:955
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:736
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1274
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1482
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:228
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::getInlineCost
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Definition: InlineCost.cpp:2473
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:86
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::ConstantExpr::getIntToPtr
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2193
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1396
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
SI
@ SI
Definition: SIInstrInfo.cpp:7463
llvm::ConstantExpr::getPtrToInt
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2179
llvm::InlineCost
Represents the cost of inlining a function.
Definition: InlineCost.h:67
llvm::ConstantFoldCall
Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
Definition: ConstantFolding.cpp:2981
InlineEnableCostBenefitAnalysis
static cl::opt< bool > InlineEnableCostBenefitAnalysis("inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner"))
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::canConstantFoldCallTo
bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
Definition: ConstantFolding.cpp:1428
TargetLibraryInfo.h
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::PatternMatch::m_FNeg
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
Definition: PatternMatch.h:1063
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:45
llvm::Operator::getOpcode
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:40
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::InlineResult::isSuccess
bool isSuccess() const
Definition: InlineCost.h:143
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:404
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:885
llvm::InlineCostAnnotationPrinterPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Definition: InlineCost.cpp:2772
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
IR
Statically lint checks LLVM IR
Definition: Lint.cpp:744
SmallPtrSet.h
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:748
llvm::InlineCost::get
static InlineCost get(int Cost, int Threshold)
Definition: InlineCost.h:87
llvm::getInlineParams
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition: InlineCost.cpp:2743
llvm::InlineConstants::InstrCost
const int InstrCost
Definition: InlineCost.h:44
PatternMatch.h
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:153
llvm::None
const NoneType None
Definition: None.h:23
LoopInfo.h
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3088
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:78
llvm::InvokeInst
Invoke instruction.
Definition: Instructions.h:3688
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:712
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
AssemblyAnnotationWriter.h
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::cl::opt
Definition: CommandLine.h:1422
llvm::ConstantExpr::getCompare
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:2372
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:303
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
InlineThreshold
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)"))
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::InlineConstants::IndirectCallThreshold
const int IndirectCallThreshold
Definition: InlineCost.h:45
llvm::StructLayout
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:604
ProfileSummaryInfo.h
llvm::CatchReturnInst
Definition: Instructions.h:4463
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:169
llvm::SimplifyBinOp
Value * SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
Definition: InstructionSimplify.cpp:5157
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::BlockFrequency::getFrequency
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
Definition: BlockFrequency.h:35
llvm::SaturatingMultiplyAdd
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Definition: MathExtras.h:866
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:905
llvm::Attribute::AttrKind
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results,...
Definition: Attributes.h:71
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::InlineConstants::OptAggressiveThreshold
const int OptAggressiveThreshold
Use when -O3 is specified.
Definition: InlineCost.h:41
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:634
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::formatted_raw_ostream
formatted_raw_ostream - A raw_ostream that wraps another one and keeps track of line and column posit...
Definition: FormattedStream.h:30
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::InlineCostAnnotationPrinterPass::OS
raw_ostream & OS
Definition: InlineCost.h:281
getFalse
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Definition: InstructionSimplify.cpp:119
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::ConstantExpr::getCast
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1962
InlineCost.h
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1715
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:162
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::TargetTransformInfo::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
Definition: TargetTransformInfo.cpp:275
llvm::Record
Definition: Record.h:1472
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::InlineParams::ColdCallSiteThreshold
Optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
Definition: InlineCost.h:184
llvm::GEPOperator
Definition: Operator.h:457
llvm::CallBase::arg_end
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1312
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition: Instructions.h:3085
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
CFG.h
llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:261
llvm::LoopInfo
Definition: LoopInfo.h:1080
llvm::BinaryOperator
Definition: InstrTypes.h:190
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
DataLayout.h
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:212
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:167
this
Analysis the ScalarEvolution expression for r is this
Definition: README.txt:8
InstVisitor.h
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:214
llvm::CallingConv::Cold
@ Cold
Definition: CallingConv.h:48
DEBUG_PRINT_STAT
#define DEBUG_PRINT_STAT(x)
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::ConstantInt::isZero
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:192
llvm::TargetTransformInfo::getUserCost
InstructionCost getUserCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
Definition: TargetTransformInfo.cpp:223
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
A
* A
Definition: README_ALTIVEC.txt:89
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:767
llvm::BranchProbability
Definition: BranchProbability.h:30
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:952
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::PtrToIntInst
This class represents a cast from a pointer to an integer.
Definition: Instructions.h:5087
llvm::InlineResult::getFailureReason
const char * getFailureReason() const
Definition: InlineCost.h:144
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::InstVisitor
Base class for instruction visitors.
Definition: InstVisitor.h:79
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:432
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:418
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:299
DisableGEPConstOperand
static cl::opt< bool > DisableGEPConstOperand("disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands"))
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::CallBase::paramHasAttr
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Definition: Instructions.cpp:339
llvm::isAssumeLikeIntrinsic
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
Definition: ValueTracking.cpp:513
users
iv users
Definition: IVUsers.cpp:52
llvm::RegState::Dead
@ Dead
Unused definition.
Definition: MachineInstrBuilder.h:50
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:140
llvm::InlineConstants::MaxSimplifiedDynamicAllocaToInline
const uint64_t MaxSimplifiedDynamicAllocaToInline
Do not inline dynamic allocas that have been constant propagated to be static allocas above this amou...
Definition: InlineCost.h:54
InlineCallerSupersetNoBuiltin
static cl::opt< bool > InlineCallerSupersetNoBuiltin("inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes."))
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:205
ColdCallSiteRelFreq
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
llvm::APInt::getNullValue
static APInt getNullValue(unsigned numBits)
Get the '0' value.
Definition: APInt.h:574
ColdCallSiteThreshold
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining cold callsites"))
CallingConv.h
llvm::getAttributeBasedInliningDecision
Optional< InlineResult > getAttributeBasedInliningDecision(CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref< const TargetLibraryInfo &(Function &)> GetTLI)
Returns InlineResult::success() if the call site should be always inlined because of user directives,...
Definition: InlineCost.cpp:2508
llvm::ResumeInst
Resume the propagation of an exception.
Definition: Instructions.h:4138
LocallyHotCallSiteThreshold
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore, cl::desc("Threshold for locally hot callsites "))
llvm::InlineParams::HintThreshold
Optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:165
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:833
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::StructLayout::getElementOffset
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:634
llvm::AssemblyAnnotationWriter
Definition: AssemblyAnnotationWriter.h:27
llvm::InlineConstants::ColdccPenalty
const int ColdccPenalty
Definition: InlineCost.h:48
HintThreshold
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with inline hint"))
llvm::APInt::sextOrTrunc
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:956
llvm::ExtractValueInst
This instruction extracts a struct member or array element value from an aggregate value.
Definition: Instructions.h:2318
llvm::InlineCost::getAlways
static InlineCost getAlways(const char *Reason)
Definition: InlineCost.h:92
llvm::TypeSize
Definition: TypeSize.h:417
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::size
unsigned size() const
Definition: DenseMap.h:100
llvm::SetVector::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:215
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:207
llvm::InlineResult::failure
static InlineResult failure(const char *Reason)
Definition: InlineCost.h:140
llvm::ConstantExpr::getGetElementPtr
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, bool InBounds=false, Optional< unsigned > InRangeIndex=None, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
Definition: Constants.h:1205
llvm::TargetTransformInfo::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier() const
Definition: TargetTransformInfo.cpp:196
llvm::InlineConstants::OptMinSizeThreshold
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
Definition: InlineCost.h:38
llvm::CleanupReturnInst
Definition: Instructions.h:4544
llvm::BlockFrequencyInfo::getBlockFreq
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
Definition: BlockFrequencyInfo.cpp:204
llvm::IntToPtrInst
This class represents a cast from an integer to a pointer.
Definition: Instructions.h:5044
llvm::IndirectBrInst
Indirect Branch Instruction.
Definition: Instructions.h:3553
GlobalAlias.h
llvm::TargetTransformInfo::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB) const
Definition: TargetTransformInfo.cpp:201
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
computeThresholdFromOptLevels
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
Definition: InlineCost.cpp:2749
llvm::TargetTransformInfo::getEstimatedNumberOfCaseClusters
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Definition: TargetTransformInfo.cpp:216
llvm::getInliningCostEstimate
Optional< int > getInliningCostEstimate(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the cost estimate ignoring thresholds.
Definition: InlineCost.cpp:2483
llvm::AttributeFuncs::areInlineCompatible
bool areInlineCompatible(const Function &Caller, const Function &Callee)
Definition: Attributes.cpp:2302
llvm::getCallsiteCost
int getCallsiteCost(CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
Definition: InlineCost.cpp:2440
SmallVector.h
llvm::InlineConstants::LastCallToStaticBonus
const int LastCallToStaticBonus
Definition: InlineCost.h:47
llvm::BlockFrequencyInfo::getBlockProfileCount
Optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Definition: BlockFrequencyInfo.cpp:209
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
InstructionSimplify.h
llvm::BlockAddress::get
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1808
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2572
Threshold
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::erase
bool erase(const ValueT &V)
Definition: DenseSet.h:101
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1164
llvm::SmallPtrSetImpl< const Value * >
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1450
HotCallSiteRelFreq
static cl::opt< int > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore, cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::UnreachableInst
This function has undefined behavior.
Definition: Instructions.h:4652
llvm::InlineParams::HotCallSiteThreshold
Optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:177
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3151
llvm::Function::ProfileCount
Class to represent profile counts.
Definition: Function.h:292
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:61
llvm::cl::desc
Definition: CommandLine.h:414
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3007
raw_ostream.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: InlineCost.cpp:48
llvm::InlineConstants::TotalAllocaSizeRecursiveCaller
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive.
Definition: InlineCost.h:51
llvm::Function::ProfileCount::hasValue
bool hasValue() const
Definition: Function.h:302
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:624
llvm::InsertValueInst
This instruction inserts a struct field of array element value into an aggregate value.
Definition: Instructions.h:2429
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1797
llvm::tgtok::TrueVal
@ TrueVal
Definition: TGLexer.h:61
PrintInstructionComments
static cl::opt< bool > PrintInstructionComments("print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis"))
llvm::TargetTransformInfo::getFPOpCost
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
Definition: TargetTransformInfo.cpp:533
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::InlineResult
InlineResult is basically true or false.
Definition: InlineCost.h:134
Debug.h
llvm::InlineParams::OptSizeThreshold
Optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
Definition: InlineCost.h:171
llvm::BranchInst::isConditional
bool isConditional() const
Definition: Instructions.h:3086
llvm::ConstantExpr::getExtractValue
static Constant * getExtractValue(Constant *Agg, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2621
llvm::Optional::getValue
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:280
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3100
SetVector.h
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364