LLVM  10.0.0svn
InlineCost.cpp
Go to the documentation of this file.
1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements inline cost analysis.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SetVector.h"
16 #include "llvm/ADT/SmallPtrSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/CFG.h"
25 #include "llvm/Analysis/LoopInfo.h"
29 #include "llvm/Config/llvm-config.h"
30 #include "llvm/IR/CallingConv.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/Dominators.h"
34 #include "llvm/IR/GlobalAlias.h"
35 #include "llvm/IR/InstVisitor.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Operator.h"
38 #include "llvm/IR/PatternMatch.h"
39 #include "llvm/Support/Debug.h"
41 
42 using namespace llvm;
43 
44 #define DEBUG_TYPE "inline-cost"
45 
46 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
47 
49  "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
50  cl::desc("Control the amount of inlining to perform (default = 225)"));
51 
53  "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore,
54  cl::desc("Threshold for inlining functions with inline hint"));
55 
56 static cl::opt<int>
57  ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
59  cl::desc("Threshold for inlining cold callsites"));
60 
61 // We introduce this threshold to help performance of instrumentation based
62 // PGO before we actually hook up inliner with analysis passes such as BPI and
63 // BFI.
65  "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore,
66  cl::desc("Threshold for inlining functions with cold attribute"));
67 
68 static cl::opt<int>
69  HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
71  cl::desc("Threshold for hot callsites "));
72 
74  "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore,
75  cl::desc("Threshold for locally hot callsites "));
76 
78  "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
79  cl::desc("Maximum block frequency, expressed as a percentage of caller's "
80  "entry frequency, for a callsite to be cold in the absence of "
81  "profile information."));
82 
84  "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore,
85  cl::desc("Minimum block frequency, expressed as a multiple of caller's "
86  "entry frequency, for a callsite to be hot in the absence of "
87  "profile information."));
88 
90  "inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore,
91  cl::desc("Compute the full inline cost of a call site even when the cost "
92  "exceeds the threshold."));
93 
94 namespace {
95 
96 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
98  friend class InstVisitor<CallAnalyzer, bool>;
99 
100  /// The TargetTransformInfo available for this compilation.
101  const TargetTransformInfo &TTI;
102 
103  /// Getter for the cache of @llvm.assume intrinsics.
104  std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
105 
106  /// Getter for BlockFrequencyInfo
108 
109  /// Profile summary information.
110  ProfileSummaryInfo *PSI;
111 
112  /// The called function.
113  Function &F;
114 
115  // Cache the DataLayout since we use it a lot.
116  const DataLayout &DL;
117 
118  /// The OptimizationRemarkEmitter available for this compilation.
120 
121  /// The candidate callsite being analyzed. Please do not use this to do
122  /// analysis in the caller function; we want the inline cost query to be
123  /// easily cacheable. Instead, use the cover function paramHasAttr.
124  CallBase &CandidateCall;
125 
126  /// Tunable parameters that control the analysis.
127  const InlineParams &Params;
128 
129  /// Upper bound for the inlining cost. Bonuses are being applied to account
130  /// for speculative "expected profit" of the inlining decision.
131  int Threshold;
132 
133  /// Inlining cost measured in abstract units, accounts for all the
134  /// instructions expected to be executed for a given function invocation.
135  /// Instructions that are statically proven to be dead based on call-site
136  /// arguments are not counted here.
137  int Cost = 0;
138 
139  bool ComputeFullInlineCost;
140 
141  bool IsCallerRecursive = false;
142  bool IsRecursiveCall = false;
143  bool ExposesReturnsTwice = false;
144  bool HasDynamicAlloca = false;
145  bool ContainsNoDuplicateCall = false;
146  bool HasReturn = false;
147  bool HasIndirectBr = false;
148  bool HasUninlineableIntrinsic = false;
149  bool InitsVargArgs = false;
150 
151  /// Number of bytes allocated statically by the callee.
152  uint64_t AllocatedSize = 0;
153  unsigned NumInstructions = 0;
154  unsigned NumVectorInstructions = 0;
155 
156  /// Bonus to be applied when percentage of vector instructions in callee is
157  /// high (see more details in updateThreshold).
158  int VectorBonus = 0;
159  /// Bonus to be applied when the callee has only one reachable basic block.
160  int SingleBBBonus = 0;
161 
162  /// While we walk the potentially-inlined instructions, we build up and
163  /// maintain a mapping of simplified values specific to this callsite. The
164  /// idea is to propagate any special information we have about arguments to
165  /// this call through the inlinable section of the function, and account for
166  /// likely simplifications post-inlining. The most important aspect we track
167  /// is CFG altering simplifications -- when we prove a basic block dead, that
168  /// can cause dramatic shifts in the cost of inlining a function.
169  DenseMap<Value *, Constant *> SimplifiedValues;
170 
171  /// Keep track of the values which map back (through function arguments) to
172  /// allocas on the caller stack which could be simplified through SROA.
173  DenseMap<Value *, Value *> SROAArgValues;
174 
175  /// The mapping of caller Alloca values to their accumulated cost savings. If
176  /// we have to disable SROA for one of the allocas, this tells us how much
177  /// cost must be added.
178  DenseMap<Value *, int> SROAArgCosts;
179 
180  /// Keep track of values which map to a pointer base and constant offset.
182 
183  /// Keep track of dead blocks due to the constant arguments.
184  SetVector<BasicBlock *> DeadBlocks;
185 
186  /// The mapping of the blocks to their known unique successors due to the
187  /// constant arguments.
188  DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
189 
190  /// Model the elimination of repeated loads that is expected to happen
191  /// whenever we simplify away the stores that would otherwise cause them to be
192  /// loads.
193  bool EnableLoadElimination;
194  SmallPtrSet<Value *, 16> LoadAddrSet;
195  int LoadEliminationCost = 0;
196 
197  // Custom simplification helper routines.
198  bool isAllocaDerivedArg(Value *V);
199  bool lookupSROAArgAndCost(Value *V, Value *&Arg,
201  void disableSROA(DenseMap<Value *, int>::iterator CostIt);
202  void disableSROA(Value *V);
203  void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
204  void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
205  int InstructionCost);
206  void disableLoadElimination();
207  bool isGEPFree(GetElementPtrInst &GEP);
208  bool canFoldInboundsGEP(GetElementPtrInst &I);
209  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
210  bool simplifyCallSite(Function *F, CallBase &Call);
211  template <typename Callable>
212  bool simplifyInstruction(Instruction &I, Callable Evaluate);
213  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
214 
215  /// Return true if the given argument to the function being considered for
216  /// inlining has the given attribute set either at the call site or the
217  /// function declaration. Primarily used to inspect call site specific
218  /// attributes since these can be more precise than the ones on the callee
219  /// itself.
220  bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
221 
222  /// Return true if the given value is known non null within the callee if
223  /// inlined through this particular callsite.
224  bool isKnownNonNullInCallee(Value *V);
225 
226  /// Update Threshold based on callsite properties such as callee
227  /// attributes and callee hotness for PGO builds. The Callee is explicitly
228  /// passed to support analyzing indirect calls whose target is inferred by
229  /// analysis.
230  void updateThreshold(CallBase &Call, Function &Callee);
231 
232  /// Return true if size growth is allowed when inlining the callee at \p Call.
233  bool allowSizeGrowth(CallBase &Call);
234 
235  /// Return true if \p Call is a cold callsite.
236  bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
237 
238  /// Return a higher threshold if \p Call is a hot callsite.
239  Optional<int> getHotCallSiteThreshold(CallBase &Call,
240  BlockFrequencyInfo *CallerBFI);
241 
242  // Custom analysis routines.
243  InlineResult analyzeBlock(BasicBlock *BB,
244  SmallPtrSetImpl<const Value *> &EphValues);
245 
246  /// Handle a capped 'int' increment for Cost.
247  void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) {
248  assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound");
249  Cost = (int)std::min(UpperBound, Cost + Inc);
250  }
251 
252  // Disable several entry points to the visitor so we don't accidentally use
253  // them by declaring but not defining them here.
254  void visit(Module *);
255  void visit(Module &);
256  void visit(Function *);
257  void visit(Function &);
258  void visit(BasicBlock *);
259  void visit(BasicBlock &);
260 
261  // Provide base case for our instruction visit.
262  bool visitInstruction(Instruction &I);
263 
264  // Our visit overrides.
265  bool visitAlloca(AllocaInst &I);
266  bool visitPHI(PHINode &I);
267  bool visitGetElementPtr(GetElementPtrInst &I);
268  bool visitBitCast(BitCastInst &I);
269  bool visitPtrToInt(PtrToIntInst &I);
270  bool visitIntToPtr(IntToPtrInst &I);
271  bool visitCastInst(CastInst &I);
272  bool visitUnaryInstruction(UnaryInstruction &I);
273  bool visitCmpInst(CmpInst &I);
274  bool visitSub(BinaryOperator &I);
275  bool visitBinaryOperator(BinaryOperator &I);
276  bool visitFNeg(UnaryOperator &I);
277  bool visitLoad(LoadInst &I);
278  bool visitStore(StoreInst &I);
279  bool visitExtractValue(ExtractValueInst &I);
280  bool visitInsertValue(InsertValueInst &I);
281  bool visitCallBase(CallBase &Call);
282  bool visitReturnInst(ReturnInst &RI);
283  bool visitBranchInst(BranchInst &BI);
284  bool visitSelectInst(SelectInst &SI);
285  bool visitSwitchInst(SwitchInst &SI);
286  bool visitIndirectBrInst(IndirectBrInst &IBI);
287  bool visitResumeInst(ResumeInst &RI);
288  bool visitCleanupReturnInst(CleanupReturnInst &RI);
289  bool visitCatchReturnInst(CatchReturnInst &RI);
290  bool visitUnreachableInst(UnreachableInst &I);
291 
292 public:
293  CallAnalyzer(const TargetTransformInfo &TTI,
294  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
297  Function &Callee, CallBase &Call, const InlineParams &Params)
298  : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
299  PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
300  CandidateCall(Call), Params(Params), Threshold(Params.DefaultThreshold),
301  ComputeFullInlineCost(OptComputeFullInlineCost ||
302  Params.ComputeFullInlineCost || ORE),
303  EnableLoadElimination(true) {}
304 
305  InlineResult analyzeCall(CallBase &Call);
306 
307  int getThreshold() { return Threshold; }
308  int getCost() { return Cost; }
309 
310  // Keep a bunch of stats about the cost savings found so we can print them
311  // out when debugging.
312  unsigned NumConstantArgs = 0;
313  unsigned NumConstantOffsetPtrArgs = 0;
314  unsigned NumAllocaArgs = 0;
315  unsigned NumConstantPtrCmps = 0;
316  unsigned NumConstantPtrDiffs = 0;
317  unsigned NumInstructionsSimplified = 0;
318  unsigned SROACostSavings = 0;
319  unsigned SROACostSavingsLost = 0;
320 
321  void dump();
322 };
323 
324 } // namespace
325 
326 /// Test whether the given value is an Alloca-derived function argument.
327 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
328  return SROAArgValues.count(V);
329 }
330 
331 /// Lookup the SROA-candidate argument and cost iterator which V maps to.
332 /// Returns false if V does not map to a SROA-candidate.
333 bool CallAnalyzer::lookupSROAArgAndCost(
335  if (SROAArgValues.empty() || SROAArgCosts.empty())
336  return false;
337 
338  DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
339  if (ArgIt == SROAArgValues.end())
340  return false;
341 
342  Arg = ArgIt->second;
343  CostIt = SROAArgCosts.find(Arg);
344  return CostIt != SROAArgCosts.end();
345 }
346 
347 /// Disable SROA for the candidate marked by this cost iterator.
348 ///
349 /// This marks the candidate as no longer viable for SROA, and adds the cost
350 /// savings associated with it back into the inline cost measurement.
351 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
352  // If we're no longer able to perform SROA we need to undo its cost savings
353  // and prevent subsequent analysis.
354  addCost(CostIt->second);
355  SROACostSavings -= CostIt->second;
356  SROACostSavingsLost += CostIt->second;
357  SROAArgCosts.erase(CostIt);
358  disableLoadElimination();
359 }
360 
361 /// If 'V' maps to a SROA candidate, disable SROA for it.
362 void CallAnalyzer::disableSROA(Value *V) {
363  Value *SROAArg;
365  if (lookupSROAArgAndCost(V, SROAArg, CostIt))
366  disableSROA(CostIt);
367 }
368 
369 /// Accumulate the given cost for a particular SROA candidate.
370 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
371  int InstructionCost) {
372  CostIt->second += InstructionCost;
373  SROACostSavings += InstructionCost;
374 }
375 
376 void CallAnalyzer::disableLoadElimination() {
377  if (EnableLoadElimination) {
378  addCost(LoadEliminationCost);
379  LoadEliminationCost = 0;
380  EnableLoadElimination = false;
381  }
382 }
383 
384 /// Accumulate a constant GEP offset into an APInt if possible.
385 ///
386 /// Returns false if unable to compute the offset for any reason. Respects any
387 /// simplified values known during the analysis of this callsite.
388 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
389  unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
390  assert(IntPtrWidth == Offset.getBitWidth());
391 
392  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
393  GTI != GTE; ++GTI) {
394  ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
395  if (!OpC)
396  if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
397  OpC = dyn_cast<ConstantInt>(SimpleOp);
398  if (!OpC)
399  return false;
400  if (OpC->isZero())
401  continue;
402 
403  // Handle a struct index, which adds its field offset to the pointer.
404  if (StructType *STy = GTI.getStructTypeOrNull()) {
405  unsigned ElementIdx = OpC->getZExtValue();
406  const StructLayout *SL = DL.getStructLayout(STy);
407  Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
408  continue;
409  }
410 
411  APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
412  Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
413  }
414  return true;
415 }
416 
417 /// Use TTI to check whether a GEP is free.
418 ///
419 /// Respects any simplified values known during the analysis of this callsite.
420 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
422  Operands.push_back(GEP.getOperand(0));
423  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
424  if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
425  Operands.push_back(SimpleOp);
426  else
427  Operands.push_back(*I);
428  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands);
429 }
430 
431 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
432  // Check whether inlining will turn a dynamic alloca into a static
433  // alloca and handle that case.
434  if (I.isArrayAllocation()) {
435  Constant *Size = SimplifiedValues.lookup(I.getArraySize());
436  if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
437  Type *Ty = I.getAllocatedType();
438  AllocatedSize = SaturatingMultiplyAdd(
439  AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty).getFixedSize(),
440  AllocatedSize);
441  return Base::visitAlloca(I);
442  }
443  }
444 
445  // Accumulate the allocated size.
446  if (I.isStaticAlloca()) {
447  Type *Ty = I.getAllocatedType();
448  AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty).getFixedSize(),
449  AllocatedSize);
450  }
451 
452  // We will happily inline static alloca instructions.
453  if (I.isStaticAlloca())
454  return Base::visitAlloca(I);
455 
456  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
457  // a variety of reasons, and so we would like to not inline them into
458  // functions which don't currently have a dynamic alloca. This simply
459  // disables inlining altogether in the presence of a dynamic alloca.
460  HasDynamicAlloca = true;
461  return false;
462 }
463 
464 bool CallAnalyzer::visitPHI(PHINode &I) {
465  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
466  // though we don't want to propagate it's bonuses. The idea is to disable
467  // SROA if it *might* be used in an inappropriate manner.
468 
469  // Phi nodes are always zero-cost.
470  // FIXME: Pointer sizes may differ between different address spaces, so do we
471  // need to use correct address space in the call to getPointerSizeInBits here?
472  // Or could we skip the getPointerSizeInBits call completely? As far as I can
473  // see the ZeroOffset is used as a dummy value, so we can probably use any
474  // bit width for the ZeroOffset?
475  APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits(0));
476  bool CheckSROA = I.getType()->isPointerTy();
477 
478  // Track the constant or pointer with constant offset we've seen so far.
479  Constant *FirstC = nullptr;
480  std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};
481  Value *FirstV = nullptr;
482 
483  for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {
484  BasicBlock *Pred = I.getIncomingBlock(i);
485  // If the incoming block is dead, skip the incoming block.
486  if (DeadBlocks.count(Pred))
487  continue;
488  // If the parent block of phi is not the known successor of the incoming
489  // block, skip the incoming block.
490  BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
491  if (KnownSuccessor && KnownSuccessor != I.getParent())
492  continue;
493 
494  Value *V = I.getIncomingValue(i);
495  // If the incoming value is this phi itself, skip the incoming value.
496  if (&I == V)
497  continue;
498 
499  Constant *C = dyn_cast<Constant>(V);
500  if (!C)
501  C = SimplifiedValues.lookup(V);
502 
503  std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};
504  if (!C && CheckSROA)
505  BaseAndOffset = ConstantOffsetPtrs.lookup(V);
506 
507  if (!C && !BaseAndOffset.first)
508  // The incoming value is neither a constant nor a pointer with constant
509  // offset, exit early.
510  return true;
511 
512  if (FirstC) {
513  if (FirstC == C)
514  // If we've seen a constant incoming value before and it is the same
515  // constant we see this time, continue checking the next incoming value.
516  continue;
517  // Otherwise early exit because we either see a different constant or saw
518  // a constant before but we have a pointer with constant offset this time.
519  return true;
520  }
521 
522  if (FirstV) {
523  // The same logic as above, but check pointer with constant offset here.
524  if (FirstBaseAndOffset == BaseAndOffset)
525  continue;
526  return true;
527  }
528 
529  if (C) {
530  // This is the 1st time we've seen a constant, record it.
531  FirstC = C;
532  continue;
533  }
534 
535  // The remaining case is that this is the 1st time we've seen a pointer with
536  // constant offset, record it.
537  FirstV = V;
538  FirstBaseAndOffset = BaseAndOffset;
539  }
540 
541  // Check if we can map phi to a constant.
542  if (FirstC) {
543  SimplifiedValues[&I] = FirstC;
544  return true;
545  }
546 
547  // Check if we can map phi to a pointer with constant offset.
548  if (FirstBaseAndOffset.first) {
549  ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
550 
551  Value *SROAArg;
553  if (lookupSROAArgAndCost(FirstV, SROAArg, CostIt))
554  SROAArgValues[&I] = SROAArg;
555  }
556 
557  return true;
558 }
559 
560 /// Check we can fold GEPs of constant-offset call site argument pointers.
561 /// This requires target data and inbounds GEPs.
562 ///
563 /// \return true if the specified GEP can be folded.
564 bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
565  // Check if we have a base + offset for the pointer.
566  std::pair<Value *, APInt> BaseAndOffset =
567  ConstantOffsetPtrs.lookup(I.getPointerOperand());
568  if (!BaseAndOffset.first)
569  return false;
570 
571  // Check if the offset of this GEP is constant, and if so accumulate it
572  // into Offset.
573  if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second))
574  return false;
575 
576  // Add the result as a new mapping to Base + Offset.
577  ConstantOffsetPtrs[&I] = BaseAndOffset;
578 
579  return true;
580 }
581 
582 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
583  Value *SROAArg;
585  bool SROACandidate =
586  lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt);
587 
588  // Lambda to check whether a GEP's indices are all constant.
589  auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
590  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
591  if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
592  return false;
593  return true;
594  };
595 
596  if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
597  if (SROACandidate)
598  SROAArgValues[&I] = SROAArg;
599 
600  // Constant GEPs are modeled as free.
601  return true;
602  }
603 
604  // Variable GEPs will require math and will disable SROA.
605  if (SROACandidate)
606  disableSROA(CostIt);
607  return isGEPFree(I);
608 }
609 
610 /// Simplify \p I if its operands are constants and update SimplifiedValues.
611 /// \p Evaluate is a callable specific to instruction type that evaluates the
612 /// instruction when all the operands are constants.
613 template <typename Callable>
614 bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
616  for (Value *Op : I.operands()) {
617  Constant *COp = dyn_cast<Constant>(Op);
618  if (!COp)
619  COp = SimplifiedValues.lookup(Op);
620  if (!COp)
621  return false;
622  COps.push_back(COp);
623  }
624  auto *C = Evaluate(COps);
625  if (!C)
626  return false;
627  SimplifiedValues[&I] = C;
628  return true;
629 }
630 
631 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
632  // Propagate constants through bitcasts.
633  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
634  return ConstantExpr::getBitCast(COps[0], I.getType());
635  }))
636  return true;
637 
638  // Track base/offsets through casts
639  std::pair<Value *, APInt> BaseAndOffset =
640  ConstantOffsetPtrs.lookup(I.getOperand(0));
641  // Casts don't change the offset, just wrap it up.
642  if (BaseAndOffset.first)
643  ConstantOffsetPtrs[&I] = BaseAndOffset;
644 
645  // Also look for SROA candidates here.
646  Value *SROAArg;
648  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
649  SROAArgValues[&I] = SROAArg;
650 
651  // Bitcasts are always zero cost.
652  return true;
653 }
654 
655 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
656  // Propagate constants through ptrtoint.
657  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
658  return ConstantExpr::getPtrToInt(COps[0], I.getType());
659  }))
660  return true;
661 
662  // Track base/offset pairs when converted to a plain integer provided the
663  // integer is large enough to represent the pointer.
664  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
665  unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
666  if (IntegerSize >= DL.getPointerSizeInBits(AS)) {
667  std::pair<Value *, APInt> BaseAndOffset =
668  ConstantOffsetPtrs.lookup(I.getOperand(0));
669  if (BaseAndOffset.first)
670  ConstantOffsetPtrs[&I] = BaseAndOffset;
671  }
672 
673  // This is really weird. Technically, ptrtoint will disable SROA. However,
674  // unless that ptrtoint is *used* somewhere in the live basic blocks after
675  // inlining, it will be nuked, and SROA should proceed. All of the uses which
676  // would block SROA would also block SROA if applied directly to a pointer,
677  // and so we can just add the integer in here. The only places where SROA is
678  // preserved either cannot fire on an integer, or won't in-and-of themselves
679  // disable SROA (ext) w/o some later use that we would see and disable.
680  Value *SROAArg;
682  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
683  SROAArgValues[&I] = SROAArg;
684 
685  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
686 }
687 
688 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
689  // Propagate constants through ptrtoint.
690  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
691  return ConstantExpr::getIntToPtr(COps[0], I.getType());
692  }))
693  return true;
694 
695  // Track base/offset pairs when round-tripped through a pointer without
696  // modifications provided the integer is not too large.
697  Value *Op = I.getOperand(0);
698  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
699  if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
700  std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
701  if (BaseAndOffset.first)
702  ConstantOffsetPtrs[&I] = BaseAndOffset;
703  }
704 
705  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
706  Value *SROAArg;
708  if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
709  SROAArgValues[&I] = SROAArg;
710 
711  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
712 }
713 
714 bool CallAnalyzer::visitCastInst(CastInst &I) {
715  // Propagate constants through casts.
716  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
717  return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
718  }))
719  return true;
720 
721  // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
722  disableSROA(I.getOperand(0));
723 
724  // If this is a floating-point cast, and the target says this operation
725  // is expensive, this may eventually become a library call. Treat the cost
726  // as such.
727  switch (I.getOpcode()) {
728  case Instruction::FPTrunc:
729  case Instruction::FPExt:
730  case Instruction::UIToFP:
731  case Instruction::SIToFP:
732  case Instruction::FPToUI:
733  case Instruction::FPToSI:
734  if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
736  break;
737  default:
738  break;
739  }
740 
741  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
742 }
743 
744 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
745  Value *Operand = I.getOperand(0);
746  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
747  return ConstantFoldInstOperands(&I, COps[0], DL);
748  }))
749  return true;
750 
751  // Disable any SROA on the argument to arbitrary unary instructions.
752  disableSROA(Operand);
753 
754  return false;
755 }
756 
757 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
758  return CandidateCall.paramHasAttr(A->getArgNo(), Attr);
759 }
760 
761 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
762  // Does the *call site* have the NonNull attribute set on an argument? We
763  // use the attribute on the call site to memoize any analysis done in the
764  // caller. This will also trip if the callee function has a non-null
765  // parameter attribute, but that's a less interesting case because hopefully
766  // the callee would already have been simplified based on that.
767  if (Argument *A = dyn_cast<Argument>(V))
768  if (paramHasAttr(A, Attribute::NonNull))
769  return true;
770 
771  // Is this an alloca in the caller? This is distinct from the attribute case
772  // above because attributes aren't updated within the inliner itself and we
773  // always want to catch the alloca derived case.
774  if (isAllocaDerivedArg(V))
775  // We can actually predict the result of comparisons between an
776  // alloca-derived value and null. Note that this fires regardless of
777  // SROA firing.
778  return true;
779 
780  return false;
781 }
782 
783 bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {
784  // If the normal destination of the invoke or the parent block of the call
785  // site is unreachable-terminated, there is little point in inlining this
786  // unless there is literally zero cost.
787  // FIXME: Note that it is possible that an unreachable-terminated block has a
788  // hot entry. For example, in below scenario inlining hot_call_X() may be
789  // beneficial :
790  // main() {
791  // hot_call_1();
792  // ...
793  // hot_call_N()
794  // exit(0);
795  // }
796  // For now, we are not handling this corner case here as it is rare in real
797  // code. In future, we should elaborate this based on BPI and BFI in more
798  // general threshold adjusting heuristics in updateThreshold().
799  if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
800  if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
801  return false;
802  } else if (isa<UnreachableInst>(Call.getParent()->getTerminator()))
803  return false;
804 
805  return true;
806 }
807 
809  BlockFrequencyInfo *CallerBFI) {
810  // If global profile summary is available, then callsite's coldness is
811  // determined based on that.
812  if (PSI && PSI->hasProfileSummary())
813  return PSI->isColdCallSite(CallSite(&Call), CallerBFI);
814 
815  // Otherwise we need BFI to be available.
816  if (!CallerBFI)
817  return false;
818 
819  // Determine if the callsite is cold relative to caller's entry. We could
820  // potentially cache the computation of scaled entry frequency, but the added
821  // complexity is not worth it unless this scaling shows up high in the
822  // profiles.
823  const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
824  auto CallSiteBB = Call.getParent();
825  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
826  auto CallerEntryFreq =
827  CallerBFI->getBlockFreq(&(Call.getCaller()->getEntryBlock()));
828  return CallSiteFreq < CallerEntryFreq * ColdProb;
829 }
830 
832 CallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
833  BlockFrequencyInfo *CallerBFI) {
834 
835  // If global profile summary is available, then callsite's hotness is
836  // determined based on that.
837  if (PSI && PSI->hasProfileSummary() &&
838  PSI->isHotCallSite(CallSite(&Call), CallerBFI))
839  return Params.HotCallSiteThreshold;
840 
841  // Otherwise we need BFI to be available and to have a locally hot callsite
842  // threshold.
843  if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
844  return None;
845 
846  // Determine if the callsite is hot relative to caller's entry. We could
847  // potentially cache the computation of scaled entry frequency, but the added
848  // complexity is not worth it unless this scaling shows up high in the
849  // profiles.
850  auto CallSiteBB = Call.getParent();
851  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
852  auto CallerEntryFreq = CallerBFI->getEntryFreq();
853  if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
854  return Params.LocallyHotCallSiteThreshold;
855 
856  // Otherwise treat it normally.
857  return None;
858 }
859 
860 void CallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
861  // If no size growth is allowed for this inlining, set Threshold to 0.
862  if (!allowSizeGrowth(Call)) {
863  Threshold = 0;
864  return;
865  }
866 
867  Function *Caller = Call.getCaller();
868 
869  // return min(A, B) if B is valid.
870  auto MinIfValid = [](int A, Optional<int> B) {
871  return B ? std::min(A, B.getValue()) : A;
872  };
873 
874  // return max(A, B) if B is valid.
875  auto MaxIfValid = [](int A, Optional<int> B) {
876  return B ? std::max(A, B.getValue()) : A;
877  };
878 
879  // Various bonus percentages. These are multiplied by Threshold to get the
880  // bonus values.
881  // SingleBBBonus: This bonus is applied if the callee has a single reachable
882  // basic block at the given callsite context. This is speculatively applied
883  // and withdrawn if more than one basic block is seen.
884  //
885  // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
886  // of the last call to a static function as inlining such functions is
887  // guaranteed to reduce code size.
888  //
889  // These bonus percentages may be set to 0 based on properties of the caller
890  // and the callsite.
891  int SingleBBBonusPercent = 50;
892  int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
894 
895  // Lambda to set all the above bonus and bonus percentages to 0.
896  auto DisallowAllBonuses = [&]() {
897  SingleBBBonusPercent = 0;
898  VectorBonusPercent = 0;
899  LastCallToStaticBonus = 0;
900  };
901 
902  // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
903  // and reduce the threshold if the caller has the necessary attribute.
904  if (Caller->hasMinSize()) {
905  Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
906  // For minsize, we want to disable the single BB bonus and the vector
907  // bonuses, but not the last-call-to-static bonus. Inlining the last call to
908  // a static function will, at the minimum, eliminate the parameter setup and
909  // call/return instructions.
910  SingleBBBonusPercent = 0;
911  VectorBonusPercent = 0;
912  } else if (Caller->hasOptSize())
913  Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
914 
915  // Adjust the threshold based on inlinehint attribute and profile based
916  // hotness information if the caller does not have MinSize attribute.
917  if (!Caller->hasMinSize()) {
918  if (Callee.hasFnAttribute(Attribute::InlineHint))
919  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
920 
921  // FIXME: After switching to the new passmanager, simplify the logic below
922  // by checking only the callsite hotness/coldness as we will reliably
923  // have local profile information.
924  //
925  // Callsite hotness and coldness can be determined if sample profile is
926  // used (which adds hotness metadata to calls) or if caller's
927  // BlockFrequencyInfo is available.
928  BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
929  auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI);
930  if (!Caller->hasOptSize() && HotCallSiteThreshold) {
931  LLVM_DEBUG(dbgs() << "Hot callsite.\n");
932  // FIXME: This should update the threshold only if it exceeds the
933  // current threshold, but AutoFDO + ThinLTO currently relies on this
934  // behavior to prevent inlining of hot callsites during ThinLTO
935  // compile phase.
936  Threshold = HotCallSiteThreshold.getValue();
937  } else if (isColdCallSite(Call, CallerBFI)) {
938  LLVM_DEBUG(dbgs() << "Cold callsite.\n");
939  // Do not apply bonuses for a cold callsite including the
940  // LastCallToStatic bonus. While this bonus might result in code size
941  // reduction, it can cause the size of a non-cold caller to increase
942  // preventing it from being inlined.
943  DisallowAllBonuses();
944  Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
945  } else if (PSI) {
946  // Use callee's global profile information only if we have no way of
947  // determining this via callsite information.
948  if (PSI->isFunctionEntryHot(&Callee)) {
949  LLVM_DEBUG(dbgs() << "Hot callee.\n");
950  // If callsite hotness can not be determined, we may still know
951  // that the callee is hot and treat it as a weaker hint for threshold
952  // increase.
953  Threshold = MaxIfValid(Threshold, Params.HintThreshold);
954  } else if (PSI->isFunctionEntryCold(&Callee)) {
955  LLVM_DEBUG(dbgs() << "Cold callee.\n");
956  // Do not apply bonuses for a cold callee including the
957  // LastCallToStatic bonus. While this bonus might result in code size
958  // reduction, it can cause the size of a non-cold caller to increase
959  // preventing it from being inlined.
960  DisallowAllBonuses();
961  Threshold = MinIfValid(Threshold, Params.ColdThreshold);
962  }
963  }
964  }
965 
966  // Finally, take the target-specific inlining threshold multiplier into
967  // account.
968  Threshold *= TTI.getInliningThresholdMultiplier();
969 
970  SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
971  VectorBonus = Threshold * VectorBonusPercent / 100;
972 
973  bool OnlyOneCallAndLocalLinkage =
974  F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction();
975  // If there is only one call of the function, and it has internal linkage,
976  // the cost of inlining it drops dramatically. It may seem odd to update
977  // Cost in updateThreshold, but the bonus depends on the logic in this method.
978  if (OnlyOneCallAndLocalLinkage)
979  Cost -= LastCallToStaticBonus;
980 }
981 
982 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
983  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
984  // First try to handle simplified comparisons.
985  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
986  return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
987  }))
988  return true;
989 
990  if (I.getOpcode() == Instruction::FCmp)
991  return false;
992 
993  // Otherwise look for a comparison between constant offset pointers with
994  // a common base.
995  Value *LHSBase, *RHSBase;
996  APInt LHSOffset, RHSOffset;
997  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
998  if (LHSBase) {
999  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1000  if (RHSBase && LHSBase == RHSBase) {
1001  // We have common bases, fold the icmp to a constant based on the
1002  // offsets.
1003  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1004  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1005  if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
1006  SimplifiedValues[&I] = C;
1007  ++NumConstantPtrCmps;
1008  return true;
1009  }
1010  }
1011  }
1012 
1013  // If the comparison is an equality comparison with null, we can simplify it
1014  // if we know the value (argument) can't be null
1015  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
1016  isKnownNonNullInCallee(I.getOperand(0))) {
1017  bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
1018  SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
1020  return true;
1021  }
1022  // Finally check for SROA candidates in comparisons.
1023  Value *SROAArg;
1025  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
1026  if (isa<ConstantPointerNull>(I.getOperand(1))) {
1027  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1028  return true;
1029  }
1030 
1031  disableSROA(CostIt);
1032  }
1033 
1034  return false;
1035 }
1036 
1037 bool CallAnalyzer::visitSub(BinaryOperator &I) {
1038  // Try to handle a special case: we can fold computing the difference of two
1039  // constant-related pointers.
1040  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1041  Value *LHSBase, *RHSBase;
1042  APInt LHSOffset, RHSOffset;
1043  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
1044  if (LHSBase) {
1045  std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
1046  if (RHSBase && LHSBase == RHSBase) {
1047  // We have common bases, fold the subtract to a constant based on the
1048  // offsets.
1049  Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
1050  Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
1051  if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
1052  SimplifiedValues[&I] = C;
1053  ++NumConstantPtrDiffs;
1054  return true;
1055  }
1056  }
1057  }
1058 
1059  // Otherwise, fall back to the generic logic for simplifying and handling
1060  // instructions.
1061  return Base::visitSub(I);
1062 }
1063 
1064 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
1065  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1066  Constant *CLHS = dyn_cast<Constant>(LHS);
1067  if (!CLHS)
1068  CLHS = SimplifiedValues.lookup(LHS);
1069  Constant *CRHS = dyn_cast<Constant>(RHS);
1070  if (!CRHS)
1071  CRHS = SimplifiedValues.lookup(RHS);
1072 
1073  Value *SimpleV = nullptr;
1074  if (auto FI = dyn_cast<FPMathOperator>(&I))
1075  SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS,
1076  CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL);
1077  else
1078  SimpleV =
1079  SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
1080 
1081  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
1082  SimplifiedValues[&I] = C;
1083 
1084  if (SimpleV)
1085  return true;
1086 
1087  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
1088  disableSROA(LHS);
1089  disableSROA(RHS);
1090 
1091  // If the instruction is floating point, and the target says this operation
1092  // is expensive, this may eventually become a library call. Treat the cost
1093  // as such. Unless it's fneg which can be implemented with an xor.
1094  using namespace llvm::PatternMatch;
1095  if (I.getType()->isFloatingPointTy() &&
1096  TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive &&
1097  !match(&I, m_FNeg(m_Value())))
1099 
1100  return false;
1101 }
1102 
1103 bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
1104  Value *Op = I.getOperand(0);
1105  Constant *COp = dyn_cast<Constant>(Op);
1106  if (!COp)
1107  COp = SimplifiedValues.lookup(Op);
1108 
1109  Value *SimpleV = SimplifyFNegInst(COp ? COp : Op,
1110  cast<FPMathOperator>(I).getFastMathFlags(),
1111  DL);
1112 
1113  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
1114  SimplifiedValues[&I] = C;
1115 
1116  if (SimpleV)
1117  return true;
1118 
1119  // Disable any SROA on arguments to arbitrary, unsimplified fneg.
1120  disableSROA(Op);
1121 
1122  return false;
1123 }
1124 
1125 bool CallAnalyzer::visitLoad(LoadInst &I) {
1126  Value *SROAArg;
1128  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
1129  if (I.isSimple()) {
1130  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1131  return true;
1132  }
1133 
1134  disableSROA(CostIt);
1135  }
1136 
1137  // If the data is already loaded from this address and hasn't been clobbered
1138  // by any stores or calls, this load is likely to be redundant and can be
1139  // eliminated.
1140  if (EnableLoadElimination &&
1141  !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
1142  LoadEliminationCost += InlineConstants::InstrCost;
1143  return true;
1144  }
1145 
1146  return false;
1147 }
1148 
1149 bool CallAnalyzer::visitStore(StoreInst &I) {
1150  Value *SROAArg;
1152  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
1153  if (I.isSimple()) {
1154  accumulateSROACost(CostIt, InlineConstants::InstrCost);
1155  return true;
1156  }
1157 
1158  disableSROA(CostIt);
1159  }
1160 
1161  // The store can potentially clobber loads and prevent repeated loads from
1162  // being eliminated.
1163  // FIXME:
1164  // 1. We can probably keep an initial set of eliminatable loads substracted
1165  // from the cost even when we finally see a store. We just need to disable
1166  // *further* accumulation of elimination savings.
1167  // 2. We should probably at some point thread MemorySSA for the callee into
1168  // this and then use that to actually compute *really* precise savings.
1169  disableLoadElimination();
1170  return false;
1171 }
1172 
1173 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
1174  // Constant folding for extract value is trivial.
1175  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1176  return ConstantExpr::getExtractValue(COps[0], I.getIndices());
1177  }))
1178  return true;
1179 
1180  // SROA can look through these but give them a cost.
1181  return false;
1182 }
1183 
1184 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
1185  // Constant folding for insert value is trivial.
1186  if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
1187  return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
1188  /*InsertedValueOperand*/ COps[1],
1189  I.getIndices());
1190  }))
1191  return true;
1192 
1193  // SROA can look through these but give them a cost.
1194  return false;
1195 }
1196 
1197 /// Try to simplify a call site.
1198 ///
1199 /// Takes a concrete function and callsite and tries to actually simplify it by
1200 /// analyzing the arguments and call itself with instsimplify. Returns true if
1201 /// it has simplified the callsite to some other entity (a constant), making it
1202 /// free.
1203 bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
1204  // FIXME: Using the instsimplify logic directly for this is inefficient
1205  // because we have to continually rebuild the argument list even when no
1206  // simplifications can be performed. Until that is fixed with remapping
1207  // inside of instsimplify, directly constant fold calls here.
1208  if (!canConstantFoldCallTo(&Call, F))
1209  return false;
1210 
1211  // Try to re-map the arguments to constants.
1212  SmallVector<Constant *, 4> ConstantArgs;
1213  ConstantArgs.reserve(Call.arg_size());
1214  for (Value *I : Call.args()) {
1215  Constant *C = dyn_cast<Constant>(I);
1216  if (!C)
1217  C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(I));
1218  if (!C)
1219  return false; // This argument doesn't map to a constant.
1220 
1221  ConstantArgs.push_back(C);
1222  }
1223  if (Constant *C = ConstantFoldCall(&Call, F, ConstantArgs)) {
1224  SimplifiedValues[&Call] = C;
1225  return true;
1226  }
1227 
1228  return false;
1229 }
1230 
1231 bool CallAnalyzer::visitCallBase(CallBase &Call) {
1232  if (Call.hasFnAttr(Attribute::ReturnsTwice) &&
1233  !F.hasFnAttribute(Attribute::ReturnsTwice)) {
1234  // This aborts the entire analysis.
1235  ExposesReturnsTwice = true;
1236  return false;
1237  }
1238  if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
1239  ContainsNoDuplicateCall = true;
1240 
1241  if (Function *F = Call.getCalledFunction()) {
1242  // When we have a concrete function, first try to simplify it directly.
1243  if (simplifyCallSite(F, Call))
1244  return true;
1245 
1246  // Next check if it is an intrinsic we know about.
1247  // FIXME: Lift this into part of the InstVisitor.
1248  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
1249  switch (II->getIntrinsicID()) {
1250  default:
1251  if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
1252  disableLoadElimination();
1253  return Base::visitCallBase(Call);
1254 
1255  case Intrinsic::load_relative:
1256  // This is normally lowered to 4 LLVM instructions.
1257  addCost(3 * InlineConstants::InstrCost);
1258  return false;
1259 
1260  case Intrinsic::memset:
1261  case Intrinsic::memcpy:
1262  case Intrinsic::memmove:
1263  disableLoadElimination();
1264  // SROA can usually chew through these intrinsics, but they aren't free.
1265  return false;
1266  case Intrinsic::icall_branch_funnel:
1267  case Intrinsic::localescape:
1268  HasUninlineableIntrinsic = true;
1269  return false;
1270  case Intrinsic::vastart:
1271  InitsVargArgs = true;
1272  return false;
1273  }
1274  }
1275 
1276  if (F == Call.getFunction()) {
1277  // This flag will fully abort the analysis, so don't bother with anything
1278  // else.
1279  IsRecursiveCall = true;
1280  return false;
1281  }
1282 
1283  if (TTI.isLoweredToCall(F)) {
1284  // We account for the average 1 instruction per call argument setup
1285  // here.
1286  addCost(Call.arg_size() * InlineConstants::InstrCost);
1287 
1288  // Everything other than inline ASM will also have a significant cost
1289  // merely from making the call.
1290  if (!isa<InlineAsm>(Call.getCalledValue()))
1292  }
1293 
1294  if (!Call.onlyReadsMemory())
1295  disableLoadElimination();
1296  return Base::visitCallBase(Call);
1297  }
1298 
1299  // Otherwise we're in a very special case -- an indirect function call. See
1300  // if we can be particularly clever about this.
1301  Value *Callee = Call.getCalledValue();
1302 
1303  // First, pay the price of the argument setup. We account for the average
1304  // 1 instruction per call argument setup here.
1305  addCost(Call.arg_size() * InlineConstants::InstrCost);
1306 
1307  // Next, check if this happens to be an indirect function call to a known
1308  // function in this inline context. If not, we've done all we can.
1309  Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
1310  if (!F) {
1311  if (!Call.onlyReadsMemory())
1312  disableLoadElimination();
1313  return Base::visitCallBase(Call);
1314  }
1315 
1316  // If we have a constant that we are calling as a function, we can peer
1317  // through it and see the function target. This happens not infrequently
1318  // during devirtualization and so we want to give it a hefty bonus for
1319  // inlining, but cap that bonus in the event that inlining wouldn't pan
1320  // out. Pretend to inline the function, with a custom threshold.
1321  auto IndirectCallParams = Params;
1323  CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, Call,
1324  IndirectCallParams);
1325  if (CA.analyzeCall(Call)) {
1326  // We were able to inline the indirect call! Subtract the cost from the
1327  // threshold to get the bonus we want to apply, but don't go below zero.
1328  Cost -= std::max(0, CA.getThreshold() - CA.getCost());
1329  }
1330 
1331  if (!F->onlyReadsMemory())
1332  disableLoadElimination();
1333  return Base::visitCallBase(Call);
1334 }
1335 
1336 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
1337  // At least one return instruction will be free after inlining.
1338  bool Free = !HasReturn;
1339  HasReturn = true;
1340  return Free;
1341 }
1342 
1343 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
1344  // We model unconditional branches as essentially free -- they really
1345  // shouldn't exist at all, but handling them makes the behavior of the
1346  // inliner more regular and predictable. Interestingly, conditional branches
1347  // which will fold away are also free.
1348  return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
1349  dyn_cast_or_null<ConstantInt>(
1350  SimplifiedValues.lookup(BI.getCondition()));
1351 }
1352 
1353 bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
1354  bool CheckSROA = SI.getType()->isPointerTy();
1355  Value *TrueVal = SI.getTrueValue();
1356  Value *FalseVal = SI.getFalseValue();
1357 
1358  Constant *TrueC = dyn_cast<Constant>(TrueVal);
1359  if (!TrueC)
1360  TrueC = SimplifiedValues.lookup(TrueVal);
1361  Constant *FalseC = dyn_cast<Constant>(FalseVal);
1362  if (!FalseC)
1363  FalseC = SimplifiedValues.lookup(FalseVal);
1364  Constant *CondC =
1365  dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition()));
1366 
1367  if (!CondC) {
1368  // Select C, X, X => X
1369  if (TrueC == FalseC && TrueC) {
1370  SimplifiedValues[&SI] = TrueC;
1371  return true;
1372  }
1373 
1374  if (!CheckSROA)
1375  return Base::visitSelectInst(SI);
1376 
1377  std::pair<Value *, APInt> TrueBaseAndOffset =
1378  ConstantOffsetPtrs.lookup(TrueVal);
1379  std::pair<Value *, APInt> FalseBaseAndOffset =
1380  ConstantOffsetPtrs.lookup(FalseVal);
1381  if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
1382  ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
1383 
1384  Value *SROAArg;
1386  if (lookupSROAArgAndCost(TrueVal, SROAArg, CostIt))
1387  SROAArgValues[&SI] = SROAArg;
1388  return true;
1389  }
1390 
1391  return Base::visitSelectInst(SI);
1392  }
1393 
1394  // Select condition is a constant.
1395  Value *SelectedV = CondC->isAllOnesValue()
1396  ? TrueVal
1397  : (CondC->isNullValue()) ? FalseVal : nullptr;
1398  if (!SelectedV) {
1399  // Condition is a vector constant that is not all 1s or all 0s. If all
1400  // operands are constants, ConstantExpr::getSelect() can handle the cases
1401  // such as select vectors.
1402  if (TrueC && FalseC) {
1403  if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {
1404  SimplifiedValues[&SI] = C;
1405  return true;
1406  }
1407  }
1408  return Base::visitSelectInst(SI);
1409  }
1410 
1411  // Condition is either all 1s or all 0s. SI can be simplified.
1412  if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
1413  SimplifiedValues[&SI] = SelectedC;
1414  return true;
1415  }
1416 
1417  if (!CheckSROA)
1418  return true;
1419 
1420  std::pair<Value *, APInt> BaseAndOffset =
1421  ConstantOffsetPtrs.lookup(SelectedV);
1422  if (BaseAndOffset.first) {
1423  ConstantOffsetPtrs[&SI] = BaseAndOffset;
1424 
1425  Value *SROAArg;
1427  if (lookupSROAArgAndCost(SelectedV, SROAArg, CostIt))
1428  SROAArgValues[&SI] = SROAArg;
1429  }
1430 
1431  return true;
1432 }
1433 
1434 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
1435  // We model unconditional switches as free, see the comments on handling
1436  // branches.
1437  if (isa<ConstantInt>(SI.getCondition()))
1438  return true;
1439  if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
1440  if (isa<ConstantInt>(V))
1441  return true;
1442 
1443  // Assume the most general case where the switch is lowered into
1444  // either a jump table, bit test, or a balanced binary tree consisting of
1445  // case clusters without merging adjacent clusters with the same
1446  // destination. We do not consider the switches that are lowered with a mix
1447  // of jump table/bit test/binary search tree. The cost of the switch is
1448  // proportional to the size of the tree or the size of jump table range.
1449  //
1450  // NB: We convert large switches which are just used to initialize large phi
1451  // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
1452  // inlining those. It will prevent inlining in cases where the optimization
1453  // does not (yet) fire.
1454 
1455  // Maximum valid cost increased in this function.
1456  int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
1457 
1458  unsigned JumpTableSize = 0;
1459  unsigned NumCaseCluster =
1460  TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
1461 
1462  // If suitable for a jump table, consider the cost for the table size and
1463  // branch to destination.
1464  if (JumpTableSize) {
1465  int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
1466  4 * InlineConstants::InstrCost;
1467 
1468  addCost(JTCost, (int64_t)CostUpperBound);
1469  return false;
1470  }
1471 
1472  // Considering forming a binary search, we should find the number of nodes
1473  // which is same as the number of comparisons when lowered. For a given
1474  // number of clusters, n, we can define a recursive function, f(n), to find
1475  // the number of nodes in the tree. The recursion is :
1476  // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
1477  // and f(n) = n, when n <= 3.
1478  // This will lead a binary tree where the leaf should be either f(2) or f(3)
1479  // when n > 3. So, the number of comparisons from leaves should be n, while
1480  // the number of non-leaf should be :
1481  // 2^(log2(n) - 1) - 1
1482  // = 2^log2(n) * 2^-1 - 1
1483  // = n / 2 - 1.
1484  // Considering comparisons from leaf and non-leaf nodes, we can estimate the
1485  // number of comparisons in a simple closed form :
1486  // n + n / 2 - 1 = n * 3 / 2 - 1
1487  if (NumCaseCluster <= 3) {
1488  // Suppose a comparison includes one compare and one conditional branch.
1489  addCost(NumCaseCluster * 2 * InlineConstants::InstrCost);
1490  return false;
1491  }
1492 
1493  int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
1494  int64_t SwitchCost =
1495  ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
1496 
1497  addCost(SwitchCost, (int64_t)CostUpperBound);
1498  return false;
1499 }
1500 
1501 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
1502  // We never want to inline functions that contain an indirectbr. This is
1503  // incorrect because all the blockaddress's (in static global initializers
1504  // for example) would be referring to the original function, and this
1505  // indirect jump would jump from the inlined copy of the function into the
1506  // original function which is extremely undefined behavior.
1507  // FIXME: This logic isn't really right; we can safely inline functions with
1508  // indirectbr's as long as no other function or global references the
1509  // blockaddress of a block within the current function.
1510  HasIndirectBr = true;
1511  return false;
1512 }
1513 
1514 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
1515  // FIXME: It's not clear that a single instruction is an accurate model for
1516  // the inline cost of a resume instruction.
1517  return false;
1518 }
1519 
1520 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
1521  // FIXME: It's not clear that a single instruction is an accurate model for
1522  // the inline cost of a cleanupret instruction.
1523  return false;
1524 }
1525 
1526 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
1527  // FIXME: It's not clear that a single instruction is an accurate model for
1528  // the inline cost of a catchret instruction.
1529  return false;
1530 }
1531 
1532 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
1533  // FIXME: It might be reasonably to discount the cost of instructions leading
1534  // to unreachable as they have the lowest possible impact on both runtime and
1535  // code size.
1536  return true; // No actual code is needed for unreachable.
1537 }
1538 
1539 bool CallAnalyzer::visitInstruction(Instruction &I) {
1540  // Some instructions are free. All of the free intrinsics can also be
1541  // handled by SROA, etc.
1542  if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
1543  return true;
1544 
1545  // We found something we don't understand or can't handle. Mark any SROA-able
1546  // values in the operand list as no longer viable.
1547  for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
1548  disableSROA(*OI);
1549 
1550  return false;
1551 }
1552 
1553 /// Analyze a basic block for its contribution to the inline cost.
1554 ///
1555 /// This method walks the analyzer over every instruction in the given basic
1556 /// block and accounts for their cost during inlining at this callsite. It
1557 /// aborts early if the threshold has been exceeded or an impossible to inline
1558 /// construct has been detected. It returns false if inlining is no longer
1559 /// viable, and true if inlining remains viable.
1561 CallAnalyzer::analyzeBlock(BasicBlock *BB,
1562  SmallPtrSetImpl<const Value *> &EphValues) {
1563  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
1564  // FIXME: Currently, the number of instructions in a function regardless of
1565  // our ability to simplify them during inline to constants or dead code,
1566  // are actually used by the vector bonus heuristic. As long as that's true,
1567  // we have to special case debug intrinsics here to prevent differences in
1568  // inlining due to debug symbols. Eventually, the number of unsimplified
1569  // instructions shouldn't factor into the cost computation, but until then,
1570  // hack around it here.
1571  if (isa<DbgInfoIntrinsic>(I))
1572  continue;
1573 
1574  // Skip ephemeral values.
1575  if (EphValues.count(&*I))
1576  continue;
1577 
1578  ++NumInstructions;
1579  if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
1580  ++NumVectorInstructions;
1581 
1582  // If the instruction simplified to a constant, there is no cost to this
1583  // instruction. Visit the instructions using our InstVisitor to account for
1584  // all of the per-instruction logic. The visit tree returns true if we
1585  // consumed the instruction in any way, and false if the instruction's base
1586  // cost should count against inlining.
1587  if (Base::visit(&*I))
1588  ++NumInstructionsSimplified;
1589  else
1590  addCost(InlineConstants::InstrCost);
1591 
1592  using namespace ore;
1593  // If the visit this instruction detected an uninlinable pattern, abort.
1594  InlineResult IR;
1595  if (IsRecursiveCall)
1596  IR = "recursive";
1597  else if (ExposesReturnsTwice)
1598  IR = "exposes returns twice";
1599  else if (HasDynamicAlloca)
1600  IR = "dynamic alloca";
1601  else if (HasIndirectBr)
1602  IR = "indirect branch";
1603  else if (HasUninlineableIntrinsic)
1604  IR = "uninlinable intrinsic";
1605  else if (InitsVargArgs)
1606  IR = "varargs";
1607  if (!IR) {
1608  if (ORE)
1609  ORE->emit([&]() {
1610  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
1611  &CandidateCall)
1612  << NV("Callee", &F) << " has uninlinable pattern ("
1613  << NV("InlineResult", IR.message)
1614  << ") and cost is not fully computed";
1615  });
1616  return IR;
1617  }
1618 
1619  // If the caller is a recursive function then we don't want to inline
1620  // functions which allocate a lot of stack space because it would increase
1621  // the caller stack usage dramatically.
1622  if (IsCallerRecursive &&
1624  InlineResult IR = "recursive and allocates too much stack space";
1625  if (ORE)
1626  ORE->emit([&]() {
1627  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
1628  &CandidateCall)
1629  << NV("Callee", &F) << " is " << NV("InlineResult", IR.message)
1630  << ". Cost is not fully computed";
1631  });
1632  return IR;
1633  }
1634 
1635  // Check if we've passed the maximum possible threshold so we don't spin in
1636  // huge basic blocks that will never inline.
1637  if (Cost >= Threshold && !ComputeFullInlineCost)
1638  return false;
1639  }
1640 
1641  return true;
1642 }
1643 
1644 /// Compute the base pointer and cumulative constant offsets for V.
1645 ///
1646 /// This strips all constant offsets off of V, leaving it the base pointer, and
1647 /// accumulates the total constant offset applied in the returned constant. It
1648 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
1649 /// no constant offsets applied.
1650 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
1651  if (!V->getType()->isPointerTy())
1652  return nullptr;
1653 
1654  unsigned AS = V->getType()->getPointerAddressSpace();
1655  unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
1656  APInt Offset = APInt::getNullValue(IntPtrWidth);
1657 
1658  // Even though we don't look through PHI nodes, we could be called on an
1659  // instruction in an unreachable block, which may be on a cycle.
1660  SmallPtrSet<Value *, 4> Visited;
1661  Visited.insert(V);
1662  do {
1663  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
1664  if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
1665  return nullptr;
1666  V = GEP->getPointerOperand();
1667  } else if (Operator::getOpcode(V) == Instruction::BitCast) {
1668  V = cast<Operator>(V)->getOperand(0);
1669  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
1670  if (GA->isInterposable())
1671  break;
1672  V = GA->getAliasee();
1673  } else {
1674  break;
1675  }
1676  assert(V->getType()->isPointerTy() && "Unexpected operand type!");
1677  } while (Visited.insert(V).second);
1678 
1679  Type *IntPtrTy = DL.getIntPtrType(V->getContext(), AS);
1680  return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
1681 }
1682 
1683 /// Find dead blocks due to deleted CFG edges during inlining.
1684 ///
1685 /// If we know the successor of the current block, \p CurrBB, has to be \p
1686 /// NextBB, the other successors of \p CurrBB are dead if these successors have
1687 /// no live incoming CFG edges. If one block is found to be dead, we can
1688 /// continue growing the dead block list by checking the successors of the dead
1689 /// blocks to see if all their incoming edges are dead or not.
1690 void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
1691  auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
1692  // A CFG edge is dead if the predecessor is dead or the predecessor has a
1693  // known successor which is not the one under exam.
1694  return (DeadBlocks.count(Pred) ||
1695  (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
1696  };
1697 
1698  auto IsNewlyDead = [&](BasicBlock *BB) {
1699  // If all the edges to a block are dead, the block is also dead.
1700  return (!DeadBlocks.count(BB) &&
1702  [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));
1703  };
1704 
1705  for (BasicBlock *Succ : successors(CurrBB)) {
1706  if (Succ == NextBB || !IsNewlyDead(Succ))
1707  continue;
1709  NewDead.push_back(Succ);
1710  while (!NewDead.empty()) {
1711  BasicBlock *Dead = NewDead.pop_back_val();
1712  if (DeadBlocks.insert(Dead))
1713  // Continue growing the dead block lists.
1714  for (BasicBlock *S : successors(Dead))
1715  if (IsNewlyDead(S))
1716  NewDead.push_back(S);
1717  }
1718  }
1719 }
1720 
1721 /// Analyze a call site for potential inlining.
1722 ///
1723 /// Returns true if inlining this call is viable, and false if it is not
1724 /// viable. It computes the cost and adjusts the threshold based on numerous
1725 /// factors and heuristics. If this method returns false but the computed cost
1726 /// is below the computed threshold, then inlining was forcibly disabled by
1727 /// some artifact of the routine.
1728 InlineResult CallAnalyzer::analyzeCall(CallBase &Call) {
1729  ++NumCallsAnalyzed;
1730 
1731  // Perform some tweaks to the cost and threshold based on the direct
1732  // callsite information.
1733 
1734  // We want to more aggressively inline vector-dense kernels, so up the
1735  // threshold, and we'll lower it if the % of vector instructions gets too
1736  // low. Note that these bonuses are some what arbitrary and evolved over time
1737  // by accident as much as because they are principled bonuses.
1738  //
1739  // FIXME: It would be nice to remove all such bonuses. At least it would be
1740  // nice to base the bonus values on something more scientific.
1741  assert(NumInstructions == 0);
1742  assert(NumVectorInstructions == 0);
1743 
1744  // Update the threshold based on callsite properties
1745  updateThreshold(Call, F);
1746 
1747  // While Threshold depends on commandline options that can take negative
1748  // values, we want to enforce the invariant that the computed threshold and
1749  // bonuses are non-negative.
1750  assert(Threshold >= 0);
1751  assert(SingleBBBonus >= 0);
1752  assert(VectorBonus >= 0);
1753 
1754  // Speculatively apply all possible bonuses to Threshold. If cost exceeds
1755  // this Threshold any time, and cost cannot decrease, we can stop processing
1756  // the rest of the function body.
1757  Threshold += (SingleBBBonus + VectorBonus);
1758 
1759  // Give out bonuses for the callsite, as the instructions setting them up
1760  // will be gone after inlining.
1761  addCost(-getCallsiteCost(Call, DL));
1762 
1763  // If this function uses the coldcc calling convention, prefer not to inline
1764  // it.
1765  if (F.getCallingConv() == CallingConv::Cold)
1767 
1768  // Check if we're done. This can happen due to bonuses and penalties.
1769  if (Cost >= Threshold && !ComputeFullInlineCost)
1770  return "high cost";
1771 
1772  if (F.empty())
1773  return true;
1774 
1775  Function *Caller = Call.getFunction();
1776  // Check if the caller function is recursive itself.
1777  for (User *U : Caller->users()) {
1778  CallBase *Call = dyn_cast<CallBase>(U);
1779  if (Call && Call->getFunction() == Caller) {
1780  IsCallerRecursive = true;
1781  break;
1782  }
1783  }
1784 
1785  // Populate our simplified values by mapping from function arguments to call
1786  // arguments with known important simplifications.
1787  auto CAI = Call.arg_begin();
1788  for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
1789  FAI != FAE; ++FAI, ++CAI) {
1790  assert(CAI != Call.arg_end());
1791  if (Constant *C = dyn_cast<Constant>(CAI))
1792  SimplifiedValues[&*FAI] = C;
1793 
1794  Value *PtrArg = *CAI;
1795  if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
1796  ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
1797 
1798  // We can SROA any pointer arguments derived from alloca instructions.
1799  if (isa<AllocaInst>(PtrArg)) {
1800  SROAArgValues[&*FAI] = PtrArg;
1801  SROAArgCosts[PtrArg] = 0;
1802  }
1803  }
1804  }
1805  NumConstantArgs = SimplifiedValues.size();
1806  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
1807  NumAllocaArgs = SROAArgValues.size();
1808 
1809  // FIXME: If a caller has multiple calls to a callee, we end up recomputing
1810  // the ephemeral values multiple times (and they're completely determined by
1811  // the callee, so this is purely duplicate work).
1813  CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
1814 
1815  // The worklist of live basic blocks in the callee *after* inlining. We avoid
1816  // adding basic blocks of the callee which can be proven to be dead for this
1817  // particular call site in order to get more accurate cost estimates. This
1818  // requires a somewhat heavyweight iteration pattern: we need to walk the
1819  // basic blocks in a breadth-first order as we insert live successors. To
1820  // accomplish this, prioritizing for small iterations because we exit after
1821  // crossing our threshold, we use a small-size optimized SetVector.
1824  BBSetVector;
1825  BBSetVector BBWorklist;
1826  BBWorklist.insert(&F.getEntryBlock());
1827  bool SingleBB = true;
1828  // Note that we *must not* cache the size, this loop grows the worklist.
1829  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
1830  // Bail out the moment we cross the threshold. This means we'll under-count
1831  // the cost, but only when undercounting doesn't matter.
1832  if (Cost >= Threshold && !ComputeFullInlineCost)
1833  break;
1834 
1835  BasicBlock *BB = BBWorklist[Idx];
1836  if (BB->empty())
1837  continue;
1838 
1839  // Disallow inlining a blockaddress with uses other than strictly callbr.
1840  // A blockaddress only has defined behavior for an indirect branch in the
1841  // same function, and we do not currently support inlining indirect
1842  // branches. But, the inliner may not see an indirect branch that ends up
1843  // being dead code at a particular call site. If the blockaddress escapes
1844  // the function, e.g., via a global variable, inlining may lead to an
1845  // invalid cross-function reference.
1846  // FIXME: pr/39560: continue relaxing this overt restriction.
1847  if (BB->hasAddressTaken())
1848  for (User *U : BlockAddress::get(&*BB)->users())
1849  if (!isa<CallBrInst>(*U))
1850  return "blockaddress used outside of callbr";
1851 
1852  // Analyze the cost of this block. If we blow through the threshold, this
1853  // returns false, and we can bail on out.
1854  InlineResult IR = analyzeBlock(BB, EphValues);
1855  if (!IR)
1856  return IR;
1857 
1858  Instruction *TI = BB->getTerminator();
1859 
1860  // Add in the live successors by first checking whether we have terminator
1861  // that may be simplified based on the values simplified by this call.
1862  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1863  if (BI->isConditional()) {
1864  Value *Cond = BI->getCondition();
1865  if (ConstantInt *SimpleCond =
1866  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1867  BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0);
1868  BBWorklist.insert(NextBB);
1869  KnownSuccessors[BB] = NextBB;
1870  findDeadBlocks(BB, NextBB);
1871  continue;
1872  }
1873  }
1874  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
1875  Value *Cond = SI->getCondition();
1876  if (ConstantInt *SimpleCond =
1877  dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1878  BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
1879  BBWorklist.insert(NextBB);
1880  KnownSuccessors[BB] = NextBB;
1881  findDeadBlocks(BB, NextBB);
1882  continue;
1883  }
1884  }
1885 
1886  // If we're unable to select a particular successor, just count all of
1887  // them.
1888  for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
1889  ++TIdx)
1890  BBWorklist.insert(TI->getSuccessor(TIdx));
1891 
1892  // If we had any successors at this point, than post-inlining is likely to
1893  // have them as well. Note that we assume any basic blocks which existed
1894  // due to branches or switches which folded above will also fold after
1895  // inlining.
1896  if (SingleBB && TI->getNumSuccessors() > 1) {
1897  // Take off the bonus we applied to the threshold.
1898  Threshold -= SingleBBBonus;
1899  SingleBB = false;
1900  }
1901  }
1902 
1903  bool OnlyOneCallAndLocalLinkage =
1904  F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction();
1905  // If this is a noduplicate call, we can still inline as long as
1906  // inlining this would cause the removal of the caller (so the instruction
1907  // is not actually duplicated, just moved).
1908  if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
1909  return "noduplicate";
1910 
1911  // Loops generally act a lot like calls in that they act like barriers to
1912  // movement, require a certain amount of setup, etc. So when optimising for
1913  // size, we penalise any call sites that perform loops. We do this after all
1914  // other costs here, so will likely only be dealing with relatively small
1915  // functions (and hence DT and LI will hopefully be cheap).
1916  if (Caller->hasMinSize()) {
1917  DominatorTree DT(F);
1918  LoopInfo LI(DT);
1919  int NumLoops = 0;
1920  for (Loop *L : LI) {
1921  // Ignore loops that will not be executed
1922  if (DeadBlocks.count(L->getHeader()))
1923  continue;
1924  NumLoops++;
1925  }
1926  addCost(NumLoops * InlineConstants::CallPenalty);
1927  }
1928 
1929  // We applied the maximum possible vector bonus at the beginning. Now,
1930  // subtract the excess bonus, if any, from the Threshold before
1931  // comparing against Cost.
1932  if (NumVectorInstructions <= NumInstructions / 10)
1933  Threshold -= VectorBonus;
1934  else if (NumVectorInstructions <= NumInstructions / 2)
1935  Threshold -= VectorBonus/2;
1936 
1937  return Cost < std::max(1, Threshold);
1938 }
1939 
1940 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1941 /// Dump stats about this call's analysis.
1943 #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
1944  DEBUG_PRINT_STAT(NumConstantArgs);
1945  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
1946  DEBUG_PRINT_STAT(NumAllocaArgs);
1947  DEBUG_PRINT_STAT(NumConstantPtrCmps);
1948  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
1949  DEBUG_PRINT_STAT(NumInstructionsSimplified);
1950  DEBUG_PRINT_STAT(NumInstructions);
1951  DEBUG_PRINT_STAT(SROACostSavings);
1952  DEBUG_PRINT_STAT(SROACostSavingsLost);
1953  DEBUG_PRINT_STAT(LoadEliminationCost);
1954  DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
1955  DEBUG_PRINT_STAT(Cost);
1956  DEBUG_PRINT_STAT(Threshold);
1957 #undef DEBUG_PRINT_STAT
1958 }
1959 #endif
1960 
1961 /// Test that there are no attribute conflicts between Caller and Callee
1962 /// that prevent inlining.
1964  Function *Callee,
1965  TargetTransformInfo &TTI) {
1966  return TTI.areInlineCompatible(Caller, Callee) &&
1967  AttributeFuncs::areInlineCompatible(*Caller, *Callee);
1968 }
1969 
1971  int Cost = 0;
1972  for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) {
1973  if (Call.isByValArgument(I)) {
1974  // We approximate the number of loads and stores needed by dividing the
1975  // size of the byval type by the target's pointer size.
1976  PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
1977  unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
1978  unsigned AS = PTy->getAddressSpace();
1979  unsigned PointerSize = DL.getPointerSizeInBits(AS);
1980  // Ceiling division.
1981  unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
1982 
1983  // If it generates more than 8 stores it is likely to be expanded as an
1984  // inline memcpy so we take that as an upper bound. Otherwise we assume
1985  // one load and one store per word copied.
1986  // FIXME: The maxStoresPerMemcpy setting from the target should be used
1987  // here instead of a magic number of 8, but it's not available via
1988  // DataLayout.
1989  NumStores = std::min(NumStores, 8U);
1990 
1991  Cost += 2 * NumStores * InlineConstants::InstrCost;
1992  } else {
1993  // For non-byval arguments subtract off one instruction per call
1994  // argument.
1996  }
1997  }
1998  // The call instruction also disappears after inlining.
1999  Cost += InlineConstants::InstrCost + InlineConstants::CallPenalty;
2000  return Cost;
2001 }
2002 
2004  CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
2005  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
2008  return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
2009  GetAssumptionCache, GetBFI, PSI, ORE);
2010 }
2011 
2013  CallBase &Call, Function *Callee, const InlineParams &Params,
2014  TargetTransformInfo &CalleeTTI,
2015  std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
2018 
2019  // Cannot inline indirect calls.
2020  if (!Callee)
2021  return llvm::InlineCost::getNever("indirect call");
2022 
2023  // Never inline calls with byval arguments that does not have the alloca
2024  // address space. Since byval arguments can be replaced with a copy to an
2025  // alloca, the inlined code would need to be adjusted to handle that the
2026  // argument is in the alloca address space (so it is a little bit complicated
2027  // to solve).
2028  unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
2029  for (unsigned I = 0, E = Call.arg_size(); I != E; ++I)
2030  if (Call.isByValArgument(I)) {
2031  PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
2032  if (PTy->getAddressSpace() != AllocaAS)
2033  return llvm::InlineCost::getNever("byval arguments without alloca"
2034  " address space");
2035  }
2036 
2037  // Calls to functions with always-inline attributes should be inlined
2038  // whenever possible.
2039  if (Call.hasFnAttr(Attribute::AlwaysInline)) {
2040  auto IsViable = isInlineViable(*Callee);
2041  if (IsViable)
2042  return llvm::InlineCost::getAlways("always inline attribute");
2043  return llvm::InlineCost::getNever(IsViable.message);
2044  }
2045 
2046  // Never inline functions with conflicting attributes (unless callee has
2047  // always-inline attribute).
2048  Function *Caller = Call.getCaller();
2049  if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI))
2050  return llvm::InlineCost::getNever("conflicting attributes");
2051 
2052  // Don't inline this call if the caller has the optnone attribute.
2053  if (Caller->hasOptNone())
2054  return llvm::InlineCost::getNever("optnone attribute");
2055 
2056  // Don't inline a function that treats null pointer as valid into a caller
2057  // that does not have this attribute.
2058  if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
2059  return llvm::InlineCost::getNever("nullptr definitions incompatible");
2060 
2061  // Don't inline functions which can be interposed at link-time.
2062  if (Callee->isInterposable())
2063  return llvm::InlineCost::getNever("interposable");
2064 
2065  // Don't inline functions marked noinline.
2066  if (Callee->hasFnAttribute(Attribute::NoInline))
2067  return llvm::InlineCost::getNever("noinline function attribute");
2068 
2069  // Don't inline call sites marked noinline.
2070  if (Call.isNoInline())
2071  return llvm::InlineCost::getNever("noinline call site attribute");
2072 
2073  LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
2074  << "... (caller:" << Caller->getName() << ")\n");
2075 
2076  CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee,
2077  Call, Params);
2078  InlineResult ShouldInline = CA.analyzeCall(Call);
2079 
2080  LLVM_DEBUG(CA.dump());
2081 
2082  // Check if there was a reason to force inlining or no inlining.
2083  if (!ShouldInline && CA.getCost() < CA.getThreshold())
2084  return InlineCost::getNever(ShouldInline.message);
2085  if (ShouldInline && CA.getCost() >= CA.getThreshold())
2086  return InlineCost::getAlways("empty function");
2087 
2088  return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
2089 }
2090 
2092  bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
2093  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
2094  // Disallow inlining of functions which contain indirect branches.
2095  if (isa<IndirectBrInst>(BI->getTerminator()))
2096  return "contains indirect branches";
2097 
2098  // Disallow inlining of blockaddresses which are used by non-callbr
2099  // instructions.
2100  if (BI->hasAddressTaken())
2101  for (User *U : BlockAddress::get(&*BI)->users())
2102  if (!isa<CallBrInst>(*U))
2103  return "blockaddress used outside of callbr";
2104 
2105  for (auto &II : *BI) {
2106  CallBase *Call = dyn_cast<CallBase>(&II);
2107  if (!Call)
2108  continue;
2109 
2110  // Disallow recursive calls.
2111  if (&F == Call->getCalledFunction())
2112  return "recursive call";
2113 
2114  // Disallow calls which expose returns-twice to a function not previously
2115  // attributed as such.
2116  if (!ReturnsTwice && isa<CallInst>(Call) &&
2117  cast<CallInst>(Call)->canReturnTwice())
2118  return "exposes returns-twice attribute";
2119 
2120  if (Call->getCalledFunction())
2121  switch (Call->getCalledFunction()->getIntrinsicID()) {
2122  default:
2123  break;
2124  // Disallow inlining of @llvm.icall.branch.funnel because current
2125  // backend can't separate call targets from call arguments.
2126  case llvm::Intrinsic::icall_branch_funnel:
2127  return "disallowed inlining of @llvm.icall.branch.funnel";
2128  // Disallow inlining functions that call @llvm.localescape. Doing this
2129  // correctly would require major changes to the inliner.
2130  case llvm::Intrinsic::localescape:
2131  return "disallowed inlining of @llvm.localescape";
2132  // Disallow inlining of functions that initialize VarArgs with va_start.
2133  case llvm::Intrinsic::vastart:
2134  return "contains VarArgs initialized with va_start";
2135  }
2136  }
2137  }
2138 
2139  return true;
2140 }
2141 
2142 // APIs to create InlineParams based on command line flags and/or other
2143 // parameters.
2144 
2146  InlineParams Params;
2147 
2148  // This field is the threshold to use for a callee by default. This is
2149  // derived from one or more of:
2150  // * optimization or size-optimization levels,
2151  // * a value passed to createFunctionInliningPass function, or
2152  // * the -inline-threshold flag.
2153  // If the -inline-threshold flag is explicitly specified, that is used
2154  // irrespective of anything else.
2155  if (InlineThreshold.getNumOccurrences() > 0)
2157  else
2158  Params.DefaultThreshold = Threshold;
2159 
2160  // Set the HintThreshold knob from the -inlinehint-threshold.
2161  Params.HintThreshold = HintThreshold;
2162 
2163  // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
2164  Params.HotCallSiteThreshold = HotCallSiteThreshold;
2165 
2166  // If the -locally-hot-callsite-threshold is explicitly specified, use it to
2167  // populate LocallyHotCallSiteThreshold. Later, we populate
2168  // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
2169  // we know that optimization level is O3 (in the getInlineParams variant that
2170  // takes the opt and size levels).
2171  // FIXME: Remove this check (and make the assignment unconditional) after
2172  // addressing size regression issues at O2.
2173  if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
2175 
2176  // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold.
2178 
2179  // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
2180  // -inlinehint-threshold commandline option is not explicitly given. If that
2181  // option is present, then its value applies even for callees with size and
2182  // minsize attributes.
2183  // If the -inline-threshold is not specified, set the ColdThreshold from the
2184  // -inlinecold-threshold even if it is not explicitly passed. If
2185  // -inline-threshold is specified, then -inlinecold-threshold needs to be
2186  // explicitly specified to set the ColdThreshold knob
2187  if (InlineThreshold.getNumOccurrences() == 0) {
2190  Params.ColdThreshold = ColdThreshold;
2191  } else if (ColdThreshold.getNumOccurrences() > 0) {
2192  Params.ColdThreshold = ColdThreshold;
2193  }
2194  return Params;
2195 }
2196 
2199 }
2200 
2201 // Compute the default threshold for inlining based on the opt level and the
2202 // size opt level.
2203 static int computeThresholdFromOptLevels(unsigned OptLevel,
2204  unsigned SizeOptLevel) {
2205  if (OptLevel > 2)
2207  if (SizeOptLevel == 1) // -Os
2209  if (SizeOptLevel == 2) // -Oz
2211  return InlineThreshold;
2212 }
2213 
2214 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
2215  auto Params =
2216  getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
2217  // At O3, use the value of -locally-hot-callsite-threshold option to populate
2218  // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
2219  // when it is specified explicitly.
2220  if (OptLevel > 2)
2222  return Params;
2223 }
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:70
uint64_t CallInst * C
Return a value (possibly void), from a function.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:616
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:153
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:722
bool isSimple() const
Definition: Instructions.h:281
bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
Definition: Constants.cpp:100
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:402
bool hasLocalLinkage() const
Definition: GlobalValue.h:445
Diagnostic information for missed-optimization remarks.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
This instruction extracts a struct member or array element value from an aggregate value...
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
Base class for instruction visitors.
Definition: InstVisitor.h:80
DiagnosticInfoOptimizationBase::Argument NV
This class represents lattice values for constants.
Definition: AllocatorList.h:23
BinaryOps getOpcode() const
Definition: InstrTypes.h:402
bool hasOptNone() const
Do not optimize this function (-O0).
Definition: Function.h:621
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:484
Optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
Definition: InlineCost.h:164
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:66
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining cold callsites"))
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:627
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
iterator end()
Definition: Function.h:687
Function * getCaller()
Helper to get the caller (the parent function).
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore, cl::desc("Threshold for locally hot callsites "))
Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant *> Operands, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:613
Analysis providing profile information.
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
Definition: InlineCost.h:37
const int ColdccPenalty
Definition: InlineCost.h:47
Value * getCondition() const
gep_type_iterator gep_type_end(const User *GEP)
const Value * getTrueValue() const
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:104
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1791
A cache of @llvm.assume calls within a function.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:743
bool isInterposable() const
Return true if this global&#39;s definition can be substituted with an arbitrary definition at link time...
Definition: GlobalValue.h:429
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:323
unsigned second
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1165
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:322
BasicBlock * getSuccessor(unsigned i) const
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:392
STATISTIC(NumFunctions, "Total number of functions")
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1100
F(f)
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1212
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:635
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:169
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:1968
Hexagon Common GEP
Value * getCondition() const
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:624
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2261
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:144
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
void reserve(size_type N)
Definition: SmallVector.h:369
op_iterator op_begin()
Definition: User.h:229
std::enable_if< std::is_unsigned< T >::value, T >::type SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
Definition: MathExtras.h:788
Represents the cost of inlining a function.
Definition: InlineCost.h:63
bool isFunctionEntryCold(const Function *F)
Returns true if F has cold function entry.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1517
bool areInlineCompatible(const Function &Caller, const Function &Callee)
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:273
bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI)
Returns true if CallSite CS is considered hot.
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1241
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
unsigned getAllocaAddrSpace() const
Definition: DataLayout.h:273
ArrayRef< unsigned > getIndices() const
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:585
static bool isColdCallSite(CallSite CS, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
Definition: GlobalOpt.cpp:2165
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:439
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
Definition: DataLayout.cpp:671
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
Optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:158
Class to represent struct types.
Definition: DerivedTypes.h:238
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
#define DEBUG_PRINT_STAT(x)
mir Rename Register Operands
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
This class represents a cast from a pointer to an integer.
Value * SimplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)
Given operand for an FNeg, fold the result or return null.
bool empty() const
Definition: BasicBlock.h:284
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition: InstrTypes.h:802
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:85
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:692
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:938
static Constant * getSelect(Constant *C, Constant *V1, Constant *V2, Type *OnlyIfReducedTy=nullptr)
Select constant expr.
Definition: Constants.cpp:1990
This class represents a no-op cast from one type to another.
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:137
const int LastCallToStaticBonus
Definition: InlineCost.h:46
An instruction for storing to memory.
Definition: Instructions.h:325
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::ZeroOrMore, cl::desc("Threshold for hot callsites "))
iterator begin()
Definition: Function.h:685
static InlineCost getAlways(const char *Reason)
Definition: InlineCost.h:91
const int CallPenalty
Definition: InlineCost.h:45
InlineResult is basically true or false.
Definition: InlineCost.h:135
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Value * getOperand(unsigned i) const
Definition: User.h:169
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:210
Class to represent pointers.
Definition: DerivedTypes.h:579
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1804
bool hasProfileSummary()
Returns true if profile summary is available.
const BasicBlock & getEntryBlock() const
Definition: Function.h:669
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:881
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:769
static Constant * getInsertValue(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2183
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
bool isNoInline() const
Return true if the call should not be inlined.
Definition: InstrTypes.h:1626
const int IndirectCallThreshold
Definition: InlineCost.h:44
unsigned arg_size() const
Definition: InstrTypes.h:1229
Value * getCalledValue() const
Definition: InstrTypes.h:1280
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Conditional or Unconditional Branch instruction.
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1465
This function has undefined behavior.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
Resume the propagation of an exception.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
uint64_t getFixedSize() const
Definition: TypeSize.h:124
Indirect Branch Instruction.
const int OptAggressiveThreshold
Use when -O3 is specified.
Definition: InlineCost.h:40
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:486
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Expected to fold away in lowering.
bool isUnordered() const
Definition: Instructions.h:283
op_iterator op_end()
Definition: User.h:231
Optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
Definition: InlineCost.h:167
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with inline hint"))
constexpr double e
Definition: MathExtras.h:57
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
op_range operands()
Definition: User.h:237
Value * getPointerOperand()
Definition: Instructions.h:289
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:607
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:2065
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
unsigned getIndexTypeSizeInBits(Type *Ty) const
Layout size of the index used in GEP calculation.
Definition: DataLayout.cpp:687
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:59
This class represents a cast from an integer to a pointer.
const Value * getCondition() const
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap. ...
Optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.
Definition: InlineCost.h:174
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:92
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Definition: InstrTypes.h:1373
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1222
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: InstrTypes.h:1528
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:105
bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function...
std::enable_if< std::is_unsigned< T >::value, T >::type SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Definition: MathExtras.h:851
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining...
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
Definition: BasicBlock.h:396
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:390
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:275
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:134
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
static cl::opt< int > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore, cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
#define DEBUG_TYPE
Definition: InlineCost.cpp:44
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:653
bool isConditional() const
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with cold attribute"))
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:124
Optional< int > ColdThreshold
Threshold to use for cold callees.
Definition: InlineCost.h:161
unsigned getNumIncomingValues() const
Return the number of incoming edges.
CaseIt findCaseValue(const ConstantInt *C)
Search all of the case values for the specified constant.
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:609
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:193
BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
int getCallsiteCost(CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
Class for arbitrary precision integers.
Definition: APInt.h:69
amdgpu Simplify well known AMD library false FunctionCallee Callee
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition: Argument.h:47
iterator_range< user_iterator > users()
Definition: Value.h:420
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1561
const Value * getFalseValue() const
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1206
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)"))
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:807
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match &#39;fneg X&#39; as &#39;fsub -0.0, X&#39;.
Definition: PatternMatch.h:814
static InlineCost getNever(const char *Reason)
Definition: InlineCost.h:94
bool isFunctionEntryHot(const Function *F)
Returns true if F has hot function entry.
const char * message
Definition: InlineCost.h:136
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:607
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1287
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1778
bool onlyReadsMemory(unsigned OpNo) const
Definition: InstrTypes.h:1557
#define I(x, y, z)
Definition: MD5.cpp:58
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:624
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:192
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
uint32_t Size
Definition: Profile.cpp:46
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant *> Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands...
bool isUnconditional() const
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:185
bool isColdCallSite(const CallSite &CS, BlockFrequencyInfo *BFI)
Returns true if Callsite CS is considered cold.
Multiway switch.
Value * SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive...
Definition: InlineCost.h:50
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
ArrayRef< unsigned > getIndices() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
LLVM Value Representation.
Definition: Value.h:74
bool isEquality() const
This is just a convenience that dispatches to the subclasses.
A vector that has set insertion semantics.
Definition: SetVector.h:40
succ_range successors(Instruction *I)
Definition: CFG.h:259
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:40
Optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
Definition: InlineCost.h:177
static const Function * getParent(const Value *V)
Invoke instruction.
print Print MemDeps of function
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:433
static Constant * getExtractValue(Constant *Agg, ArrayRef< unsigned > Idxs, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2207
This pass exposes codegen information to IR-level passes.
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
const int OptSizeThreshold
Use when optsize (-Os) is specified.
Definition: InlineCost.h:34
bool nullPointerIsDefined() const
Check if null pointer dereferencing is considered undefined behavior for the function.
Definition: Function.cpp:1598
static APInt getNullValue(unsigned numBits)
Get the &#39;0&#39; value.
Definition: APInt.h:568
bool isSimple() const
Definition: Instructions.h:407
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Value * getPointerOperand()
Definition: Instructions.h:418
The cost of a &#39;div&#39; instruction on x86.
The optimization diagnostic interface.
Statically lint checks LLVM IR
Definition: Lint.cpp:192
Type * getElementType() const
Definition: DerivedTypes.h:598
Optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:170
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:155
static InlineCost get(int Cost, int Threshold)
Definition: InlineCost.h:86
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:59
This instruction inserts a struct field of array element value into an aggregate value.
AttrKind
This enumeration lists the attributes that can be associated with parameters, function results...
Definition: Attributes.h:70
gep_type_iterator gep_type_begin(const User *GEP)