LLVM  9.0.0svn
PartialInlining.cpp
Go to the documentation of this file.
1 //===- PartialInlining.cpp - Inline parts of functions --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs partial inlining, typically by inlining an if statement
11 // that surrounds the body of the function.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
26 #include "llvm/Analysis/LoopInfo.h"
31 #include "llvm/IR/Attributes.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/CFG.h"
34 #include "llvm/IR/CallSite.h"
35 #include "llvm/IR/DebugLoc.h"
36 #include "llvm/IR/DiagnosticInfo.h"
37 #include "llvm/IR/Dominators.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/InstrTypes.h"
40 #include "llvm/IR/Instruction.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/IntrinsicInst.h"
43 #include "llvm/IR/Intrinsics.h"
44 #include "llvm/IR/Module.h"
45 #include "llvm/IR/User.h"
46 #include "llvm/Pass.h"
49 #include "llvm/Support/Casting.h"
52 #include "llvm/Transforms/IPO.h"
56 #include <algorithm>
57 #include <cassert>
58 #include <cstdint>
59 #include <functional>
60 #include <iterator>
61 #include <memory>
62 #include <tuple>
63 #include <vector>
64 
65 using namespace llvm;
66 
67 #define DEBUG_TYPE "partial-inlining"
68 
69 STATISTIC(NumPartialInlined,
70  "Number of callsites functions partially inlined into.");
71 STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
72  "cold outlined regions were partially "
73  "inlined into its caller(s).");
74 STATISTIC(NumColdRegionsFound,
75  "Number of cold single entry/exit regions found.");
76 STATISTIC(NumColdRegionsOutlined,
77  "Number of cold single entry/exit regions outlined.");
78 
79 // Command line option to disable partial-inlining. The default is false:
80 static cl::opt<bool>
81  DisablePartialInlining("disable-partial-inlining", cl::init(false),
82  cl::Hidden, cl::desc("Disable partial inlining"));
83 // Command line option to disable multi-region partial-inlining. The default is
84 // false:
86  "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
87  cl::desc("Disable multi-region partial inlining"));
88 
89 // Command line option to force outlining in regions with live exit variables.
90 // The default is false:
91 static cl::opt<bool>
92  ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
93  cl::desc("Force outline regions with live exits"));
94 
95 // Command line option to enable marking outline functions with Cold Calling
96 // Convention. The default is false:
97 static cl::opt<bool>
98  MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
99  cl::desc("Mark outline function calls with ColdCC"));
100 
101 #ifndef NDEBUG
102 // Command line option to debug partial-inlining. The default is none:
103 static cl::opt<bool> TracePartialInlining("trace-partial-inlining",
104  cl::init(false), cl::Hidden,
105  cl::desc("Trace partial inlining."));
106 #endif
107 
108 // This is an option used by testing:
109 static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
110  cl::init(false), cl::ZeroOrMore,
112  cl::desc("Skip Cost Analysis"));
113 // Used to determine if a cold region is worth outlining based on
114 // its inlining cost compared to the original function. Default is set at 10%.
115 // ie. if the cold region reduces the inlining cost of the original function by
116 // at least 10%.
118  "min-region-size-ratio", cl::init(0.1), cl::Hidden,
119  cl::desc("Minimum ratio comparing relative sizes of each "
120  "outline candidate and original function"));
121 // Used to tune the minimum number of execution counts needed in the predecessor
122 // block to the cold edge. ie. confidence interval.
123 static cl::opt<unsigned>
124  MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
125  cl::desc("Minimum block executions to consider "
126  "its BranchProbabilityInfo valid"));
127 // Used to determine when an edge is considered cold. Default is set to 10%. ie.
128 // if the branch probability is 10% or less, then it is deemed as 'cold'.
130  "cold-branch-ratio", cl::init(0.1), cl::Hidden,
131  cl::desc("Minimum BranchProbability to consider a region cold."));
132 
134  "max-num-inline-blocks", cl::init(5), cl::Hidden,
135  cl::desc("Max number of blocks to be partially inlined"));
136 
137 // Command line option to set the maximum number of partial inlining allowed
138 // for the module. The default value of -1 means no limit.
140  "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
141  cl::desc("Max number of partial inlining. The default is unlimited"));
142 
143 // Used only when PGO or user annotated branch data is absent. It is
144 // the least value that is used to weigh the outline region. If BFI
145 // produces larger value, the BFI value will be used.
146 static cl::opt<int>
147  OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
149  cl::desc("Relative frequency of outline region to "
150  "the entry block"));
151 
153  "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
154  cl::desc("A debug option to add additional penalty to the computed one."));
155 
156 namespace {
157 
158 struct FunctionOutliningInfo {
159  FunctionOutliningInfo() = default;
160 
161  // Returns the number of blocks to be inlined including all blocks
162  // in Entries and one return block.
163  unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
164 
165  // A set of blocks including the function entry that guard
166  // the region to be outlined.
168 
169  // The return block that is not included in the outlined region.
170  BasicBlock *ReturnBlock = nullptr;
171 
172  // The dominating block of the region to be outlined.
173  BasicBlock *NonReturnBlock = nullptr;
174 
175  // The set of blocks in Entries that that are predecessors to ReturnBlock
176  SmallVector<BasicBlock *, 4> ReturnBlockPreds;
177 };
178 
179 struct FunctionOutliningMultiRegionInfo {
180  FunctionOutliningMultiRegionInfo()
181  : ORI() {}
182 
183  // Container for outline regions
184  struct OutlineRegionInfo {
185  OutlineRegionInfo(SmallVector<BasicBlock *, 8> Region,
186  BasicBlock *EntryBlock, BasicBlock *ExitBlock,
187  BasicBlock *ReturnBlock)
188  : Region(Region), EntryBlock(EntryBlock), ExitBlock(ExitBlock),
189  ReturnBlock(ReturnBlock) {}
191  BasicBlock *EntryBlock;
192  BasicBlock *ExitBlock;
193  BasicBlock *ReturnBlock;
194  };
195 
197 };
198 
199 struct PartialInlinerImpl {
200 
201  PartialInlinerImpl(
205  ProfileSummaryInfo *ProfSI)
206  : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
207 
208  bool run(Module &M);
209  // Main part of the transformation that calls helper functions to find
210  // outlining candidates, clone & outline the function, and attempt to
211  // partially inline the resulting function. Returns true if
212  // inlining was successful, false otherwise. Also returns the outline
213  // function (only if we partially inlined early returns) as there is a
214  // possibility to further "peel" early return statements that were left in the
215  // outline function due to code size.
216  std::pair<bool, Function *> unswitchFunction(Function *F);
217 
218  // This class speculatively clones the function to be partial inlined.
219  // At the end of partial inlining, the remaining callsites to the cloned
220  // function that are not partially inlined will be fixed up to reference
221  // the original function, and the cloned function will be erased.
222  struct FunctionCloner {
223  // Two constructors, one for single region outlining, the other for
224  // multi-region outlining.
225  FunctionCloner(Function *F, FunctionOutliningInfo *OI,
227  FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
229  ~FunctionCloner();
230 
231  // Prepare for function outlining: making sure there is only
232  // one incoming edge from the extracted/outlined region to
233  // the return block.
234  void NormalizeReturnBlock();
235 
236  // Do function outlining for cold regions.
237  bool doMultiRegionFunctionOutlining();
238  // Do function outlining for region after early return block(s).
239  // NOTE: For vararg functions that do the vararg handling in the outlined
240  // function, we temporarily generate IR that does not properly
241  // forward varargs to the outlined function. Calling InlineFunction
242  // will update calls to the outlined functions to properly forward
243  // the varargs.
244  Function *doSingleRegionFunctionOutlining();
245 
246  Function *OrigFunc = nullptr;
247  Function *ClonedFunc = nullptr;
248 
249  typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
250  // Keep track of Outlined Functions and the basic block they're called from.
251  SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
252 
253  // ClonedFunc is inlined in one of its callers after function
254  // outlining.
255  bool IsFunctionInlined = false;
256  // The cost of the region to be outlined.
257  int OutlinedRegionCost = 0;
258  // ClonedOI is specific to outlining non-early return blocks.
259  std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
260  // ClonedOMRI is specific to outlining cold regions.
261  std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
262  std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
264  };
265 
266 private:
267  int NumPartialInlining = 0;
268  std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
269  std::function<TargetTransformInfo &(Function &)> *GetTTI;
271  ProfileSummaryInfo *PSI;
272 
273  // Return the frequency of the OutlininingBB relative to F's entry point.
274  // The result is no larger than 1 and is represented using BP.
275  // (Note that the outlined region's 'head' block can only have incoming
276  // edges from the guarding entry blocks).
277  BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner);
278 
279  // Return true if the callee of CS should be partially inlined with
280  // profit.
281  bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
282  BlockFrequency WeightedOutliningRcost,
284 
285  // Try to inline DuplicateFunction (cloned from F with call to
286  // the OutlinedFunction into its callers. Return true
287  // if there is any successful inlining.
288  bool tryPartialInline(FunctionCloner &Cloner);
289 
290  // Compute the mapping from use site of DuplicationFunction to the enclosing
291  // BB's profile count.
292  void computeCallsiteToProfCountMap(Function *DuplicateFunction,
293  DenseMap<User *, uint64_t> &SiteCountMap);
294 
295  bool IsLimitReached() {
296  return (MaxNumPartialInlining != -1 &&
297  NumPartialInlining >= MaxNumPartialInlining);
298  }
299 
300  static CallSite getCallSite(User *U) {
301  CallSite CS;
302  if (CallInst *CI = dyn_cast<CallInst>(U))
303  CS = CallSite(CI);
304  else if (InvokeInst *II = dyn_cast<InvokeInst>(U))
305  CS = CallSite(II);
306  else
307  llvm_unreachable("All uses must be calls");
308  return CS;
309  }
310 
311  static CallSite getOneCallSiteTo(Function *F) {
312  User *User = *F->user_begin();
313  return getCallSite(User);
314  }
315 
316  std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function *F) {
317  CallSite CS = getOneCallSiteTo(F);
318  DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
319  BasicBlock *Block = CS.getParent();
320  return std::make_tuple(DLoc, Block);
321  }
322 
323  // Returns the costs associated with function outlining:
324  // - The first value is the non-weighted runtime cost for making the call
325  // to the outlined function, including the addtional setup cost in the
326  // outlined function itself;
327  // - The second value is the estimated size of the new call sequence in
328  // basic block Cloner.OutliningCallBB;
329  std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
330 
331  // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
332  // approximate both the size and runtime cost (Note that in the current
333  // inline cost analysis, there is no clear distinction there either).
334  static int computeBBInlineCost(BasicBlock *BB);
335 
336  std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
337  std::unique_ptr<FunctionOutliningMultiRegionInfo>
338  computeOutliningColdRegionsInfo(Function *F, OptimizationRemarkEmitter &ORE);
339 };
340 
341 struct PartialInlinerLegacyPass : public ModulePass {
342  static char ID; // Pass identification, replacement for typeid
343 
344  PartialInlinerLegacyPass() : ModulePass(ID) {
346  }
347 
348  void getAnalysisUsage(AnalysisUsage &AU) const override {
352  }
353 
354  bool runOnModule(Module &M) override {
355  if (skipModule(M))
356  return false;
357 
358  AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
360  &getAnalysis<TargetTransformInfoWrapperPass>();
361  ProfileSummaryInfo *PSI =
362  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
363 
364  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
365  [&ACT](Function &F) -> AssumptionCache & {
366  return ACT->getAssumptionCache(F);
367  };
368 
369  std::function<TargetTransformInfo &(Function &)> GetTTI =
370  [&TTIWP](Function &F) -> TargetTransformInfo & {
371  return TTIWP->getTTI(F);
372  };
373 
374  return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, NoneType::None, PSI)
375  .run(M);
376  }
377 };
378 
379 } // end anonymous namespace
380 
381 std::unique_ptr<FunctionOutliningMultiRegionInfo>
382 PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
384  BasicBlock *EntryBlock = &F->front();
385 
386  DominatorTree DT(*F);
387  LoopInfo LI(DT);
388  BranchProbabilityInfo BPI(*F, LI);
389  std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
391  if (!GetBFI) {
392  ScopedBFI.reset(new BlockFrequencyInfo(*F, BPI, LI));
393  BFI = ScopedBFI.get();
394  } else
395  BFI = &(*GetBFI)(*F);
396 
397  // Return if we don't have profiling information.
398  if (!PSI->hasInstrumentationProfile())
399  return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
400 
401  std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
402  llvm::make_unique<FunctionOutliningMultiRegionInfo>();
403 
404  auto IsSingleEntry = [](SmallVectorImpl<BasicBlock *> &BlockList) {
405  BasicBlock *Dom = BlockList.front();
406  return BlockList.size() > 1 && Dom->hasNPredecessors(1);
407  };
408 
409  auto IsSingleExit =
410  [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
411  BasicBlock *ExitBlock = nullptr;
412  for (auto *Block : BlockList) {
413  for (auto SI = succ_begin(Block); SI != succ_end(Block); ++SI) {
414  if (!is_contained(BlockList, *SI)) {
415  if (ExitBlock) {
416  ORE.emit([&]() {
417  return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
418  &SI->front())
419  << "Region dominated by "
420  << ore::NV("Block", BlockList.front()->getName())
421  << " has more than one region exit edge.";
422  });
423  return nullptr;
424  } else
425  ExitBlock = Block;
426  }
427  }
428  }
429  return ExitBlock;
430  };
431 
432  auto BBProfileCount = [BFI](BasicBlock *BB) {
433  return BFI->getBlockProfileCount(BB)
434  ? BFI->getBlockProfileCount(BB).getValue()
435  : 0;
436  };
437 
438  // Use the same computeBBInlineCost function to compute the cost savings of
439  // the outlining the candidate region.
440  int OverallFunctionCost = 0;
441  for (auto &BB : *F)
442  OverallFunctionCost += computeBBInlineCost(&BB);
443 
444 #ifndef NDEBUG
446  dbgs() << "OverallFunctionCost = " << OverallFunctionCost << "\n";
447 #endif
448  int MinOutlineRegionCost =
449  static_cast<int>(OverallFunctionCost * MinRegionSizeRatio);
450  BranchProbability MinBranchProbability(
451  static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
452  MinBlockCounterExecution);
453  bool ColdCandidateFound = false;
454  BasicBlock *CurrEntry = EntryBlock;
455  std::vector<BasicBlock *> DFS;
456  DenseMap<BasicBlock *, bool> VisitedMap;
457  DFS.push_back(CurrEntry);
458  VisitedMap[CurrEntry] = true;
459  // Use Depth First Search on the basic blocks to find CFG edges that are
460  // considered cold.
461  // Cold regions considered must also have its inline cost compared to the
462  // overall inline cost of the original function. The region is outlined only
463  // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
464  // more.
465  while (!DFS.empty()) {
466  auto *thisBB = DFS.back();
467  DFS.pop_back();
468  // Only consider regions with predecessor blocks that are considered
469  // not-cold (default: part of the top 99.99% of all block counters)
470  // AND greater than our minimum block execution count (default: 100).
471  if (PSI->isColdBlock(thisBB, BFI) ||
472  BBProfileCount(thisBB) < MinBlockCounterExecution)
473  continue;
474  for (auto SI = succ_begin(thisBB); SI != succ_end(thisBB); ++SI) {
475  if (VisitedMap[*SI])
476  continue;
477  VisitedMap[*SI] = true;
478  DFS.push_back(*SI);
479  // If branch isn't cold, we skip to the next one.
480  BranchProbability SuccProb = BPI.getEdgeProbability(thisBB, *SI);
481  if (SuccProb > MinBranchProbability)
482  continue;
483 #ifndef NDEBUG
484  if (TracePartialInlining) {
485  dbgs() << "Found cold edge: " << thisBB->getName() << "->"
486  << (*SI)->getName() << "\nBranch Probability = " << SuccProb
487  << "\n";
488  }
489 #endif
490  SmallVector<BasicBlock *, 8> DominateVector;
491  DT.getDescendants(*SI, DominateVector);
492  // We can only outline single entry regions (for now).
493  if (!IsSingleEntry(DominateVector))
494  continue;
495  BasicBlock *ExitBlock = nullptr;
496  // We can only outline single exit regions (for now).
497  if (!(ExitBlock = IsSingleExit(DominateVector)))
498  continue;
499  int OutlineRegionCost = 0;
500  for (auto *BB : DominateVector)
501  OutlineRegionCost += computeBBInlineCost(BB);
502 
503 #ifndef NDEBUG
505  dbgs() << "OutlineRegionCost = " << OutlineRegionCost << "\n";
506 #endif
507 
508  if (OutlineRegionCost < MinOutlineRegionCost) {
509  ORE.emit([&]() {
510  return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
511  &SI->front())
512  << ore::NV("Callee", F) << " inline cost-savings smaller than "
513  << ore::NV("Cost", MinOutlineRegionCost);
514  });
515  continue;
516  }
517  // For now, ignore blocks that belong to a SISE region that is a
518  // candidate for outlining. In the future, we may want to look
519  // at inner regions because the outer region may have live-exit
520  // variables.
521  for (auto *BB : DominateVector)
522  VisitedMap[BB] = true;
523  // ReturnBlock here means the block after the outline call
524  BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
525  // assert(ReturnBlock && "ReturnBlock is NULL somehow!");
526  FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
527  DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
528  RegInfo.Region = DominateVector;
529  OutliningInfo->ORI.push_back(RegInfo);
530 #ifndef NDEBUG
531  if (TracePartialInlining) {
532  dbgs() << "Found Cold Candidate starting at block: "
533  << DominateVector.front()->getName() << "\n";
534  }
535 #endif
536  ColdCandidateFound = true;
537  NumColdRegionsFound++;
538  }
539  }
540  if (ColdCandidateFound)
541  return OutliningInfo;
542  else
543  return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
544 }
545 
546 std::unique_ptr<FunctionOutliningInfo>
547 PartialInlinerImpl::computeOutliningInfo(Function *F) {
548  BasicBlock *EntryBlock = &F->front();
549  BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
550  if (!BR || BR->isUnconditional())
551  return std::unique_ptr<FunctionOutliningInfo>();
552 
553  // Returns true if Succ is BB's successor
554  auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
555  return is_contained(successors(BB), Succ);
556  };
557 
558  auto IsReturnBlock = [](BasicBlock *BB) {
559  Instruction *TI = BB->getTerminator();
560  return isa<ReturnInst>(TI);
561  };
562 
563  auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
564  if (IsReturnBlock(Succ1))
565  return std::make_tuple(Succ1, Succ2);
566  if (IsReturnBlock(Succ2))
567  return std::make_tuple(Succ2, Succ1);
568 
569  return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
570  };
571 
572  // Detect a triangular shape:
573  auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
574  if (IsSuccessor(Succ1, Succ2))
575  return std::make_tuple(Succ1, Succ2);
576  if (IsSuccessor(Succ2, Succ1))
577  return std::make_tuple(Succ2, Succ1);
578 
579  return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
580  };
581 
582  std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
583  llvm::make_unique<FunctionOutliningInfo>();
584 
585  BasicBlock *CurrEntry = EntryBlock;
586  bool CandidateFound = false;
587  do {
588  // The number of blocks to be inlined has already reached
589  // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
590  // disables partial inlining for the function.
591  if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
592  break;
593 
594  if (succ_size(CurrEntry) != 2)
595  break;
596 
597  BasicBlock *Succ1 = *succ_begin(CurrEntry);
598  BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
599 
600  BasicBlock *ReturnBlock, *NonReturnBlock;
601  std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
602 
603  if (ReturnBlock) {
604  OutliningInfo->Entries.push_back(CurrEntry);
605  OutliningInfo->ReturnBlock = ReturnBlock;
606  OutliningInfo->NonReturnBlock = NonReturnBlock;
607  CandidateFound = true;
608  break;
609  }
610 
611  BasicBlock *CommSucc;
613  std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
614 
615  if (!CommSucc)
616  break;
617 
618  OutliningInfo->Entries.push_back(CurrEntry);
619  CurrEntry = OtherSucc;
620  } while (true);
621 
622  if (!CandidateFound)
623  return std::unique_ptr<FunctionOutliningInfo>();
624 
625  // Do sanity check of the entries: threre should not
626  // be any successors (not in the entry set) other than
627  // {ReturnBlock, NonReturnBlock}
628  assert(OutliningInfo->Entries[0] == &F->front() &&
629  "Function Entry must be the first in Entries vector");
630  DenseSet<BasicBlock *> Entries;
631  for (BasicBlock *E : OutliningInfo->Entries)
632  Entries.insert(E);
633 
634  // Returns true of BB has Predecessor which is not
635  // in Entries set.
636  auto HasNonEntryPred = [Entries](BasicBlock *BB) {
637  for (auto Pred : predecessors(BB)) {
638  if (!Entries.count(Pred))
639  return true;
640  }
641  return false;
642  };
643  auto CheckAndNormalizeCandidate =
644  [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
645  for (BasicBlock *E : OutliningInfo->Entries) {
646  for (auto Succ : successors(E)) {
647  if (Entries.count(Succ))
648  continue;
649  if (Succ == OutliningInfo->ReturnBlock)
650  OutliningInfo->ReturnBlockPreds.push_back(E);
651  else if (Succ != OutliningInfo->NonReturnBlock)
652  return false;
653  }
654  // There should not be any outside incoming edges either:
655  if (HasNonEntryPred(E))
656  return false;
657  }
658  return true;
659  };
660 
661  if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
662  return std::unique_ptr<FunctionOutliningInfo>();
663 
664  // Now further growing the candidate's inlining region by
665  // peeling off dominating blocks from the outlining region:
666  while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
667  BasicBlock *Cand = OutliningInfo->NonReturnBlock;
668  if (succ_size(Cand) != 2)
669  break;
670 
671  if (HasNonEntryPred(Cand))
672  break;
673 
674  BasicBlock *Succ1 = *succ_begin(Cand);
675  BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
676 
677  BasicBlock *ReturnBlock, *NonReturnBlock;
678  std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
679  if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
680  break;
681 
682  if (NonReturnBlock->getSinglePredecessor() != Cand)
683  break;
684 
685  // Now grow and update OutlininigInfo:
686  OutliningInfo->Entries.push_back(Cand);
687  OutliningInfo->NonReturnBlock = NonReturnBlock;
688  OutliningInfo->ReturnBlockPreds.push_back(Cand);
689  Entries.insert(Cand);
690  }
691 
692  return OutliningInfo;
693 }
694 
695 // Check if there is PGO data or user annoated branch data:
696 static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
697  if (F->hasProfileData())
698  return true;
699  // Now check if any of the entry block has MD_prof data:
700  for (auto *E : OI->Entries) {
702  if (!BR || BR->isUnconditional())
703  continue;
704  uint64_t T, F;
705  if (BR->extractProfMetadata(T, F))
706  return true;
707  }
708  return false;
709 }
710 
712 PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
713  BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
714  auto EntryFreq =
715  Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
716  auto OutliningCallFreq =
717  Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
718  // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
719  // we outlined any regions, so we may encounter situations where the
720  // OutliningCallFreq is *slightly* bigger than the EntryFreq.
721  if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency()) {
722  OutliningCallFreq = EntryFreq;
723  }
724  auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
725  OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
726 
727  if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))
728  return OutlineRegionRelFreq;
729 
730  // When profile data is not available, we need to be conservative in
731  // estimating the overall savings. Static branch prediction can usually
732  // guess the branch direction right (taken/non-taken), but the guessed
733  // branch probability is usually not biased enough. In case when the
734  // outlined region is predicted to be likely, its probability needs
735  // to be made higher (more biased) to not under-estimate the cost of
736  // function outlining. On the other hand, if the outlined region
737  // is predicted to be less likely, the predicted probablity is usually
738  // higher than the actual. For instance, the actual probability of the
739  // less likely target is only 5%, but the guessed probablity can be
740  // 40%. In the latter case, there is no need for further adjustement.
741  // FIXME: add an option for this.
742  if (OutlineRegionRelFreq < BranchProbability(45, 100))
743  return OutlineRegionRelFreq;
744 
745  OutlineRegionRelFreq = std::max(
746  OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
747 
748  return OutlineRegionRelFreq;
749 }
750 
751 bool PartialInlinerImpl::shouldPartialInline(
752  CallSite CS, FunctionCloner &Cloner,
753  BlockFrequency WeightedOutliningRcost,
755  using namespace ore;
756 
757  Instruction *Call = CS.getInstruction();
759  assert(Callee == Cloner.ClonedFunc);
760 
761  if (SkipCostAnalysis)
762  return isInlineViable(*Callee);
763 
764  Function *Caller = CS.getCaller();
765  auto &CalleeTTI = (*GetTTI)(*Callee);
766  InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
767  *GetAssumptionCache, GetBFI, PSI, &ORE);
768 
769  if (IC.isAlways()) {
770  ORE.emit([&]() {
771  return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
772  << NV("Callee", Cloner.OrigFunc)
773  << " should always be fully inlined, not partially";
774  });
775  return false;
776  }
777 
778  if (IC.isNever()) {
779  ORE.emit([&]() {
780  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
781  << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
782  << NV("Caller", Caller)
783  << " because it should never be inlined (cost=never)";
784  });
785  return false;
786  }
787 
788  if (!IC) {
789  ORE.emit([&]() {
790  return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
791  << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
792  << NV("Caller", Caller) << " because too costly to inline (cost="
793  << NV("Cost", IC.getCost()) << ", threshold="
794  << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
795  });
796  return false;
797  }
798  const DataLayout &DL = Caller->getParent()->getDataLayout();
799 
800  // The savings of eliminating the call:
801  int NonWeightedSavings = getCallsiteCost(CS, DL);
802  BlockFrequency NormWeightedSavings(NonWeightedSavings);
803 
804  // Weighted saving is smaller than weighted cost, return false
805  if (NormWeightedSavings < WeightedOutliningRcost) {
806  ORE.emit([&]() {
807  return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
808  Call)
809  << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
810  << NV("Caller", Caller) << " runtime overhead (overhead="
811  << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
812  << ", savings="
813  << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
814  << ")"
815  << " of making the outlined call is too high";
816  });
817 
818  return false;
819  }
820 
821  ORE.emit([&]() {
822  return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
823  << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
824  << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
825  << " (threshold="
826  << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
827  });
828  return true;
829 }
830 
831 // TODO: Ideally we should share Inliner's InlineCost Analysis code.
832 // For now use a simplified version. The returned 'InlineCost' will be used
833 // to esimate the size cost as well as runtime cost of the BB.
834 int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
835  int InlineCost = 0;
836  const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
837  for (Instruction &I : BB->instructionsWithoutDebug()) {
838  // Skip free instructions.
839  switch (I.getOpcode()) {
840  case Instruction::BitCast:
841  case Instruction::PtrToInt:
842  case Instruction::IntToPtr:
843  case Instruction::Alloca:
844  case Instruction::PHI:
845  continue;
846  case Instruction::GetElementPtr:
847  if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
848  continue;
849  break;
850  default:
851  break;
852  }
853 
854  if (I.isLifetimeStartOrEnd())
855  continue;
856 
857  if (CallInst *CI = dyn_cast<CallInst>(&I)) {
858  InlineCost += getCallsiteCost(CallSite(CI), DL);
859  continue;
860  }
861 
862  if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
863  InlineCost += getCallsiteCost(CallSite(II), DL);
864  continue;
865  }
866 
867  if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
868  InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
869  continue;
870  }
871  InlineCost += InlineConstants::InstrCost;
872  }
873  return InlineCost;
874 }
875 
876 std::tuple<int, int>
877 PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
878  int OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
879  for (auto FuncBBPair : Cloner.OutlinedFunctions) {
880  Function *OutlinedFunc = FuncBBPair.first;
881  BasicBlock* OutliningCallBB = FuncBBPair.second;
882  // Now compute the cost of the call sequence to the outlined function
883  // 'OutlinedFunction' in BB 'OutliningCallBB':
884  OutliningFuncCallCost += computeBBInlineCost(OutliningCallBB);
885 
886  // Now compute the cost of the extracted/outlined function itself:
887  for (BasicBlock &BB : *OutlinedFunc)
888  OutlinedFunctionCost += computeBBInlineCost(&BB);
889  }
890  assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
891  "Outlined function cost should be no less than the outlined region");
892 
893  // The code extractor introduces a new root and exit stub blocks with
894  // additional unconditional branches. Those branches will be eliminated
895  // later with bb layout. The cost should be adjusted accordingly:
896  OutlinedFunctionCost -=
897  2 * InlineConstants::InstrCost * Cloner.OutlinedFunctions.size();
898 
899  int OutliningRuntimeOverhead =
900  OutliningFuncCallCost +
901  (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
903 
904  return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
905 }
906 
907 // Create the callsite to profile count map which is
908 // used to update the original function's entry count,
909 // after the function is partially inlined into the callsite.
910 void PartialInlinerImpl::computeCallsiteToProfCountMap(
911  Function *DuplicateFunction,
912  DenseMap<User *, uint64_t> &CallSiteToProfCountMap) {
913  std::vector<User *> Users(DuplicateFunction->user_begin(),
914  DuplicateFunction->user_end());
915  Function *CurrentCaller = nullptr;
916  std::unique_ptr<BlockFrequencyInfo> TempBFI;
917  BlockFrequencyInfo *CurrentCallerBFI = nullptr;
918 
919  auto ComputeCurrBFI = [&,this](Function *Caller) {
920  // For the old pass manager:
921  if (!GetBFI) {
922  DominatorTree DT(*Caller);
923  LoopInfo LI(DT);
924  BranchProbabilityInfo BPI(*Caller, LI);
925  TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
926  CurrentCallerBFI = TempBFI.get();
927  } else {
928  // New pass manager:
929  CurrentCallerBFI = &(*GetBFI)(*Caller);
930  }
931  };
932 
933  for (User *User : Users) {
934  CallSite CS = getCallSite(User);
935  Function *Caller = CS.getCaller();
936  if (CurrentCaller != Caller) {
937  CurrentCaller = Caller;
938  ComputeCurrBFI(Caller);
939  } else {
940  assert(CurrentCallerBFI && "CallerBFI is not set");
941  }
942  BasicBlock *CallBB = CS.getInstruction()->getParent();
943  auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
944  if (Count)
945  CallSiteToProfCountMap[User] = *Count;
946  else
947  CallSiteToProfCountMap[User] = 0;
948  }
949 }
950 
951 PartialInlinerImpl::FunctionCloner::FunctionCloner(
952  Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE)
953  : OrigFunc(F), ORE(ORE) {
954  ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
955 
956  // Clone the function, so that we can hack away on it.
957  ValueToValueMapTy VMap;
958  ClonedFunc = CloneFunction(F, VMap);
959 
960  ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
961  ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
962  for (BasicBlock *BB : OI->Entries) {
963  ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
964  }
965  for (BasicBlock *E : OI->ReturnBlockPreds) {
966  BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
967  ClonedOI->ReturnBlockPreds.push_back(NewE);
968  }
969  // Go ahead and update all uses to the duplicate, so that we can just
970  // use the inliner functionality when we're done hacking.
971  F->replaceAllUsesWith(ClonedFunc);
972 }
973 
974 PartialInlinerImpl::FunctionCloner::FunctionCloner(
975  Function *F, FunctionOutliningMultiRegionInfo *OI,
977  : OrigFunc(F), ORE(ORE) {
978  ClonedOMRI = llvm::make_unique<FunctionOutliningMultiRegionInfo>();
979 
980  // Clone the function, so that we can hack away on it.
981  ValueToValueMapTy VMap;
982  ClonedFunc = CloneFunction(F, VMap);
983 
984  // Go through all Outline Candidate Regions and update all BasicBlock
985  // information.
986  for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
987  OI->ORI) {
989  for (BasicBlock *BB : RegionInfo.Region) {
990  Region.push_back(cast<BasicBlock>(VMap[BB]));
991  }
992  BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
993  BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
994  BasicBlock *NewReturnBlock = nullptr;
995  if (RegionInfo.ReturnBlock)
996  NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
997  FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
998  Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
999  ClonedOMRI->ORI.push_back(MappedRegionInfo);
1000  }
1001  // Go ahead and update all uses to the duplicate, so that we can just
1002  // use the inliner functionality when we're done hacking.
1003  F->replaceAllUsesWith(ClonedFunc);
1004 }
1005 
1006 void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
1007  auto getFirstPHI = [](BasicBlock *BB) {
1008  BasicBlock::iterator I = BB->begin();
1009  PHINode *FirstPhi = nullptr;
1010  while (I != BB->end()) {
1011  PHINode *Phi = dyn_cast<PHINode>(I);
1012  if (!Phi)
1013  break;
1014  if (!FirstPhi) {
1015  FirstPhi = Phi;
1016  break;
1017  }
1018  }
1019  return FirstPhi;
1020  };
1021 
1022  // Shouldn't need to normalize PHIs if we're not outlining non-early return
1023  // blocks.
1024  if (!ClonedOI)
1025  return;
1026 
1027  // Special hackery is needed with PHI nodes that have inputs from more than
1028  // one extracted block. For simplicity, just split the PHIs into a two-level
1029  // sequence of PHIs, some of which will go in the extracted region, and some
1030  // of which will go outside.
1031  BasicBlock *PreReturn = ClonedOI->ReturnBlock;
1032  // only split block when necessary:
1033  PHINode *FirstPhi = getFirstPHI(PreReturn);
1034  unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
1035 
1036  if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
1037  return;
1038 
1039  auto IsTrivialPhi = [](PHINode *PN) -> Value * {
1040  Value *CommonValue = PN->getIncomingValue(0);
1041  if (all_of(PN->incoming_values(),
1042  [&](Value *V) { return V == CommonValue; }))
1043  return CommonValue;
1044  return nullptr;
1045  };
1046 
1047  ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
1048  ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
1049  BasicBlock::iterator I = PreReturn->begin();
1050  Instruction *Ins = &ClonedOI->ReturnBlock->front();
1052  while (I != PreReturn->end()) {
1053  PHINode *OldPhi = dyn_cast<PHINode>(I);
1054  if (!OldPhi)
1055  break;
1056 
1057  PHINode *RetPhi =
1058  PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
1059  OldPhi->replaceAllUsesWith(RetPhi);
1060  Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
1061 
1062  RetPhi->addIncoming(&*I, PreReturn);
1063  for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
1064  RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
1065  OldPhi->removeIncomingValue(E);
1066  }
1067 
1068  // After incoming values splitting, the old phi may become trivial.
1069  // Keeping the trivial phi can introduce definition inside the outline
1070  // region which is live-out, causing necessary overhead (load, store
1071  // arg passing etc).
1072  if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
1073  OldPhi->replaceAllUsesWith(OldPhiVal);
1074  DeadPhis.push_back(OldPhi);
1075  }
1076  ++I;
1077  }
1078  for (auto *DP : DeadPhis)
1079  DP->eraseFromParent();
1080 
1081  for (auto E : ClonedOI->ReturnBlockPreds) {
1082  E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
1083  }
1084 }
1085 
1086 bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1087 
1088  auto ComputeRegionCost = [](SmallVectorImpl<BasicBlock *> &Region) {
1089  int Cost = 0;
1090  for (BasicBlock* BB : Region)
1091  Cost += computeBBInlineCost(BB);
1092  return Cost;
1093  };
1094 
1095  assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
1096 
1097  if (ClonedOMRI->ORI.empty())
1098  return false;
1099 
1100  // The CodeExtractor needs a dominator tree.
1101  DominatorTree DT;
1102  DT.recalculate(*ClonedFunc);
1103 
1104  // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1105  LoopInfo LI(DT);
1106  BranchProbabilityInfo BPI(*ClonedFunc, LI);
1107  ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1108 
1109  SetVector<Value *> Inputs, Outputs, Sinks;
1110  for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1111  ClonedOMRI->ORI) {
1112  int CurrentOutlinedRegionCost = ComputeRegionCost(RegionInfo.Region);
1113 
1114  CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
1115  ClonedFuncBFI.get(), &BPI, /* AllowVarargs */ false);
1116 
1117  CE.findInputsOutputs(Inputs, Outputs, Sinks);
1118 
1119 #ifndef NDEBUG
1120  if (TracePartialInlining) {
1121  dbgs() << "inputs: " << Inputs.size() << "\n";
1122  dbgs() << "outputs: " << Outputs.size() << "\n";
1123  for (Value *value : Inputs)
1124  dbgs() << "value used in func: " << *value << "\n";
1125  for (Value *output : Outputs)
1126  dbgs() << "instr used in func: " << *output << "\n";
1127  }
1128 #endif
1129  // Do not extract regions that have live exit variables.
1130  if (Outputs.size() > 0 && !ForceLiveExit)
1131  continue;
1132 
1133  Function *OutlinedFunc = CE.extractCodeRegion();
1134 
1135  if (OutlinedFunc) {
1136  CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc);
1137  BasicBlock *OutliningCallBB = OCS.getInstruction()->getParent();
1138  assert(OutliningCallBB->getParent() == ClonedFunc);
1139  OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1140  NumColdRegionsOutlined++;
1141  OutlinedRegionCost += CurrentOutlinedRegionCost;
1142 
1143  if (MarkOutlinedColdCC) {
1144  OutlinedFunc->setCallingConv(CallingConv::Cold);
1146  }
1147  } else
1148  ORE.emit([&]() {
1149  return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1150  &RegionInfo.Region.front()->front())
1151  << "Failed to extract region at block "
1152  << ore::NV("Block", RegionInfo.Region.front());
1153  });
1154  }
1155 
1156  return !OutlinedFunctions.empty();
1157 }
1158 
1159 Function *
1160 PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
1161  // Returns true if the block is to be partial inlined into the caller
1162  // (i.e. not to be extracted to the out of line function)
1163  auto ToBeInlined = [&, this](BasicBlock *BB) {
1164  return BB == ClonedOI->ReturnBlock ||
1165  (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) !=
1166  ClonedOI->Entries.end());
1167  };
1168 
1169  assert(ClonedOI && "Expecting OutlineInfo for single region outline");
1170  // The CodeExtractor needs a dominator tree.
1171  DominatorTree DT;
1172  DT.recalculate(*ClonedFunc);
1173 
1174  // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1175  LoopInfo LI(DT);
1176  BranchProbabilityInfo BPI(*ClonedFunc, LI);
1177  ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1178 
1179  // Gather up the blocks that we're going to extract.
1180  std::vector<BasicBlock *> ToExtract;
1181  ToExtract.push_back(ClonedOI->NonReturnBlock);
1182  OutlinedRegionCost +=
1183  PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
1184  for (BasicBlock &BB : *ClonedFunc)
1185  if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
1186  ToExtract.push_back(&BB);
1187  // FIXME: the code extractor may hoist/sink more code
1188  // into the outlined function which may make the outlining
1189  // overhead (the difference of the outlined function cost
1190  // and OutliningRegionCost) look larger.
1191  OutlinedRegionCost += computeBBInlineCost(&BB);
1192  }
1193 
1194  // Extract the body of the if.
1195  Function *OutlinedFunc =
1196  CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
1197  ClonedFuncBFI.get(), &BPI,
1198  /* AllowVarargs */ true)
1199  .extractCodeRegion();
1200 
1201  if (OutlinedFunc) {
1202  BasicBlock *OutliningCallBB =
1203  PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
1204  .getInstruction()
1205  ->getParent();
1206  assert(OutliningCallBB->getParent() == ClonedFunc);
1207  OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1208  } else
1209  ORE.emit([&]() {
1210  return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1211  &ToExtract.front()->front())
1212  << "Failed to extract region at block "
1213  << ore::NV("Block", ToExtract.front());
1214  });
1215 
1216  return OutlinedFunc;
1217 }
1218 
1219 PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
1220  // Ditch the duplicate, since we're done with it, and rewrite all remaining
1221  // users (function pointers, etc.) back to the original function.
1222  ClonedFunc->replaceAllUsesWith(OrigFunc);
1223  ClonedFunc->eraseFromParent();
1224  if (!IsFunctionInlined) {
1225  // Remove each function that was speculatively created if there is no
1226  // reference.
1227  for (auto FuncBBPair : OutlinedFunctions) {
1228  Function *Func = FuncBBPair.first;
1229  Func->eraseFromParent();
1230  }
1231  }
1232 }
1233 
1234 std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
1235 
1236  if (F->hasAddressTaken())
1237  return {false, nullptr};
1238 
1239  // Let inliner handle it
1240  if (F->hasFnAttribute(Attribute::AlwaysInline))
1241  return {false, nullptr};
1242 
1243  if (F->hasFnAttribute(Attribute::NoInline))
1244  return {false, nullptr};
1245 
1246  if (PSI->isFunctionEntryCold(F))
1247  return {false, nullptr};
1248 
1249  if (empty(F->users()))
1250  return {false, nullptr};
1251 
1253 
1254  // Only try to outline cold regions if we have a profile summary, which
1255  // implies we have profiling information.
1256  if (PSI->hasProfileSummary() && F->hasProfileData() &&
1258  std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
1259  computeOutliningColdRegionsInfo(F, ORE);
1260  if (OMRI) {
1261  FunctionCloner Cloner(F, OMRI.get(), ORE);
1262 
1263 #ifndef NDEBUG
1264  if (TracePartialInlining) {
1265  dbgs() << "HotCountThreshold = " << PSI->getHotCountThreshold() << "\n";
1266  dbgs() << "ColdCountThreshold = " << PSI->getColdCountThreshold()
1267  << "\n";
1268  }
1269 #endif
1270  bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1271 
1272  if (DidOutline) {
1273 #ifndef NDEBUG
1274  if (TracePartialInlining) {
1275  dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
1276  Cloner.ClonedFunc->print(dbgs());
1277  dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
1278  }
1279 #endif
1280 
1281  if (tryPartialInline(Cloner))
1282  return {true, nullptr};
1283  }
1284  }
1285  }
1286 
1287  // Fall-thru to regular partial inlining if we:
1288  // i) can't find any cold regions to outline, or
1289  // ii) can't inline the outlined function anywhere.
1290  std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
1291  if (!OI)
1292  return {false, nullptr};
1293 
1294  FunctionCloner Cloner(F, OI.get(), ORE);
1295  Cloner.NormalizeReturnBlock();
1296 
1297  Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1298 
1299  if (!OutlinedFunction)
1300  return {false, nullptr};
1301 
1302  bool AnyInline = tryPartialInline(Cloner);
1303 
1304  if (AnyInline)
1305  return {true, OutlinedFunction};
1306 
1307  return {false, nullptr};
1308 }
1309 
1310 bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
1311  if (Cloner.OutlinedFunctions.empty())
1312  return false;
1313 
1314  int SizeCost = 0;
1315  BlockFrequency WeightedRcost;
1316  int NonWeightedRcost;
1317  std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
1318 
1319  // Only calculate RelativeToEntryFreq when we are doing single region
1320  // outlining.
1321  BranchProbability RelativeToEntryFreq;
1322  if (Cloner.ClonedOI) {
1323  RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1324  } else
1325  // RelativeToEntryFreq doesn't make sense when we have more than one
1326  // outlined call because each call will have a different relative frequency
1327  // to the entry block. We can consider using the average, but the
1328  // usefulness of that information is questionable. For now, assume we never
1329  // execute the calls to outlined functions.
1330  RelativeToEntryFreq = BranchProbability(0, 1);
1331 
1332  WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
1333 
1334  // The call sequence(s) to the outlined function(s) are larger than the sum of
1335  // the original outlined region size(s), it does not increase the chances of
1336  // inlining the function with outlining (The inliner uses the size increase to
1337  // model the cost of inlining a callee).
1338  if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
1339  OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1340  DebugLoc DLoc;
1341  BasicBlock *Block;
1342  std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
1343  OrigFuncORE.emit([&]() {
1344  return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
1345  DLoc, Block)
1346  << ore::NV("Function", Cloner.OrigFunc)
1347  << " not partially inlined into callers (Original Size = "
1348  << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
1349  << ", Size of call sequence to outlined function = "
1350  << ore::NV("NewSize", SizeCost) << ")";
1351  });
1352  return false;
1353  }
1354 
1355  assert(empty(Cloner.OrigFunc->users()) &&
1356  "F's users should all be replaced!");
1357 
1358  std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
1359  Cloner.ClonedFunc->user_end());
1360 
1361  DenseMap<User *, uint64_t> CallSiteToProfCountMap;
1362  auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1363  if (CalleeEntryCount)
1364  computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
1365 
1366  uint64_t CalleeEntryCountV =
1367  (CalleeEntryCount ? CalleeEntryCount.getCount() : 0);
1368 
1369  bool AnyInline = false;
1370  for (User *User : Users) {
1371  CallSite CS = getCallSite(User);
1372 
1373  if (IsLimitReached())
1374  continue;
1375 
1376  OptimizationRemarkEmitter CallerORE(CS.getCaller());
1377  if (!shouldPartialInline(CS, Cloner, WeightedRcost, CallerORE))
1378  continue;
1379 
1380  // Construct remark before doing the inlining, as after successful inlining
1381  // the callsite is removed.
1382  OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction());
1383  OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
1384  << ore::NV("Caller", CS.getCaller());
1385 
1386  InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
1387  // We can only forward varargs when we outlined a single region, else we
1388  // bail on vararg functions.
1389  if (!InlineFunction(CS, IFI, nullptr, true,
1390  (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1391  : nullptr)))
1392  continue;
1393 
1394  CallerORE.emit(OR);
1395 
1396  // Now update the entry count:
1397  if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
1398  uint64_t CallSiteCount = CallSiteToProfCountMap[User];
1399  CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
1400  }
1401 
1402  AnyInline = true;
1403  NumPartialInlining++;
1404  // Update the stats
1405  if (Cloner.ClonedOI)
1406  NumPartialInlined++;
1407  else
1408  NumColdOutlinePartialInlined++;
1409 
1410  }
1411 
1412  if (AnyInline) {
1413  Cloner.IsFunctionInlined = true;
1414  if (CalleeEntryCount)
1415  Cloner.OrigFunc->setEntryCount(
1416  CalleeEntryCount.setCount(CalleeEntryCountV));
1417  OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1418  OrigFuncORE.emit([&]() {
1419  return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
1420  << "Partially inlined into at least one caller";
1421  });
1422 
1423  }
1424 
1425  return AnyInline;
1426 }
1427 
1428 bool PartialInlinerImpl::run(Module &M) {
1430  return false;
1431 
1432  std::vector<Function *> Worklist;
1433  Worklist.reserve(M.size());
1434  for (Function &F : M)
1435  if (!F.use_empty() && !F.isDeclaration())
1436  Worklist.push_back(&F);
1437 
1438  bool Changed = false;
1439  while (!Worklist.empty()) {
1440  Function *CurrFunc = Worklist.back();
1441  Worklist.pop_back();
1442 
1443  if (CurrFunc->use_empty())
1444  continue;
1445 
1446  bool Recursive = false;
1447  for (User *U : CurrFunc->users())
1448  if (Instruction *I = dyn_cast<Instruction>(U))
1449  if (I->getParent()->getParent() == CurrFunc) {
1450  Recursive = true;
1451  break;
1452  }
1453  if (Recursive)
1454  continue;
1455 
1456  std::pair<bool, Function * > Result = unswitchFunction(CurrFunc);
1457  if (Result.second)
1458  Worklist.push_back(Result.second);
1459  Changed |= Result.first;
1460  }
1461 
1462  return Changed;
1463 }
1464 
1466 
1467 INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
1468  "Partial Inliner", false, false)
1472 INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
1473  "Partial Inliner", false, false)
1474 
1476  return new PartialInlinerLegacyPass();
1477 }
1478 
1480  ModuleAnalysisManager &AM) {
1481  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1482 
1483  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
1484  [&FAM](Function &F) -> AssumptionCache & {
1485  return FAM.getResult<AssumptionAnalysis>(F);
1486  };
1487 
1488  std::function<BlockFrequencyInfo &(Function &)> GetBFI =
1489  [&FAM](Function &F) -> BlockFrequencyInfo & {
1490  return FAM.getResult<BlockFrequencyAnalysis>(F);
1491  };
1492 
1493  std::function<TargetTransformInfo &(Function &)> GetTTI =
1494  [&FAM](Function &F) -> TargetTransformInfo & {
1495  return FAM.getResult<TargetIRAnalysis>(F);
1496  };
1497 
1499 
1500  if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI)
1501  .run(M))
1502  return PreservedAnalyses::none();
1503  return PreservedAnalyses::all();
1504 }
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:52
Diagnostic information for missed-optimization remarks.
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:261
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
DiagnosticInfoOptimizationBase::Argument NV
bool isNever() const
Definition: InlineCost.h:105
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:770
partial Partial Inliner
This class represents lattice values for constants.
Definition: AllocatorList.h:24
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
void initializePartialInlinerLegacyPassPass(PassRegistry &)
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
static cl::opt< int > MaxNumPartialInlining("max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of partial inlining. The default is unlimited"))
Implements a dense probed hash-table based set.
Definition: DenseSet.h:250
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Analysis providing profile information.
This class represents a function call, abstracting a target machine&#39;s calling convention.
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function&#39;s module.
An immutable pass that tracks lazily created AssumptionCache objects.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:117
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
const BasicBlock & back() const
Definition: Function.h:665
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:34
F(f)
InlineResult InlineFunction(CallInst *C, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true)
This function inlines the called function into the basic block of the caller.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
iv Induction Variable Users
Definition: IVUsers.cpp:52
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:174
Represents the cost of inlining a function.
Definition: InlineCost.h:64
static cl::opt< bool > ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden, cl::desc("Force outline regions with live exits"))
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:269
AnalysisUsage & addRequired()
ModulePass * createPartialInliningPass()
createPartialInliningPass - This pass inlines parts of functions.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
bool isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
Diagnostic information for optimization analysis remarks.
static cl::opt< int > OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), cl::Hidden, cl::ZeroOrMore, cl::desc("Relative frequency of outline region to " "the entry block"))
This file contains the simple types necessary to represent the attributes associated with functions a...
InstrTy * getInstruction() const
Definition: CallSite.h:92
partial inliner
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:103
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static cl::opt< bool > SkipCostAnalysis("skip-partial-inlining-cost-analysis", cl::init(false), cl::ZeroOrMore, cl::ReallyHidden, cl::desc("Skip Cost Analysis"))
#define T
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:163
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:269
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
static cl::opt< unsigned > MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid"))
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
amdgpu Simplify well known AMD library false Value * Callee
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:157
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:106
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
static cl::opt< unsigned > ExtraOutliningPenalty("partial-inlining-extra-penalty", cl::init(0), cl::Hidden, cl::desc("A debug option to add additional penalty to the computed one."))
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
void setCallingConv(CallingConv::ID CC)
Definition: Function.h:217
static cl::opt< bool > MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden, cl::desc("Mark outline function calls with ColdCC"))
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
bool isAlways() const
Definition: InlineCost.h:104
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:629
static cl::opt< bool > DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial inlining"))
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:234
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
Conditional or Unconditional Branch instruction.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
static cl::opt< float > ColdBranchRatio("cold-branch-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum BranchProbability to consider a region cold."))
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug() const
Return a const iterator range over the instructions in the block, skipping any debug instructions...
Definition: BasicBlock.cpp:95
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Value * getIncomingValueForBlock(const BasicBlock *BB) const
const Instruction & front() const
Definition: BasicBlock.h:281
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:188
static ManagedStatic< OptionRegistry > OR
Definition: Options.cpp:31
Diagnostic information for applied optimization remarks.
Represent the analysis usage information of a pass.
void setCallingConv(CallingConv::ID CC)
Set the calling convention of the call.
Definition: CallSite.h:316
Used in the streaming interface as the general argument type.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1207
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
InlineCost getInlineCost(CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:210
A function analysis which provides an AssumptionCache.
Analysis pass which computes BlockFrequencyInfo.
Iterator for intrusive lists based on ilist_node.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:271
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Module.h This file contains the declarations for the Module class.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:125
unsigned getNumIncomingValues() const
Return the number of incoming edges.
#define DEBUG_TYPE
BBTy * getParent() const
Get the basic block containing the call site.
Definition: CallSite.h:97
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
static cl::opt< unsigned > MaxNumInlineBlocks("max-num-inline-blocks", cl::init(5), cl::Hidden, cl::desc("Max number of blocks to be partially inlined"))
iterator_range< user_iterator > users()
Definition: Value.h:400
int getCost() const
Get the inline cost estimate.
Definition: InlineCost.h:110
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
int getCostDelta() const
Get the cost delta from the threshold for inlining.
Definition: InlineCost.h:131
static cl::opt< bool > DisableMultiRegionPartialInline("disable-mr-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable multi-region partial inlining"))
INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner", false, false) INITIALIZE_PASS_END(PartialInlinerLegacyPass
unsigned succ_size(const Instruction *I)
Definition: CFG.h:261
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
FunTy * getCaller() const
Return the caller function for this call site.
Definition: CallSite.h:267
Analysis providing branch probability information.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:311
int getCallsiteCost(CallSite CS, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
static cl::opt< bool > TracePartialInlining("trace-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Trace partial inlining."))
size_t size() const
Definition: Module.h:603
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:58
static bool hasProfileData(Function *F, FunctionOutliningInfo *OI)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:225
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
void eraseFromParent()
eraseFromParent - This method unlinks &#39;this&#39; from the containing module and deletes it...
Definition: Function.cpp:214
bool isUnconditional() const
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:206
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107
Multiway switch.
bool hasAddressTaken(const User **=nullptr) const
hasAddressTaken - returns true if there are any uses of this function other than direct calls or invo...
Definition: Function.cpp:1254
static cl::opt< float > MinRegionSizeRatio("min-region-size-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum ratio comparing relative sizes of each " "outline candidate and original function"))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:376
const BasicBlock & front() const
Definition: Function.h:663
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
LLVM Value Representation.
Definition: Value.h:73
succ_range successors(Instruction *I)
Definition: CFG.h:264
Invoke instruction.
AssumptionCache & getAssumptionCache(Function &F)
Get the cached assumptions for a function.
print Print MemDeps of function
A container for analyses that lazily runs them and caches their results.
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
This pass exposes codegen information to IR-level passes.
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1315
The optimization diagnostic interface.
bool hasProfileData() const
Return true if the function is annotated with profile data.
Definition: Function.h:308
bool use_empty() const
Definition: Value.h:323
Optional< uint64_t > getBlockProfileCount(const BasicBlock *BB) const
Returns the estimated profile count of BB.
const BasicBlock * getParent() const
Definition: Instruction.h:67
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1038
user_iterator user_end()
Definition: Value.h:384
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1245