LLVM  14.0.0git
PartialInlining.cpp
Go to the documentation of this file.
1 //===- PartialInlining.cpp - Inline parts of functions --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs partial inlining, typically by inlining an if statement
10 // that surrounds the body of the function.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/None.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
25 #include "llvm/Analysis/LoopInfo.h"
30 #include "llvm/IR/Attributes.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/CFG.h"
33 #include "llvm/IR/DebugLoc.h"
34 #include "llvm/IR/DiagnosticInfo.h"
35 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/InstrTypes.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/IntrinsicInst.h"
41 #include "llvm/IR/Intrinsics.h"
42 #include "llvm/IR/Module.h"
43 #include "llvm/IR/User.h"
44 #include "llvm/InitializePasses.h"
45 #include "llvm/Pass.h"
48 #include "llvm/Support/Casting.h"
51 #include "llvm/Transforms/IPO.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <functional>
59 #include <iterator>
60 #include <memory>
61 #include <tuple>
62 #include <vector>
63 
64 using namespace llvm;
65 
66 #define DEBUG_TYPE "partial-inlining"
67 
68 STATISTIC(NumPartialInlined,
69  "Number of callsites functions partially inlined into.");
70 STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
71  "cold outlined regions were partially "
72  "inlined into its caller(s).");
73 STATISTIC(NumColdRegionsFound,
74  "Number of cold single entry/exit regions found.");
75 STATISTIC(NumColdRegionsOutlined,
76  "Number of cold single entry/exit regions outlined.");
77 
78 // Command line option to disable partial-inlining. The default is false:
79 static cl::opt<bool>
80  DisablePartialInlining("disable-partial-inlining", cl::init(false),
81  cl::Hidden, cl::desc("Disable partial inlining"));
82 // Command line option to disable multi-region partial-inlining. The default is
83 // false:
85  "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
86  cl::desc("Disable multi-region partial inlining"));
87 
88 // Command line option to force outlining in regions with live exit variables.
89 // The default is false:
90 static cl::opt<bool>
91  ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
92  cl::desc("Force outline regions with live exits"));
93 
94 // Command line option to enable marking outline functions with Cold Calling
95 // Convention. The default is false:
96 static cl::opt<bool>
97  MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
98  cl::desc("Mark outline function calls with ColdCC"));
99 
100 // This is an option used by testing:
101 static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
102  cl::init(false), cl::ZeroOrMore,
104  cl::desc("Skip Cost Analysis"));
105 // Used to determine if a cold region is worth outlining based on
106 // its inlining cost compared to the original function. Default is set at 10%.
107 // ie. if the cold region reduces the inlining cost of the original function by
108 // at least 10%.
110  "min-region-size-ratio", cl::init(0.1), cl::Hidden,
111  cl::desc("Minimum ratio comparing relative sizes of each "
112  "outline candidate and original function"));
113 // Used to tune the minimum number of execution counts needed in the predecessor
114 // block to the cold edge. ie. confidence interval.
115 static cl::opt<unsigned>
116  MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
117  cl::desc("Minimum block executions to consider "
118  "its BranchProbabilityInfo valid"));
119 // Used to determine when an edge is considered cold. Default is set to 10%. ie.
120 // if the branch probability is 10% or less, then it is deemed as 'cold'.
122  "cold-branch-ratio", cl::init(0.1), cl::Hidden,
123  cl::desc("Minimum BranchProbability to consider a region cold."));
124 
126  "max-num-inline-blocks", cl::init(5), cl::Hidden,
127  cl::desc("Max number of blocks to be partially inlined"));
128 
129 // Command line option to set the maximum number of partial inlining allowed
130 // for the module. The default value of -1 means no limit.
132  "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
133  cl::desc("Max number of partial inlining. The default is unlimited"));
134 
135 // Used only when PGO or user annotated branch data is absent. It is
136 // the least value that is used to weigh the outline region. If BFI
137 // produces larger value, the BFI value will be used.
138 static cl::opt<int>
139  OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
141  cl::desc("Relative frequency of outline region to "
142  "the entry block"));
143 
145  "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
146  cl::desc("A debug option to add additional penalty to the computed one."));
147 
148 namespace {
149 
150 struct FunctionOutliningInfo {
151  FunctionOutliningInfo() = default;
152 
153  // Returns the number of blocks to be inlined including all blocks
154  // in Entries and one return block.
155  unsigned getNumInlinedBlocks() const { return Entries.size() + 1; }
156 
157  // A set of blocks including the function entry that guard
158  // the region to be outlined.
160 
161  // The return block that is not included in the outlined region.
162  BasicBlock *ReturnBlock = nullptr;
163 
164  // The dominating block of the region to be outlined.
165  BasicBlock *NonReturnBlock = nullptr;
166 
167  // The set of blocks in Entries that that are predecessors to ReturnBlock
168  SmallVector<BasicBlock *, 4> ReturnBlockPreds;
169 };
170 
171 struct FunctionOutliningMultiRegionInfo {
172  FunctionOutliningMultiRegionInfo()
173  : ORI() {}
174 
175  // Container for outline regions
176  struct OutlineRegionInfo {
177  OutlineRegionInfo(ArrayRef<BasicBlock *> Region,
178  BasicBlock *EntryBlock, BasicBlock *ExitBlock,
179  BasicBlock *ReturnBlock)
180  : Region(Region.begin(), Region.end()), EntryBlock(EntryBlock),
181  ExitBlock(ExitBlock), ReturnBlock(ReturnBlock) {}
183  BasicBlock *EntryBlock;
184  BasicBlock *ExitBlock;
185  BasicBlock *ReturnBlock;
186  };
187 
189 };
190 
191 struct PartialInlinerImpl {
192 
193  PartialInlinerImpl(
195  function_ref<AssumptionCache *(Function &)> LookupAC,
197  function_ref<const TargetLibraryInfo &(Function &)> GTLI,
198  ProfileSummaryInfo &ProfSI,
199  function_ref<BlockFrequencyInfo &(Function &)> GBFI = nullptr)
200  : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
201  GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
202 
203  bool run(Module &M);
204  // Main part of the transformation that calls helper functions to find
205  // outlining candidates, clone & outline the function, and attempt to
206  // partially inline the resulting function. Returns true if
207  // inlining was successful, false otherwise. Also returns the outline
208  // function (only if we partially inlined early returns) as there is a
209  // possibility to further "peel" early return statements that were left in the
210  // outline function due to code size.
211  std::pair<bool, Function *> unswitchFunction(Function &F);
212 
213  // This class speculatively clones the function to be partial inlined.
214  // At the end of partial inlining, the remaining callsites to the cloned
215  // function that are not partially inlined will be fixed up to reference
216  // the original function, and the cloned function will be erased.
217  struct FunctionCloner {
218  // Two constructors, one for single region outlining, the other for
219  // multi-region outlining.
220  FunctionCloner(Function *F, FunctionOutliningInfo *OI,
222  function_ref<AssumptionCache *(Function &)> LookupAC,
224  FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
226  function_ref<AssumptionCache *(Function &)> LookupAC,
228 
229  ~FunctionCloner();
230 
231  // Prepare for function outlining: making sure there is only
232  // one incoming edge from the extracted/outlined region to
233  // the return block.
234  void normalizeReturnBlock() const;
235 
236  // Do function outlining for cold regions.
237  bool doMultiRegionFunctionOutlining();
238  // Do function outlining for region after early return block(s).
239  // NOTE: For vararg functions that do the vararg handling in the outlined
240  // function, we temporarily generate IR that does not properly
241  // forward varargs to the outlined function. Calling InlineFunction
242  // will update calls to the outlined functions to properly forward
243  // the varargs.
244  Function *doSingleRegionFunctionOutlining();
245 
246  Function *OrigFunc = nullptr;
247  Function *ClonedFunc = nullptr;
248 
249  typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
250  // Keep track of Outlined Functions and the basic block they're called from.
251  SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
252 
253  // ClonedFunc is inlined in one of its callers after function
254  // outlining.
255  bool IsFunctionInlined = false;
256  // The cost of the region to be outlined.
257  InstructionCost OutlinedRegionCost = 0;
258  // ClonedOI is specific to outlining non-early return blocks.
259  std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
260  // ClonedOMRI is specific to outlining cold regions.
261  std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
262  std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
264  function_ref<AssumptionCache *(Function &)> LookupAC;
266  };
267 
268 private:
269  int NumPartialInlining = 0;
270  function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
271  function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
274  function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
275  ProfileSummaryInfo &PSI;
276 
277  // Return the frequency of the OutlininingBB relative to F's entry point.
278  // The result is no larger than 1 and is represented using BP.
279  // (Note that the outlined region's 'head' block can only have incoming
280  // edges from the guarding entry blocks).
282  getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) const;
283 
284  // Return true if the callee of CB should be partially inlined with
285  // profit.
286  bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner,
287  BlockFrequency WeightedOutliningRcost,
288  OptimizationRemarkEmitter &ORE) const;
289 
290  // Try to inline DuplicateFunction (cloned from F with call to
291  // the OutlinedFunction into its callers. Return true
292  // if there is any successful inlining.
293  bool tryPartialInline(FunctionCloner &Cloner);
294 
295  // Compute the mapping from use site of DuplicationFunction to the enclosing
296  // BB's profile count.
297  void
298  computeCallsiteToProfCountMap(Function *DuplicateFunction,
299  DenseMap<User *, uint64_t> &SiteCountMap) const;
300 
301  bool isLimitReached() const {
302  return (MaxNumPartialInlining != -1 &&
303  NumPartialInlining >= MaxNumPartialInlining);
304  }
305 
306  static CallBase *getSupportedCallBase(User *U) {
307  if (isa<CallInst>(U) || isa<InvokeInst>(U))
308  return cast<CallBase>(U);
309  llvm_unreachable("All uses must be calls");
310  return nullptr;
311  }
312 
313  static CallBase *getOneCallSiteTo(Function &F) {
314  User *User = *F.user_begin();
315  return getSupportedCallBase(User);
316  }
317 
318  std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function &F) const {
319  CallBase *CB = getOneCallSiteTo(F);
320  DebugLoc DLoc = CB->getDebugLoc();
321  BasicBlock *Block = CB->getParent();
322  return std::make_tuple(DLoc, Block);
323  }
324 
325  // Returns the costs associated with function outlining:
326  // - The first value is the non-weighted runtime cost for making the call
327  // to the outlined function, including the addtional setup cost in the
328  // outlined function itself;
329  // - The second value is the estimated size of the new call sequence in
330  // basic block Cloner.OutliningCallBB;
331  std::tuple<InstructionCost, InstructionCost>
332  computeOutliningCosts(FunctionCloner &Cloner) const;
333 
334  // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
335  // approximate both the size and runtime cost (Note that in the current
336  // inline cost analysis, there is no clear distinction there either).
337  static InstructionCost computeBBInlineCost(BasicBlock *BB,
339 
340  std::unique_ptr<FunctionOutliningInfo>
341  computeOutliningInfo(Function &F) const;
342 
343  std::unique_ptr<FunctionOutliningMultiRegionInfo>
344  computeOutliningColdRegionsInfo(Function &F,
345  OptimizationRemarkEmitter &ORE) const;
346 };
347 
348 struct PartialInlinerLegacyPass : public ModulePass {
349  static char ID; // Pass identification, replacement for typeid
350 
351  PartialInlinerLegacyPass() : ModulePass(ID) {
353  }
354 
355  void getAnalysisUsage(AnalysisUsage &AU) const override {
360  }
361 
362  bool runOnModule(Module &M) override {
363  if (skipModule(M))
364  return false;
365 
366  AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
368  &getAnalysis<TargetTransformInfoWrapperPass>();
369  ProfileSummaryInfo &PSI =
370  getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
371 
372  auto GetAssumptionCache = [&ACT](Function &F) -> AssumptionCache & {
373  return ACT->getAssumptionCache(F);
374  };
375 
376  auto LookupAssumptionCache = [ACT](Function &F) -> AssumptionCache * {
377  return ACT->lookupAssumptionCache(F);
378  };
379 
380  auto GetTTI = [&TTIWP](Function &F) -> TargetTransformInfo & {
381  return TTIWP->getTTI(F);
382  };
383 
384  auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
385  return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
386  };
387 
388  return PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
389  GetTLI, PSI)
390  .run(M);
391  }
392 };
393 
394 } // end anonymous namespace
395 
396 std::unique_ptr<FunctionOutliningMultiRegionInfo>
397 PartialInlinerImpl::computeOutliningColdRegionsInfo(
398  Function &F, OptimizationRemarkEmitter &ORE) const {
399  BasicBlock *EntryBlock = &F.front();
400 
401  DominatorTree DT(F);
402  LoopInfo LI(DT);
403  BranchProbabilityInfo BPI(F, LI);
404  std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
406  if (!GetBFI) {
407  ScopedBFI.reset(new BlockFrequencyInfo(F, BPI, LI));
408  BFI = ScopedBFI.get();
409  } else
410  BFI = &(GetBFI(F));
411 
412  // Return if we don't have profiling information.
413  if (!PSI.hasInstrumentationProfile())
414  return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
415 
416  std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
417  std::make_unique<FunctionOutliningMultiRegionInfo>();
418 
419  auto IsSingleExit =
420  [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
421  BasicBlock *ExitBlock = nullptr;
422  for (auto *Block : BlockList) {
423  for (BasicBlock *Succ : successors(Block)) {
424  if (!is_contained(BlockList, Succ)) {
425  if (ExitBlock) {
426  ORE.emit([&]() {
427  return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
428  &Succ->front())
429  << "Region dominated by "
430  << ore::NV("Block", BlockList.front()->getName())
431  << " has more than one region exit edge.";
432  });
433  return nullptr;
434  }
435 
436  ExitBlock = Block;
437  }
438  }
439  }
440  return ExitBlock;
441  };
442 
443  auto BBProfileCount = [BFI](BasicBlock *BB) {
444  return BFI->getBlockProfileCount(BB)
445  ? BFI->getBlockProfileCount(BB).getValue()
446  : 0;
447  };
448 
449  // Use the same computeBBInlineCost function to compute the cost savings of
450  // the outlining the candidate region.
451  TargetTransformInfo *FTTI = &GetTTI(F);
452  InstructionCost OverallFunctionCost = 0;
453  for (auto &BB : F)
454  OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
455 
456  LLVM_DEBUG(dbgs() << "OverallFunctionCost = " << OverallFunctionCost
457  << "\n";);
458 
459  InstructionCost MinOutlineRegionCost = OverallFunctionCost.map(
460  [&](auto Cost) { return Cost * MinRegionSizeRatio; });
461 
462  BranchProbability MinBranchProbability(
463  static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
465  bool ColdCandidateFound = false;
466  BasicBlock *CurrEntry = EntryBlock;
467  std::vector<BasicBlock *> DFS;
468  DenseMap<BasicBlock *, bool> VisitedMap;
469  DFS.push_back(CurrEntry);
470  VisitedMap[CurrEntry] = true;
471 
472  // Use Depth First Search on the basic blocks to find CFG edges that are
473  // considered cold.
474  // Cold regions considered must also have its inline cost compared to the
475  // overall inline cost of the original function. The region is outlined only
476  // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
477  // more.
478  while (!DFS.empty()) {
479  auto *ThisBB = DFS.back();
480  DFS.pop_back();
481  // Only consider regions with predecessor blocks that are considered
482  // not-cold (default: part of the top 99.99% of all block counters)
483  // AND greater than our minimum block execution count (default: 100).
484  if (PSI.isColdBlock(ThisBB, BFI) ||
485  BBProfileCount(ThisBB) < MinBlockCounterExecution)
486  continue;
487  for (auto SI = succ_begin(ThisBB); SI != succ_end(ThisBB); ++SI) {
488  if (VisitedMap[*SI])
489  continue;
490  VisitedMap[*SI] = true;
491  DFS.push_back(*SI);
492  // If branch isn't cold, we skip to the next one.
493  BranchProbability SuccProb = BPI.getEdgeProbability(ThisBB, *SI);
494  if (SuccProb > MinBranchProbability)
495  continue;
496 
497  LLVM_DEBUG(dbgs() << "Found cold edge: " << ThisBB->getName() << "->"
498  << SI->getName()
499  << "\nBranch Probability = " << SuccProb << "\n";);
500 
501  SmallVector<BasicBlock *, 8> DominateVector;
502  DT.getDescendants(*SI, DominateVector);
503  assert(!DominateVector.empty() &&
504  "SI should be reachable and have at least itself as descendant");
505 
506  // We can only outline single entry regions (for now).
507  if (!DominateVector.front()->hasNPredecessors(1)) {
508  LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
509  << " doesn't have a single predecessor in the "
510  "dominator tree\n";);
511  continue;
512  }
513 
514  BasicBlock *ExitBlock = nullptr;
515  // We can only outline single exit regions (for now).
516  if (!(ExitBlock = IsSingleExit(DominateVector))) {
517  LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
518  << " doesn't have a unique successor\n";);
519  continue;
520  }
521 
522  InstructionCost OutlineRegionCost = 0;
523  for (auto *BB : DominateVector)
524  OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
525 
526  LLVM_DEBUG(dbgs() << "OutlineRegionCost = " << OutlineRegionCost
527  << "\n";);
528 
529  if (!SkipCostAnalysis && OutlineRegionCost < MinOutlineRegionCost) {
530  ORE.emit([&]() {
531  return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
532  &SI->front())
533  << ore::NV("Callee", &F)
534  << " inline cost-savings smaller than "
535  << ore::NV("Cost", MinOutlineRegionCost);
536  });
537 
538  LLVM_DEBUG(dbgs() << "ABORT: Outline region cost is smaller than "
539  << MinOutlineRegionCost << "\n";);
540  continue;
541  }
542 
543  // For now, ignore blocks that belong to a SISE region that is a
544  // candidate for outlining. In the future, we may want to look
545  // at inner regions because the outer region may have live-exit
546  // variables.
547  for (auto *BB : DominateVector)
548  VisitedMap[BB] = true;
549 
550  // ReturnBlock here means the block after the outline call
551  BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
552  FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
553  DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
554  OutliningInfo->ORI.push_back(RegInfo);
555  LLVM_DEBUG(dbgs() << "Found Cold Candidate starting at block: "
556  << DominateVector.front()->getName() << "\n";);
557  ColdCandidateFound = true;
558  NumColdRegionsFound++;
559  }
560  }
561 
562  if (ColdCandidateFound)
563  return OutliningInfo;
564 
565  return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
566 }
567 
568 std::unique_ptr<FunctionOutliningInfo>
569 PartialInlinerImpl::computeOutliningInfo(Function &F) const {
570  BasicBlock *EntryBlock = &F.front();
571  BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
572  if (!BR || BR->isUnconditional())
573  return std::unique_ptr<FunctionOutliningInfo>();
574 
575  // Returns true if Succ is BB's successor
576  auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
577  return is_contained(successors(BB), Succ);
578  };
579 
580  auto IsReturnBlock = [](BasicBlock *BB) {
581  Instruction *TI = BB->getTerminator();
582  return isa<ReturnInst>(TI);
583  };
584 
585  auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
586  if (IsReturnBlock(Succ1))
587  return std::make_tuple(Succ1, Succ2);
588  if (IsReturnBlock(Succ2))
589  return std::make_tuple(Succ2, Succ1);
590 
591  return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
592  };
593 
594  // Detect a triangular shape:
595  auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
596  if (IsSuccessor(Succ1, Succ2))
597  return std::make_tuple(Succ1, Succ2);
598  if (IsSuccessor(Succ2, Succ1))
599  return std::make_tuple(Succ2, Succ1);
600 
601  return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
602  };
603 
604  std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
605  std::make_unique<FunctionOutliningInfo>();
606 
607  BasicBlock *CurrEntry = EntryBlock;
608  bool CandidateFound = false;
609  do {
610  // The number of blocks to be inlined has already reached
611  // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
612  // disables partial inlining for the function.
613  if (OutliningInfo->getNumInlinedBlocks() >= MaxNumInlineBlocks)
614  break;
615 
616  if (succ_size(CurrEntry) != 2)
617  break;
618 
619  BasicBlock *Succ1 = *succ_begin(CurrEntry);
620  BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
621 
622  BasicBlock *ReturnBlock, *NonReturnBlock;
623  std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
624 
625  if (ReturnBlock) {
626  OutliningInfo->Entries.push_back(CurrEntry);
627  OutliningInfo->ReturnBlock = ReturnBlock;
628  OutliningInfo->NonReturnBlock = NonReturnBlock;
629  CandidateFound = true;
630  break;
631  }
632 
633  BasicBlock *CommSucc, *OtherSucc;
634  std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
635 
636  if (!CommSucc)
637  break;
638 
639  OutliningInfo->Entries.push_back(CurrEntry);
640  CurrEntry = OtherSucc;
641  } while (true);
642 
643  if (!CandidateFound)
644  return std::unique_ptr<FunctionOutliningInfo>();
645 
646  // Do sanity check of the entries: threre should not
647  // be any successors (not in the entry set) other than
648  // {ReturnBlock, NonReturnBlock}
649  assert(OutliningInfo->Entries[0] == &F.front() &&
650  "Function Entry must be the first in Entries vector");
651  DenseSet<BasicBlock *> Entries;
652  for (BasicBlock *E : OutliningInfo->Entries)
653  Entries.insert(E);
654 
655  // Returns true of BB has Predecessor which is not
656  // in Entries set.
657  auto HasNonEntryPred = [Entries](BasicBlock *BB) {
658  for (auto *Pred : predecessors(BB)) {
659  if (!Entries.count(Pred))
660  return true;
661  }
662  return false;
663  };
664  auto CheckAndNormalizeCandidate =
665  [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
666  for (BasicBlock *E : OutliningInfo->Entries) {
667  for (auto *Succ : successors(E)) {
668  if (Entries.count(Succ))
669  continue;
670  if (Succ == OutliningInfo->ReturnBlock)
671  OutliningInfo->ReturnBlockPreds.push_back(E);
672  else if (Succ != OutliningInfo->NonReturnBlock)
673  return false;
674  }
675  // There should not be any outside incoming edges either:
676  if (HasNonEntryPred(E))
677  return false;
678  }
679  return true;
680  };
681 
682  if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
683  return std::unique_ptr<FunctionOutliningInfo>();
684 
685  // Now further growing the candidate's inlining region by
686  // peeling off dominating blocks from the outlining region:
687  while (OutliningInfo->getNumInlinedBlocks() < MaxNumInlineBlocks) {
688  BasicBlock *Cand = OutliningInfo->NonReturnBlock;
689  if (succ_size(Cand) != 2)
690  break;
691 
692  if (HasNonEntryPred(Cand))
693  break;
694 
695  BasicBlock *Succ1 = *succ_begin(Cand);
696  BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
697 
698  BasicBlock *ReturnBlock, *NonReturnBlock;
699  std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
700  if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
701  break;
702 
703  if (NonReturnBlock->getSinglePredecessor() != Cand)
704  break;
705 
706  // Now grow and update OutlininigInfo:
707  OutliningInfo->Entries.push_back(Cand);
708  OutliningInfo->NonReturnBlock = NonReturnBlock;
709  OutliningInfo->ReturnBlockPreds.push_back(Cand);
710  Entries.insert(Cand);
711  }
712 
713  return OutliningInfo;
714 }
715 
716 // Check if there is PGO data or user annotated branch data:
717 static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI) {
718  if (F.hasProfileData())
719  return true;
720  // Now check if any of the entry block has MD_prof data:
721  for (auto *E : OI.Entries) {
722  BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
723  if (!BR || BR->isUnconditional())
724  continue;
725  uint64_t T, F;
726  if (BR->extractProfMetadata(T, F))
727  return true;
728  }
729  return false;
730 }
731 
732 BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
733  FunctionCloner &Cloner) const {
734  BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
735  auto EntryFreq =
736  Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
737  auto OutliningCallFreq =
738  Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
739  // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
740  // we outlined any regions, so we may encounter situations where the
741  // OutliningCallFreq is *slightly* bigger than the EntryFreq.
742  if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
743  OutliningCallFreq = EntryFreq;
744 
745  auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
746  OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
747 
748  if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI.get()))
749  return OutlineRegionRelFreq;
750 
751  // When profile data is not available, we need to be conservative in
752  // estimating the overall savings. Static branch prediction can usually
753  // guess the branch direction right (taken/non-taken), but the guessed
754  // branch probability is usually not biased enough. In case when the
755  // outlined region is predicted to be likely, its probability needs
756  // to be made higher (more biased) to not under-estimate the cost of
757  // function outlining. On the other hand, if the outlined region
758  // is predicted to be less likely, the predicted probablity is usually
759  // higher than the actual. For instance, the actual probability of the
760  // less likely target is only 5%, but the guessed probablity can be
761  // 40%. In the latter case, there is no need for further adjustement.
762  // FIXME: add an option for this.
763  if (OutlineRegionRelFreq < BranchProbability(45, 100))
764  return OutlineRegionRelFreq;
765 
766  OutlineRegionRelFreq = std::max(
767  OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
768 
769  return OutlineRegionRelFreq;
770 }
771 
772 bool PartialInlinerImpl::shouldPartialInline(
773  CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
774  OptimizationRemarkEmitter &ORE) const {
775  using namespace ore;
776 
778  assert(Callee == Cloner.ClonedFunc);
779 
780  if (SkipCostAnalysis)
781  return isInlineViable(*Callee).isSuccess();
782 
783  Function *Caller = CB.getCaller();
784  auto &CalleeTTI = GetTTI(*Callee);
785  bool RemarksEnabled =
786  Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
787  DEBUG_TYPE);
788  InlineCost IC =
789  getInlineCost(CB, getInlineParams(), CalleeTTI, GetAssumptionCache,
790  GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE : nullptr);
791 
792  if (IC.isAlways()) {
793  ORE.emit([&]() {
794  return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", &CB)
795  << NV("Callee", Cloner.OrigFunc)
796  << " should always be fully inlined, not partially";
797  });
798  return false;
799  }
800 
801  if (IC.isNever()) {
802  ORE.emit([&]() {
803  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CB)
804  << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
805  << NV("Caller", Caller)
806  << " because it should never be inlined (cost=never)";
807  });
808  return false;
809  }
810 
811  if (!IC) {
812  ORE.emit([&]() {
813  return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", &CB)
814  << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
815  << NV("Caller", Caller) << " because too costly to inline (cost="
816  << NV("Cost", IC.getCost()) << ", threshold="
817  << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
818  });
819  return false;
820  }
821  const DataLayout &DL = Caller->getParent()->getDataLayout();
822 
823  // The savings of eliminating the call:
824  int NonWeightedSavings = getCallsiteCost(CB, DL);
825  BlockFrequency NormWeightedSavings(NonWeightedSavings);
826 
827  // Weighted saving is smaller than weighted cost, return false
828  if (NormWeightedSavings < WeightedOutliningRcost) {
829  ORE.emit([&]() {
830  return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
831  &CB)
832  << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
833  << NV("Caller", Caller) << " runtime overhead (overhead="
834  << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
835  << ", savings="
836  << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
837  << ")"
838  << " of making the outlined call is too high";
839  });
840 
841  return false;
842  }
843 
844  ORE.emit([&]() {
845  return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", &CB)
846  << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
847  << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
848  << " (threshold="
849  << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
850  });
851  return true;
852 }
853 
854 // TODO: Ideally we should share Inliner's InlineCost Analysis code.
855 // For now use a simplified version. The returned 'InlineCost' will be used
856 // to esimate the size cost as well as runtime cost of the BB.
858 PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
861  const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
862  for (Instruction &I : BB->instructionsWithoutDebug()) {
863  // Skip free instructions.
864  switch (I.getOpcode()) {
865  case Instruction::BitCast:
866  case Instruction::PtrToInt:
867  case Instruction::IntToPtr:
868  case Instruction::Alloca:
869  case Instruction::PHI:
870  continue;
871  case Instruction::GetElementPtr:
872  if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
873  continue;
874  break;
875  default:
876  break;
877  }
878 
879  if (I.isLifetimeStartOrEnd())
880  continue;
881 
882  if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
883  Intrinsic::ID IID = II->getIntrinsicID();
885  FastMathFlags FMF;
886  for (Value *Val : II->args())
887  Tys.push_back(Val->getType());
888 
889  if (auto *FPMO = dyn_cast<FPMathOperator>(II))
890  FMF = FPMO->getFastMathFlags();
891 
892  IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF);
894  continue;
895  }
896 
897  if (CallInst *CI = dyn_cast<CallInst>(&I)) {
898  InlineCost += getCallsiteCost(*CI, DL);
899  continue;
900  }
901 
902  if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
903  InlineCost += getCallsiteCost(*II, DL);
904  continue;
905  }
906 
907  if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
908  InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
909  continue;
910  }
912  }
913 
914  return InlineCost;
915 }
916 
917 std::tuple<InstructionCost, InstructionCost>
918 PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) const {
919  InstructionCost OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
920  for (auto FuncBBPair : Cloner.OutlinedFunctions) {
921  Function *OutlinedFunc = FuncBBPair.first;
922  BasicBlock* OutliningCallBB = FuncBBPair.second;
923  // Now compute the cost of the call sequence to the outlined function
924  // 'OutlinedFunction' in BB 'OutliningCallBB':
925  auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
926  OutliningFuncCallCost +=
927  computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
928 
929  // Now compute the cost of the extracted/outlined function itself:
930  for (BasicBlock &BB : *OutlinedFunc)
931  OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
932  }
933  assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
934  "Outlined function cost should be no less than the outlined region");
935 
936  // The code extractor introduces a new root and exit stub blocks with
937  // additional unconditional branches. Those branches will be eliminated
938  // later with bb layout. The cost should be adjusted accordingly:
939  OutlinedFunctionCost -=
940  2 * InlineConstants::InstrCost * Cloner.OutlinedFunctions.size();
941 
942  InstructionCost OutliningRuntimeOverhead =
943  OutliningFuncCallCost +
944  (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
945  ExtraOutliningPenalty.getValue();
946 
947  return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
948 }
949 
950 // Create the callsite to profile count map which is
951 // used to update the original function's entry count,
952 // after the function is partially inlined into the callsite.
953 void PartialInlinerImpl::computeCallsiteToProfCountMap(
954  Function *DuplicateFunction,
955  DenseMap<User *, uint64_t> &CallSiteToProfCountMap) const {
956  std::vector<User *> Users(DuplicateFunction->user_begin(),
957  DuplicateFunction->user_end());
958  Function *CurrentCaller = nullptr;
959  std::unique_ptr<BlockFrequencyInfo> TempBFI;
960  BlockFrequencyInfo *CurrentCallerBFI = nullptr;
961 
962  auto ComputeCurrBFI = [&,this](Function *Caller) {
963  // For the old pass manager:
964  if (!GetBFI) {
965  DominatorTree DT(*Caller);
966  LoopInfo LI(DT);
967  BranchProbabilityInfo BPI(*Caller, LI);
968  TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
969  CurrentCallerBFI = TempBFI.get();
970  } else {
971  // New pass manager:
972  CurrentCallerBFI = &(GetBFI(*Caller));
973  }
974  };
975 
976  for (User *User : Users) {
977  CallBase *CB = getSupportedCallBase(User);
978  Function *Caller = CB->getCaller();
979  if (CurrentCaller != Caller) {
980  CurrentCaller = Caller;
981  ComputeCurrBFI(Caller);
982  } else {
983  assert(CurrentCallerBFI && "CallerBFI is not set");
984  }
985  BasicBlock *CallBB = CB->getParent();
986  auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
987  if (Count)
988  CallSiteToProfCountMap[User] = *Count;
989  else
990  CallSiteToProfCountMap[User] = 0;
991  }
992 }
993 
994 PartialInlinerImpl::FunctionCloner::FunctionCloner(
995  Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
996  function_ref<AssumptionCache *(Function &)> LookupAC,
998  : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
999  ClonedOI = std::make_unique<FunctionOutliningInfo>();
1000 
1001  // Clone the function, so that we can hack away on it.
1002  ValueToValueMapTy VMap;
1003  ClonedFunc = CloneFunction(F, VMap);
1004 
1005  ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
1006  ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
1007  for (BasicBlock *BB : OI->Entries)
1008  ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
1009 
1010  for (BasicBlock *E : OI->ReturnBlockPreds) {
1011  BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
1012  ClonedOI->ReturnBlockPreds.push_back(NewE);
1013  }
1014  // Go ahead and update all uses to the duplicate, so that we can just
1015  // use the inliner functionality when we're done hacking.
1016  F->replaceAllUsesWith(ClonedFunc);
1017 }
1018 
1019 PartialInlinerImpl::FunctionCloner::FunctionCloner(
1020  Function *F, FunctionOutliningMultiRegionInfo *OI,
1022  function_ref<AssumptionCache *(Function &)> LookupAC,
1024  : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
1025  ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
1026 
1027  // Clone the function, so that we can hack away on it.
1028  ValueToValueMapTy VMap;
1029  ClonedFunc = CloneFunction(F, VMap);
1030 
1031  // Go through all Outline Candidate Regions and update all BasicBlock
1032  // information.
1033  for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1034  OI->ORI) {
1036  for (BasicBlock *BB : RegionInfo.Region)
1037  Region.push_back(cast<BasicBlock>(VMap[BB]));
1038 
1039  BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
1040  BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
1041  BasicBlock *NewReturnBlock = nullptr;
1042  if (RegionInfo.ReturnBlock)
1043  NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
1044  FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
1045  Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
1046  ClonedOMRI->ORI.push_back(MappedRegionInfo);
1047  }
1048  // Go ahead and update all uses to the duplicate, so that we can just
1049  // use the inliner functionality when we're done hacking.
1050  F->replaceAllUsesWith(ClonedFunc);
1051 }
1052 
1053 void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
1054  auto GetFirstPHI = [](BasicBlock *BB) {
1055  BasicBlock::iterator I = BB->begin();
1056  PHINode *FirstPhi = nullptr;
1057  while (I != BB->end()) {
1058  PHINode *Phi = dyn_cast<PHINode>(I);
1059  if (!Phi)
1060  break;
1061  if (!FirstPhi) {
1062  FirstPhi = Phi;
1063  break;
1064  }
1065  }
1066  return FirstPhi;
1067  };
1068 
1069  // Shouldn't need to normalize PHIs if we're not outlining non-early return
1070  // blocks.
1071  if (!ClonedOI)
1072  return;
1073 
1074  // Special hackery is needed with PHI nodes that have inputs from more than
1075  // one extracted block. For simplicity, just split the PHIs into a two-level
1076  // sequence of PHIs, some of which will go in the extracted region, and some
1077  // of which will go outside.
1078  BasicBlock *PreReturn = ClonedOI->ReturnBlock;
1079  // only split block when necessary:
1080  PHINode *FirstPhi = GetFirstPHI(PreReturn);
1081  unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
1082 
1083  if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
1084  return;
1085 
1086  auto IsTrivialPhi = [](PHINode *PN) -> Value * {
1087  Value *CommonValue = PN->getIncomingValue(0);
1088  if (all_of(PN->incoming_values(),
1089  [&](Value *V) { return V == CommonValue; }))
1090  return CommonValue;
1091  return nullptr;
1092  };
1093 
1094  ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
1095  ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
1096  BasicBlock::iterator I = PreReturn->begin();
1097  Instruction *Ins = &ClonedOI->ReturnBlock->front();
1099  while (I != PreReturn->end()) {
1100  PHINode *OldPhi = dyn_cast<PHINode>(I);
1101  if (!OldPhi)
1102  break;
1103 
1104  PHINode *RetPhi =
1105  PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
1106  OldPhi->replaceAllUsesWith(RetPhi);
1107  Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
1108 
1109  RetPhi->addIncoming(&*I, PreReturn);
1110  for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
1111  RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
1112  OldPhi->removeIncomingValue(E);
1113  }
1114 
1115  // After incoming values splitting, the old phi may become trivial.
1116  // Keeping the trivial phi can introduce definition inside the outline
1117  // region which is live-out, causing necessary overhead (load, store
1118  // arg passing etc).
1119  if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
1120  OldPhi->replaceAllUsesWith(OldPhiVal);
1121  DeadPhis.push_back(OldPhi);
1122  }
1123  ++I;
1124  }
1125  for (auto *DP : DeadPhis)
1126  DP->eraseFromParent();
1127 
1128  for (auto *E : ClonedOI->ReturnBlockPreds)
1129  E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
1130 }
1131 
1132 bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1133 
1134  auto ComputeRegionCost =
1136  InstructionCost Cost = 0;
1137  for (BasicBlock* BB : Region)
1138  Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
1139  return Cost;
1140  };
1141 
1142  assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
1143 
1144  if (ClonedOMRI->ORI.empty())
1145  return false;
1146 
1147  // The CodeExtractor needs a dominator tree.
1148  DominatorTree DT;
1149  DT.recalculate(*ClonedFunc);
1150 
1151  // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1152  LoopInfo LI(DT);
1153  BranchProbabilityInfo BPI(*ClonedFunc, LI);
1154  ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1155 
1156  // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
1157  CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1158 
1159  SetVector<Value *> Inputs, Outputs, Sinks;
1160  for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1161  ClonedOMRI->ORI) {
1162  InstructionCost CurrentOutlinedRegionCost =
1163  ComputeRegionCost(RegionInfo.Region);
1164 
1165  CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
1166  ClonedFuncBFI.get(), &BPI,
1167  LookupAC(*RegionInfo.EntryBlock->getParent()),
1168  /* AllowVarargs */ false);
1169 
1170  CE.findInputsOutputs(Inputs, Outputs, Sinks);
1171 
1172  LLVM_DEBUG({
1173  dbgs() << "inputs: " << Inputs.size() << "\n";
1174  dbgs() << "outputs: " << Outputs.size() << "\n";
1175  for (Value *value : Inputs)
1176  dbgs() << "value used in func: " << *value << "\n";
1177  for (Value *output : Outputs)
1178  dbgs() << "instr used in func: " << *output << "\n";
1179  });
1180 
1181  // Do not extract regions that have live exit variables.
1182  if (Outputs.size() > 0 && !ForceLiveExit)
1183  continue;
1184 
1185  if (Function *OutlinedFunc = CE.extractCodeRegion(CEAC)) {
1186  CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
1187  BasicBlock *OutliningCallBB = OCS->getParent();
1188  assert(OutliningCallBB->getParent() == ClonedFunc);
1189  OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1190  NumColdRegionsOutlined++;
1191  OutlinedRegionCost += CurrentOutlinedRegionCost;
1192 
1193  if (MarkOutlinedColdCC) {
1194  OutlinedFunc->setCallingConv(CallingConv::Cold);
1196  }
1197  } else
1198  ORE.emit([&]() {
1199  return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1200  &RegionInfo.Region.front()->front())
1201  << "Failed to extract region at block "
1202  << ore::NV("Block", RegionInfo.Region.front());
1203  });
1204  }
1205 
1206  return !OutlinedFunctions.empty();
1207 }
1208 
1209 Function *
1210 PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
1211  // Returns true if the block is to be partial inlined into the caller
1212  // (i.e. not to be extracted to the out of line function)
1213  auto ToBeInlined = [&, this](BasicBlock *BB) {
1214  return BB == ClonedOI->ReturnBlock ||
1215  llvm::is_contained(ClonedOI->Entries, BB);
1216  };
1217 
1218  assert(ClonedOI && "Expecting OutlineInfo for single region outline");
1219  // The CodeExtractor needs a dominator tree.
1220  DominatorTree DT;
1221  DT.recalculate(*ClonedFunc);
1222 
1223  // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1224  LoopInfo LI(DT);
1225  BranchProbabilityInfo BPI(*ClonedFunc, LI);
1226  ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1227 
1228  // Gather up the blocks that we're going to extract.
1229  std::vector<BasicBlock *> ToExtract;
1230  auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
1231  ToExtract.push_back(ClonedOI->NonReturnBlock);
1232  OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
1233  ClonedOI->NonReturnBlock, ClonedFuncTTI);
1234  for (BasicBlock &BB : *ClonedFunc)
1235  if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
1236  ToExtract.push_back(&BB);
1237  // FIXME: the code extractor may hoist/sink more code
1238  // into the outlined function which may make the outlining
1239  // overhead (the difference of the outlined function cost
1240  // and OutliningRegionCost) look larger.
1241  OutlinedRegionCost += computeBBInlineCost(&BB, ClonedFuncTTI);
1242  }
1243 
1244  // Extract the body of the if.
1245  CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1246  Function *OutlinedFunc =
1247  CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
1248  ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
1249  /* AllowVarargs */ true)
1250  .extractCodeRegion(CEAC);
1251 
1252  if (OutlinedFunc) {
1253  BasicBlock *OutliningCallBB =
1254  PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->getParent();
1255  assert(OutliningCallBB->getParent() == ClonedFunc);
1256  OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1257  } else
1258  ORE.emit([&]() {
1259  return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1260  &ToExtract.front()->front())
1261  << "Failed to extract region at block "
1262  << ore::NV("Block", ToExtract.front());
1263  });
1264 
1265  return OutlinedFunc;
1266 }
1267 
1268 PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
1269  // Ditch the duplicate, since we're done with it, and rewrite all remaining
1270  // users (function pointers, etc.) back to the original function.
1271  ClonedFunc->replaceAllUsesWith(OrigFunc);
1272  ClonedFunc->eraseFromParent();
1273  if (!IsFunctionInlined) {
1274  // Remove each function that was speculatively created if there is no
1275  // reference.
1276  for (auto FuncBBPair : OutlinedFunctions) {
1277  Function *Func = FuncBBPair.first;
1278  Func->eraseFromParent();
1279  }
1280  }
1281 }
1282 
1283 std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function &F) {
1284  if (F.hasAddressTaken())
1285  return {false, nullptr};
1286 
1287  // Let inliner handle it
1288  if (F.hasFnAttribute(Attribute::AlwaysInline))
1289  return {false, nullptr};
1290 
1291  if (F.hasFnAttribute(Attribute::NoInline))
1292  return {false, nullptr};
1293 
1294  if (PSI.isFunctionEntryCold(&F))
1295  return {false, nullptr};
1296 
1297  if (F.users().empty())
1298  return {false, nullptr};
1299 
1301 
1302  // Only try to outline cold regions if we have a profile summary, which
1303  // implies we have profiling information.
1304  if (PSI.hasProfileSummary() && F.hasProfileData() &&
1306  std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
1307  computeOutliningColdRegionsInfo(F, ORE);
1308  if (OMRI) {
1309  FunctionCloner Cloner(&F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
1310 
1311  LLVM_DEBUG({
1312  dbgs() << "HotCountThreshold = " << PSI.getHotCountThreshold() << "\n";
1313  dbgs() << "ColdCountThreshold = " << PSI.getColdCountThreshold()
1314  << "\n";
1315  });
1316 
1317  bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1318 
1319  if (DidOutline) {
1320  LLVM_DEBUG({
1321  dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
1322  Cloner.ClonedFunc->print(dbgs());
1323  dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
1324  });
1325 
1326  if (tryPartialInline(Cloner))
1327  return {true, nullptr};
1328  }
1329  }
1330  }
1331 
1332  // Fall-thru to regular partial inlining if we:
1333  // i) can't find any cold regions to outline, or
1334  // ii) can't inline the outlined function anywhere.
1335  std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
1336  if (!OI)
1337  return {false, nullptr};
1338 
1339  FunctionCloner Cloner(&F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
1340  Cloner.normalizeReturnBlock();
1341 
1342  Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1343 
1344  if (!OutlinedFunction)
1345  return {false, nullptr};
1346 
1347  if (tryPartialInline(Cloner))
1348  return {true, OutlinedFunction};
1349 
1350  return {false, nullptr};
1351 }
1352 
1353 bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
1354  if (Cloner.OutlinedFunctions.empty())
1355  return false;
1356 
1357  int SizeCost = 0;
1358  BlockFrequency WeightedRcost;
1359  int NonWeightedRcost;
1360 
1361  auto OutliningCosts = computeOutliningCosts(Cloner);
1362  assert(std::get<0>(OutliningCosts).isValid() &&
1363  std::get<1>(OutliningCosts).isValid() && "Expected valid costs");
1364 
1365  SizeCost = *std::get<0>(OutliningCosts).getValue();
1366  NonWeightedRcost = *std::get<1>(OutliningCosts).getValue();
1367 
1368  // Only calculate RelativeToEntryFreq when we are doing single region
1369  // outlining.
1370  BranchProbability RelativeToEntryFreq;
1371  if (Cloner.ClonedOI)
1372  RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1373  else
1374  // RelativeToEntryFreq doesn't make sense when we have more than one
1375  // outlined call because each call will have a different relative frequency
1376  // to the entry block. We can consider using the average, but the
1377  // usefulness of that information is questionable. For now, assume we never
1378  // execute the calls to outlined functions.
1379  RelativeToEntryFreq = BranchProbability(0, 1);
1380 
1381  WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
1382 
1383  // The call sequence(s) to the outlined function(s) are larger than the sum of
1384  // the original outlined region size(s), it does not increase the chances of
1385  // inlining the function with outlining (The inliner uses the size increase to
1386  // model the cost of inlining a callee).
1387  if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
1388  OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1389  DebugLoc DLoc;
1390  BasicBlock *Block;
1391  std::tie(DLoc, Block) = getOneDebugLoc(*Cloner.ClonedFunc);
1392  OrigFuncORE.emit([&]() {
1393  return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
1394  DLoc, Block)
1395  << ore::NV("Function", Cloner.OrigFunc)
1396  << " not partially inlined into callers (Original Size = "
1397  << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
1398  << ", Size of call sequence to outlined function = "
1399  << ore::NV("NewSize", SizeCost) << ")";
1400  });
1401  return false;
1402  }
1403 
1404  assert(Cloner.OrigFunc->users().empty() &&
1405  "F's users should all be replaced!");
1406 
1407  std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
1408  Cloner.ClonedFunc->user_end());
1409 
1410  DenseMap<User *, uint64_t> CallSiteToProfCountMap;
1411  auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1412  if (CalleeEntryCount)
1413  computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
1414 
1415  uint64_t CalleeEntryCountV =
1416  (CalleeEntryCount ? CalleeEntryCount.getCount() : 0);
1417 
1418  bool AnyInline = false;
1419  for (User *User : Users) {
1420  CallBase *CB = getSupportedCallBase(User);
1421 
1422  if (isLimitReached())
1423  continue;
1424 
1425  OptimizationRemarkEmitter CallerORE(CB->getCaller());
1426  if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
1427  continue;
1428 
1429  // Construct remark before doing the inlining, as after successful inlining
1430  // the callsite is removed.
1431  OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CB);
1432  OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
1433  << ore::NV("Caller", CB->getCaller());
1434 
1435  InlineFunctionInfo IFI(nullptr, GetAssumptionCache, &PSI);
1436  // We can only forward varargs when we outlined a single region, else we
1437  // bail on vararg functions.
1438  if (!InlineFunction(*CB, IFI, nullptr, true,
1439  (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1440  : nullptr))
1441  .isSuccess())
1442  continue;
1443 
1444  CallerORE.emit(OR);
1445 
1446  // Now update the entry count:
1447  if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
1448  uint64_t CallSiteCount = CallSiteToProfCountMap[User];
1449  CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
1450  }
1451 
1452  AnyInline = true;
1453  NumPartialInlining++;
1454  // Update the stats
1455  if (Cloner.ClonedOI)
1456  NumPartialInlined++;
1457  else
1458  NumColdOutlinePartialInlined++;
1459  }
1460 
1461  if (AnyInline) {
1462  Cloner.IsFunctionInlined = true;
1463  if (CalleeEntryCount)
1464  Cloner.OrigFunc->setEntryCount(
1465  CalleeEntryCount.setCount(CalleeEntryCountV));
1466  OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1467  OrigFuncORE.emit([&]() {
1468  return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
1469  << "Partially inlined into at least one caller";
1470  });
1471  }
1472 
1473  return AnyInline;
1474 }
1475 
1476 bool PartialInlinerImpl::run(Module &M) {
1478  return false;
1479 
1480  std::vector<Function *> Worklist;
1481  Worklist.reserve(M.size());
1482  for (Function &F : M)
1483  if (!F.use_empty() && !F.isDeclaration())
1484  Worklist.push_back(&F);
1485 
1486  bool Changed = false;
1487  while (!Worklist.empty()) {
1488  Function *CurrFunc = Worklist.back();
1489  Worklist.pop_back();
1490 
1491  if (CurrFunc->use_empty())
1492  continue;
1493 
1494  bool Recursive = false;
1495  for (User *U : CurrFunc->users())
1496  if (Instruction *I = dyn_cast<Instruction>(U))
1497  if (I->getParent()->getParent() == CurrFunc) {
1498  Recursive = true;
1499  break;
1500  }
1501  if (Recursive)
1502  continue;
1503 
1504  std::pair<bool, Function *> Result = unswitchFunction(*CurrFunc);
1505  if (Result.second)
1506  Worklist.push_back(Result.second);
1507  Changed |= Result.first;
1508  }
1509 
1510  return Changed;
1511 }
1512 
1514 
1515 INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
1516  "Partial Inliner", false, false)
1521 INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
1522  "Partial Inliner", false, false)
1523 
1525  return new PartialInlinerLegacyPass();
1526 }
1527 
1528 PreservedAnalyses PartialInlinerPass::run(Module &M,
1529  ModuleAnalysisManager &AM) {
1530  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1531 
1532  auto GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & {
1533  return FAM.getResult<AssumptionAnalysis>(F);
1534  };
1535 
1536  auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
1538  };
1539 
1540  auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
1542  };
1543 
1544  auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
1545  return FAM.getResult<TargetIRAnalysis>(F);
1546  };
1547 
1548  auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1550  };
1551 
1553 
1554  if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
1555  GetTLI, PSI, GetBFI)
1556  .run(M))
1557  return PreservedAnalyses::none();
1558  return PreservedAnalyses::all();
1559 }
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::InlineCost::isAlways
bool isAlways() const
Definition: InlineCost.h:124
SkipCostAnalysis
static cl::opt< bool > SkipCostAnalysis("skip-partial-inlining-cost-analysis", cl::init(false), cl::ZeroOrMore, cl::ReallyHidden, cl::desc("Skip Cost Analysis"))
llvm::InlineCost::getCost
int getCost() const
Get the inline cost estimate.
Definition: InlineCost.h:130
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2331
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:730
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
OutlineRegionFreqPercent
static cl::opt< int > OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), cl::Hidden, cl::ZeroOrMore, cl::desc("Relative frequency of outline region to " "the entry block"))
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
Optional.h
ValueMapper.h
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:633
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
llvm::InstructionCost::map
auto map(const Function &F) const -> InstructionCost
Definition: InstructionCost.h:245
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:783
PartialInlining.h
llvm::succ_end
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
T
llvm::Function
Definition: Function.h:62
DEBUG_TYPE
#define DEBUG_TYPE
Definition: PartialInlining.cpp:66
Pass.h
llvm::CodeExtractor::extractCodeRegion
Function * extractCodeRegion(const CodeExtractorAnalysisCache &CEAC)
Perform the extraction, returning the new function.
Definition: CodeExtractor.cpp:1590
llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::TargetTransformInfoWrapperPass::getTTI
TargetTransformInfo & getTTI(const Function &F)
Definition: TargetTransformInfo.cpp:1164
llvm::ProfileSummaryInfo::hasProfileSummary
bool hasProfileSummary() const
Returns true if profile summary is available.
Definition: ProfileSummaryInfo.h:68
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
output
Current output
Definition: README.txt:1350
ErrorHandling.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::TargetTransformInfo::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfo.cpp:868
llvm::PHINode::removeIncomingValue
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
Definition: Instructions.cpp:113
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
MinBlockCounterExecution
static cl::opt< unsigned > MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid"))
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
BlockFrequency.h
DenseMap.h
Module.h
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
llvm::BasicBlock::getSingleSuccessor
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:298
llvm::ProfileSummaryInfo::isFunctionEntryCold
bool isFunctionEntryCold(const Function *F) const
Returns true if F has cold function entry.
Definition: ProfileSummaryInfo.cpp:222
llvm::isInlineViable
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
Definition: InlineCost.cpp:2960
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::BranchProbability::getBranchProbability
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Definition: BranchProbability.cpp:53
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::Value::user_begin
user_iterator user_begin()
Definition: Value.h:397
CodeExtractor.h
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:268
STLExtras.h
llvm::InlineCost::isNever
bool isNever() const
Definition: InlineCost.h:125
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:161
llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:144
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
ColdBranchRatio
static cl::opt< float > ColdBranchRatio("cold-branch-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum BranchProbability to consider a region cold."))
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Instruction.h
CommandLine.h
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1551
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::getInlineCost
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
Definition: InlineCost.cpp:2788
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
llvm::succ_size
unsigned succ_size(const Instruction *I)
Definition: CFG.h:259
Intrinsics.h
InstrTypes.h
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1383
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::BranchProbabilityInfo
Analysis providing branch probability information.
Definition: BranchProbabilityInfo.h:115
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::InlineCost
Represents the cost of inlining a function.
Definition: InlineCost.h:82
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
TargetLibraryInfo.h
DenseSet.h
false
Definition: StackSlotColoring.cpp:142
llvm::AssumptionCacheTracker::lookupAssumptionCache
AssumptionCache * lookupAssumptionCache(Function &F)
Return the cached assumptions for a function if it has already been scanned.
Definition: AssumptionCache.cpp:289
MaxNumPartialInlining
static cl::opt< int > MaxNumPartialInlining("max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of partial inlining. The default is unlimited"))
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2818
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:45
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::InlineResult::isSuccess
bool isSuccess() const
Definition: InlineCost.h:168
DebugLoc.h
llvm::getInlineParams
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition: InlineCost.cpp:3072
llvm::InlineConstants::InstrCost
const int InstrCost
Definition: InlineCost.h:45
llvm::BlockFrequencyAnalysis
Analysis pass which computes BlockFrequencyInfo.
Definition: BlockFrequencyInfo.h:112
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2725
MinRegionSizeRatio
static cl::opt< float > MinRegionSizeRatio("min-region-size-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum ratio comparing relative sizes of each " "outline candidate and original function"))
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:118
BranchProbability.h
llvm::CallBase::getCaller
Function * getCaller()
Helper to get the caller (the parent function).
Definition: Instructions.cpp:282
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
CFG.h
LoopInfo.h
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
llvm::InvokeInst
Invoke instruction.
Definition: Instructions.h:3749
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
llvm::Value::user_end
user_iterator user_end()
Definition: Value.h:405
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
BasicBlock.h
llvm::cl::opt< bool >
llvm::BlockFrequency
Definition: BlockFrequency.h:24
BranchProbabilityInfo.h
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:465
uint64_t
ProfileSummaryInfo.h
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2387
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:169
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
IPO.h
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2783
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
Cloning.h
llvm::succ_begin
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
llvm::BlockFrequency::getFrequency
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
Definition: BlockFrequency.h:35
MarkOutlinedColdCC
static cl::opt< bool > MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden, cl::desc("Mark outline function calls with ColdCC"))
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::ProfileSummaryInfo::getColdCountThreshold
uint64_t getColdCountThreshold() const
Returns ColdCountThreshold if set.
Definition: ProfileSummaryInfo.h:176
llvm::ProfileSummaryInfoWrapperPass
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:193
llvm::InlineCost::getCostDelta
int getCostDelta() const
Get the cost delta from the threshold for inlining.
Definition: InlineCost.h:154
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1616
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
InlineCost.h
llvm::ProfileSummaryInfo::hasInstrumentationProfile
bool hasInstrumentationProfile() const
Returns true if module M has instrumentation profile.
Definition: ProfileSummaryInfo.h:77
llvm::WinEH::EncodingType::CE
@ CE
Windows NT (Windows on ARM)
inliner
partial inliner
Definition: PartialInlining.cpp:1521
llvm::CallingConv::Cold
@ Cold
Definition: CallingConv.h:48
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
OtherSucc
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
Definition: ARMISelLowering.cpp:10958
DisablePartialInlining
static cl::opt< bool > DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial inlining"))
hasProfileData
static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI)
Definition: PartialInlining.cpp:717
llvm::ProfileSummaryAnalysis
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:211
llvm::initializePartialInlinerLegacyPassPass
void initializePartialInlinerLegacyPassPass(PassRegistry &)
MaxNumInlineBlocks
static cl::opt< unsigned > MaxNumInlineBlocks("max-num-inline-blocks", cl::init(5), cl::Hidden, cl::desc("Max number of blocks to be partially inlined"))
llvm::DominatorTreeBase::recalculate
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Definition: GenericDomTree.h:778
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1083
None.h
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::Function::setCallingConv
void setCallingConv(CallingConv::ID CC)
Definition: Function.h:244
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:215
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::BranchProbability
Definition: BranchProbability.h:30
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::CodeExtractor
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:85
llvm::RegionInfo
Definition: RegionInfo.h:900
llvm::CloneFunction
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
Definition: CloneFunction.cpp:283
BlockFrequencyInfo.h
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:421
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:776
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::ValueMap< const Value *, WeakTrackingVH >
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
ForceLiveExit
static cl::opt< bool > ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden, cl::desc("Force outline regions with live exits"))
Attributes.h
llvm::AssumptionCacheTracker::getAssumptionCache
AssumptionCache & getAssumptionCache(Function &F)
Get the cached assumptions for a function.
Definition: AssumptionCache.cpp:272
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::ISD::BR
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:937
llvm::Function::back
const BasicBlock & back() const
Definition: Function.h:743
llvm::AnalysisManager::getCachedResult
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:802
Casting.h
DiagnosticInfo.h
Function.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:221
llvm::InlineFunctionInfo
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:201
DP
So we should use XX3Form_Rcr to implement instrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store load store see def memrix16 in PPCInstrInfo td Load Store Vector load store outs ins lxsdx set load store with conversion from to DP
Definition: README_P9.txt:520
Inliner
partial Partial Inliner
Definition: PartialInlining.cpp:1522
llvm::CodeExtractorAnalysisCache
A cache for the CodeExtractor analysis.
Definition: CodeExtractor.h:46
llvm::ProfileSummaryInfo::isColdBlock
bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) const
Returns true if BasicBlock BB is considered cold.
Definition: ProfileSummaryInfo.cpp:332
DisableMultiRegionPartialInline
static cl::opt< bool > DisableMultiRegionPartialInline("disable-mr-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable multi-region partial inlining"))
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:685
isValid
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Definition: RustDemangle.cpp:217
Instructions.h
llvm::getCallsiteCost
int getCallsiteCost(CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
Definition: InlineCost.cpp:2755
SmallVector.h
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:370
User.h
llvm::BlockFrequencyInfo::getBlockProfileCount
Optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Definition: BlockFrequencyInfo.cpp:209
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
Users
iv Induction Variable Users
Definition: IVUsers.cpp:52
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2633
llvm::ProfileSummaryInfo::getHotCountThreshold
uint64_t getHotCountThreshold() const
Returns HotCountThreshold if set.
Definition: ProfileSummaryInfo.h:172
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:157
llvm::SmallVectorImpl< BasicBlock * >
llvm::InlineFunction
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
Definition: InlineFunction.cpp:1754
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1161
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::createPartialInliningPass
ModulePass * createPartialInliningPass()
createPartialInliningPass - This pass inlines parts of functions.
Definition: PartialInlining.cpp:1524
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:940
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner", false, false) INITIALIZE_PASS_END(PartialInlinerLegacyPass
ExtraOutliningPenalty
static cl::opt< unsigned > ExtraOutliningPenalty("partial-inlining-extra-penalty", cl::init(0), cl::Hidden, cl::desc("A debug option to add additional penalty to the computed one."))
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3212
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::cl::desc
Definition: CommandLine.h:412
llvm::Region
Definition: RegionInfo.h:889
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3068
llvm::SetVector< Value * >
llvm::pdb::PDB_SymType::Block
@ Block
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:440
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::CallBase::setCallingConv
void setCallingConv(CallingConv::ID CC)
Definition: InstrTypes.h:1443
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37