LLVM 20.0.0git
PartialInlining.cpp
Go to the documentation of this file.
1//===- PartialInlining.cpp - Inline parts of functions --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs partial inlining, typically by inlining an if statement
10// that surrounds the body of the function.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
18#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/Attributes.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/CFG.h"
32#include "llvm/IR/DebugLoc.h"
34#include "llvm/IR/Dominators.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/InstrTypes.h"
37#include "llvm/IR/Instruction.h"
40#include "llvm/IR/Intrinsics.h"
41#include "llvm/IR/Module.h"
42#include "llvm/IR/Operator.h"
44#include "llvm/IR/User.h"
50#include "llvm/Transforms/IPO.h"
54#include <algorithm>
55#include <cassert>
56#include <cstdint>
57#include <memory>
58#include <tuple>
59#include <vector>
60
61using namespace llvm;
62
63#define DEBUG_TYPE "partial-inlining"
64
65STATISTIC(NumPartialInlined,
66 "Number of callsites functions partially inlined into.");
67STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
68 "cold outlined regions were partially "
69 "inlined into its caller(s).");
70STATISTIC(NumColdRegionsFound,
71 "Number of cold single entry/exit regions found.");
72STATISTIC(NumColdRegionsOutlined,
73 "Number of cold single entry/exit regions outlined.");
74
75// Command line option to disable partial-inlining. The default is false:
76static cl::opt<bool>
77 DisablePartialInlining("disable-partial-inlining", cl::init(false),
78 cl::Hidden, cl::desc("Disable partial inlining"));
79// Command line option to disable multi-region partial-inlining. The default is
80// false:
82 "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
83 cl::desc("Disable multi-region partial inlining"));
84
85// Command line option to force outlining in regions with live exit variables.
86// The default is false:
87static cl::opt<bool>
88 ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
89 cl::desc("Force outline regions with live exits"));
90
91// Command line option to enable marking outline functions with Cold Calling
92// Convention. The default is false:
93static cl::opt<bool>
94 MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
95 cl::desc("Mark outline function calls with ColdCC"));
96
97// This is an option used by testing:
98static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
99
101 cl::desc("Skip Cost Analysis"));
102// Used to determine if a cold region is worth outlining based on
103// its inlining cost compared to the original function. Default is set at 10%.
104// ie. if the cold region reduces the inlining cost of the original function by
105// at least 10%.
107 "min-region-size-ratio", cl::init(0.1), cl::Hidden,
108 cl::desc("Minimum ratio comparing relative sizes of each "
109 "outline candidate and original function"));
110// Used to tune the minimum number of execution counts needed in the predecessor
111// block to the cold edge. ie. confidence interval.
113 MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
114 cl::desc("Minimum block executions to consider "
115 "its BranchProbabilityInfo valid"));
116// Used to determine when an edge is considered cold. Default is set to 10%. ie.
117// if the branch probability is 10% or less, then it is deemed as 'cold'.
119 "cold-branch-ratio", cl::init(0.1), cl::Hidden,
120 cl::desc("Minimum BranchProbability to consider a region cold."));
121
123 "max-num-inline-blocks", cl::init(5), cl::Hidden,
124 cl::desc("Max number of blocks to be partially inlined"));
125
126// Command line option to set the maximum number of partial inlining allowed
127// for the module. The default value of -1 means no limit.
129 "max-partial-inlining", cl::init(-1), cl::Hidden,
130 cl::desc("Max number of partial inlining. The default is unlimited"));
131
132// Used only when PGO or user annotated branch data is absent. It is
133// the least value that is used to weigh the outline region. If BFI
134// produces larger value, the BFI value will be used.
135static cl::opt<int>
136 OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
138 cl::desc("Relative frequency of outline region to "
139 "the entry block"));
140
142 "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
143 cl::desc("A debug option to add additional penalty to the computed one."));
144
145namespace {
146
147struct FunctionOutliningInfo {
148 FunctionOutliningInfo() = default;
149
150 // Returns the number of blocks to be inlined including all blocks
151 // in Entries and one return block.
152 unsigned getNumInlinedBlocks() const { return Entries.size() + 1; }
153
154 // A set of blocks including the function entry that guard
155 // the region to be outlined.
157
158 // The return block that is not included in the outlined region.
159 BasicBlock *ReturnBlock = nullptr;
160
161 // The dominating block of the region to be outlined.
162 BasicBlock *NonReturnBlock = nullptr;
163
164 // The set of blocks in Entries that are predecessors to ReturnBlock
165 SmallVector<BasicBlock *, 4> ReturnBlockPreds;
166};
167
168struct FunctionOutliningMultiRegionInfo {
169 FunctionOutliningMultiRegionInfo() = default;
170
171 // Container for outline regions
172 struct OutlineRegionInfo {
173 OutlineRegionInfo(ArrayRef<BasicBlock *> Region, BasicBlock *EntryBlock,
174 BasicBlock *ExitBlock, BasicBlock *ReturnBlock)
175 : Region(Region), EntryBlock(EntryBlock), ExitBlock(ExitBlock),
176 ReturnBlock(ReturnBlock) {}
178 BasicBlock *EntryBlock;
179 BasicBlock *ExitBlock;
180 BasicBlock *ReturnBlock;
181 };
182
184};
185
186struct PartialInlinerImpl {
187
188 PartialInlinerImpl(
192 function_ref<const TargetLibraryInfo &(Function &)> GTLI,
193 ProfileSummaryInfo &ProfSI,
194 function_ref<BlockFrequencyInfo &(Function &)> GBFI = nullptr)
195 : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
196 GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
197
198 bool run(Module &M);
199 // Main part of the transformation that calls helper functions to find
200 // outlining candidates, clone & outline the function, and attempt to
201 // partially inline the resulting function. Returns true if
202 // inlining was successful, false otherwise. Also returns the outline
203 // function (only if we partially inlined early returns) as there is a
204 // possibility to further "peel" early return statements that were left in the
205 // outline function due to code size.
206 std::pair<bool, Function *> unswitchFunction(Function &F);
207
208 // This class speculatively clones the function to be partial inlined.
209 // At the end of partial inlining, the remaining callsites to the cloned
210 // function that are not partially inlined will be fixed up to reference
211 // the original function, and the cloned function will be erased.
212 struct FunctionCloner {
213 // Two constructors, one for single region outlining, the other for
214 // multi-region outlining.
215 FunctionCloner(Function *F, FunctionOutliningInfo *OI,
219 FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
223
224 ~FunctionCloner();
225
226 // Prepare for function outlining: making sure there is only
227 // one incoming edge from the extracted/outlined region to
228 // the return block.
229 void normalizeReturnBlock() const;
230
231 // Do function outlining for cold regions.
232 bool doMultiRegionFunctionOutlining();
233 // Do function outlining for region after early return block(s).
234 // NOTE: For vararg functions that do the vararg handling in the outlined
235 // function, we temporarily generate IR that does not properly
236 // forward varargs to the outlined function. Calling InlineFunction
237 // will update calls to the outlined functions to properly forward
238 // the varargs.
239 Function *doSingleRegionFunctionOutlining();
240
241 Function *OrigFunc = nullptr;
242 Function *ClonedFunc = nullptr;
243
244 typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
245 // Keep track of Outlined Functions and the basic block they're called from.
246 SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
247
248 // ClonedFunc is inlined in one of its callers after function
249 // outlining.
250 bool IsFunctionInlined = false;
251 // The cost of the region to be outlined.
252 InstructionCost OutlinedRegionCost = 0;
253 // ClonedOI is specific to outlining non-early return blocks.
254 std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
255 // ClonedOMRI is specific to outlining cold regions.
256 std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
257 std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
261 };
262
263private:
264 int NumPartialInlining = 0;
265 function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
266 function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
269 function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
271
272 // Return the frequency of the OutlininingBB relative to F's entry point.
273 // The result is no larger than 1 and is represented using BP.
274 // (Note that the outlined region's 'head' block can only have incoming
275 // edges from the guarding entry blocks).
277 getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) const;
278
279 // Return true if the callee of CB should be partially inlined with
280 // profit.
281 bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner,
282 BlockFrequency WeightedOutliningRcost,
283 OptimizationRemarkEmitter &ORE) const;
284
285 // Try to inline DuplicateFunction (cloned from F with call to
286 // the OutlinedFunction into its callers. Return true
287 // if there is any successful inlining.
288 bool tryPartialInline(FunctionCloner &Cloner);
289
290 // Compute the mapping from use site of DuplicationFunction to the enclosing
291 // BB's profile count.
292 void
293 computeCallsiteToProfCountMap(Function *DuplicateFunction,
294 DenseMap<User *, uint64_t> &SiteCountMap) const;
295
296 bool isLimitReached() const {
297 return (MaxNumPartialInlining != -1 &&
298 NumPartialInlining >= MaxNumPartialInlining);
299 }
300
301 static CallBase *getSupportedCallBase(User *U) {
302 if (isa<CallInst>(U) || isa<InvokeInst>(U))
303 return cast<CallBase>(U);
304 llvm_unreachable("All uses must be calls");
305 return nullptr;
306 }
307
308 static CallBase *getOneCallSiteTo(Function &F) {
309 User *User = *F.user_begin();
310 return getSupportedCallBase(User);
311 }
312
313 std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function &F) const {
314 CallBase *CB = getOneCallSiteTo(F);
315 DebugLoc DLoc = CB->getDebugLoc();
316 BasicBlock *Block = CB->getParent();
317 return std::make_tuple(DLoc, Block);
318 }
319
320 // Returns the costs associated with function outlining:
321 // - The first value is the non-weighted runtime cost for making the call
322 // to the outlined function, including the addtional setup cost in the
323 // outlined function itself;
324 // - The second value is the estimated size of the new call sequence in
325 // basic block Cloner.OutliningCallBB;
326 std::tuple<InstructionCost, InstructionCost>
327 computeOutliningCosts(FunctionCloner &Cloner) const;
328
329 // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
330 // approximate both the size and runtime cost (Note that in the current
331 // inline cost analysis, there is no clear distinction there either).
332 static InstructionCost computeBBInlineCost(BasicBlock *BB,
334
335 std::unique_ptr<FunctionOutliningInfo>
336 computeOutliningInfo(Function &F) const;
337
338 std::unique_ptr<FunctionOutliningMultiRegionInfo>
339 computeOutliningColdRegionsInfo(Function &F,
340 OptimizationRemarkEmitter &ORE) const;
341};
342
343} // end anonymous namespace
344
345std::unique_ptr<FunctionOutliningMultiRegionInfo>
346PartialInlinerImpl::computeOutliningColdRegionsInfo(
347 Function &F, OptimizationRemarkEmitter &ORE) const {
348 BasicBlock *EntryBlock = &F.front();
349
350 DominatorTree DT(F);
351 LoopInfo LI(DT);
352 BranchProbabilityInfo BPI(F, LI);
353 std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
355 if (!GetBFI) {
356 ScopedBFI.reset(new BlockFrequencyInfo(F, BPI, LI));
357 BFI = ScopedBFI.get();
358 } else
359 BFI = &(GetBFI(F));
360
361 // Return if we don't have profiling information.
362 if (!PSI.hasInstrumentationProfile())
363 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
364
365 std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
366 std::make_unique<FunctionOutliningMultiRegionInfo>();
367
368 auto IsSingleExit =
369 [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
370 BasicBlock *ExitBlock = nullptr;
371 for (auto *Block : BlockList) {
372 for (BasicBlock *Succ : successors(Block)) {
373 if (!is_contained(BlockList, Succ)) {
374 if (ExitBlock) {
375 ORE.emit([&]() {
376 return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
377 &Succ->front())
378 << "Region dominated by "
379 << ore::NV("Block", BlockList.front()->getName())
380 << " has more than one region exit edge.";
381 });
382 return nullptr;
383 }
384
385 ExitBlock = Block;
386 }
387 }
388 }
389 return ExitBlock;
390 };
391
392 auto BBProfileCount = [BFI](BasicBlock *BB) {
393 return BFI->getBlockProfileCount(BB).value_or(0);
394 };
395
396 // Use the same computeBBInlineCost function to compute the cost savings of
397 // the outlining the candidate region.
398 TargetTransformInfo *FTTI = &GetTTI(F);
399 InstructionCost OverallFunctionCost = 0;
400 for (auto &BB : F)
401 OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
402
403 LLVM_DEBUG(dbgs() << "OverallFunctionCost = " << OverallFunctionCost
404 << "\n";);
405
406 InstructionCost MinOutlineRegionCost = OverallFunctionCost.map(
407 [&](auto Cost) { return Cost * MinRegionSizeRatio; });
408
409 BranchProbability MinBranchProbability(
410 static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
412 bool ColdCandidateFound = false;
413 BasicBlock *CurrEntry = EntryBlock;
414 std::vector<BasicBlock *> DFS;
416 DFS.push_back(CurrEntry);
417 VisitedMap[CurrEntry] = true;
418
419 // Use Depth First Search on the basic blocks to find CFG edges that are
420 // considered cold.
421 // Cold regions considered must also have its inline cost compared to the
422 // overall inline cost of the original function. The region is outlined only
423 // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
424 // more.
425 while (!DFS.empty()) {
426 auto *ThisBB = DFS.back();
427 DFS.pop_back();
428 // Only consider regions with predecessor blocks that are considered
429 // not-cold (default: part of the top 99.99% of all block counters)
430 // AND greater than our minimum block execution count (default: 100).
431 if (PSI.isColdBlock(ThisBB, BFI) ||
432 BBProfileCount(ThisBB) < MinBlockCounterExecution)
433 continue;
434 for (auto SI = succ_begin(ThisBB); SI != succ_end(ThisBB); ++SI) {
435 if (VisitedMap[*SI])
436 continue;
437 VisitedMap[*SI] = true;
438 DFS.push_back(*SI);
439 // If branch isn't cold, we skip to the next one.
440 BranchProbability SuccProb = BPI.getEdgeProbability(ThisBB, *SI);
441 if (SuccProb > MinBranchProbability)
442 continue;
443
444 LLVM_DEBUG(dbgs() << "Found cold edge: " << ThisBB->getName() << "->"
445 << SI->getName()
446 << "\nBranch Probability = " << SuccProb << "\n";);
447
448 SmallVector<BasicBlock *, 8> DominateVector;
449 DT.getDescendants(*SI, DominateVector);
450 assert(!DominateVector.empty() &&
451 "SI should be reachable and have at least itself as descendant");
452
453 // We can only outline single entry regions (for now).
454 if (!DominateVector.front()->hasNPredecessors(1)) {
455 LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
456 << " doesn't have a single predecessor in the "
457 "dominator tree\n";);
458 continue;
459 }
460
461 BasicBlock *ExitBlock = nullptr;
462 // We can only outline single exit regions (for now).
463 if (!(ExitBlock = IsSingleExit(DominateVector))) {
464 LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
465 << " doesn't have a unique successor\n";);
466 continue;
467 }
468
469 InstructionCost OutlineRegionCost = 0;
470 for (auto *BB : DominateVector)
471 OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
472
473 LLVM_DEBUG(dbgs() << "OutlineRegionCost = " << OutlineRegionCost
474 << "\n";);
475
476 if (!SkipCostAnalysis && OutlineRegionCost < MinOutlineRegionCost) {
477 ORE.emit([&]() {
478 return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
479 &SI->front())
480 << ore::NV("Callee", &F)
481 << " inline cost-savings smaller than "
482 << ore::NV("Cost", MinOutlineRegionCost);
483 });
484
485 LLVM_DEBUG(dbgs() << "ABORT: Outline region cost is smaller than "
486 << MinOutlineRegionCost << "\n";);
487 continue;
488 }
489
490 // For now, ignore blocks that belong to a SISE region that is a
491 // candidate for outlining. In the future, we may want to look
492 // at inner regions because the outer region may have live-exit
493 // variables.
494 for (auto *BB : DominateVector)
495 VisitedMap[BB] = true;
496
497 // ReturnBlock here means the block after the outline call
498 BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
499 FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
500 DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
501 OutliningInfo->ORI.push_back(RegInfo);
502 LLVM_DEBUG(dbgs() << "Found Cold Candidate starting at block: "
503 << DominateVector.front()->getName() << "\n";);
504 ColdCandidateFound = true;
505 NumColdRegionsFound++;
506 }
507 }
508
509 if (ColdCandidateFound)
510 return OutliningInfo;
511
512 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
513}
514
515std::unique_ptr<FunctionOutliningInfo>
516PartialInlinerImpl::computeOutliningInfo(Function &F) const {
517 BasicBlock *EntryBlock = &F.front();
518 BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
519 if (!BR || BR->isUnconditional())
520 return std::unique_ptr<FunctionOutliningInfo>();
521
522 // Returns true if Succ is BB's successor
523 auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
524 return is_contained(successors(BB), Succ);
525 };
526
527 auto IsReturnBlock = [](BasicBlock *BB) {
528 Instruction *TI = BB->getTerminator();
529 return isa<ReturnInst>(TI);
530 };
531
532 auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
533 if (IsReturnBlock(Succ1))
534 return std::make_tuple(Succ1, Succ2);
535 if (IsReturnBlock(Succ2))
536 return std::make_tuple(Succ2, Succ1);
537
538 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
539 };
540
541 // Detect a triangular shape:
542 auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
543 if (IsSuccessor(Succ1, Succ2))
544 return std::make_tuple(Succ1, Succ2);
545 if (IsSuccessor(Succ2, Succ1))
546 return std::make_tuple(Succ2, Succ1);
547
548 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
549 };
550
551 std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
552 std::make_unique<FunctionOutliningInfo>();
553
554 BasicBlock *CurrEntry = EntryBlock;
555 bool CandidateFound = false;
556 do {
557 // The number of blocks to be inlined has already reached
558 // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
559 // disables partial inlining for the function.
560 if (OutliningInfo->getNumInlinedBlocks() >= MaxNumInlineBlocks)
561 break;
562
563 if (succ_size(CurrEntry) != 2)
564 break;
565
566 BasicBlock *Succ1 = *succ_begin(CurrEntry);
567 BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
568
569 BasicBlock *ReturnBlock, *NonReturnBlock;
570 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
571
572 if (ReturnBlock) {
573 OutliningInfo->Entries.push_back(CurrEntry);
574 OutliningInfo->ReturnBlock = ReturnBlock;
575 OutliningInfo->NonReturnBlock = NonReturnBlock;
576 CandidateFound = true;
577 break;
578 }
579
580 BasicBlock *CommSucc, *OtherSucc;
581 std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
582
583 if (!CommSucc)
584 break;
585
586 OutliningInfo->Entries.push_back(CurrEntry);
587 CurrEntry = OtherSucc;
588 } while (true);
589
590 if (!CandidateFound)
591 return std::unique_ptr<FunctionOutliningInfo>();
592
593 // There should not be any successors (not in the entry set) other than
594 // {ReturnBlock, NonReturnBlock}
595 assert(OutliningInfo->Entries[0] == &F.front() &&
596 "Function Entry must be the first in Entries vector");
598 for (BasicBlock *E : OutliningInfo->Entries)
599 Entries.insert(E);
600
601 // Returns true of BB has Predecessor which is not
602 // in Entries set.
603 auto HasNonEntryPred = [Entries](BasicBlock *BB) {
604 for (auto *Pred : predecessors(BB)) {
605 if (!Entries.count(Pred))
606 return true;
607 }
608 return false;
609 };
610 auto CheckAndNormalizeCandidate =
611 [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
612 for (BasicBlock *E : OutliningInfo->Entries) {
613 for (auto *Succ : successors(E)) {
614 if (Entries.count(Succ))
615 continue;
616 if (Succ == OutliningInfo->ReturnBlock)
617 OutliningInfo->ReturnBlockPreds.push_back(E);
618 else if (Succ != OutliningInfo->NonReturnBlock)
619 return false;
620 }
621 // There should not be any outside incoming edges either:
622 if (HasNonEntryPred(E))
623 return false;
624 }
625 return true;
626 };
627
628 if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
629 return std::unique_ptr<FunctionOutliningInfo>();
630
631 // Now further growing the candidate's inlining region by
632 // peeling off dominating blocks from the outlining region:
633 while (OutliningInfo->getNumInlinedBlocks() < MaxNumInlineBlocks) {
634 BasicBlock *Cand = OutliningInfo->NonReturnBlock;
635 if (succ_size(Cand) != 2)
636 break;
637
638 if (HasNonEntryPred(Cand))
639 break;
640
641 BasicBlock *Succ1 = *succ_begin(Cand);
642 BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
643
644 BasicBlock *ReturnBlock, *NonReturnBlock;
645 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
646 if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
647 break;
648
649 if (NonReturnBlock->getSinglePredecessor() != Cand)
650 break;
651
652 // Now grow and update OutlininigInfo:
653 OutliningInfo->Entries.push_back(Cand);
654 OutliningInfo->NonReturnBlock = NonReturnBlock;
655 OutliningInfo->ReturnBlockPreds.push_back(Cand);
656 Entries.insert(Cand);
657 }
658
659 return OutliningInfo;
660}
661
662// Check if there is PGO data or user annotated branch data:
663static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI) {
664 if (F.hasProfileData())
665 return true;
666 // Now check if any of the entry block has MD_prof data:
667 for (auto *E : OI.Entries) {
668 BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
669 if (!BR || BR->isUnconditional())
670 continue;
671 if (hasBranchWeightMD(*BR))
672 return true;
673 }
674 return false;
675}
676
677BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
678 FunctionCloner &Cloner) const {
679 BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
680 auto EntryFreq =
681 Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
682 auto OutliningCallFreq =
683 Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
684 // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
685 // we outlined any regions, so we may encounter situations where the
686 // OutliningCallFreq is *slightly* bigger than the EntryFreq.
687 if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
688 OutliningCallFreq = EntryFreq;
689
690 auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
691 OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
692
693 if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI))
694 return OutlineRegionRelFreq;
695
696 // When profile data is not available, we need to be conservative in
697 // estimating the overall savings. Static branch prediction can usually
698 // guess the branch direction right (taken/non-taken), but the guessed
699 // branch probability is usually not biased enough. In case when the
700 // outlined region is predicted to be likely, its probability needs
701 // to be made higher (more biased) to not under-estimate the cost of
702 // function outlining. On the other hand, if the outlined region
703 // is predicted to be less likely, the predicted probablity is usually
704 // higher than the actual. For instance, the actual probability of the
705 // less likely target is only 5%, but the guessed probablity can be
706 // 40%. In the latter case, there is no need for further adjustment.
707 // FIXME: add an option for this.
708 if (OutlineRegionRelFreq < BranchProbability(45, 100))
709 return OutlineRegionRelFreq;
710
711 OutlineRegionRelFreq = std::max(
712 OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
713
714 return OutlineRegionRelFreq;
715}
716
717bool PartialInlinerImpl::shouldPartialInline(
718 CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
719 OptimizationRemarkEmitter &ORE) const {
720 using namespace ore;
721
723 assert(Callee == Cloner.ClonedFunc);
724
726 return isInlineViable(*Callee).isSuccess();
727
728 Function *Caller = CB.getCaller();
729 auto &CalleeTTI = GetTTI(*Callee);
730 bool RemarksEnabled =
731 Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
732 DEBUG_TYPE);
733 InlineCost IC =
734 getInlineCost(CB, getInlineParams(), CalleeTTI, GetAssumptionCache,
735 GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE : nullptr);
736
737 if (IC.isAlways()) {
738 ORE.emit([&]() {
739 return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", &CB)
740 << NV("Callee", Cloner.OrigFunc)
741 << " should always be fully inlined, not partially";
742 });
743 return false;
744 }
745
746 if (IC.isNever()) {
747 ORE.emit([&]() {
748 return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CB)
749 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
750 << NV("Caller", Caller)
751 << " because it should never be inlined (cost=never)";
752 });
753 return false;
754 }
755
756 if (!IC) {
757 ORE.emit([&]() {
758 return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", &CB)
759 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
760 << NV("Caller", Caller) << " because too costly to inline (cost="
761 << NV("Cost", IC.getCost()) << ", threshold="
762 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
763 });
764 return false;
765 }
766 const DataLayout &DL = Caller->getDataLayout();
767
768 // The savings of eliminating the call:
769 int NonWeightedSavings = getCallsiteCost(CalleeTTI, CB, DL);
770 BlockFrequency NormWeightedSavings(NonWeightedSavings);
771
772 // Weighted saving is smaller than weighted cost, return false
773 if (NormWeightedSavings < WeightedOutliningRcost) {
774 ORE.emit([&]() {
775 return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
776 &CB)
777 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
778 << NV("Caller", Caller) << " runtime overhead (overhead="
779 << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
780 << ", savings="
781 << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
782 << ")"
783 << " of making the outlined call is too high";
784 });
785
786 return false;
787 }
788
789 ORE.emit([&]() {
790 return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", &CB)
791 << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
792 << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
793 << " (threshold="
794 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
795 });
796 return true;
797}
798
799// TODO: Ideally we should share Inliner's InlineCost Analysis code.
800// For now use a simplified version. The returned 'InlineCost' will be used
801// to esimate the size cost as well as runtime cost of the BB.
803PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
806 const DataLayout &DL = BB->getDataLayout();
808 for (Instruction &I : BB->instructionsWithoutDebug()) {
809 // Skip free instructions.
810 switch (I.getOpcode()) {
811 case Instruction::BitCast:
812 case Instruction::PtrToInt:
813 case Instruction::IntToPtr:
814 case Instruction::Alloca:
815 case Instruction::PHI:
816 continue;
817 case Instruction::GetElementPtr:
818 if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
819 continue;
820 break;
821 default:
822 break;
823 }
824
825 if (I.isLifetimeStartOrEnd())
826 continue;
827
828 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
829 Intrinsic::ID IID = II->getIntrinsicID();
831 FastMathFlags FMF;
832 for (Value *Val : II->args())
833 Tys.push_back(Val->getType());
834
835 if (auto *FPMO = dyn_cast<FPMathOperator>(II))
836 FMF = FPMO->getFastMathFlags();
837
838 IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF);
840 continue;
841 }
842
843 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
844 InlineCost += getCallsiteCost(*TTI, *CI, DL);
845 continue;
846 }
847
848 if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
850 continue;
851 }
852
853 if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
854 InlineCost += (SI->getNumCases() + 1) * InstrCost;
855 continue;
856 }
858 }
859
860 return InlineCost;
861}
862
863std::tuple<InstructionCost, InstructionCost>
864PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) const {
865 InstructionCost OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
866 for (auto FuncBBPair : Cloner.OutlinedFunctions) {
867 Function *OutlinedFunc = FuncBBPair.first;
868 BasicBlock* OutliningCallBB = FuncBBPair.second;
869 // Now compute the cost of the call sequence to the outlined function
870 // 'OutlinedFunction' in BB 'OutliningCallBB':
871 auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
872 OutliningFuncCallCost +=
873 computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
874
875 // Now compute the cost of the extracted/outlined function itself:
876 for (BasicBlock &BB : *OutlinedFunc)
877 OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
878 }
879 assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
880 "Outlined function cost should be no less than the outlined region");
881
882 // The code extractor introduces a new root and exit stub blocks with
883 // additional unconditional branches. Those branches will be eliminated
884 // later with bb layout. The cost should be adjusted accordingly:
885 OutlinedFunctionCost -=
886 2 * InlineConstants::getInstrCost() * Cloner.OutlinedFunctions.size();
887
888 InstructionCost OutliningRuntimeOverhead =
889 OutliningFuncCallCost +
890 (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
891 ExtraOutliningPenalty.getValue();
892
893 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
894}
895
896// Create the callsite to profile count map which is
897// used to update the original function's entry count,
898// after the function is partially inlined into the callsite.
899void PartialInlinerImpl::computeCallsiteToProfCountMap(
900 Function *DuplicateFunction,
901 DenseMap<User *, uint64_t> &CallSiteToProfCountMap) const {
902 std::vector<User *> Users(DuplicateFunction->user_begin(),
903 DuplicateFunction->user_end());
904 Function *CurrentCaller = nullptr;
905 std::unique_ptr<BlockFrequencyInfo> TempBFI;
906 BlockFrequencyInfo *CurrentCallerBFI = nullptr;
907
908 auto ComputeCurrBFI = [&,this](Function *Caller) {
909 // For the old pass manager:
910 if (!GetBFI) {
911 DominatorTree DT(*Caller);
912 LoopInfo LI(DT);
913 BranchProbabilityInfo BPI(*Caller, LI);
914 TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
915 CurrentCallerBFI = TempBFI.get();
916 } else {
917 // New pass manager:
918 CurrentCallerBFI = &(GetBFI(*Caller));
919 }
920 };
921
922 for (User *User : Users) {
923 // Don't bother with BlockAddress used by CallBr for asm goto.
924 if (isa<BlockAddress>(User))
925 continue;
926 CallBase *CB = getSupportedCallBase(User);
927 Function *Caller = CB->getCaller();
928 if (CurrentCaller != Caller) {
929 CurrentCaller = Caller;
930 ComputeCurrBFI(Caller);
931 } else {
932 assert(CurrentCallerBFI && "CallerBFI is not set");
933 }
934 BasicBlock *CallBB = CB->getParent();
935 auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
936 if (Count)
937 CallSiteToProfCountMap[User] = *Count;
938 else
939 CallSiteToProfCountMap[User] = 0;
940 }
941}
942
943PartialInlinerImpl::FunctionCloner::FunctionCloner(
944 Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
947 : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
948 ClonedOI = std::make_unique<FunctionOutliningInfo>();
949
950 // Clone the function, so that we can hack away on it.
952 ClonedFunc = CloneFunction(F, VMap);
953
954 ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
955 ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
956 for (BasicBlock *BB : OI->Entries)
957 ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
958
959 for (BasicBlock *E : OI->ReturnBlockPreds) {
960 BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
961 ClonedOI->ReturnBlockPreds.push_back(NewE);
962 }
963 // Go ahead and update all uses to the duplicate, so that we can just
964 // use the inliner functionality when we're done hacking.
965 F->replaceAllUsesWith(ClonedFunc);
966}
967
968PartialInlinerImpl::FunctionCloner::FunctionCloner(
969 Function *F, FunctionOutliningMultiRegionInfo *OI,
973 : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
974 ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
975
976 // Clone the function, so that we can hack away on it.
978 ClonedFunc = CloneFunction(F, VMap);
979
980 // Go through all Outline Candidate Regions and update all BasicBlock
981 // information.
982 for (const FunctionOutliningMultiRegionInfo::OutlineRegionInfo &RegionInfo :
983 OI->ORI) {
985 for (BasicBlock *BB : RegionInfo.Region)
986 Region.push_back(cast<BasicBlock>(VMap[BB]));
987
988 BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
989 BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
990 BasicBlock *NewReturnBlock = nullptr;
991 if (RegionInfo.ReturnBlock)
992 NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
993 FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
994 Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
995 ClonedOMRI->ORI.push_back(MappedRegionInfo);
996 }
997 // Go ahead and update all uses to the duplicate, so that we can just
998 // use the inliner functionality when we're done hacking.
999 F->replaceAllUsesWith(ClonedFunc);
1000}
1001
1002void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
1003 auto GetFirstPHI = [](BasicBlock *BB) {
1004 BasicBlock::iterator I = BB->begin();
1005 PHINode *FirstPhi = nullptr;
1006 while (I != BB->end()) {
1007 PHINode *Phi = dyn_cast<PHINode>(I);
1008 if (!Phi)
1009 break;
1010 if (!FirstPhi) {
1011 FirstPhi = Phi;
1012 break;
1013 }
1014 }
1015 return FirstPhi;
1016 };
1017
1018 // Shouldn't need to normalize PHIs if we're not outlining non-early return
1019 // blocks.
1020 if (!ClonedOI)
1021 return;
1022
1023 // Special hackery is needed with PHI nodes that have inputs from more than
1024 // one extracted block. For simplicity, just split the PHIs into a two-level
1025 // sequence of PHIs, some of which will go in the extracted region, and some
1026 // of which will go outside.
1027 BasicBlock *PreReturn = ClonedOI->ReturnBlock;
1028 // only split block when necessary:
1029 PHINode *FirstPhi = GetFirstPHI(PreReturn);
1030 unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
1031
1032 if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
1033 return;
1034
1035 auto IsTrivialPhi = [](PHINode *PN) -> Value * {
1036 if (llvm::all_equal(PN->incoming_values()))
1037 return PN->getIncomingValue(0);
1038 return nullptr;
1039 };
1040
1041 ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
1042 ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
1043 BasicBlock::iterator I = PreReturn->begin();
1044 BasicBlock::iterator Ins = ClonedOI->ReturnBlock->begin();
1046 while (I != PreReturn->end()) {
1047 PHINode *OldPhi = dyn_cast<PHINode>(I);
1048 if (!OldPhi)
1049 break;
1050
1051 PHINode *RetPhi =
1052 PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "");
1053 RetPhi->insertBefore(Ins);
1054 OldPhi->replaceAllUsesWith(RetPhi);
1055 Ins = ClonedOI->ReturnBlock->getFirstNonPHIIt();
1056
1057 RetPhi->addIncoming(&*I, PreReturn);
1058 for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
1059 RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
1060 OldPhi->removeIncomingValue(E);
1061 }
1062
1063 // After incoming values splitting, the old phi may become trivial.
1064 // Keeping the trivial phi can introduce definition inside the outline
1065 // region which is live-out, causing necessary overhead (load, store
1066 // arg passing etc).
1067 if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
1068 OldPhi->replaceAllUsesWith(OldPhiVal);
1069 DeadPhis.push_back(OldPhi);
1070 }
1071 ++I;
1072 }
1073 for (auto *DP : DeadPhis)
1074 DP->eraseFromParent();
1075
1076 for (auto *E : ClonedOI->ReturnBlockPreds)
1077 E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
1078}
1079
1080bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1081
1082 auto ComputeRegionCost =
1085 for (BasicBlock* BB : Region)
1086 Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
1087 return Cost;
1088 };
1089
1090 assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
1091
1092 if (ClonedOMRI->ORI.empty())
1093 return false;
1094
1095 // The CodeExtractor needs a dominator tree.
1096 DominatorTree DT;
1097 DT.recalculate(*ClonedFunc);
1098
1099 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1100 LoopInfo LI(DT);
1101 BranchProbabilityInfo BPI(*ClonedFunc, LI);
1102 ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1103
1104 // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
1105 CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1106
1107 SetVector<Value *> Inputs, Outputs, Sinks;
1108 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1109 ClonedOMRI->ORI) {
1110 InstructionCost CurrentOutlinedRegionCost =
1111 ComputeRegionCost(RegionInfo.Region);
1112
1113 CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
1114 ClonedFuncBFI.get(), &BPI,
1115 LookupAC(*RegionInfo.EntryBlock->getParent()),
1116 /* AllowVarargs */ false);
1117
1118 CE.findInputsOutputs(Inputs, Outputs, Sinks);
1119
1120 LLVM_DEBUG({
1121 dbgs() << "inputs: " << Inputs.size() << "\n";
1122 dbgs() << "outputs: " << Outputs.size() << "\n";
1123 for (Value *value : Inputs)
1124 dbgs() << "value used in func: " << *value << "\n";
1125 for (Value *output : Outputs)
1126 dbgs() << "instr used in func: " << *output << "\n";
1127 });
1128
1129 // Do not extract regions that have live exit variables.
1130 if (Outputs.size() > 0 && !ForceLiveExit)
1131 continue;
1132
1133 if (Function *OutlinedFunc = CE.extractCodeRegion(CEAC)) {
1134 CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
1135 BasicBlock *OutliningCallBB = OCS->getParent();
1136 assert(OutliningCallBB->getParent() == ClonedFunc);
1137 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1138 NumColdRegionsOutlined++;
1139 OutlinedRegionCost += CurrentOutlinedRegionCost;
1140
1141 if (MarkOutlinedColdCC) {
1142 OutlinedFunc->setCallingConv(CallingConv::Cold);
1144 }
1145 } else
1146 ORE.emit([&]() {
1147 return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1148 &RegionInfo.Region.front()->front())
1149 << "Failed to extract region at block "
1150 << ore::NV("Block", RegionInfo.Region.front());
1151 });
1152 }
1153
1154 return !OutlinedFunctions.empty();
1155}
1156
1157Function *
1158PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
1159 // Returns true if the block is to be partial inlined into the caller
1160 // (i.e. not to be extracted to the out of line function)
1161 auto ToBeInlined = [&, this](BasicBlock *BB) {
1162 return BB == ClonedOI->ReturnBlock ||
1163 llvm::is_contained(ClonedOI->Entries, BB);
1164 };
1165
1166 assert(ClonedOI && "Expecting OutlineInfo for single region outline");
1167 // The CodeExtractor needs a dominator tree.
1168 DominatorTree DT;
1169 DT.recalculate(*ClonedFunc);
1170
1171 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1172 LoopInfo LI(DT);
1173 BranchProbabilityInfo BPI(*ClonedFunc, LI);
1174 ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1175
1176 // Gather up the blocks that we're going to extract.
1177 std::vector<BasicBlock *> ToExtract;
1178 auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
1179 ToExtract.push_back(ClonedOI->NonReturnBlock);
1180 OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
1181 ClonedOI->NonReturnBlock, ClonedFuncTTI);
1182 for (BasicBlock *BB : depth_first(&ClonedFunc->getEntryBlock()))
1183 if (!ToBeInlined(BB) && BB != ClonedOI->NonReturnBlock) {
1184 ToExtract.push_back(BB);
1185 // FIXME: the code extractor may hoist/sink more code
1186 // into the outlined function which may make the outlining
1187 // overhead (the difference of the outlined function cost
1188 // and OutliningRegionCost) look larger.
1189 OutlinedRegionCost += computeBBInlineCost(BB, ClonedFuncTTI);
1190 }
1191
1192 // Extract the body of the if.
1193 CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1194 Function *OutlinedFunc =
1195 CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
1196 ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
1197 /* AllowVarargs */ true)
1198 .extractCodeRegion(CEAC);
1199
1200 if (OutlinedFunc) {
1201 BasicBlock *OutliningCallBB =
1202 PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->getParent();
1203 assert(OutliningCallBB->getParent() == ClonedFunc);
1204 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1205 } else
1206 ORE.emit([&]() {
1207 return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1208 &ToExtract.front()->front())
1209 << "Failed to extract region at block "
1210 << ore::NV("Block", ToExtract.front());
1211 });
1212
1213 return OutlinedFunc;
1214}
1215
1216PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
1217 // Ditch the duplicate, since we're done with it, and rewrite all remaining
1218 // users (function pointers, etc.) back to the original function.
1219 ClonedFunc->replaceAllUsesWith(OrigFunc);
1220 ClonedFunc->eraseFromParent();
1221 if (!IsFunctionInlined) {
1222 // Remove each function that was speculatively created if there is no
1223 // reference.
1224 for (auto FuncBBPair : OutlinedFunctions) {
1225 Function *Func = FuncBBPair.first;
1226 Func->eraseFromParent();
1227 }
1228 }
1229}
1230
1231std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function &F) {
1232 if (F.hasAddressTaken())
1233 return {false, nullptr};
1234
1235 // Let inliner handle it
1236 if (F.hasFnAttribute(Attribute::AlwaysInline))
1237 return {false, nullptr};
1238
1239 if (F.hasFnAttribute(Attribute::NoInline))
1240 return {false, nullptr};
1241
1242 if (PSI.isFunctionEntryCold(&F))
1243 return {false, nullptr};
1244
1245 if (F.users().empty())
1246 return {false, nullptr};
1247
1249
1250 // Only try to outline cold regions if we have a profile summary, which
1251 // implies we have profiling information.
1252 if (PSI.hasProfileSummary() && F.hasProfileData() &&
1254 std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
1255 computeOutliningColdRegionsInfo(F, ORE);
1256 if (OMRI) {
1257 FunctionCloner Cloner(&F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
1258
1259 LLVM_DEBUG({
1260 dbgs() << "HotCountThreshold = " << PSI.getHotCountThreshold() << "\n";
1261 dbgs() << "ColdCountThreshold = " << PSI.getColdCountThreshold()
1262 << "\n";
1263 });
1264
1265 bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1266
1267 if (DidOutline) {
1268 LLVM_DEBUG({
1269 dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
1270 Cloner.ClonedFunc->print(dbgs());
1271 dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
1272 });
1273
1274 if (tryPartialInline(Cloner))
1275 return {true, nullptr};
1276 }
1277 }
1278 }
1279
1280 // Fall-thru to regular partial inlining if we:
1281 // i) can't find any cold regions to outline, or
1282 // ii) can't inline the outlined function anywhere.
1283 std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
1284 if (!OI)
1285 return {false, nullptr};
1286
1287 FunctionCloner Cloner(&F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
1288 Cloner.normalizeReturnBlock();
1289
1290 Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1291
1292 if (!OutlinedFunction)
1293 return {false, nullptr};
1294
1295 if (tryPartialInline(Cloner))
1296 return {true, OutlinedFunction};
1297
1298 return {false, nullptr};
1299}
1300
1301bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
1302 if (Cloner.OutlinedFunctions.empty())
1303 return false;
1304
1305 auto OutliningCosts = computeOutliningCosts(Cloner);
1306
1307 InstructionCost SizeCost = std::get<0>(OutliningCosts);
1308 InstructionCost NonWeightedRcost = std::get<1>(OutliningCosts);
1309
1310 assert(SizeCost.isValid() && NonWeightedRcost.isValid() &&
1311 "Expected valid costs");
1312
1313 // Only calculate RelativeToEntryFreq when we are doing single region
1314 // outlining.
1315 BranchProbability RelativeToEntryFreq;
1316 if (Cloner.ClonedOI)
1317 RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1318 else
1319 // RelativeToEntryFreq doesn't make sense when we have more than one
1320 // outlined call because each call will have a different relative frequency
1321 // to the entry block. We can consider using the average, but the
1322 // usefulness of that information is questionable. For now, assume we never
1323 // execute the calls to outlined functions.
1324 RelativeToEntryFreq = BranchProbability(0, 1);
1325
1326 BlockFrequency WeightedRcost =
1327 BlockFrequency(*NonWeightedRcost.getValue()) * RelativeToEntryFreq;
1328
1329 // The call sequence(s) to the outlined function(s) are larger than the sum of
1330 // the original outlined region size(s), it does not increase the chances of
1331 // inlining the function with outlining (The inliner uses the size increase to
1332 // model the cost of inlining a callee).
1333 if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
1334 OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1335 DebugLoc DLoc;
1337 std::tie(DLoc, Block) = getOneDebugLoc(*Cloner.ClonedFunc);
1338 OrigFuncORE.emit([&]() {
1339 return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
1340 DLoc, Block)
1341 << ore::NV("Function", Cloner.OrigFunc)
1342 << " not partially inlined into callers (Original Size = "
1343 << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
1344 << ", Size of call sequence to outlined function = "
1345 << ore::NV("NewSize", SizeCost) << ")";
1346 });
1347 return false;
1348 }
1349
1350 assert(Cloner.OrigFunc->users().empty() &&
1351 "F's users should all be replaced!");
1352
1353 std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
1354 Cloner.ClonedFunc->user_end());
1355
1356 DenseMap<User *, uint64_t> CallSiteToProfCountMap;
1357 auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1358 if (CalleeEntryCount)
1359 computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
1360
1361 uint64_t CalleeEntryCountV =
1362 (CalleeEntryCount ? CalleeEntryCount->getCount() : 0);
1363
1364 bool AnyInline = false;
1365 for (User *User : Users) {
1366 // Don't bother with BlockAddress used by CallBr for asm goto.
1367 if (isa<BlockAddress>(User))
1368 continue;
1369
1370 CallBase *CB = getSupportedCallBase(User);
1371
1372 if (isLimitReached())
1373 continue;
1374
1375 OptimizationRemarkEmitter CallerORE(CB->getCaller());
1376 if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
1377 continue;
1378
1379 // Construct remark before doing the inlining, as after successful inlining
1380 // the callsite is removed.
1381 OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CB);
1382 OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
1383 << ore::NV("Caller", CB->getCaller());
1384
1385 InlineFunctionInfo IFI(GetAssumptionCache, &PSI);
1386 // We can only forward varargs when we outlined a single region, else we
1387 // bail on vararg functions.
1388 if (!InlineFunction(*CB, IFI, /*MergeAttributes=*/false, nullptr, true,
1389 (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1390 : nullptr))
1391 .isSuccess())
1392 continue;
1393
1394 CallerORE.emit(OR);
1395
1396 // Now update the entry count:
1397 if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
1398 uint64_t CallSiteCount = CallSiteToProfCountMap[User];
1399 CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
1400 }
1401
1402 AnyInline = true;
1403 NumPartialInlining++;
1404 // Update the stats
1405 if (Cloner.ClonedOI)
1406 NumPartialInlined++;
1407 else
1408 NumColdOutlinePartialInlined++;
1409 }
1410
1411 if (AnyInline) {
1412 Cloner.IsFunctionInlined = true;
1413 if (CalleeEntryCount)
1414 Cloner.OrigFunc->setEntryCount(Function::ProfileCount(
1415 CalleeEntryCountV, CalleeEntryCount->getType()));
1416 OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1417 OrigFuncORE.emit([&]() {
1418 return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
1419 << "Partially inlined into at least one caller";
1420 });
1421 }
1422
1423 return AnyInline;
1424}
1425
1426bool PartialInlinerImpl::run(Module &M) {
1428 return false;
1429
1430 std::vector<Function *> Worklist;
1431 Worklist.reserve(M.size());
1432 for (Function &F : M)
1433 if (!F.use_empty() && !F.isDeclaration())
1434 Worklist.push_back(&F);
1435
1436 bool Changed = false;
1437 while (!Worklist.empty()) {
1438 Function *CurrFunc = Worklist.back();
1439 Worklist.pop_back();
1440
1441 if (CurrFunc->use_empty())
1442 continue;
1443
1444 std::pair<bool, Function *> Result = unswitchFunction(*CurrFunc);
1445 if (Result.second)
1446 Worklist.push_back(Result.second);
1447 Changed |= Result.first;
1448 }
1449
1450 return Changed;
1451}
1452
1455 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1456
1457 auto GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & {
1459 };
1460
1461 auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
1463 };
1464
1465 auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
1467 };
1468
1469 auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
1471 };
1472
1473 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1475 };
1476
1478
1479 if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
1480 GetTLI, PSI, GetBFI)
1481 .run(M))
1482 return PreservedAnalyses::none();
1483 return PreservedAnalyses::all();
1484}
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
Given that RA is a live value
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
Definition: IVUsers.cpp:48
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
static cl::opt< unsigned > MaxNumInlineBlocks("max-num-inline-blocks", cl::init(5), cl::Hidden, cl::desc("Max number of blocks to be partially inlined"))
static cl::opt< int > OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), cl::Hidden, cl::desc("Relative frequency of outline region to " "the entry block"))
static cl::opt< bool > MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden, cl::desc("Mark outline function calls with ColdCC"))
static cl::opt< float > MinRegionSizeRatio("min-region-size-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum ratio comparing relative sizes of each " "outline candidate and original function"))
static cl::opt< bool > DisableMultiRegionPartialInline("disable-mr-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable multi-region partial inlining"))
static cl::opt< unsigned > MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid"))
static cl::opt< int > MaxNumPartialInlining("max-partial-inlining", cl::init(-1), cl::Hidden, cl::desc("Max number of partial inlining. The default is unlimited"))
static cl::opt< bool > DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial inlining"))
static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI)
static cl::opt< float > ColdBranchRatio("cold-branch-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum BranchProbability to consider a region cold."))
#define DEBUG_TYPE
static cl::opt< bool > ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden, cl::desc("Force outline regions with live exits"))
static cl::opt< unsigned > ExtraOutliningPenalty("partial-inlining-extra-penalty", cl::init(0), cl::Hidden, cl::desc("A debug option to add additional penalty to the computed one."))
static cl::opt< bool > SkipCostAnalysis("skip-partial-inlining-cost-analysis", cl::ReallyHidden, cl::desc("Skip Cost Analysis"))
FunctionAnalysisManager FAM
This file contains the declarations for profiling metadata utility functions.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:424
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:250
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:489
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:296
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
const Instruction & back() const
Definition: BasicBlock.h:473
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
Conditional or Unconditional Branch instruction.
Analysis providing branch probability information.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
void setCallingConv(CallingConv::ID CC)
Definition: InstrTypes.h:1527
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1465
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
A cache for the CodeExtractor analysis.
Definition: CodeExtractor.h:45
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:84
Function * extractCodeRegion(const CodeExtractorAnalysisCache &CEAC)
Perform the extraction, returning the new function.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
Class to represent profile counts.
Definition: Function.h:296
const BasicBlock & back() const
Definition: Function.h:860
void setCallingConv(CallingConv::ID CC)
Definition: Function.h:285
Represents the cost of inlining a function.
Definition: InlineCost.h:90
int getCost() const
Get the inline cost estimate.
Definition: InlineCost.h:145
bool isAlways() const
Definition: InlineCost.h:139
int getCostDelta() const
Get the cost delta from the threshold for inlining.
Definition: InlineCost.h:175
bool isNever() const
Definition: InlineCost.h:140
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:203
bool isSuccess() const
Definition: InlineCost.h:189
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:563
auto map(const Function &F) const -> InstructionCost
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:97
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:466
Invoke instruction.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
bool empty() const
Definition: SmallVector.h:95
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
@ TCK_SizeAndLatency
The weighted sum of size and latency.
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
bool use_empty() const
Definition: Value.h:344
user_iterator user_end()
Definition: Value.h:405
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1115
@ CE
Windows NT (Windows on ARM)
@ ReallyHidden
Definition: CommandLine.h:138
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto successors(const MachineBasicBlock *BB)
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
iterator_range< df_iterator< T > > depth_first(const T &G)
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2045
unsigned succ_size(const MachineBasicBlock *BB)
bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.