LLVM 19.0.0git
PartialInlining.cpp
Go to the documentation of this file.
1//===- PartialInlining.cpp - Inline parts of functions --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs partial inlining, typically by inlining an if statement
10// that surrounds the body of the function.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
18#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/Attributes.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/CFG.h"
32#include "llvm/IR/DebugLoc.h"
34#include "llvm/IR/Dominators.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/InstrTypes.h"
37#include "llvm/IR/Instruction.h"
40#include "llvm/IR/Intrinsics.h"
41#include "llvm/IR/Module.h"
42#include "llvm/IR/Operator.h"
44#include "llvm/IR/User.h"
50#include "llvm/Transforms/IPO.h"
54#include <algorithm>
55#include <cassert>
56#include <cstdint>
57#include <memory>
58#include <tuple>
59#include <vector>
60
61using namespace llvm;
62
63#define DEBUG_TYPE "partial-inlining"
64
65STATISTIC(NumPartialInlined,
66 "Number of callsites functions partially inlined into.");
67STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
68 "cold outlined regions were partially "
69 "inlined into its caller(s).");
70STATISTIC(NumColdRegionsFound,
71 "Number of cold single entry/exit regions found.");
72STATISTIC(NumColdRegionsOutlined,
73 "Number of cold single entry/exit regions outlined.");
74
75// Command line option to disable partial-inlining. The default is false:
76static cl::opt<bool>
77 DisablePartialInlining("disable-partial-inlining", cl::init(false),
78 cl::Hidden, cl::desc("Disable partial inlining"));
79// Command line option to disable multi-region partial-inlining. The default is
80// false:
82 "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
83 cl::desc("Disable multi-region partial inlining"));
84
85// Command line option to force outlining in regions with live exit variables.
86// The default is false:
87static cl::opt<bool>
88 ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
89 cl::desc("Force outline regions with live exits"));
90
91// Command line option to enable marking outline functions with Cold Calling
92// Convention. The default is false:
93static cl::opt<bool>
94 MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
95 cl::desc("Mark outline function calls with ColdCC"));
96
97// This is an option used by testing:
98static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
99
101 cl::desc("Skip Cost Analysis"));
102// Used to determine if a cold region is worth outlining based on
103// its inlining cost compared to the original function. Default is set at 10%.
104// ie. if the cold region reduces the inlining cost of the original function by
105// at least 10%.
107 "min-region-size-ratio", cl::init(0.1), cl::Hidden,
108 cl::desc("Minimum ratio comparing relative sizes of each "
109 "outline candidate and original function"));
110// Used to tune the minimum number of execution counts needed in the predecessor
111// block to the cold edge. ie. confidence interval.
113 MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
114 cl::desc("Minimum block executions to consider "
115 "its BranchProbabilityInfo valid"));
116// Used to determine when an edge is considered cold. Default is set to 10%. ie.
117// if the branch probability is 10% or less, then it is deemed as 'cold'.
119 "cold-branch-ratio", cl::init(0.1), cl::Hidden,
120 cl::desc("Minimum BranchProbability to consider a region cold."));
121
123 "max-num-inline-blocks", cl::init(5), cl::Hidden,
124 cl::desc("Max number of blocks to be partially inlined"));
125
126// Command line option to set the maximum number of partial inlining allowed
127// for the module. The default value of -1 means no limit.
129 "max-partial-inlining", cl::init(-1), cl::Hidden,
130 cl::desc("Max number of partial inlining. The default is unlimited"));
131
132// Used only when PGO or user annotated branch data is absent. It is
133// the least value that is used to weigh the outline region. If BFI
134// produces larger value, the BFI value will be used.
135static cl::opt<int>
136 OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
138 cl::desc("Relative frequency of outline region to "
139 "the entry block"));
140
142 "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
143 cl::desc("A debug option to add additional penalty to the computed one."));
144
145namespace {
146
147struct FunctionOutliningInfo {
148 FunctionOutliningInfo() = default;
149
150 // Returns the number of blocks to be inlined including all blocks
151 // in Entries and one return block.
152 unsigned getNumInlinedBlocks() const { return Entries.size() + 1; }
153
154 // A set of blocks including the function entry that guard
155 // the region to be outlined.
157
158 // The return block that is not included in the outlined region.
159 BasicBlock *ReturnBlock = nullptr;
160
161 // The dominating block of the region to be outlined.
162 BasicBlock *NonReturnBlock = nullptr;
163
164 // The set of blocks in Entries that are predecessors to ReturnBlock
165 SmallVector<BasicBlock *, 4> ReturnBlockPreds;
166};
167
168struct FunctionOutliningMultiRegionInfo {
169 FunctionOutliningMultiRegionInfo() = default;
170
171 // Container for outline regions
172 struct OutlineRegionInfo {
173 OutlineRegionInfo(ArrayRef<BasicBlock *> Region,
174 BasicBlock *EntryBlock, BasicBlock *ExitBlock,
175 BasicBlock *ReturnBlock)
176 : Region(Region.begin(), Region.end()), EntryBlock(EntryBlock),
177 ExitBlock(ExitBlock), ReturnBlock(ReturnBlock) {}
179 BasicBlock *EntryBlock;
180 BasicBlock *ExitBlock;
181 BasicBlock *ReturnBlock;
182 };
183
185};
186
187struct PartialInlinerImpl {
188
189 PartialInlinerImpl(
193 function_ref<const TargetLibraryInfo &(Function &)> GTLI,
194 ProfileSummaryInfo &ProfSI,
195 function_ref<BlockFrequencyInfo &(Function &)> GBFI = nullptr)
196 : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
197 GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
198
199 bool run(Module &M);
200 // Main part of the transformation that calls helper functions to find
201 // outlining candidates, clone & outline the function, and attempt to
202 // partially inline the resulting function. Returns true if
203 // inlining was successful, false otherwise. Also returns the outline
204 // function (only if we partially inlined early returns) as there is a
205 // possibility to further "peel" early return statements that were left in the
206 // outline function due to code size.
207 std::pair<bool, Function *> unswitchFunction(Function &F);
208
209 // This class speculatively clones the function to be partial inlined.
210 // At the end of partial inlining, the remaining callsites to the cloned
211 // function that are not partially inlined will be fixed up to reference
212 // the original function, and the cloned function will be erased.
213 struct FunctionCloner {
214 // Two constructors, one for single region outlining, the other for
215 // multi-region outlining.
216 FunctionCloner(Function *F, FunctionOutliningInfo *OI,
220 FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
224
225 ~FunctionCloner();
226
227 // Prepare for function outlining: making sure there is only
228 // one incoming edge from the extracted/outlined region to
229 // the return block.
230 void normalizeReturnBlock() const;
231
232 // Do function outlining for cold regions.
233 bool doMultiRegionFunctionOutlining();
234 // Do function outlining for region after early return block(s).
235 // NOTE: For vararg functions that do the vararg handling in the outlined
236 // function, we temporarily generate IR that does not properly
237 // forward varargs to the outlined function. Calling InlineFunction
238 // will update calls to the outlined functions to properly forward
239 // the varargs.
240 Function *doSingleRegionFunctionOutlining();
241
242 Function *OrigFunc = nullptr;
243 Function *ClonedFunc = nullptr;
244
245 typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
246 // Keep track of Outlined Functions and the basic block they're called from.
247 SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
248
249 // ClonedFunc is inlined in one of its callers after function
250 // outlining.
251 bool IsFunctionInlined = false;
252 // The cost of the region to be outlined.
253 InstructionCost OutlinedRegionCost = 0;
254 // ClonedOI is specific to outlining non-early return blocks.
255 std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
256 // ClonedOMRI is specific to outlining cold regions.
257 std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
258 std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
262 };
263
264private:
265 int NumPartialInlining = 0;
266 function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
267 function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
270 function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
272
273 // Return the frequency of the OutlininingBB relative to F's entry point.
274 // The result is no larger than 1 and is represented using BP.
275 // (Note that the outlined region's 'head' block can only have incoming
276 // edges from the guarding entry blocks).
278 getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) const;
279
280 // Return true if the callee of CB should be partially inlined with
281 // profit.
282 bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner,
283 BlockFrequency WeightedOutliningRcost,
284 OptimizationRemarkEmitter &ORE) const;
285
286 // Try to inline DuplicateFunction (cloned from F with call to
287 // the OutlinedFunction into its callers. Return true
288 // if there is any successful inlining.
289 bool tryPartialInline(FunctionCloner &Cloner);
290
291 // Compute the mapping from use site of DuplicationFunction to the enclosing
292 // BB's profile count.
293 void
294 computeCallsiteToProfCountMap(Function *DuplicateFunction,
295 DenseMap<User *, uint64_t> &SiteCountMap) const;
296
297 bool isLimitReached() const {
298 return (MaxNumPartialInlining != -1 &&
299 NumPartialInlining >= MaxNumPartialInlining);
300 }
301
302 static CallBase *getSupportedCallBase(User *U) {
303 if (isa<CallInst>(U) || isa<InvokeInst>(U))
304 return cast<CallBase>(U);
305 llvm_unreachable("All uses must be calls");
306 return nullptr;
307 }
308
309 static CallBase *getOneCallSiteTo(Function &F) {
310 User *User = *F.user_begin();
311 return getSupportedCallBase(User);
312 }
313
314 std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function &F) const {
315 CallBase *CB = getOneCallSiteTo(F);
316 DebugLoc DLoc = CB->getDebugLoc();
317 BasicBlock *Block = CB->getParent();
318 return std::make_tuple(DLoc, Block);
319 }
320
321 // Returns the costs associated with function outlining:
322 // - The first value is the non-weighted runtime cost for making the call
323 // to the outlined function, including the addtional setup cost in the
324 // outlined function itself;
325 // - The second value is the estimated size of the new call sequence in
326 // basic block Cloner.OutliningCallBB;
327 std::tuple<InstructionCost, InstructionCost>
328 computeOutliningCosts(FunctionCloner &Cloner) const;
329
330 // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
331 // approximate both the size and runtime cost (Note that in the current
332 // inline cost analysis, there is no clear distinction there either).
333 static InstructionCost computeBBInlineCost(BasicBlock *BB,
335
336 std::unique_ptr<FunctionOutliningInfo>
337 computeOutliningInfo(Function &F) const;
338
339 std::unique_ptr<FunctionOutliningMultiRegionInfo>
340 computeOutliningColdRegionsInfo(Function &F,
341 OptimizationRemarkEmitter &ORE) const;
342};
343
344} // end anonymous namespace
345
346std::unique_ptr<FunctionOutliningMultiRegionInfo>
347PartialInlinerImpl::computeOutliningColdRegionsInfo(
348 Function &F, OptimizationRemarkEmitter &ORE) const {
349 BasicBlock *EntryBlock = &F.front();
350
351 DominatorTree DT(F);
352 LoopInfo LI(DT);
353 BranchProbabilityInfo BPI(F, LI);
354 std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
356 if (!GetBFI) {
357 ScopedBFI.reset(new BlockFrequencyInfo(F, BPI, LI));
358 BFI = ScopedBFI.get();
359 } else
360 BFI = &(GetBFI(F));
361
362 // Return if we don't have profiling information.
363 if (!PSI.hasInstrumentationProfile())
364 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
365
366 std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
367 std::make_unique<FunctionOutliningMultiRegionInfo>();
368
369 auto IsSingleExit =
370 [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
371 BasicBlock *ExitBlock = nullptr;
372 for (auto *Block : BlockList) {
373 for (BasicBlock *Succ : successors(Block)) {
374 if (!is_contained(BlockList, Succ)) {
375 if (ExitBlock) {
376 ORE.emit([&]() {
377 return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
378 &Succ->front())
379 << "Region dominated by "
380 << ore::NV("Block", BlockList.front()->getName())
381 << " has more than one region exit edge.";
382 });
383 return nullptr;
384 }
385
386 ExitBlock = Block;
387 }
388 }
389 }
390 return ExitBlock;
391 };
392
393 auto BBProfileCount = [BFI](BasicBlock *BB) {
394 return BFI->getBlockProfileCount(BB).value_or(0);
395 };
396
397 // Use the same computeBBInlineCost function to compute the cost savings of
398 // the outlining the candidate region.
399 TargetTransformInfo *FTTI = &GetTTI(F);
400 InstructionCost OverallFunctionCost = 0;
401 for (auto &BB : F)
402 OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
403
404 LLVM_DEBUG(dbgs() << "OverallFunctionCost = " << OverallFunctionCost
405 << "\n";);
406
407 InstructionCost MinOutlineRegionCost = OverallFunctionCost.map(
408 [&](auto Cost) { return Cost * MinRegionSizeRatio; });
409
410 BranchProbability MinBranchProbability(
411 static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
413 bool ColdCandidateFound = false;
414 BasicBlock *CurrEntry = EntryBlock;
415 std::vector<BasicBlock *> DFS;
417 DFS.push_back(CurrEntry);
418 VisitedMap[CurrEntry] = true;
419
420 // Use Depth First Search on the basic blocks to find CFG edges that are
421 // considered cold.
422 // Cold regions considered must also have its inline cost compared to the
423 // overall inline cost of the original function. The region is outlined only
424 // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
425 // more.
426 while (!DFS.empty()) {
427 auto *ThisBB = DFS.back();
428 DFS.pop_back();
429 // Only consider regions with predecessor blocks that are considered
430 // not-cold (default: part of the top 99.99% of all block counters)
431 // AND greater than our minimum block execution count (default: 100).
432 if (PSI.isColdBlock(ThisBB, BFI) ||
433 BBProfileCount(ThisBB) < MinBlockCounterExecution)
434 continue;
435 for (auto SI = succ_begin(ThisBB); SI != succ_end(ThisBB); ++SI) {
436 if (VisitedMap[*SI])
437 continue;
438 VisitedMap[*SI] = true;
439 DFS.push_back(*SI);
440 // If branch isn't cold, we skip to the next one.
441 BranchProbability SuccProb = BPI.getEdgeProbability(ThisBB, *SI);
442 if (SuccProb > MinBranchProbability)
443 continue;
444
445 LLVM_DEBUG(dbgs() << "Found cold edge: " << ThisBB->getName() << "->"
446 << SI->getName()
447 << "\nBranch Probability = " << SuccProb << "\n";);
448
449 SmallVector<BasicBlock *, 8> DominateVector;
450 DT.getDescendants(*SI, DominateVector);
451 assert(!DominateVector.empty() &&
452 "SI should be reachable and have at least itself as descendant");
453
454 // We can only outline single entry regions (for now).
455 if (!DominateVector.front()->hasNPredecessors(1)) {
456 LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
457 << " doesn't have a single predecessor in the "
458 "dominator tree\n";);
459 continue;
460 }
461
462 BasicBlock *ExitBlock = nullptr;
463 // We can only outline single exit regions (for now).
464 if (!(ExitBlock = IsSingleExit(DominateVector))) {
465 LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
466 << " doesn't have a unique successor\n";);
467 continue;
468 }
469
470 InstructionCost OutlineRegionCost = 0;
471 for (auto *BB : DominateVector)
472 OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
473
474 LLVM_DEBUG(dbgs() << "OutlineRegionCost = " << OutlineRegionCost
475 << "\n";);
476
477 if (!SkipCostAnalysis && OutlineRegionCost < MinOutlineRegionCost) {
478 ORE.emit([&]() {
479 return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
480 &SI->front())
481 << ore::NV("Callee", &F)
482 << " inline cost-savings smaller than "
483 << ore::NV("Cost", MinOutlineRegionCost);
484 });
485
486 LLVM_DEBUG(dbgs() << "ABORT: Outline region cost is smaller than "
487 << MinOutlineRegionCost << "\n";);
488 continue;
489 }
490
491 // For now, ignore blocks that belong to a SISE region that is a
492 // candidate for outlining. In the future, we may want to look
493 // at inner regions because the outer region may have live-exit
494 // variables.
495 for (auto *BB : DominateVector)
496 VisitedMap[BB] = true;
497
498 // ReturnBlock here means the block after the outline call
499 BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
500 FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
501 DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
502 OutliningInfo->ORI.push_back(RegInfo);
503 LLVM_DEBUG(dbgs() << "Found Cold Candidate starting at block: "
504 << DominateVector.front()->getName() << "\n";);
505 ColdCandidateFound = true;
506 NumColdRegionsFound++;
507 }
508 }
509
510 if (ColdCandidateFound)
511 return OutliningInfo;
512
513 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
514}
515
516std::unique_ptr<FunctionOutliningInfo>
517PartialInlinerImpl::computeOutliningInfo(Function &F) const {
518 BasicBlock *EntryBlock = &F.front();
519 BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
520 if (!BR || BR->isUnconditional())
521 return std::unique_ptr<FunctionOutliningInfo>();
522
523 // Returns true if Succ is BB's successor
524 auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
525 return is_contained(successors(BB), Succ);
526 };
527
528 auto IsReturnBlock = [](BasicBlock *BB) {
529 Instruction *TI = BB->getTerminator();
530 return isa<ReturnInst>(TI);
531 };
532
533 auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
534 if (IsReturnBlock(Succ1))
535 return std::make_tuple(Succ1, Succ2);
536 if (IsReturnBlock(Succ2))
537 return std::make_tuple(Succ2, Succ1);
538
539 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
540 };
541
542 // Detect a triangular shape:
543 auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
544 if (IsSuccessor(Succ1, Succ2))
545 return std::make_tuple(Succ1, Succ2);
546 if (IsSuccessor(Succ2, Succ1))
547 return std::make_tuple(Succ2, Succ1);
548
549 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
550 };
551
552 std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
553 std::make_unique<FunctionOutliningInfo>();
554
555 BasicBlock *CurrEntry = EntryBlock;
556 bool CandidateFound = false;
557 do {
558 // The number of blocks to be inlined has already reached
559 // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
560 // disables partial inlining for the function.
561 if (OutliningInfo->getNumInlinedBlocks() >= MaxNumInlineBlocks)
562 break;
563
564 if (succ_size(CurrEntry) != 2)
565 break;
566
567 BasicBlock *Succ1 = *succ_begin(CurrEntry);
568 BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
569
570 BasicBlock *ReturnBlock, *NonReturnBlock;
571 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
572
573 if (ReturnBlock) {
574 OutliningInfo->Entries.push_back(CurrEntry);
575 OutliningInfo->ReturnBlock = ReturnBlock;
576 OutliningInfo->NonReturnBlock = NonReturnBlock;
577 CandidateFound = true;
578 break;
579 }
580
581 BasicBlock *CommSucc, *OtherSucc;
582 std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
583
584 if (!CommSucc)
585 break;
586
587 OutliningInfo->Entries.push_back(CurrEntry);
588 CurrEntry = OtherSucc;
589 } while (true);
590
591 if (!CandidateFound)
592 return std::unique_ptr<FunctionOutliningInfo>();
593
594 // There should not be any successors (not in the entry set) other than
595 // {ReturnBlock, NonReturnBlock}
596 assert(OutliningInfo->Entries[0] == &F.front() &&
597 "Function Entry must be the first in Entries vector");
599 for (BasicBlock *E : OutliningInfo->Entries)
600 Entries.insert(E);
601
602 // Returns true of BB has Predecessor which is not
603 // in Entries set.
604 auto HasNonEntryPred = [Entries](BasicBlock *BB) {
605 for (auto *Pred : predecessors(BB)) {
606 if (!Entries.count(Pred))
607 return true;
608 }
609 return false;
610 };
611 auto CheckAndNormalizeCandidate =
612 [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
613 for (BasicBlock *E : OutliningInfo->Entries) {
614 for (auto *Succ : successors(E)) {
615 if (Entries.count(Succ))
616 continue;
617 if (Succ == OutliningInfo->ReturnBlock)
618 OutliningInfo->ReturnBlockPreds.push_back(E);
619 else if (Succ != OutliningInfo->NonReturnBlock)
620 return false;
621 }
622 // There should not be any outside incoming edges either:
623 if (HasNonEntryPred(E))
624 return false;
625 }
626 return true;
627 };
628
629 if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
630 return std::unique_ptr<FunctionOutliningInfo>();
631
632 // Now further growing the candidate's inlining region by
633 // peeling off dominating blocks from the outlining region:
634 while (OutliningInfo->getNumInlinedBlocks() < MaxNumInlineBlocks) {
635 BasicBlock *Cand = OutliningInfo->NonReturnBlock;
636 if (succ_size(Cand) != 2)
637 break;
638
639 if (HasNonEntryPred(Cand))
640 break;
641
642 BasicBlock *Succ1 = *succ_begin(Cand);
643 BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
644
645 BasicBlock *ReturnBlock, *NonReturnBlock;
646 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
647 if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
648 break;
649
650 if (NonReturnBlock->getSinglePredecessor() != Cand)
651 break;
652
653 // Now grow and update OutlininigInfo:
654 OutliningInfo->Entries.push_back(Cand);
655 OutliningInfo->NonReturnBlock = NonReturnBlock;
656 OutliningInfo->ReturnBlockPreds.push_back(Cand);
657 Entries.insert(Cand);
658 }
659
660 return OutliningInfo;
661}
662
663// Check if there is PGO data or user annotated branch data:
664static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI) {
665 if (F.hasProfileData())
666 return true;
667 // Now check if any of the entry block has MD_prof data:
668 for (auto *E : OI.Entries) {
669 BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
670 if (!BR || BR->isUnconditional())
671 continue;
672 if (hasBranchWeightMD(*BR))
673 return true;
674 }
675 return false;
676}
677
678BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
679 FunctionCloner &Cloner) const {
680 BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
681 auto EntryFreq =
682 Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
683 auto OutliningCallFreq =
684 Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
685 // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
686 // we outlined any regions, so we may encounter situations where the
687 // OutliningCallFreq is *slightly* bigger than the EntryFreq.
688 if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
689 OutliningCallFreq = EntryFreq;
690
691 auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
692 OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
693
694 if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI))
695 return OutlineRegionRelFreq;
696
697 // When profile data is not available, we need to be conservative in
698 // estimating the overall savings. Static branch prediction can usually
699 // guess the branch direction right (taken/non-taken), but the guessed
700 // branch probability is usually not biased enough. In case when the
701 // outlined region is predicted to be likely, its probability needs
702 // to be made higher (more biased) to not under-estimate the cost of
703 // function outlining. On the other hand, if the outlined region
704 // is predicted to be less likely, the predicted probablity is usually
705 // higher than the actual. For instance, the actual probability of the
706 // less likely target is only 5%, but the guessed probablity can be
707 // 40%. In the latter case, there is no need for further adjustment.
708 // FIXME: add an option for this.
709 if (OutlineRegionRelFreq < BranchProbability(45, 100))
710 return OutlineRegionRelFreq;
711
712 OutlineRegionRelFreq = std::max(
713 OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
714
715 return OutlineRegionRelFreq;
716}
717
718bool PartialInlinerImpl::shouldPartialInline(
719 CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
720 OptimizationRemarkEmitter &ORE) const {
721 using namespace ore;
722
724 assert(Callee == Cloner.ClonedFunc);
725
727 return isInlineViable(*Callee).isSuccess();
728
729 Function *Caller = CB.getCaller();
730 auto &CalleeTTI = GetTTI(*Callee);
731 bool RemarksEnabled =
732 Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
733 DEBUG_TYPE);
734 InlineCost IC =
735 getInlineCost(CB, getInlineParams(), CalleeTTI, GetAssumptionCache,
736 GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE : nullptr);
737
738 if (IC.isAlways()) {
739 ORE.emit([&]() {
740 return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", &CB)
741 << NV("Callee", Cloner.OrigFunc)
742 << " should always be fully inlined, not partially";
743 });
744 return false;
745 }
746
747 if (IC.isNever()) {
748 ORE.emit([&]() {
749 return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CB)
750 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
751 << NV("Caller", Caller)
752 << " because it should never be inlined (cost=never)";
753 });
754 return false;
755 }
756
757 if (!IC) {
758 ORE.emit([&]() {
759 return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", &CB)
760 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
761 << NV("Caller", Caller) << " because too costly to inline (cost="
762 << NV("Cost", IC.getCost()) << ", threshold="
763 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
764 });
765 return false;
766 }
767 const DataLayout &DL = Caller->getParent()->getDataLayout();
768
769 // The savings of eliminating the call:
770 int NonWeightedSavings = getCallsiteCost(CalleeTTI, CB, DL);
771 BlockFrequency NormWeightedSavings(NonWeightedSavings);
772
773 // Weighted saving is smaller than weighted cost, return false
774 if (NormWeightedSavings < WeightedOutliningRcost) {
775 ORE.emit([&]() {
776 return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
777 &CB)
778 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
779 << NV("Caller", Caller) << " runtime overhead (overhead="
780 << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
781 << ", savings="
782 << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
783 << ")"
784 << " of making the outlined call is too high";
785 });
786
787 return false;
788 }
789
790 ORE.emit([&]() {
791 return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", &CB)
792 << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
793 << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
794 << " (threshold="
795 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
796 });
797 return true;
798}
799
800// TODO: Ideally we should share Inliner's InlineCost Analysis code.
801// For now use a simplified version. The returned 'InlineCost' will be used
802// to esimate the size cost as well as runtime cost of the BB.
804PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
807 const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
809 for (Instruction &I : BB->instructionsWithoutDebug()) {
810 // Skip free instructions.
811 switch (I.getOpcode()) {
812 case Instruction::BitCast:
813 case Instruction::PtrToInt:
814 case Instruction::IntToPtr:
815 case Instruction::Alloca:
816 case Instruction::PHI:
817 continue;
818 case Instruction::GetElementPtr:
819 if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
820 continue;
821 break;
822 default:
823 break;
824 }
825
826 if (I.isLifetimeStartOrEnd())
827 continue;
828
829 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
830 Intrinsic::ID IID = II->getIntrinsicID();
832 FastMathFlags FMF;
833 for (Value *Val : II->args())
834 Tys.push_back(Val->getType());
835
836 if (auto *FPMO = dyn_cast<FPMathOperator>(II))
837 FMF = FPMO->getFastMathFlags();
838
839 IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF);
841 continue;
842 }
843
844 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
845 InlineCost += getCallsiteCost(*TTI, *CI, DL);
846 continue;
847 }
848
849 if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
851 continue;
852 }
853
854 if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
855 InlineCost += (SI->getNumCases() + 1) * InstrCost;
856 continue;
857 }
859 }
860
861 return InlineCost;
862}
863
864std::tuple<InstructionCost, InstructionCost>
865PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) const {
866 InstructionCost OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
867 for (auto FuncBBPair : Cloner.OutlinedFunctions) {
868 Function *OutlinedFunc = FuncBBPair.first;
869 BasicBlock* OutliningCallBB = FuncBBPair.second;
870 // Now compute the cost of the call sequence to the outlined function
871 // 'OutlinedFunction' in BB 'OutliningCallBB':
872 auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
873 OutliningFuncCallCost +=
874 computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
875
876 // Now compute the cost of the extracted/outlined function itself:
877 for (BasicBlock &BB : *OutlinedFunc)
878 OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
879 }
880 assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
881 "Outlined function cost should be no less than the outlined region");
882
883 // The code extractor introduces a new root and exit stub blocks with
884 // additional unconditional branches. Those branches will be eliminated
885 // later with bb layout. The cost should be adjusted accordingly:
886 OutlinedFunctionCost -=
887 2 * InlineConstants::getInstrCost() * Cloner.OutlinedFunctions.size();
888
889 InstructionCost OutliningRuntimeOverhead =
890 OutliningFuncCallCost +
891 (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
892 ExtraOutliningPenalty.getValue();
893
894 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
895}
896
897// Create the callsite to profile count map which is
898// used to update the original function's entry count,
899// after the function is partially inlined into the callsite.
900void PartialInlinerImpl::computeCallsiteToProfCountMap(
901 Function *DuplicateFunction,
902 DenseMap<User *, uint64_t> &CallSiteToProfCountMap) const {
903 std::vector<User *> Users(DuplicateFunction->user_begin(),
904 DuplicateFunction->user_end());
905 Function *CurrentCaller = nullptr;
906 std::unique_ptr<BlockFrequencyInfo> TempBFI;
907 BlockFrequencyInfo *CurrentCallerBFI = nullptr;
908
909 auto ComputeCurrBFI = [&,this](Function *Caller) {
910 // For the old pass manager:
911 if (!GetBFI) {
912 DominatorTree DT(*Caller);
913 LoopInfo LI(DT);
914 BranchProbabilityInfo BPI(*Caller, LI);
915 TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
916 CurrentCallerBFI = TempBFI.get();
917 } else {
918 // New pass manager:
919 CurrentCallerBFI = &(GetBFI(*Caller));
920 }
921 };
922
923 for (User *User : Users) {
924 // Don't bother with BlockAddress used by CallBr for asm goto.
925 if (isa<BlockAddress>(User))
926 continue;
927 CallBase *CB = getSupportedCallBase(User);
928 Function *Caller = CB->getCaller();
929 if (CurrentCaller != Caller) {
930 CurrentCaller = Caller;
931 ComputeCurrBFI(Caller);
932 } else {
933 assert(CurrentCallerBFI && "CallerBFI is not set");
934 }
935 BasicBlock *CallBB = CB->getParent();
936 auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
937 if (Count)
938 CallSiteToProfCountMap[User] = *Count;
939 else
940 CallSiteToProfCountMap[User] = 0;
941 }
942}
943
944PartialInlinerImpl::FunctionCloner::FunctionCloner(
945 Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
948 : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
949 ClonedOI = std::make_unique<FunctionOutliningInfo>();
950
951 // Clone the function, so that we can hack away on it.
953 ClonedFunc = CloneFunction(F, VMap);
954
955 ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
956 ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
957 for (BasicBlock *BB : OI->Entries)
958 ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
959
960 for (BasicBlock *E : OI->ReturnBlockPreds) {
961 BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
962 ClonedOI->ReturnBlockPreds.push_back(NewE);
963 }
964 // Go ahead and update all uses to the duplicate, so that we can just
965 // use the inliner functionality when we're done hacking.
966 F->replaceAllUsesWith(ClonedFunc);
967}
968
969PartialInlinerImpl::FunctionCloner::FunctionCloner(
970 Function *F, FunctionOutliningMultiRegionInfo *OI,
974 : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
975 ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
976
977 // Clone the function, so that we can hack away on it.
979 ClonedFunc = CloneFunction(F, VMap);
980
981 // Go through all Outline Candidate Regions and update all BasicBlock
982 // information.
983 for (const FunctionOutliningMultiRegionInfo::OutlineRegionInfo &RegionInfo :
984 OI->ORI) {
986 for (BasicBlock *BB : RegionInfo.Region)
987 Region.push_back(cast<BasicBlock>(VMap[BB]));
988
989 BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
990 BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
991 BasicBlock *NewReturnBlock = nullptr;
992 if (RegionInfo.ReturnBlock)
993 NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
994 FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
995 Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
996 ClonedOMRI->ORI.push_back(MappedRegionInfo);
997 }
998 // Go ahead and update all uses to the duplicate, so that we can just
999 // use the inliner functionality when we're done hacking.
1000 F->replaceAllUsesWith(ClonedFunc);
1001}
1002
1003void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
1004 auto GetFirstPHI = [](BasicBlock *BB) {
1005 BasicBlock::iterator I = BB->begin();
1006 PHINode *FirstPhi = nullptr;
1007 while (I != BB->end()) {
1008 PHINode *Phi = dyn_cast<PHINode>(I);
1009 if (!Phi)
1010 break;
1011 if (!FirstPhi) {
1012 FirstPhi = Phi;
1013 break;
1014 }
1015 }
1016 return FirstPhi;
1017 };
1018
1019 // Shouldn't need to normalize PHIs if we're not outlining non-early return
1020 // blocks.
1021 if (!ClonedOI)
1022 return;
1023
1024 // Special hackery is needed with PHI nodes that have inputs from more than
1025 // one extracted block. For simplicity, just split the PHIs into a two-level
1026 // sequence of PHIs, some of which will go in the extracted region, and some
1027 // of which will go outside.
1028 BasicBlock *PreReturn = ClonedOI->ReturnBlock;
1029 // only split block when necessary:
1030 PHINode *FirstPhi = GetFirstPHI(PreReturn);
1031 unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
1032
1033 if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
1034 return;
1035
1036 auto IsTrivialPhi = [](PHINode *PN) -> Value * {
1037 if (llvm::all_equal(PN->incoming_values()))
1038 return PN->getIncomingValue(0);
1039 return nullptr;
1040 };
1041
1042 ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
1043 ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
1044 BasicBlock::iterator I = PreReturn->begin();
1045 BasicBlock::iterator Ins = ClonedOI->ReturnBlock->begin();
1047 while (I != PreReturn->end()) {
1048 PHINode *OldPhi = dyn_cast<PHINode>(I);
1049 if (!OldPhi)
1050 break;
1051
1052 PHINode *RetPhi =
1053 PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "");
1054 RetPhi->insertBefore(Ins);
1055 OldPhi->replaceAllUsesWith(RetPhi);
1056 Ins = ClonedOI->ReturnBlock->getFirstNonPHIIt();
1057
1058 RetPhi->addIncoming(&*I, PreReturn);
1059 for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
1060 RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
1061 OldPhi->removeIncomingValue(E);
1062 }
1063
1064 // After incoming values splitting, the old phi may become trivial.
1065 // Keeping the trivial phi can introduce definition inside the outline
1066 // region which is live-out, causing necessary overhead (load, store
1067 // arg passing etc).
1068 if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
1069 OldPhi->replaceAllUsesWith(OldPhiVal);
1070 DeadPhis.push_back(OldPhi);
1071 }
1072 ++I;
1073 }
1074 for (auto *DP : DeadPhis)
1075 DP->eraseFromParent();
1076
1077 for (auto *E : ClonedOI->ReturnBlockPreds)
1078 E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
1079}
1080
1081bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1082
1083 auto ComputeRegionCost =
1086 for (BasicBlock* BB : Region)
1087 Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
1088 return Cost;
1089 };
1090
1091 assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
1092
1093 if (ClonedOMRI->ORI.empty())
1094 return false;
1095
1096 // The CodeExtractor needs a dominator tree.
1097 DominatorTree DT;
1098 DT.recalculate(*ClonedFunc);
1099
1100 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1101 LoopInfo LI(DT);
1102 BranchProbabilityInfo BPI(*ClonedFunc, LI);
1103 ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1104
1105 // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
1106 CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1107
1108 SetVector<Value *> Inputs, Outputs, Sinks;
1109 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1110 ClonedOMRI->ORI) {
1111 InstructionCost CurrentOutlinedRegionCost =
1112 ComputeRegionCost(RegionInfo.Region);
1113
1114 CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
1115 ClonedFuncBFI.get(), &BPI,
1116 LookupAC(*RegionInfo.EntryBlock->getParent()),
1117 /* AllowVarargs */ false);
1118
1119 CE.findInputsOutputs(Inputs, Outputs, Sinks);
1120
1121 LLVM_DEBUG({
1122 dbgs() << "inputs: " << Inputs.size() << "\n";
1123 dbgs() << "outputs: " << Outputs.size() << "\n";
1124 for (Value *value : Inputs)
1125 dbgs() << "value used in func: " << *value << "\n";
1126 for (Value *output : Outputs)
1127 dbgs() << "instr used in func: " << *output << "\n";
1128 });
1129
1130 // Do not extract regions that have live exit variables.
1131 if (Outputs.size() > 0 && !ForceLiveExit)
1132 continue;
1133
1134 if (Function *OutlinedFunc = CE.extractCodeRegion(CEAC)) {
1135 CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
1136 BasicBlock *OutliningCallBB = OCS->getParent();
1137 assert(OutliningCallBB->getParent() == ClonedFunc);
1138 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1139 NumColdRegionsOutlined++;
1140 OutlinedRegionCost += CurrentOutlinedRegionCost;
1141
1142 if (MarkOutlinedColdCC) {
1143 OutlinedFunc->setCallingConv(CallingConv::Cold);
1145 }
1146 } else
1147 ORE.emit([&]() {
1148 return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1149 &RegionInfo.Region.front()->front())
1150 << "Failed to extract region at block "
1151 << ore::NV("Block", RegionInfo.Region.front());
1152 });
1153 }
1154
1155 return !OutlinedFunctions.empty();
1156}
1157
1158Function *
1159PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
1160 // Returns true if the block is to be partial inlined into the caller
1161 // (i.e. not to be extracted to the out of line function)
1162 auto ToBeInlined = [&, this](BasicBlock *BB) {
1163 return BB == ClonedOI->ReturnBlock ||
1164 llvm::is_contained(ClonedOI->Entries, BB);
1165 };
1166
1167 assert(ClonedOI && "Expecting OutlineInfo for single region outline");
1168 // The CodeExtractor needs a dominator tree.
1169 DominatorTree DT;
1170 DT.recalculate(*ClonedFunc);
1171
1172 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1173 LoopInfo LI(DT);
1174 BranchProbabilityInfo BPI(*ClonedFunc, LI);
1175 ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1176
1177 // Gather up the blocks that we're going to extract.
1178 std::vector<BasicBlock *> ToExtract;
1179 auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
1180 ToExtract.push_back(ClonedOI->NonReturnBlock);
1181 OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
1182 ClonedOI->NonReturnBlock, ClonedFuncTTI);
1183 for (BasicBlock *BB : depth_first(&ClonedFunc->getEntryBlock()))
1184 if (!ToBeInlined(BB) && BB != ClonedOI->NonReturnBlock) {
1185 ToExtract.push_back(BB);
1186 // FIXME: the code extractor may hoist/sink more code
1187 // into the outlined function which may make the outlining
1188 // overhead (the difference of the outlined function cost
1189 // and OutliningRegionCost) look larger.
1190 OutlinedRegionCost += computeBBInlineCost(BB, ClonedFuncTTI);
1191 }
1192
1193 // Extract the body of the if.
1194 CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1195 Function *OutlinedFunc =
1196 CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
1197 ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
1198 /* AllowVarargs */ true)
1199 .extractCodeRegion(CEAC);
1200
1201 if (OutlinedFunc) {
1202 BasicBlock *OutliningCallBB =
1203 PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->getParent();
1204 assert(OutliningCallBB->getParent() == ClonedFunc);
1205 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1206 } else
1207 ORE.emit([&]() {
1208 return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1209 &ToExtract.front()->front())
1210 << "Failed to extract region at block "
1211 << ore::NV("Block", ToExtract.front());
1212 });
1213
1214 return OutlinedFunc;
1215}
1216
1217PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
1218 // Ditch the duplicate, since we're done with it, and rewrite all remaining
1219 // users (function pointers, etc.) back to the original function.
1220 ClonedFunc->replaceAllUsesWith(OrigFunc);
1221 ClonedFunc->eraseFromParent();
1222 if (!IsFunctionInlined) {
1223 // Remove each function that was speculatively created if there is no
1224 // reference.
1225 for (auto FuncBBPair : OutlinedFunctions) {
1226 Function *Func = FuncBBPair.first;
1227 Func->eraseFromParent();
1228 }
1229 }
1230}
1231
1232std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function &F) {
1233 if (F.hasAddressTaken())
1234 return {false, nullptr};
1235
1236 // Let inliner handle it
1237 if (F.hasFnAttribute(Attribute::AlwaysInline))
1238 return {false, nullptr};
1239
1240 if (F.hasFnAttribute(Attribute::NoInline))
1241 return {false, nullptr};
1242
1243 if (PSI.isFunctionEntryCold(&F))
1244 return {false, nullptr};
1245
1246 if (F.users().empty())
1247 return {false, nullptr};
1248
1250
1251 // Only try to outline cold regions if we have a profile summary, which
1252 // implies we have profiling information.
1253 if (PSI.hasProfileSummary() && F.hasProfileData() &&
1255 std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
1256 computeOutliningColdRegionsInfo(F, ORE);
1257 if (OMRI) {
1258 FunctionCloner Cloner(&F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
1259
1260 LLVM_DEBUG({
1261 dbgs() << "HotCountThreshold = " << PSI.getHotCountThreshold() << "\n";
1262 dbgs() << "ColdCountThreshold = " << PSI.getColdCountThreshold()
1263 << "\n";
1264 });
1265
1266 bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1267
1268 if (DidOutline) {
1269 LLVM_DEBUG({
1270 dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
1271 Cloner.ClonedFunc->print(dbgs());
1272 dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
1273 });
1274
1275 if (tryPartialInline(Cloner))
1276 return {true, nullptr};
1277 }
1278 }
1279 }
1280
1281 // Fall-thru to regular partial inlining if we:
1282 // i) can't find any cold regions to outline, or
1283 // ii) can't inline the outlined function anywhere.
1284 std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
1285 if (!OI)
1286 return {false, nullptr};
1287
1288 FunctionCloner Cloner(&F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
1289 Cloner.normalizeReturnBlock();
1290
1291 Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1292
1293 if (!OutlinedFunction)
1294 return {false, nullptr};
1295
1296 if (tryPartialInline(Cloner))
1297 return {true, OutlinedFunction};
1298
1299 return {false, nullptr};
1300}
1301
1302bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
1303 if (Cloner.OutlinedFunctions.empty())
1304 return false;
1305
1306 auto OutliningCosts = computeOutliningCosts(Cloner);
1307
1308 InstructionCost SizeCost = std::get<0>(OutliningCosts);
1309 InstructionCost NonWeightedRcost = std::get<1>(OutliningCosts);
1310
1311 assert(SizeCost.isValid() && NonWeightedRcost.isValid() &&
1312 "Expected valid costs");
1313
1314 // Only calculate RelativeToEntryFreq when we are doing single region
1315 // outlining.
1316 BranchProbability RelativeToEntryFreq;
1317 if (Cloner.ClonedOI)
1318 RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1319 else
1320 // RelativeToEntryFreq doesn't make sense when we have more than one
1321 // outlined call because each call will have a different relative frequency
1322 // to the entry block. We can consider using the average, but the
1323 // usefulness of that information is questionable. For now, assume we never
1324 // execute the calls to outlined functions.
1325 RelativeToEntryFreq = BranchProbability(0, 1);
1326
1327 BlockFrequency WeightedRcost =
1328 BlockFrequency(*NonWeightedRcost.getValue()) * RelativeToEntryFreq;
1329
1330 // The call sequence(s) to the outlined function(s) are larger than the sum of
1331 // the original outlined region size(s), it does not increase the chances of
1332 // inlining the function with outlining (The inliner uses the size increase to
1333 // model the cost of inlining a callee).
1334 if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
1335 OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1336 DebugLoc DLoc;
1338 std::tie(DLoc, Block) = getOneDebugLoc(*Cloner.ClonedFunc);
1339 OrigFuncORE.emit([&]() {
1340 return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
1341 DLoc, Block)
1342 << ore::NV("Function", Cloner.OrigFunc)
1343 << " not partially inlined into callers (Original Size = "
1344 << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
1345 << ", Size of call sequence to outlined function = "
1346 << ore::NV("NewSize", SizeCost) << ")";
1347 });
1348 return false;
1349 }
1350
1351 assert(Cloner.OrigFunc->users().empty() &&
1352 "F's users should all be replaced!");
1353
1354 std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
1355 Cloner.ClonedFunc->user_end());
1356
1357 DenseMap<User *, uint64_t> CallSiteToProfCountMap;
1358 auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1359 if (CalleeEntryCount)
1360 computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
1361
1362 uint64_t CalleeEntryCountV =
1363 (CalleeEntryCount ? CalleeEntryCount->getCount() : 0);
1364
1365 bool AnyInline = false;
1366 for (User *User : Users) {
1367 // Don't bother with BlockAddress used by CallBr for asm goto.
1368 if (isa<BlockAddress>(User))
1369 continue;
1370
1371 CallBase *CB = getSupportedCallBase(User);
1372
1373 if (isLimitReached())
1374 continue;
1375
1376 OptimizationRemarkEmitter CallerORE(CB->getCaller());
1377 if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
1378 continue;
1379
1380 // Construct remark before doing the inlining, as after successful inlining
1381 // the callsite is removed.
1382 OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CB);
1383 OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
1384 << ore::NV("Caller", CB->getCaller());
1385
1386 InlineFunctionInfo IFI(GetAssumptionCache, &PSI);
1387 // We can only forward varargs when we outlined a single region, else we
1388 // bail on vararg functions.
1389 if (!InlineFunction(*CB, IFI, /*MergeAttributes=*/false, nullptr, true,
1390 (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1391 : nullptr))
1392 .isSuccess())
1393 continue;
1394
1395 CallerORE.emit(OR);
1396
1397 // Now update the entry count:
1398 if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
1399 uint64_t CallSiteCount = CallSiteToProfCountMap[User];
1400 CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
1401 }
1402
1403 AnyInline = true;
1404 NumPartialInlining++;
1405 // Update the stats
1406 if (Cloner.ClonedOI)
1407 NumPartialInlined++;
1408 else
1409 NumColdOutlinePartialInlined++;
1410 }
1411
1412 if (AnyInline) {
1413 Cloner.IsFunctionInlined = true;
1414 if (CalleeEntryCount)
1415 Cloner.OrigFunc->setEntryCount(Function::ProfileCount(
1416 CalleeEntryCountV, CalleeEntryCount->getType()));
1417 OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1418 OrigFuncORE.emit([&]() {
1419 return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
1420 << "Partially inlined into at least one caller";
1421 });
1422 }
1423
1424 return AnyInline;
1425}
1426
1427bool PartialInlinerImpl::run(Module &M) {
1429 return false;
1430
1431 std::vector<Function *> Worklist;
1432 Worklist.reserve(M.size());
1433 for (Function &F : M)
1434 if (!F.use_empty() && !F.isDeclaration())
1435 Worklist.push_back(&F);
1436
1437 bool Changed = false;
1438 while (!Worklist.empty()) {
1439 Function *CurrFunc = Worklist.back();
1440 Worklist.pop_back();
1441
1442 if (CurrFunc->use_empty())
1443 continue;
1444
1445 std::pair<bool, Function *> Result = unswitchFunction(*CurrFunc);
1446 if (Result.second)
1447 Worklist.push_back(Result.second);
1448 Changed |= Result.first;
1449 }
1450
1451 return Changed;
1452}
1453
1456 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1457
1458 auto GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & {
1460 };
1461
1462 auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
1464 };
1465
1466 auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
1468 };
1469
1470 auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
1472 };
1473
1474 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1476 };
1477
1479
1480 if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
1481 GetTLI, PSI, GetBFI)
1482 .run(M))
1483 return PreservedAnalyses::none();
1484 return PreservedAnalyses::all();
1485}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
This file contains the simple types necessary to represent the attributes associated with functions a...
Given that RA is a live value
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
Definition: IVUsers.cpp:48
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
static cl::opt< unsigned > MaxNumInlineBlocks("max-num-inline-blocks", cl::init(5), cl::Hidden, cl::desc("Max number of blocks to be partially inlined"))
static cl::opt< int > OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), cl::Hidden, cl::desc("Relative frequency of outline region to " "the entry block"))
static cl::opt< bool > MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden, cl::desc("Mark outline function calls with ColdCC"))
static cl::opt< float > MinRegionSizeRatio("min-region-size-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum ratio comparing relative sizes of each " "outline candidate and original function"))
static cl::opt< bool > DisableMultiRegionPartialInline("disable-mr-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable multi-region partial inlining"))
static cl::opt< unsigned > MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid"))
static cl::opt< int > MaxNumPartialInlining("max-partial-inlining", cl::init(-1), cl::Hidden, cl::desc("Max number of partial inlining. The default is unlimited"))
static cl::opt< bool > DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial inlining"))
static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI)
static cl::opt< float > ColdBranchRatio("cold-branch-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum BranchProbability to consider a region cold."))
#define DEBUG_TYPE
static cl::opt< bool > ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden, cl::desc("Force outline regions with live exits"))
static cl::opt< unsigned > ExtraOutliningPenalty("partial-inlining-extra-penalty", cl::init(0), cl::Hidden, cl::desc("A debug option to add additional penalty to the computed one."))
static cl::opt< bool > SkipCostAnalysis("skip-partial-inlining-cost-analysis", cl::ReallyHidden, cl::desc("Skip Cost Analysis"))
FunctionAnalysisManager FAM
This file contains the declarations for profiling metadata utility functions.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:492
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:473
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:443
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:430
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:247
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:452
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:482
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:165
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
const Instruction & back() const
Definition: BasicBlock.h:455
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
Conditional or Unconditional Branch instruction.
Analysis providing branch probability information.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1494
void setCallingConv(CallingConv::ID CC)
Definition: InstrTypes.h:1812
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1750
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
A cache for the CodeExtractor analysis.
Definition: CodeExtractor.h:46
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:85
Function * extractCodeRegion(const CodeExtractorAnalysisCache &CEAC)
Perform the extraction, returning the new function.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
Class to represent profile counts.
Definition: Function.h:279
const BasicBlock & back() const
Definition: Function.h:815
void setCallingConv(CallingConv::ID CC)
Definition: Function.h:268
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:655
Represents the cost of inlining a function.
Definition: InlineCost.h:90
int getCost() const
Get the inline cost estimate.
Definition: InlineCost.h:145
bool isAlways() const
Definition: InlineCost.h:139
int getCostDelta() const
Get the cost delta from the threshold for inlining.
Definition: InlineCost.h:175
bool isNever() const
Definition: InlineCost.h:140
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:202
bool isSuccess() const
Definition: InlineCost.h:189
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:631
auto map(const Function &F) const -> InstructionCost
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:454
const BasicBlock * getParent() const
Definition: Instruction.h:152
Invoke instruction.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
@ TCK_SizeAndLatency
The weighted sum of size and latency.
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
bool use_empty() const
Definition: Value.h:344
user_iterator user_end()
Definition: Value.h:405
An efficient, type-erasing, non-owning reference to a callable.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1056
@ CE
Windows NT (Windows on ARM)
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:227
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto successors(const MachineBasicBlock *BB)
InlineResult isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr)
This function inlines the called function into the basic block of the caller.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
iterator_range< df_iterator< T > > depth_first(const T &G)
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2039
unsigned succ_size(const MachineBasicBlock *BB)
bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.