LLVM 23.0.0git
PartialInlining.cpp
Go to the documentation of this file.
1//===- PartialInlining.cpp - Inline parts of functions --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs partial inlining, typically by inlining an if statement
10// that surrounds the body of the function.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
18#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/Attributes.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/CFG.h"
32#include "llvm/IR/DebugLoc.h"
34#include "llvm/IR/Dominators.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/InstrTypes.h"
37#include "llvm/IR/Instruction.h"
40#include "llvm/IR/Intrinsics.h"
41#include "llvm/IR/Module.h"
42#include "llvm/IR/Operator.h"
44#include "llvm/IR/User.h"
50#include "llvm/Transforms/IPO.h"
54#include <algorithm>
55#include <cassert>
56#include <cstdint>
57#include <memory>
58#include <tuple>
59#include <vector>
60
61using namespace llvm;
62
63#define DEBUG_TYPE "partial-inlining"
64
65STATISTIC(NumPartialInlined,
66 "Number of callsites functions partially inlined into.");
67STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
68 "cold outlined regions were partially "
69 "inlined into its caller(s).");
70STATISTIC(NumColdRegionsFound,
71 "Number of cold single entry/exit regions found.");
72STATISTIC(NumColdRegionsOutlined,
73 "Number of cold single entry/exit regions outlined.");
74
75// Command line option to disable partial-inlining. The default is false:
76static cl::opt<bool>
77 DisablePartialInlining("disable-partial-inlining", cl::init(false),
78 cl::Hidden, cl::desc("Disable partial inlining"));
79// Command line option to disable multi-region partial-inlining. The default is
80// false:
82 "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
83 cl::desc("Disable multi-region partial inlining"));
84
85// Command line option to force outlining in regions with live exit variables.
86// The default is false:
87static cl::opt<bool>
88 ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
89 cl::desc("Force outline regions with live exits"));
90
91// Command line option to enable marking outline functions with Cold Calling
92// Convention. The default is false:
93static cl::opt<bool>
94 MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
95 cl::desc("Mark outline function calls with ColdCC"));
96
97// This is an option used by testing:
98static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
99
101 cl::desc("Skip Cost Analysis"));
102// Used to determine if a cold region is worth outlining based on
103// its inlining cost compared to the original function. Default is set at 10%.
104// ie. if the cold region reduces the inlining cost of the original function by
105// at least 10%.
107 "min-region-size-ratio", cl::init(0.1), cl::Hidden,
108 cl::desc("Minimum ratio comparing relative sizes of each "
109 "outline candidate and original function"));
110// Used to tune the minimum number of execution counts needed in the predecessor
111// block to the cold edge. ie. confidence interval.
113 MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
114 cl::desc("Minimum block executions to consider "
115 "its BranchProbabilityInfo valid"));
116// Used to determine when an edge is considered cold. Default is set to 10%. ie.
117// if the branch probability is 10% or less, then it is deemed as 'cold'.
119 "cold-branch-ratio", cl::init(0.1), cl::Hidden,
120 cl::desc("Minimum BranchProbability to consider a region cold."));
121
123 "max-num-inline-blocks", cl::init(5), cl::Hidden,
124 cl::desc("Max number of blocks to be partially inlined"));
125
126// Command line option to set the maximum number of partial inlining allowed
127// for the module. The default value of -1 means no limit.
129 "max-partial-inlining", cl::init(-1), cl::Hidden,
130 cl::desc("Max number of partial inlining. The default is unlimited"));
131
132// Used only when PGO or user annotated branch data is absent. It is
133// the least value that is used to weigh the outline region. If BFI
134// produces larger value, the BFI value will be used.
135static cl::opt<int>
136 OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
138 cl::desc("Relative frequency of outline region to "
139 "the entry block"));
140
142 "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
143 cl::desc("A debug option to add additional penalty to the computed one."));
144
145namespace {
146
147struct FunctionOutliningInfo {
148 FunctionOutliningInfo() = default;
149
150 // Returns the number of blocks to be inlined including all blocks
151 // in Entries and one return block.
152 unsigned getNumInlinedBlocks() const { return Entries.size() + 1; }
153
154 // A set of blocks including the function entry that guard
155 // the region to be outlined.
157
158 // The return block that is not included in the outlined region.
159 BasicBlock *ReturnBlock = nullptr;
160
161 // The dominating block of the region to be outlined.
162 BasicBlock *NonReturnBlock = nullptr;
163
164 // The set of blocks in Entries that are predecessors to ReturnBlock
165 SmallVector<BasicBlock *, 4> ReturnBlockPreds;
166};
167
168struct FunctionOutliningMultiRegionInfo {
169 FunctionOutliningMultiRegionInfo() = default;
170
171 // Container for outline regions
172 struct OutlineRegionInfo {
173 OutlineRegionInfo(ArrayRef<BasicBlock *> Region, BasicBlock *EntryBlock,
174 BasicBlock *ExitBlock, BasicBlock *ReturnBlock)
175 : Region(Region), EntryBlock(EntryBlock), ExitBlock(ExitBlock),
176 ReturnBlock(ReturnBlock) {}
177 SmallVector<BasicBlock *, 8> Region;
178 BasicBlock *EntryBlock;
179 BasicBlock *ExitBlock;
180 BasicBlock *ReturnBlock;
181 };
182
184};
185
186struct PartialInlinerImpl {
187
188 PartialInlinerImpl(
189 function_ref<AssumptionCache &(Function &)> GetAC,
190 function_ref<AssumptionCache *(Function &)> LookupAC,
191 function_ref<TargetTransformInfo &(Function &)> GTTI,
192 function_ref<const TargetLibraryInfo &(Function &)> GTLI,
193 ProfileSummaryInfo &ProfSI,
194 function_ref<BlockFrequencyInfo &(Function &)> GBFI = nullptr)
195 : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
196 GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
197
198 bool run(Module &M);
199 // Main part of the transformation that calls helper functions to find
200 // outlining candidates, clone & outline the function, and attempt to
201 // partially inline the resulting function. Returns true if
202 // inlining was successful, false otherwise. Also returns the outline
203 // function (only if we partially inlined early returns) as there is a
204 // possibility to further "peel" early return statements that were left in the
205 // outline function due to code size.
206 std::pair<bool, Function *> unswitchFunction(Function &F);
207
208 // This class speculatively clones the function to be partial inlined.
209 // At the end of partial inlining, the remaining callsites to the cloned
210 // function that are not partially inlined will be fixed up to reference
211 // the original function, and the cloned function will be erased.
212 struct FunctionCloner {
213 // Two constructors, one for single region outlining, the other for
214 // multi-region outlining.
215 FunctionCloner(Function *F, FunctionOutliningInfo *OI,
216 OptimizationRemarkEmitter &ORE,
217 function_ref<AssumptionCache *(Function &)> LookupAC,
218 function_ref<TargetTransformInfo &(Function &)> GetTTI);
219 FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
220 OptimizationRemarkEmitter &ORE,
221 function_ref<AssumptionCache *(Function &)> LookupAC,
222 function_ref<TargetTransformInfo &(Function &)> GetTTI);
223
224 ~FunctionCloner();
225
226 // Prepare for function outlining: making sure there is only
227 // one incoming edge from the extracted/outlined region to
228 // the return block.
229 void normalizeReturnBlock() const;
230
231 // Do function outlining for cold regions.
232 bool doMultiRegionFunctionOutlining();
233 // Do function outlining for region after early return block(s).
234 // NOTE: For vararg functions that do the vararg handling in the outlined
235 // function, we temporarily generate IR that does not properly
236 // forward varargs to the outlined function. Calling InlineFunction
237 // will update calls to the outlined functions to properly forward
238 // the varargs.
239 Function *doSingleRegionFunctionOutlining();
240
241 Function *OrigFunc = nullptr;
242 Function *ClonedFunc = nullptr;
243
244 typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
245 // Keep track of Outlined Functions and the basic block they're called from.
246 SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
247
248 // ClonedFunc is inlined in one of its callers after function
249 // outlining.
250 bool IsFunctionInlined = false;
251 // The cost of the region to be outlined.
252 InstructionCost OutlinedRegionCost = 0;
253 // ClonedOI is specific to outlining non-early return blocks.
254 std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
255 // ClonedOMRI is specific to outlining cold regions.
256 std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
257 std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
258 OptimizationRemarkEmitter &ORE;
259 function_ref<AssumptionCache *(Function &)> LookupAC;
260 function_ref<TargetTransformInfo &(Function &)> GetTTI;
261 };
262
263private:
264 int NumPartialInlining = 0;
265 function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
266 function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
267 function_ref<TargetTransformInfo &(Function &)> GetTTI;
268 function_ref<BlockFrequencyInfo &(Function &)> GetBFI;
269 function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
270 ProfileSummaryInfo &PSI;
271
272 // Return the frequency of the OutlininingBB relative to F's entry point.
273 // The result is no larger than 1 and is represented using BP.
274 // (Note that the outlined region's 'head' block can only have incoming
275 // edges from the guarding entry blocks).
276 BranchProbability
277 getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) const;
278
279 // Return true if the callee of CB should be partially inlined with
280 // profit.
281 bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner,
282 BlockFrequency WeightedOutliningRcost,
283 OptimizationRemarkEmitter &ORE) const;
284
285 // Try to inline DuplicateFunction (cloned from F with call to
286 // the OutlinedFunction into its callers. Return true
287 // if there is any successful inlining.
288 bool tryPartialInline(FunctionCloner &Cloner);
289
290 // Compute the mapping from use site of DuplicationFunction to the enclosing
291 // BB's profile count.
292 void
293 computeCallsiteToProfCountMap(Function *DuplicateFunction,
294 DenseMap<User *, uint64_t> &SiteCountMap) const;
295
296 bool isLimitReached() const {
297 return (MaxNumPartialInlining != -1 &&
298 NumPartialInlining >= MaxNumPartialInlining);
299 }
300
301 static CallBase *getSupportedCallBase(User *U) {
302 if (isa<CallInst>(U) || isa<InvokeInst>(U))
303 return cast<CallBase>(U);
304 llvm_unreachable("All uses must be calls");
305 return nullptr;
306 }
307
308 static CallBase *getOneCallSiteTo(Function &F) {
309 User *User = *F.user_begin();
310 return getSupportedCallBase(User);
311 }
312
313 std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function &F) const {
314 CallBase *CB = getOneCallSiteTo(F);
315 DebugLoc DLoc = CB->getDebugLoc();
316 BasicBlock *Block = CB->getParent();
317 return std::make_tuple(DLoc, Block);
318 }
319
320 // Returns the costs associated with function outlining:
321 // - The first value is the non-weighted runtime cost for making the call
322 // to the outlined function, including the addtional setup cost in the
323 // outlined function itself;
324 // - The second value is the estimated size of the new call sequence in
325 // basic block Cloner.OutliningCallBB;
326 std::tuple<InstructionCost, InstructionCost>
327 computeOutliningCosts(FunctionCloner &Cloner) const;
328
329 // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
330 // approximate both the size and runtime cost (Note that in the current
331 // inline cost analysis, there is no clear distinction there either).
332 static InstructionCost computeBBInlineCost(BasicBlock *BB,
333 TargetTransformInfo *TTI);
334
335 std::unique_ptr<FunctionOutliningInfo>
336 computeOutliningInfo(Function &F) const;
337
338 std::unique_ptr<FunctionOutliningMultiRegionInfo>
339 computeOutliningColdRegionsInfo(Function &F,
340 OptimizationRemarkEmitter &ORE) const;
341};
342
343} // end anonymous namespace
344
345std::unique_ptr<FunctionOutliningMultiRegionInfo>
346PartialInlinerImpl::computeOutliningColdRegionsInfo(
347 Function &F, OptimizationRemarkEmitter &ORE) const {
348 BasicBlock *EntryBlock = &F.front();
349
350 DominatorTree DT(F);
351 LoopInfo LI(DT);
352 BranchProbabilityInfo BPI(F, LI);
353 std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
354 BlockFrequencyInfo *BFI;
355 if (!GetBFI) {
356 ScopedBFI.reset(new BlockFrequencyInfo(F, BPI, LI));
357 BFI = ScopedBFI.get();
358 } else
359 BFI = &(GetBFI(F));
360
361 // Return if we don't have profiling information.
362 if (!PSI.hasInstrumentationProfile())
363 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
364
365 std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
366 std::make_unique<FunctionOutliningMultiRegionInfo>();
367
368 auto IsSingleExit =
369 [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
370 BasicBlock *ExitBlock = nullptr;
371 for (auto *Block : BlockList) {
372 for (BasicBlock *Succ : successors(Block)) {
373 if (!is_contained(BlockList, Succ)) {
374 if (ExitBlock) {
375 ORE.emit([&]() {
376 return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
377 &Succ->front())
378 << "Region dominated by "
379 << ore::NV("Block", BlockList.front()->getName())
380 << " has more than one region exit edge.";
381 });
382 return nullptr;
383 }
384
385 ExitBlock = Block;
386 }
387 }
388 }
389 return ExitBlock;
390 };
391
392 auto BBProfileCount = [BFI](BasicBlock *BB) {
393 return BFI->getBlockProfileCount(BB).value_or(0);
394 };
395
396 // Use the same computeBBInlineCost function to compute the cost savings of
397 // the outlining the candidate region.
398 TargetTransformInfo *FTTI = &GetTTI(F);
399 InstructionCost OverallFunctionCost = 0;
400 for (auto &BB : F)
401 OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
402
403 LLVM_DEBUG(dbgs() << "OverallFunctionCost = " << OverallFunctionCost
404 << "\n";);
405
406 InstructionCost MinOutlineRegionCost = OverallFunctionCost.map(
407 [&](auto Cost) { return Cost * MinRegionSizeRatio; });
408
409 BranchProbability MinBranchProbability(
410 static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
412 bool ColdCandidateFound = false;
413 BasicBlock *CurrEntry = EntryBlock;
414 std::vector<BasicBlock *> DFS;
415 SmallPtrSet<BasicBlock *, 8> VisitedSet;
416 DFS.push_back(CurrEntry);
417 VisitedSet.insert(CurrEntry);
418
419 // Use Depth First Search on the basic blocks to find CFG edges that are
420 // considered cold.
421 // Cold regions considered must also have its inline cost compared to the
422 // overall inline cost of the original function. The region is outlined only
423 // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
424 // more.
425 while (!DFS.empty()) {
426 auto *ThisBB = DFS.back();
427 DFS.pop_back();
428 // Only consider regions with predecessor blocks that are considered
429 // not-cold (default: part of the top 99.99% of all block counters)
430 // AND greater than our minimum block execution count (default: 100).
431 if (PSI.isColdBlock(ThisBB, BFI) ||
432 BBProfileCount(ThisBB) < MinBlockCounterExecution)
433 continue;
434 for (auto SI = succ_begin(ThisBB); SI != succ_end(ThisBB); ++SI) {
435 if (!VisitedSet.insert(*SI).second)
436 continue;
437 DFS.push_back(*SI);
438 // If branch isn't cold, we skip to the next one.
439 BranchProbability SuccProb = BPI.getEdgeProbability(ThisBB, *SI);
440 if (SuccProb > MinBranchProbability)
441 continue;
442
443 LLVM_DEBUG(dbgs() << "Found cold edge: " << ThisBB->getName() << "->"
444 << SI->getName()
445 << "\nBranch Probability = " << SuccProb << "\n";);
446
447 SmallVector<BasicBlock *, 8> DominateVector;
448 DT.getDescendants(*SI, DominateVector);
449 assert(!DominateVector.empty() &&
450 "SI should be reachable and have at least itself as descendant");
451
452 // We can only outline single entry regions (for now).
453 if (!DominateVector.front()->hasNPredecessors(1)) {
454 LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
455 << " doesn't have a single predecessor in the "
456 "dominator tree\n";);
457 continue;
458 }
459
460 BasicBlock *ExitBlock = nullptr;
461 // We can only outline single exit regions (for now).
462 if (!(ExitBlock = IsSingleExit(DominateVector))) {
463 LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
464 << " doesn't have a unique successor\n";);
465 continue;
466 }
467
468 InstructionCost OutlineRegionCost = 0;
469 for (auto *BB : DominateVector)
470 OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
471
472 LLVM_DEBUG(dbgs() << "OutlineRegionCost = " << OutlineRegionCost
473 << "\n";);
474
475 if (!SkipCostAnalysis && OutlineRegionCost < MinOutlineRegionCost) {
476 ORE.emit([&]() {
477 return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
478 &SI->front())
479 << ore::NV("Callee", &F)
480 << " inline cost-savings smaller than "
481 << ore::NV("Cost", MinOutlineRegionCost);
482 });
483
484 LLVM_DEBUG(dbgs() << "ABORT: Outline region cost is smaller than "
485 << MinOutlineRegionCost << "\n";);
486 continue;
487 }
488
489 // For now, ignore blocks that belong to a SISE region that is a
490 // candidate for outlining. In the future, we may want to look
491 // at inner regions because the outer region may have live-exit
492 // variables.
493 VisitedSet.insert_range(DominateVector);
494
495 // ReturnBlock here means the block after the outline call
496 BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
497 FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
498 DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
499 OutliningInfo->ORI.push_back(RegInfo);
500 LLVM_DEBUG(dbgs() << "Found Cold Candidate starting at block: "
501 << DominateVector.front()->getName() << "\n";);
502 ColdCandidateFound = true;
503 NumColdRegionsFound++;
504 }
505 }
506
507 if (ColdCandidateFound)
508 return OutliningInfo;
509
510 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
511}
512
513std::unique_ptr<FunctionOutliningInfo>
514PartialInlinerImpl::computeOutliningInfo(Function &F) const {
515 BasicBlock *EntryBlock = &F.front();
516 CondBrInst *BR = dyn_cast<CondBrInst>(EntryBlock->getTerminator());
517 if (!BR)
518 return std::unique_ptr<FunctionOutliningInfo>();
519
520 // Returns true if Succ is BB's successor
521 auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
522 return is_contained(successors(BB), Succ);
523 };
524
525 auto IsReturnBlock = [](BasicBlock *BB) {
526 Instruction *TI = BB->getTerminator();
527 return isa<ReturnInst>(TI);
528 };
529
530 auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
531 if (IsReturnBlock(Succ1))
532 return std::make_tuple(Succ1, Succ2);
533 if (IsReturnBlock(Succ2))
534 return std::make_tuple(Succ2, Succ1);
535
536 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
537 };
538
539 // Detect a triangular shape:
540 auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
541 if (IsSuccessor(Succ1, Succ2))
542 return std::make_tuple(Succ1, Succ2);
543 if (IsSuccessor(Succ2, Succ1))
544 return std::make_tuple(Succ2, Succ1);
545
546 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
547 };
548
549 std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
550 std::make_unique<FunctionOutliningInfo>();
551
552 BasicBlock *CurrEntry = EntryBlock;
553 bool CandidateFound = false;
554 do {
555 // The number of blocks to be inlined has already reached
556 // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
557 // disables partial inlining for the function.
558 if (OutliningInfo->getNumInlinedBlocks() >= MaxNumInlineBlocks)
559 break;
560
561 if (succ_size(CurrEntry) != 2)
562 break;
563
564 BasicBlock *Succ1 = *succ_begin(CurrEntry);
565 BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
566
567 BasicBlock *ReturnBlock, *NonReturnBlock;
568 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
569
570 if (ReturnBlock) {
571 OutliningInfo->Entries.push_back(CurrEntry);
572 OutliningInfo->ReturnBlock = ReturnBlock;
573 OutliningInfo->NonReturnBlock = NonReturnBlock;
574 CandidateFound = true;
575 break;
576 }
577
578 BasicBlock *CommSucc, *OtherSucc;
579 std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
580
581 if (!CommSucc)
582 break;
583
584 OutliningInfo->Entries.push_back(CurrEntry);
585 CurrEntry = OtherSucc;
586 } while (true);
587
588 if (!CandidateFound)
589 return std::unique_ptr<FunctionOutliningInfo>();
590
591 // There should not be any successors (not in the entry set) other than
592 // {ReturnBlock, NonReturnBlock}
593 assert(OutliningInfo->Entries[0] == &F.front() &&
594 "Function Entry must be the first in Entries vector");
595 DenseSet<BasicBlock *> Entries(llvm::from_range, OutliningInfo->Entries);
596
597 // Returns true of BB has Predecessor which is not
598 // in Entries set.
599 auto HasNonEntryPred = [Entries](BasicBlock *BB) {
600 for (auto *Pred : predecessors(BB)) {
601 if (!Entries.count(Pred))
602 return true;
603 }
604 return false;
605 };
606 auto CheckAndNormalizeCandidate =
607 [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
608 for (BasicBlock *E : OutliningInfo->Entries) {
609 for (auto *Succ : successors(E)) {
610 if (Entries.count(Succ))
611 continue;
612 if (Succ == OutliningInfo->ReturnBlock)
613 OutliningInfo->ReturnBlockPreds.push_back(E);
614 else if (Succ != OutliningInfo->NonReturnBlock)
615 return false;
616 }
617 // There should not be any outside incoming edges either:
618 if (HasNonEntryPred(E))
619 return false;
620 }
621 return true;
622 };
623
624 if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
625 return std::unique_ptr<FunctionOutliningInfo>();
626
627 // Now further growing the candidate's inlining region by
628 // peeling off dominating blocks from the outlining region:
629 while (OutliningInfo->getNumInlinedBlocks() < MaxNumInlineBlocks) {
630 BasicBlock *Cand = OutliningInfo->NonReturnBlock;
631 if (succ_size(Cand) != 2)
632 break;
633
634 if (HasNonEntryPred(Cand))
635 break;
636
637 BasicBlock *Succ1 = *succ_begin(Cand);
638 BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
639
640 BasicBlock *ReturnBlock, *NonReturnBlock;
641 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
642 if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
643 break;
644
645 if (NonReturnBlock->getSinglePredecessor() != Cand)
646 break;
647
648 // Now grow and update OutlininigInfo:
649 OutliningInfo->Entries.push_back(Cand);
650 OutliningInfo->NonReturnBlock = NonReturnBlock;
651 OutliningInfo->ReturnBlockPreds.push_back(Cand);
652 Entries.insert(Cand);
653 }
654
655 return OutliningInfo;
656}
657
658// Check if there is PGO data or user annotated branch data:
659static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI) {
660 if (F.hasProfileData())
661 return true;
662 // Now check if any of the entry block has MD_prof data:
663 for (auto *E : OI.Entries) {
664 CondBrInst *BR = dyn_cast<CondBrInst>(E->getTerminator());
665 if (BR && hasBranchWeightMD(*BR))
666 return true;
667 }
668 return false;
669}
670
671BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
672 FunctionCloner &Cloner) const {
673 BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
674 auto EntryFreq =
675 Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
676 auto OutliningCallFreq =
677 Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
678 // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
679 // we outlined any regions, so we may encounter situations where the
680 // OutliningCallFreq is *slightly* bigger than the EntryFreq.
681 if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
682 OutliningCallFreq = EntryFreq;
683
684 auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
685 OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
686
687 if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI))
688 return OutlineRegionRelFreq;
689
690 // When profile data is not available, we need to be conservative in
691 // estimating the overall savings. Static branch prediction can usually
692 // guess the branch direction right (taken/non-taken), but the guessed
693 // branch probability is usually not biased enough. In case when the
694 // outlined region is predicted to be likely, its probability needs
695 // to be made higher (more biased) to not under-estimate the cost of
696 // function outlining. On the other hand, if the outlined region
697 // is predicted to be less likely, the predicted probablity is usually
698 // higher than the actual. For instance, the actual probability of the
699 // less likely target is only 5%, but the guessed probablity can be
700 // 40%. In the latter case, there is no need for further adjustment.
701 // FIXME: add an option for this.
702 if (OutlineRegionRelFreq < BranchProbability(45, 100))
703 return OutlineRegionRelFreq;
704
705 OutlineRegionRelFreq = std::max(
706 OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
707
708 return OutlineRegionRelFreq;
709}
710
711bool PartialInlinerImpl::shouldPartialInline(
712 CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
713 OptimizationRemarkEmitter &ORE) const {
714 using namespace ore;
715
717 assert(Callee == Cloner.ClonedFunc);
718
720 return isInlineViable(*Callee).isSuccess();
721
722 Function *Caller = CB.getCaller();
723 auto &CalleeTTI = GetTTI(*Callee);
724 bool RemarksEnabled =
725 Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
726 DEBUG_TYPE);
727 InlineCost IC =
728 getInlineCost(CB, getInlineParams(), CalleeTTI, GetAssumptionCache,
729 GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE : nullptr);
730
731 if (IC.isAlways()) {
732 ORE.emit([&]() {
733 return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", &CB)
734 << NV("Callee", Cloner.OrigFunc)
735 << " should always be fully inlined, not partially";
736 });
737 return false;
738 }
739
740 if (IC.isNever()) {
741 ORE.emit([&]() {
742 return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CB)
743 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
744 << NV("Caller", Caller)
745 << " because it should never be inlined (cost=never)";
746 });
747 return false;
748 }
749
750 if (!IC) {
751 ORE.emit([&]() {
752 return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", &CB)
753 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
754 << NV("Caller", Caller) << " because too costly to inline (cost="
755 << NV("Cost", IC.getCost()) << ", threshold="
756 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
757 });
758 return false;
759 }
760 const DataLayout &DL = Caller->getDataLayout();
761
762 // The savings of eliminating the call:
763 int NonWeightedSavings = getCallsiteCost(CalleeTTI, CB, DL);
764 BlockFrequency NormWeightedSavings(NonWeightedSavings);
765
766 // Weighted saving is smaller than weighted cost, return false
767 if (NormWeightedSavings < WeightedOutliningRcost) {
768 ORE.emit([&]() {
769 return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
770 &CB)
771 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
772 << NV("Caller", Caller) << " runtime overhead (overhead="
773 << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
774 << ", savings="
775 << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
776 << ")"
777 << " of making the outlined call is too high";
778 });
779
780 return false;
781 }
782
783 ORE.emit([&]() {
784 return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", &CB)
785 << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
786 << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
787 << " (threshold="
788 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
789 });
790 return true;
791}
792
793// TODO: Ideally we should share Inliner's InlineCost Analysis code.
794// For now use a simplified version. The returned 'InlineCost' will be used
795// to esimate the size cost as well as runtime cost of the BB.
797PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
798 TargetTransformInfo *TTI) {
799 InstructionCost InlineCost = 0;
800 const DataLayout &DL = BB->getDataLayout();
802 for (Instruction &I : *BB) {
803 // Skip free instructions.
804 switch (I.getOpcode()) {
805 case Instruction::BitCast:
806 case Instruction::PtrToInt:
807 case Instruction::IntToPtr:
808 case Instruction::Alloca:
809 case Instruction::PHI:
810 continue;
811 case Instruction::GetElementPtr:
812 if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
813 continue;
814 break;
815 default:
816 break;
817 }
818
819 if (I.isLifetimeStartOrEnd())
820 continue;
821
822 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
823 Intrinsic::ID IID = II->getIntrinsicID();
825 FastMathFlags FMF;
826 for (Value *Val : II->args())
827 Tys.push_back(Val->getType());
828
829 if (auto *FPMO = dyn_cast<FPMathOperator>(II))
830 FMF = FPMO->getFastMathFlags();
831
832 IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF);
834 continue;
835 }
836
837 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
838 InlineCost += getCallsiteCost(*TTI, *CI, DL);
839 continue;
840 }
841
842 if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
843 InlineCost += getCallsiteCost(*TTI, *II, DL);
844 continue;
845 }
846
847 if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
848 InlineCost += (SI->getNumCases() + 1) * InstrCost;
849 continue;
850 }
851 InlineCost += InstrCost;
852 }
853
854 return InlineCost;
855}
856
857std::tuple<InstructionCost, InstructionCost>
858PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) const {
859 InstructionCost OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
860 for (auto FuncBBPair : Cloner.OutlinedFunctions) {
861 Function *OutlinedFunc = FuncBBPair.first;
862 BasicBlock* OutliningCallBB = FuncBBPair.second;
863 // Now compute the cost of the call sequence to the outlined function
864 // 'OutlinedFunction' in BB 'OutliningCallBB':
865 auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
866 OutliningFuncCallCost +=
867 computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
868
869 // Now compute the cost of the extracted/outlined function itself:
870 for (BasicBlock &BB : *OutlinedFunc)
871 OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
872 }
873 assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
874 "Outlined function cost should be no less than the outlined region");
875
876 // The code extractor introduces a new root and exit stub blocks with
877 // additional unconditional branches. Those branches will be eliminated
878 // later with bb layout. The cost should be adjusted accordingly:
879 OutlinedFunctionCost -=
880 2 * InlineConstants::getInstrCost() * Cloner.OutlinedFunctions.size();
881
882 InstructionCost OutliningRuntimeOverhead =
883 OutliningFuncCallCost +
884 (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
886
887 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
888}
889
890// Create the callsite to profile count map which is
891// used to update the original function's entry count,
892// after the function is partially inlined into the callsite.
893void PartialInlinerImpl::computeCallsiteToProfCountMap(
894 Function *DuplicateFunction,
895 DenseMap<User *, uint64_t> &CallSiteToProfCountMap) const {
896 std::vector<User *> Users(DuplicateFunction->user_begin(),
897 DuplicateFunction->user_end());
898 Function *CurrentCaller = nullptr;
899 std::unique_ptr<BlockFrequencyInfo> TempBFI;
900 BlockFrequencyInfo *CurrentCallerBFI = nullptr;
901
902 auto ComputeCurrBFI = [&,this](Function *Caller) {
903 // For the old pass manager:
904 if (!GetBFI) {
905 DominatorTree DT(*Caller);
906 LoopInfo LI(DT);
907 BranchProbabilityInfo BPI(*Caller, LI);
908 TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
909 CurrentCallerBFI = TempBFI.get();
910 } else {
911 // New pass manager:
912 CurrentCallerBFI = &(GetBFI(*Caller));
913 }
914 };
915
916 for (User *User : Users) {
917 CallBase *CB = getSupportedCallBase(User);
918 Function *Caller = CB->getCaller();
919 if (CurrentCaller != Caller) {
920 CurrentCaller = Caller;
921 ComputeCurrBFI(Caller);
922 } else {
923 assert(CurrentCallerBFI && "CallerBFI is not set");
924 }
925 BasicBlock *CallBB = CB->getParent();
926 auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
927 if (Count)
928 CallSiteToProfCountMap[User] = *Count;
929 else
930 CallSiteToProfCountMap[User] = 0;
931 }
932}
933
934PartialInlinerImpl::FunctionCloner::FunctionCloner(
935 Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
936 function_ref<AssumptionCache *(Function &)> LookupAC,
937 function_ref<TargetTransformInfo &(Function &)> GetTTI)
938 : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
939 ClonedOI = std::make_unique<FunctionOutliningInfo>();
940
941 // Clone the function, so that we can hack away on it.
943 ClonedFunc = CloneFunction(F, VMap);
944
945 ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
946 ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
947 for (BasicBlock *BB : OI->Entries)
948 ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
949
950 for (BasicBlock *E : OI->ReturnBlockPreds) {
951 BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
952 ClonedOI->ReturnBlockPreds.push_back(NewE);
953 }
954 // Go ahead and update all uses to the duplicate, so that we can just
955 // use the inliner functionality when we're done hacking.
956 F->replaceAllUsesWith(ClonedFunc);
957}
958
959PartialInlinerImpl::FunctionCloner::FunctionCloner(
960 Function *F, FunctionOutliningMultiRegionInfo *OI,
964 : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
965 ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
966
967 // Clone the function, so that we can hack away on it.
969 ClonedFunc = CloneFunction(F, VMap);
970
971 // Go through all Outline Candidate Regions and update all BasicBlock
972 // information.
973 for (const FunctionOutliningMultiRegionInfo::OutlineRegionInfo &RegionInfo :
974 OI->ORI) {
976 for (BasicBlock *BB : RegionInfo.Region)
977 Region.push_back(cast<BasicBlock>(VMap[BB]));
978
979 BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
980 BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
981 BasicBlock *NewReturnBlock = nullptr;
982 if (RegionInfo.ReturnBlock)
983 NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
984 FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
985 Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
986 ClonedOMRI->ORI.push_back(MappedRegionInfo);
987 }
988 // Go ahead and update all uses to the duplicate, so that we can just
989 // use the inliner functionality when we're done hacking.
990 F->replaceAllUsesWith(ClonedFunc);
991}
992
993void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
994 auto GetFirstPHI = [](BasicBlock *BB) {
995 BasicBlock::iterator I = BB->begin();
996 PHINode *FirstPhi = nullptr;
997 while (I != BB->end()) {
998 PHINode *Phi = dyn_cast<PHINode>(I);
999 if (!Phi)
1000 break;
1001 if (!FirstPhi) {
1002 FirstPhi = Phi;
1003 break;
1004 }
1005 }
1006 return FirstPhi;
1007 };
1008
1009 // Shouldn't need to normalize PHIs if we're not outlining non-early return
1010 // blocks.
1011 if (!ClonedOI)
1012 return;
1013
1014 // Special hackery is needed with PHI nodes that have inputs from more than
1015 // one extracted block. For simplicity, just split the PHIs into a two-level
1016 // sequence of PHIs, some of which will go in the extracted region, and some
1017 // of which will go outside.
1018 BasicBlock *PreReturn = ClonedOI->ReturnBlock;
1019 // only split block when necessary:
1020 PHINode *FirstPhi = GetFirstPHI(PreReturn);
1021 unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
1022
1023 if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
1024 return;
1025
1026 auto IsTrivialPhi = [](PHINode *PN) -> Value * {
1027 if (llvm::all_equal(PN->incoming_values()))
1028 return PN->getIncomingValue(0);
1029 return nullptr;
1030 };
1031
1032 ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
1033 ClonedOI->ReturnBlock->getFirstNonPHIIt());
1034 BasicBlock::iterator I = PreReturn->begin();
1035 BasicBlock::iterator Ins = ClonedOI->ReturnBlock->begin();
1036 SmallVector<Instruction *, 4> DeadPhis;
1037 while (I != PreReturn->end()) {
1038 PHINode *OldPhi = dyn_cast<PHINode>(I);
1039 if (!OldPhi)
1040 break;
1041
1042 PHINode *RetPhi =
1043 PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "");
1044 RetPhi->insertBefore(Ins);
1045 OldPhi->replaceAllUsesWith(RetPhi);
1046 Ins = ClonedOI->ReturnBlock->getFirstNonPHIIt();
1047
1048 RetPhi->addIncoming(&*I, PreReturn);
1049 for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
1050 RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
1051 OldPhi->removeIncomingValue(E);
1052 }
1053
1054 // After incoming values splitting, the old phi may become trivial.
1055 // Keeping the trivial phi can introduce definition inside the outline
1056 // region which is live-out, causing necessary overhead (load, store
1057 // arg passing etc).
1058 if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
1059 OldPhi->replaceAllUsesWith(OldPhiVal);
1060 DeadPhis.push_back(OldPhi);
1061 }
1062 ++I;
1063 }
1064 for (auto *DP : DeadPhis)
1065 DP->eraseFromParent();
1066
1067 for (auto *E : ClonedOI->ReturnBlockPreds)
1068 E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
1069}
1070
1071bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1072
1073 auto ComputeRegionCost =
1074 [&](SmallVectorImpl<BasicBlock *> &Region) -> InstructionCost {
1076 for (BasicBlock* BB : Region)
1077 Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
1078 return Cost;
1079 };
1080
1081 assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
1082
1083 if (ClonedOMRI->ORI.empty())
1084 return false;
1085
1086 // The CodeExtractor needs a dominator tree.
1087 DominatorTree DT;
1088 DT.recalculate(*ClonedFunc);
1089
1090 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1091 LoopInfo LI(DT);
1092 BranchProbabilityInfo BPI(*ClonedFunc, LI);
1093 ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1094
1095 // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
1096 CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1097
1098 SetVector<Value *> Inputs, Outputs, Sinks;
1099 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1100 ClonedOMRI->ORI) {
1101 InstructionCost CurrentOutlinedRegionCost =
1102 ComputeRegionCost(RegionInfo.Region);
1103
1104 CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
1105 ClonedFuncBFI.get(), &BPI,
1106 LookupAC(*RegionInfo.EntryBlock->getParent()),
1107 /* AllowVarargs */ false, /* AllowAlloca */ false,
1108 /* AllocaBlock */ nullptr, /* Suffix */ "",
1109 /* ArgsInZeroAddressSpace */ false,
1110 /* VoidReturnWithSingleOutput */ false);
1111
1112 CE.findInputsOutputs(Inputs, Outputs, Sinks);
1113
1114 LLVM_DEBUG({
1115 dbgs() << "inputs: " << Inputs.size() << "\n";
1116 dbgs() << "outputs: " << Outputs.size() << "\n";
1117 for (Value *value : Inputs)
1118 dbgs() << "value used in func: " << *value << "\n";
1119 for (Value *output : Outputs)
1120 dbgs() << "instr used in func: " << *output << "\n";
1121 });
1122
1123 // Do not extract regions that have live exit variables.
1124 if (Outputs.size() > 0 && !ForceLiveExit)
1125 continue;
1126
1127 if (Function *OutlinedFunc = CE.extractCodeRegion(CEAC)) {
1128 CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
1129 BasicBlock *OutliningCallBB = OCS->getParent();
1130 assert(OutliningCallBB->getParent() == ClonedFunc);
1131 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1132 NumColdRegionsOutlined++;
1133 OutlinedRegionCost += CurrentOutlinedRegionCost;
1134
1135 if (MarkOutlinedColdCC) {
1136 OutlinedFunc->setCallingConv(CallingConv::Cold);
1137 OCS->setCallingConv(CallingConv::Cold);
1138 }
1139 } else
1140 ORE.emit([&]() {
1141 return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1142 &RegionInfo.Region.front()->front())
1143 << "Failed to extract region at block "
1144 << ore::NV("Block", RegionInfo.Region.front());
1145 });
1146 }
1147
1148 return !OutlinedFunctions.empty();
1149}
1150
1151Function *
1152PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
1153 // Returns true if the block is to be partial inlined into the caller
1154 // (i.e. not to be extracted to the out of line function)
1155 auto ToBeInlined = [&, this](BasicBlock *BB) {
1156 return BB == ClonedOI->ReturnBlock ||
1157 llvm::is_contained(ClonedOI->Entries, BB);
1158 };
1159
1160 assert(ClonedOI && "Expecting OutlineInfo for single region outline");
1161 // The CodeExtractor needs a dominator tree.
1162 DominatorTree DT;
1163 DT.recalculate(*ClonedFunc);
1164
1165 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1166 LoopInfo LI(DT);
1167 BranchProbabilityInfo BPI(*ClonedFunc, LI);
1168 ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1169
1170 // Gather up the blocks that we're going to extract.
1171 std::vector<BasicBlock *> ToExtract;
1172 auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
1173 ToExtract.push_back(ClonedOI->NonReturnBlock);
1174 OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
1175 ClonedOI->NonReturnBlock, ClonedFuncTTI);
1176 for (BasicBlock *BB : depth_first(&ClonedFunc->getEntryBlock()))
1177 if (!ToBeInlined(BB) && BB != ClonedOI->NonReturnBlock) {
1178 ToExtract.push_back(BB);
1179 // FIXME: the code extractor may hoist/sink more code
1180 // into the outlined function which may make the outlining
1181 // overhead (the difference of the outlined function cost
1182 // and OutliningRegionCost) look larger.
1183 OutlinedRegionCost += computeBBInlineCost(BB, ClonedFuncTTI);
1184 }
1185
1186 // Extract the body of the if.
1187 CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1188 Function *OutlinedFunc =
1189 CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
1190 ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
1191 /* AllowVarargs */ true, /* AllowAlloca */ false,
1192 /* AllocaBlock */ nullptr, /* Suffix */ "",
1193 /* ArgsInZeroAddressSpace */ false,
1194 /* VoidReturnWithSingleOutput */ false)
1195 .extractCodeRegion(CEAC);
1196
1197 if (OutlinedFunc) {
1198 BasicBlock *OutliningCallBB =
1199 PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->getParent();
1200 assert(OutliningCallBB->getParent() == ClonedFunc);
1201 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1202 } else
1203 ORE.emit([&]() {
1204 return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1205 &ToExtract.front()->front())
1206 << "Failed to extract region at block "
1207 << ore::NV("Block", ToExtract.front());
1208 });
1209
1210 return OutlinedFunc;
1211}
1212
1213PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
1214 // Ditch the duplicate, since we're done with it, and rewrite all remaining
1215 // users (function pointers, etc.) back to the original function.
1216 ClonedFunc->replaceAllUsesWith(OrigFunc);
1217 ClonedFunc->eraseFromParent();
1218 if (!IsFunctionInlined) {
1219 // Remove each function that was speculatively created if there is no
1220 // reference.
1221 for (auto FuncBBPair : OutlinedFunctions) {
1222 Function *Func = FuncBBPair.first;
1223 Func->eraseFromParent();
1224 }
1225 }
1226}
1227
1228std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function &F) {
1229 if (F.hasAddressTaken())
1230 return {false, nullptr};
1231
1232 // Let inliner handle it
1233 if (F.hasFnAttribute(Attribute::AlwaysInline))
1234 return {false, nullptr};
1235
1236 if (F.hasFnAttribute(Attribute::NoInline))
1237 return {false, nullptr};
1238
1239 if (PSI.isFunctionEntryCold(&F))
1240 return {false, nullptr};
1241
1242 if (F.users().empty())
1243 return {false, nullptr};
1244
1245 OptimizationRemarkEmitter ORE(&F);
1246
1247 // Only try to outline cold regions if we have a profile summary, which
1248 // implies we have profiling information.
1249 if (PSI.hasProfileSummary() && F.hasProfileData() &&
1251 std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
1252 computeOutliningColdRegionsInfo(F, ORE);
1253 if (OMRI) {
1254 FunctionCloner Cloner(&F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
1255
1256 LLVM_DEBUG({
1257 dbgs() << "HotCountThreshold = " << PSI.getHotCountThreshold() << "\n";
1258 dbgs() << "ColdCountThreshold = " << PSI.getColdCountThreshold()
1259 << "\n";
1260 });
1261
1262 bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1263
1264 if (DidOutline) {
1265 LLVM_DEBUG({
1266 dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
1267 Cloner.ClonedFunc->print(dbgs());
1268 dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
1269 });
1270
1271 if (tryPartialInline(Cloner))
1272 return {true, nullptr};
1273 }
1274 }
1275 }
1276
1277 // Fall-thru to regular partial inlining if we:
1278 // i) can't find any cold regions to outline, or
1279 // ii) can't inline the outlined function anywhere.
1280 std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
1281 if (!OI)
1282 return {false, nullptr};
1283
1284 FunctionCloner Cloner(&F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
1285 Cloner.normalizeReturnBlock();
1286
1287 Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1288
1289 if (!OutlinedFunction)
1290 return {false, nullptr};
1291
1292 if (tryPartialInline(Cloner))
1293 return {true, OutlinedFunction};
1294
1295 return {false, nullptr};
1296}
1297
1298bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
1299 if (Cloner.OutlinedFunctions.empty())
1300 return false;
1301
1302 auto OutliningCosts = computeOutliningCosts(Cloner);
1303
1304 InstructionCost SizeCost = std::get<0>(OutliningCosts);
1305 InstructionCost NonWeightedRcost = std::get<1>(OutliningCosts);
1306
1307 assert(SizeCost.isValid() && NonWeightedRcost.isValid() &&
1308 "Expected valid costs");
1309
1310 // Only calculate RelativeToEntryFreq when we are doing single region
1311 // outlining.
1312 BranchProbability RelativeToEntryFreq;
1313 if (Cloner.ClonedOI)
1314 RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1315 else
1316 // RelativeToEntryFreq doesn't make sense when we have more than one
1317 // outlined call because each call will have a different relative frequency
1318 // to the entry block. We can consider using the average, but the
1319 // usefulness of that information is questionable. For now, assume we never
1320 // execute the calls to outlined functions.
1321 RelativeToEntryFreq = BranchProbability(0, 1);
1322
1323 BlockFrequency WeightedRcost =
1324 BlockFrequency(NonWeightedRcost.getValue()) * RelativeToEntryFreq;
1325
1326 // The call sequence(s) to the outlined function(s) are larger than the sum of
1327 // the original outlined region size(s), it does not increase the chances of
1328 // inlining the function with outlining (The inliner uses the size increase to
1329 // model the cost of inlining a callee).
1330 if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
1331 OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1332 DebugLoc DLoc;
1334 std::tie(DLoc, Block) = getOneDebugLoc(*Cloner.ClonedFunc);
1335 OrigFuncORE.emit([&]() {
1336 return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
1337 DLoc, Block)
1338 << ore::NV("Function", Cloner.OrigFunc)
1339 << " not partially inlined into callers (Original Size = "
1340 << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
1341 << ", Size of call sequence to outlined function = "
1342 << ore::NV("NewSize", SizeCost) << ")";
1343 });
1344 return false;
1345 }
1346
1347 assert(Cloner.OrigFunc->users().empty() &&
1348 "F's users should all be replaced!");
1349
1350 std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
1351 Cloner.ClonedFunc->user_end());
1352
1353 DenseMap<User *, uint64_t> CallSiteToProfCountMap;
1354 auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1355 if (CalleeEntryCount)
1356 computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
1357
1358 uint64_t CalleeEntryCountV =
1359 (CalleeEntryCount ? CalleeEntryCount->getCount() : 0);
1360
1361 bool AnyInline = false;
1362 for (User *User : Users) {
1363 CallBase *CB = getSupportedCallBase(User);
1364
1365 if (isLimitReached())
1366 continue;
1367
1368 OptimizationRemarkEmitter CallerORE(CB->getCaller());
1369 if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
1370 continue;
1371
1372 // Construct remark before doing the inlining, as after successful inlining
1373 // the callsite is removed.
1374 OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CB);
1375 OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
1376 << ore::NV("Caller", CB->getCaller());
1377
1378 InlineFunctionInfo IFI(GetAssumptionCache, &PSI);
1379 // We can only forward varargs when we outlined a single region, else we
1380 // bail on vararg functions.
1381 if (!InlineFunction(*CB, IFI, /*MergeAttributes=*/false, nullptr,
1382 /*InsertLifetime=*/true, /*TrackInlineHistory=*/true,
1383 (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1384 : nullptr))
1385 .isSuccess())
1386 continue;
1387
1388 CallerORE.emit(OR);
1389
1390 // Now update the entry count:
1391 if (CalleeEntryCountV) {
1392 if (auto It = CallSiteToProfCountMap.find(User);
1393 It != CallSiteToProfCountMap.end()) {
1394 uint64_t CallSiteCount = It->second;
1395 CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
1396 }
1397 }
1398
1399 AnyInline = true;
1400 NumPartialInlining++;
1401 // Update the stats
1402 if (Cloner.ClonedOI)
1403 NumPartialInlined++;
1404 else
1405 NumColdOutlinePartialInlined++;
1406 }
1407
1408 if (AnyInline) {
1409 Cloner.IsFunctionInlined = true;
1410 if (CalleeEntryCount)
1411 Cloner.OrigFunc->setEntryCount(Function::ProfileCount(
1412 CalleeEntryCountV, CalleeEntryCount->getType()));
1413 OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1414 OrigFuncORE.emit([&]() {
1415 return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
1416 << "Partially inlined into at least one caller";
1417 });
1418 }
1419
1420 return AnyInline;
1421}
1422
1423bool PartialInlinerImpl::run(Module &M) {
1425 return false;
1426
1427 std::vector<Function *> Worklist;
1428 Worklist.reserve(M.size());
1429 for (Function &F : M)
1430 if (!F.use_empty() && !F.isDeclaration())
1431 Worklist.push_back(&F);
1432
1433 bool Changed = false;
1434 while (!Worklist.empty()) {
1435 Function *CurrFunc = Worklist.back();
1436 Worklist.pop_back();
1437
1438 if (CurrFunc->use_empty())
1439 continue;
1440
1441 std::pair<bool, Function *> Result = unswitchFunction(*CurrFunc);
1442 if (Result.second)
1443 Worklist.push_back(Result.second);
1444 Changed |= Result.first;
1445 }
1446
1447 return Changed;
1448}
1449
1452 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1453
1454 auto GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & {
1455 return FAM.getResult<AssumptionAnalysis>(F);
1456 };
1457
1458 auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
1459 return FAM.getCachedResult<AssumptionAnalysis>(F);
1460 };
1461
1462 auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
1463 return FAM.getResult<BlockFrequencyAnalysis>(F);
1464 };
1465
1466 auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
1467 return FAM.getResult<TargetIRAnalysis>(F);
1468 };
1469
1470 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1471 return FAM.getResult<TargetLibraryAnalysis>(F);
1472 };
1473
1475
1476 if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
1477 GetTLI, PSI, GetBFI)
1478 .run(M))
1479 return PreservedAnalyses::none();
1480 return PreservedAnalyses::all();
1481}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
cl::opt< unsigned > MinBlockCounterExecution
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
iv Induction Variable Users
Definition IVUsers.cpp:48
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
uint64_t IntrinsicInst * II
static cl::opt< unsigned > MaxNumInlineBlocks("max-num-inline-blocks", cl::init(5), cl::Hidden, cl::desc("Max number of blocks to be partially inlined"))
static cl::opt< int > OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), cl::Hidden, cl::desc("Relative frequency of outline region to " "the entry block"))
static cl::opt< bool > MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden, cl::desc("Mark outline function calls with ColdCC"))
static cl::opt< float > MinRegionSizeRatio("min-region-size-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum ratio comparing relative sizes of each " "outline candidate and original function"))
static cl::opt< bool > DisableMultiRegionPartialInline("disable-mr-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable multi-region partial inlining"))
cl::opt< unsigned > MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid"))
static cl::opt< int > MaxNumPartialInlining("max-partial-inlining", cl::init(-1), cl::Hidden, cl::desc("Max number of partial inlining. The default is unlimited"))
static cl::opt< bool > DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial inlining"))
static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI)
static cl::opt< float > ColdBranchRatio("cold-branch-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum BranchProbability to consider a region cold."))
static cl::opt< bool > ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden, cl::desc("Force outline regions with live exits"))
static cl::opt< unsigned > ExtraOutliningPenalty("partial-inlining-extra-penalty", cl::init(0), cl::Hidden, cl::desc("A debug option to add additional penalty to the computed one."))
static cl::opt< bool > SkipCostAnalysis("skip-partial-inlining-cost-analysis", cl::ReallyHidden, cl::desc("Skip Cost Analysis"))
FunctionAnalysisManager FAM
This file contains the declarations for profiling metadata utility functions.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:484
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
void setCallingConv(CallingConv::ID CC)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
Conditional Branch instruction.
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
void setCallingConv(CallingConv::ID CC)
Definition Function.h:276
int getCost() const
Get the inline cost estimate.
Definition InlineCost.h:146
bool isAlways() const
Definition InlineCost.h:140
int getCostDelta() const
Get the cost delta from the threshold for inlining.
Definition InlineCost.h:176
bool isNever() const
Definition InlineCost.h:141
bool isSuccess() const
Definition InlineCost.h:190
auto map(const Function &F) const -> InstructionCost
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
LLVM_ABI Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
user_iterator user_begin()
Definition Value.h:402
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
bool use_empty() const
Definition Value.h:346
user_iterator user_end()
Definition Value.h:410
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BR
Control flow instructions. These all have token chains.
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI int getInstrCost()
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
constexpr from_range_t from_range
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
LLVM_ABI InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, bool TrackInlineHistory=false, Function *ForwardVarArgsTo=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
This function inlines the called function into the basic block of the caller.
LLVM_ABI InlineResult isInlineViable(Function &Callee)
Check if it is mechanically possible to inline the function Callee, based on the contents of the func...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
TargetTransformInfo TTI
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
LLVM_ABI int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
iterator_range< df_iterator< T > > depth_first(const T &G)
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition STLExtras.h:2166
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39