LLVM 23.0.0git
Inliner.cpp
Go to the documentation of this file.
1//===- Inliner.cpp - Code common to all inliners --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the mechanics required to implement inlining without
10// missing any calls and updating the call graph. The decisions of which calls
11// are profitable to inline are implemented elsewhere.
12//
13//===----------------------------------------------------------------------===//
14
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/ScopeExit.h"
19#include "llvm/ADT/SetVector.h"
22#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
33#include "llvm/Analysis/Loads.h"
38#include "llvm/IR/Attributes.h"
39#include "llvm/IR/BasicBlock.h"
40#include "llvm/IR/DebugLoc.h"
43#include "llvm/IR/Function.h"
45#include "llvm/IR/Instruction.h"
48#include "llvm/IR/Metadata.h"
49#include "llvm/IR/Module.h"
50#include "llvm/IR/PassManager.h"
51#include "llvm/IR/Value.h"
52#include "llvm/Pass.h"
55#include "llvm/Support/Debug.h"
61#include <algorithm>
62#include <cassert>
63#include <utility>
64
65using namespace llvm;
66
67#define DEBUG_TYPE "inline"
68
69STATISTIC(NumInlined, "Number of functions inlined");
70STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
71
73 "intra-scc-cost-multiplier", cl::init(2), cl::Hidden,
75 "Cost multiplier to multiply onto inlined call sites where the "
76 "new call was previously an intra-SCC call (not relevant when the "
77 "original call was already intra-SCC). This can accumulate over "
78 "multiple inlinings (e.g. if a call site already had a cost "
79 "multiplier and one of its inlined calls was also subject to "
80 "this, the inlined call would have the original multiplier "
81 "multiplied by intra-scc-cost-multiplier). This is to prevent tons of "
82 "inlining through a child SCC which can cause terrible compile times"));
83
85 "inliner-forwarding-scan-limit", cl::init(16), cl::Hidden,
86 cl::desc("Maximum number of instructions to scan backward for "
87 "store-to-load forwarding in subsequent inlining decisions. "
88 "DefMaxInstsToScan=6 is not enough and misses inlining "
89 "opportunities (e.g. when class stores into mutiple members in "
90 "ctor and afterwards calls a function reading those members)"));
91
92/// A flag for test, so we can print the content of the advisor when running it
93/// as part of the default (e.g. -O3) pipeline.
94static cl::opt<bool> KeepAdvisorForPrinting("keep-inline-advisor-for-printing",
95 cl::init(false), cl::Hidden);
96
97/// Allows printing the contents of the advisor after each SCC inliner pass.
98static cl::opt<bool>
99 EnablePostSCCAdvisorPrinting("enable-scc-inline-advisor-printing",
100 cl::init(false), cl::Hidden);
101
102
104 "cgscc-inline-replay", cl::init(""), cl::value_desc("filename"),
105 cl::desc(
106 "Optimization remarks file containing inline remarks to be replayed "
107 "by cgscc inlining."),
108 cl::Hidden);
109
111 "cgscc-inline-replay-scope",
114 "Replay on functions that have remarks associated "
115 "with them (default)"),
117 "Replay on the entire module")),
118 cl::desc("Whether inline replay should be applied to the entire "
119 "Module or just the Functions (default) that are present as "
120 "callers in remarks during cgscc inlining."),
121 cl::Hidden);
122
124 "cgscc-inline-replay-fallback",
129 "All decisions not in replay send to original advisor (default)"),
131 "AlwaysInline", "All decisions not in replay are inlined"),
133 "All decisions not in replay are not inlined")),
134 cl::desc(
135 "How cgscc inline replay treats sites that don't come from the replay. "
136 "Original: defers to original advisor, AlwaysInline: inline all sites "
137 "not in replay, NeverInline: inline no sites not in replay"),
138 cl::Hidden);
139
141 "cgscc-inline-replay-format",
144 clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"),
146 "<Line Number>:<Column Number>"),
148 "LineDiscriminator", "<Line Number>.<Discriminator>"),
150 "LineColumnDiscriminator",
151 "<Line Number>:<Column Number>.<Discriminator> (default)")),
152 cl::desc("How cgscc inline replay file is formatted"), cl::Hidden);
153
155InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
157 if (OwnedAdvisor)
158 return *OwnedAdvisor;
159
160 auto *IAA = MAM.getCachedResult<InlineAdvisorAnalysis>(M);
161 if (!IAA) {
162 // It should still be possible to run the inliner as a stand-alone SCC pass,
163 // for test scenarios. In that case, we default to the
164 // DefaultInlineAdvisor, which doesn't need to keep state between SCC pass
165 // runs. It also uses just the default InlineParams.
166 // In this case, we need to use the provided FAM, which is valid for the
167 // duration of the inliner pass, and thus the lifetime of the owned advisor.
168 // The one we would get from the MAM can be invalidated as a result of the
169 // inliner's activity.
170 OwnedAdvisor = std::make_unique<DefaultInlineAdvisor>(
171 M, FAM, getInlineParams(),
173
174 if (!CGSCCInlineReplayFile.empty())
175 OwnedAdvisor = getReplayInlineAdvisor(
176 M, FAM, M.getContext(), std::move(OwnedAdvisor),
177 ReplayInlinerSettings{CGSCCInlineReplayFile,
178 CGSCCInlineReplayScope,
179 CGSCCInlineReplayFallback,
180 {CGSCCInlineReplayFormat}},
181 /*EmitRemarks=*/true,
182 InlineContext{LTOPhase, InlinePass::ReplayCGSCCInliner});
183
184 return *OwnedAdvisor;
185 }
186 assert(IAA->getAdvisor() &&
187 "Expected a present InlineAdvisorAnalysis also have an "
188 "InlineAdvisor initialized");
189 return *IAA->getAdvisor();
190}
191
193 F.dropAllReferences();
195 BB.eraseFromParent();
196 BasicBlock *BB = BasicBlock::Create(F.getContext(), "", &F);
197 new UnreachableInst(F.getContext(), BB);
198}
199
202 CGSCCUpdateResult &UR) {
203 const auto &MAMProxy =
205 bool Changed = false;
206
207 assert(InitialC.size() > 0 && "Cannot handle an empty SCC!");
208 Module &M = *InitialC.begin()->getFunction().getParent();
209 ProfileSummaryInfo *PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(M);
210
213 .getManager();
214
215 InlineAdvisor &Advisor = getAdvisor(MAMProxy, FAM, M);
216 Advisor.onPassEntry(&InitialC);
217
218 // We use a single common worklist for calls across the entire SCC. We
219 // process these in-order and append new calls introduced during inlining to
220 // the end. The PriorityInlineOrder is optional here, in which the smaller
221 // callee would have a higher priority to inline.
222 //
223 // Note that this particular order of processing is actually critical to
224 // avoid very bad behaviors. Consider *highly connected* call graphs where
225 // each function contains a small amount of code and a couple of calls to
226 // other functions. Because the LLVM inliner is fundamentally a bottom-up
227 // inliner, it can handle gracefully the fact that these all appear to be
228 // reasonable inlining candidates as it will flatten things until they become
229 // too big to inline, and then move on and flatten another batch.
230 //
231 // However, when processing call edges *within* an SCC we cannot rely on this
232 // bottom-up behavior. As a consequence, with heavily connected *SCCs* of
233 // functions we can end up incrementally inlining N calls into each of
234 // N functions because each incremental inlining decision looks good and we
235 // don't have a topological ordering to prevent explosions.
236 //
237 // To compensate for this, we don't process transitive edges made immediate
238 // by inlining until we've done one pass of inlining across the entire SCC.
239 // Large, highly connected SCCs still lead to some amount of code bloat in
240 // this model, but it is uniformly spread across all the functions in the SCC
241 // and eventually they all become too large to inline, rather than
242 // incrementally making a single function grow in a super linear fashion.
244
245 // Populate the initial list of calls in this SCC.
246 for (auto &N : InitialC) {
247 auto &ORE =
248 FAM.getResult<OptimizationRemarkEmitterAnalysis>(N.getFunction());
249 // We want to generally process call sites top-down in order for
250 // simplifications stemming from replacing the call with the returned value
251 // after inlining to be visible to subsequent inlining decisions.
252 // FIXME: Using instructions sequence is a really bad way to do this.
253 // Instead we should do an actual RPO walk of the function body.
254 for (Instruction &I : instructions(N.getFunction()))
255 if (auto *CB = dyn_cast<CallBase>(&I))
256 if (Function *Callee = CB->getCalledFunction()) {
257 if (!Callee->isDeclaration())
258 Calls.push_back(CB);
259 else if (!isa<IntrinsicInst>(I)) {
260 using namespace ore;
261 setInlineRemark(*CB, "unavailable definition");
262 ORE.emit([&]() {
263 return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
264 << NV("Callee", Callee) << " will not be inlined into "
265 << NV("Caller", CB->getCaller())
266 << " because its definition is unavailable"
267 << setIsVerbose();
268 });
269 }
270 }
271 }
272
273 // Capture updatable variable for the current SCC.
274 auto *C = &InitialC;
275
276 llvm::scope_exit AdvisorOnExit([&] { Advisor.onPassExit(C); });
277
278 if (Calls.empty())
279 return PreservedAnalyses::all();
280
281 // Track a set vector of inlined callees so that we can augment the caller
282 // with all of their edges in the call graph before pruning out the ones that
283 // got simplified away.
284 SmallSetVector<Function *, 4> InlinedCallees;
285
286 // Track the dead functions to delete once finished with inlining calls. We
287 // defer deleting these to make it easier to handle the call graph updates.
288 SmallVector<Function *, 4> DeadFunctions;
289
290 // Track potentially dead non-local functions with comdats to see if they can
291 // be deleted as a batch after inlining.
292 SmallVector<Function *, 4> DeadFunctionsInComdats;
293
294 // Loop forward over all of the calls. Note that we cannot cache the size as
295 // inlining can introduce new calls that need to be processed.
296 for (int I = 0; I < (int)Calls.size(); ++I) {
297 // We expect the calls to typically be batched with sequences of calls that
298 // have the same caller, so we first set up some shared infrastructure for
299 // this caller. We also do any pruning we can at this layer on the caller
300 // alone.
301 Function &F = *Calls[I]->getCaller();
303 if (CG.lookupSCC(N) != C)
304 continue;
305
306 LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n"
307 << " Function size: " << F.getInstructionCount()
308 << "\n");
309
310 auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
311 return FAM.getResult<AssumptionAnalysis>(F);
312 };
313
314 // Now process as many calls as we have within this caller in the sequence.
315 // We bail out as soon as the caller has to change so we can update the
316 // call graph and prepare the context of that new caller.
317 bool DidInline = false;
318 for (; I < (int)Calls.size() && Calls[I]->getCaller() == &F; ++I) {
319 CallBase *CB = Calls[I];
320 Function &Callee = *CB->getCalledFunction();
321
322 // Check if this inlining may repeat breaking an SCC apart that has
323 // already been split once before. In that case, inlining here may
324 // trigger infinite inlining, much like is prevented within the inliner
325 // itself by the InlineHistory above, but spread across CGSCC iterations
326 // and thus hidden from the full inline history.
327 LazyCallGraph::Node &CalleeN = *CG.lookup(Callee);
328 LazyCallGraph::SCC *CalleeSCC = CG.lookupSCC(CalleeN);
329 if (CalleeSCC == C && UR.InlinedInternalEdges.count({&N, C})) {
330 LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
331 "previously split out of this SCC by inlining: "
332 << F.getName() << " -> " << Callee.getName() << "\n");
333 setInlineRemark(*CB, "recursive SCC split");
334 continue;
335 }
336
337 // Store-to-load forwarding, loads can be sometimes simplified to
338 // constants from stores introduced by previous inlining
339 if (DidInline) {
340 for (Value *Arg : CB->args()) {
341 auto *LI = dyn_cast<LoadInst>(Arg);
342 if (!LI || !LI->isSimple())
343 continue;
344 BasicBlock::iterator BBI = LI->getIterator();
346 LI, LI->getParent(), BBI, InlinerForwardingScanLimit);
347 if (!Available)
348 continue;
350 if (!C)
351 continue;
352 // Handle type mismatches from memset forwarding (e.g. memset
353 // writes i64 0 but the load type is ptr).
354 if (C->getType() != LI->getType()) {
355 if (C->isNullValue())
356 C = Constant::getNullValue(LI->getType());
357 else
358 continue;
359 }
360 LI->replaceAllUsesWith(C);
361 LI->eraseFromParent();
362 }
363 }
364
365 std::unique_ptr<InlineAdvice> Advice =
366 Advisor.getAdvice(*CB, OnlyMandatory);
367
368 // Check whether we want to inline this callsite.
369 if (!Advice)
370 continue;
371
372 if (!Advice->isInliningRecommended()) {
373 Advice->recordUnattemptedInlining();
374 continue;
375 }
376
377 int CBCostMult =
380 .value_or(1);
381
382 // Setup the data structure used to plumb customization into the
383 // `InlineFunction` routine.
385 GetAssumptionCache, PSI,
386 &FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
387 &FAM.getResult<BlockFrequencyAnalysis>(Callee));
388
389 // For compile time reasons we try to only track inline history for the
390 // calls where it may actually prevent inlining, which is inlining through
391 // an SCC. This can happen if the callee is in a non-trivial SCC/RefSCC,
392 // or if an inlined call site was an indirect call, which can be
393 // devirtualized to call any target by replacing the indirectly called
394 // function with a function pointer referenced by the caller. The indirect
395 // call case is handled within InlineFunction.
396 bool TrackInlineHistory = CalleeSCC->size() != 1 ||
397 CalleeSCC->getOuterRefSCC().size() != 1 ||
398 CalleeN->lookup(CalleeN) != nullptr;
399
401 *CB, IFI, /*MergeAttributes=*/true,
402 &FAM.getResult<AAManager>(*CB->getCaller()), /*InsertLifetime=*/true,
403 TrackInlineHistory, nullptr,
405 if (!IR.isSuccess()) {
406 Advice->recordUnsuccessfulInlining(IR);
407 continue;
408 }
409 // TODO: Shouldn't we be invalidating all analyses on F here?
410 // The caller was modified, so invalidate Ephemeral Values.
411 FAM.getResult<EphemeralValuesAnalysis>(F).clear();
412
413 DidInline = true;
414 InlinedCallees.insert(&Callee);
415 ++NumInlined;
416
417 LLVM_DEBUG(dbgs() << " Size after inlining: "
418 << F.getInstructionCount() << "\n");
419
420 // Add any new callsites to defined functions to the worklist.
421 if (!IFI.InlinedCallSites.empty()) {
422 for (CallBase *ICB : reverse(IFI.InlinedCallSites)) {
423 Function *NewCallee = ICB->getCalledFunction();
424 assert(!(NewCallee && NewCallee->isIntrinsic()) &&
425 "Intrinsic calls should not be tracked.");
426 if (!NewCallee) {
427 // Try to promote an indirect (virtual) call without waiting for
428 // the post-inline cleanup and the next DevirtSCCRepeatedPass
429 // iteration because the next iteration may not happen and we may
430 // miss inlining it.
431 if (tryPromoteCall(*ICB))
432 NewCallee = ICB->getCalledFunction();
433 }
434 if (NewCallee) {
435 if (!NewCallee->isDeclaration()) {
436 Calls.push_back(ICB);
437 // Continually inlining through an SCC can result in huge compile
438 // times and bloated code since we arbitrarily stop at some point
439 // when the inliner decides it's not profitable to inline anymore.
440 // We attempt to mitigate this by making these calls exponentially
441 // more expensive.
442 // This doesn't apply to calls in the same SCC since if we do
443 // inline through the SCC the function will end up being
444 // self-recursive which the inliner bails out on, and inlining
445 // within an SCC is necessary for performance.
446 if (CalleeSCC != C &&
447 CalleeSCC == CG.lookupSCC(CG.get(*NewCallee))) {
448 Attribute NewCBCostMult = Attribute::get(
449 M.getContext(),
451 itostr(CBCostMult * IntraSCCCostMultiplier));
452 ICB->addFnAttr(NewCBCostMult);
453 }
454 }
455 }
456 }
457 }
458
459 // For local functions or discardable functions without comdats, check
460 // whether this makes the callee trivially dead. In that case, we can drop
461 // the body of the function eagerly which may reduce the number of callers
462 // of other functions to one, changing inline cost thresholds. Non-local
463 // discardable functions with comdats are checked later on.
464 bool CalleeWasDeleted = false;
465 if (Callee.isDiscardableIfUnused() && Callee.hasZeroLiveUses() &&
466 !CG.isLibFunction(Callee)) {
467 if (Callee.hasLocalLinkage() || !Callee.hasComdat()) {
468 Calls.erase(std::remove_if(Calls.begin() + I + 1, Calls.end(),
469 [&](const CallBase *CB) {
470 return CB->getCaller() == &Callee;
471 }),
472 Calls.end());
473
474 // Report inlining decision BEFORE deleting function contents, so we
475 // can still access e.g. the DebugLoc
476 Advice->recordInliningWithCalleeDeleted();
477 // Clear the body and queue the function itself for call graph
478 // updating when we finish inlining.
480 assert(!is_contained(DeadFunctions, &Callee) &&
481 "Cannot put cause a function to become dead twice!");
482 DeadFunctions.push_back(&Callee);
483 CalleeWasDeleted = true;
484 } else {
485 DeadFunctionsInComdats.push_back(&Callee);
486 }
487 }
488 if (!CalleeWasDeleted)
489 Advice->recordInlining();
490 }
491
492 // Back the call index up by one to put us in a good position to go around
493 // the outer loop.
494 --I;
495
496 if (!DidInline)
497 continue;
498 Changed = true;
499
500 // At this point, since we have made changes we have at least removed
501 // a call instruction. However, in the process we do some incremental
502 // simplification of the surrounding code. This simplification can
503 // essentially do all of the same things as a function pass and we can
504 // re-use the exact same logic for updating the call graph to reflect the
505 // change.
506
507 // Inside the update, we also update the FunctionAnalysisManager in the
508 // proxy for this particular SCC. We do this as the SCC may have changed and
509 // as we're going to mutate this particular function we want to make sure
510 // the proxy is in place to forward any invalidation events.
511 LazyCallGraph::SCC *OldC = C;
513 LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n");
514
515 // If this causes an SCC to split apart into multiple smaller SCCs, there
516 // is a subtle risk we need to prepare for. Other transformations may
517 // expose an "infinite inlining" opportunity later, and because of the SCC
518 // mutation, we will revisit this function and potentially re-inline. If we
519 // do, and that re-inlining also has the potentially to mutate the SCC
520 // structure, the infinite inlining problem can manifest through infinite
521 // SCC splits and merges. To avoid this, we capture the originating caller
522 // node and the SCC containing the call edge. This is a slight over
523 // approximation of the possible inlining decisions that must be avoided,
524 // but is relatively efficient to store. We use C != OldC to know when
525 // a new SCC is generated and the original SCC may be generated via merge
526 // in later iterations.
527 //
528 // It is also possible that even if no new SCC is generated
529 // (i.e., C == OldC), the original SCC could be split and then merged
530 // into the same one as itself. and the original SCC will be added into
531 // UR.CWorklist again, we want to catch such cases too.
532 //
533 // FIXME: This seems like a very heavyweight way of retaining the inline
534 // history, we should look for a more efficient way of tracking it.
535 if ((C != OldC || UR.CWorklist.count(OldC)) &&
536 llvm::any_of(InlinedCallees, [&](Function *Callee) {
537 return CG.lookupSCC(*CG.lookup(*Callee)) == OldC;
538 })) {
539 LLVM_DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, "
540 "retaining this to avoid infinite inlining.\n");
541 UR.InlinedInternalEdges.insert({&N, OldC});
542 }
543 InlinedCallees.clear();
544
545 // Invalidate analyses for this function now so that we don't have to
546 // invalidate analyses for all functions in this SCC later.
547 FAM.invalidate(F, PreservedAnalyses::none());
548 }
549
550 // We must ensure that we only delete functions with comdats if every function
551 // in the comdat is going to be deleted.
552 if (!DeadFunctionsInComdats.empty()) {
553 filterDeadComdatFunctions(DeadFunctionsInComdats);
554 for (auto *Callee : DeadFunctionsInComdats)
556 DeadFunctions.append(DeadFunctionsInComdats);
557 }
558
559 // Now that we've finished inlining all of the calls across this SCC, delete
560 // all of the trivially dead functions, updating the call graph and the CGSCC
561 // pass manager in the process.
562 //
563 // Note that this walks a pointer set which has non-deterministic order but
564 // that is OK as all we do is delete things and add pointers to unordered
565 // sets.
566 for (Function *DeadF : DeadFunctions) {
567 CG.markDeadFunction(*DeadF);
568 // Get the necessary information out of the call graph and nuke the
569 // function there. Also, clear out any cached analyses.
570 auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
571 FAM.clear(*DeadF, DeadF->getName());
572 AM.clear(DeadC, DeadC.getName());
573
574 // Mark the relevant parts of the call graph as invalid so we don't visit
575 // them.
576 UR.InvalidatedSCCs.insert(&DeadC);
577
578 UR.DeadFunctions.push_back(DeadF);
579
580 ++NumDeleted;
581 }
582
583 if (!Changed)
584 return PreservedAnalyses::all();
585
587 // Even if we change the IR, we update the core CGSCC data structures and so
588 // can preserve the proxy to the function analysis manager.
590 // We have already invalidated all analyses on modified functions.
592 return PA;
593}
594
596 bool MandatoryFirst,
597 InlineContext IC,
599 unsigned MaxDevirtIterations)
600 : Params(Params), IC(IC), Mode(Mode),
601 MaxDevirtIterations(MaxDevirtIterations) {
602 // Run the inliner first. The theory is that we are walking bottom-up and so
603 // the callees have already been fully optimized, and we want to inline them
604 // into the callers so that our optimizations can reflect that.
605 // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
606 // because it makes profile annotation in the backend inaccurate.
607 if (MandatoryFirst) {
608 PM.addPass(InlinerPass(/*OnlyMandatory*/ true));
611 }
612 PM.addPass(InlinerPass());
615}
616
619 auto &IAA = MAM.getResult<InlineAdvisorAnalysis>(M);
620 if (!IAA.tryCreate(Params, Mode,
621 {CGSCCInlineReplayFile,
622 CGSCCInlineReplayScope,
623 CGSCCInlineReplayFallback,
624 {CGSCCInlineReplayFormat}},
625 IC)) {
626 M.getContext().emitError(
627 "Could not setup Inlining Advisor for the requested "
628 "mode and/or options");
629 return PreservedAnalyses::all();
630 }
631
632 // We wrap the CGSCC pipeline in a devirtualization repeater. This will try
633 // to detect when we devirtualize indirect calls and iterate the SCC passes
634 // in that case to try and catch knock-on inlining or function attrs
635 // opportunities. Then we add it to the module pipeline by walking the SCCs
636 // in postorder (or bottom-up).
637 // If MaxDevirtIterations is 0, we just don't use the devirtualization
638 // wrapper.
639 if (MaxDevirtIterations == 0)
640 MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(PM)));
641 else
644
645 MPM.addPass(std::move(AfterCGMPM));
646 MPM.run(M, MAM);
647
648 // Discard the InlineAdvisor, a subsequent inlining session should construct
649 // its own.
650 auto PA = PreservedAnalyses::all();
652 PA.abandon<InlineAdvisorAnalysis>();
653 return PA;
654}
655
657 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
658 static_cast<PassInfoMixin<InlinerPass> *>(this)->printPipeline(
659 OS, MapClassName2PassName);
660 if (OnlyMandatory)
661 OS << "<only-mandatory>";
662}
663
665 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
666 // Print some info about passes added to the wrapper. This is however
667 // incomplete as InlineAdvisorAnalysis part isn't included (which also depends
668 // on Params and Mode).
669 if (!MPM.isEmpty()) {
670 MPM.printPipeline(OS, MapClassName2PassName);
671 OS << ',';
672 }
673 OS << "cgscc(";
674 if (MaxDevirtIterations != 0)
675 OS << "devirt<" << MaxDevirtIterations << ">(";
676 PM.printPipeline(OS, MapClassName2PassName);
677 if (MaxDevirtIterations != 0)
678 OS << ')';
679 OS << ')';
680}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
This is the interface for LLVM's primary stateless and local alias analysis.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
@ Available
We know the block is fully available. This is a fixpoint.
Definition GVN.cpp:945
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static cl::opt< unsigned > InlinerForwardingScanLimit("inliner-forwarding-scan-limit", cl::init(16), cl::Hidden, cl::desc("Maximum number of instructions to scan backward for " "store-to-load forwarding in subsequent inlining decisions. " "DefMaxInstsToScan=6 is not enough and misses inlining " "opportunities (e.g. when class stores into mutiple members in " "ctor and afterwards calls a function reading those members)"))
static cl::opt< ReplayInlinerSettings::Scope > CGSCCInlineReplayScope("cgscc-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during cgscc inlining."), cl::Hidden)
static cl::opt< bool > KeepAdvisorForPrinting("keep-inline-advisor-for-printing", cl::init(false), cl::Hidden)
A flag for test, so we can print the content of the advisor when running it as part of the default (e...
static cl::opt< std::string > CGSCCInlineReplayFile("cgscc-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by cgscc inlining."), cl::Hidden)
static cl::opt< bool > EnablePostSCCAdvisorPrinting("enable-scc-inline-advisor-printing", cl::init(false), cl::Hidden)
Allows printing the contents of the advisor after each SCC inliner pass.
static cl::opt< int > IntraSCCCostMultiplier("intra-scc-cost-multiplier", cl::init(2), cl::Hidden, cl::desc("Cost multiplier to multiply onto inlined call sites where the " "new call was previously an intra-SCC call (not relevant when the " "original call was already intra-SCC). This can accumulate over " "multiple inlinings (e.g. if a call site already had a cost " "multiplier and one of its inlined calls was also subject to " "this, the inlined call would have the original multiplier " "multiplied by intra-scc-cost-multiplier). This is to prevent tons of " "inlining through a child SCC which can cause terrible compile times"))
static cl::opt< CallSiteFormat::Format > CGSCCInlineReplayFormat("cgscc-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How cgscc inline replay file is formatted"), cl::Hidden)
void makeFunctionBodyUnreachable(Function &F)
Definition Inliner.cpp:192
static cl::opt< ReplayInlinerSettings::Fallback > CGSCCInlineReplayFallback("cgscc-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How cgscc inline replay treats sites that don't come from the replay. " "Original: defers to original advisor, AlwaysInline: inline all sites " "not in replay, NeverInline: inline no sites not in replay"), cl::Hidden)
Implements a lazy call graph analysis and related passes for the new pass manager.
Legalize the Machine IR a function s Machine IR
Definition Legalizer.cpp:81
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file contains the declarations for metadata subclasses.
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
This file provides a priority worklist.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:119
A manager for alias analyses.
This templated class represents "all analyses that operate over <aparticular IR unit>" (e....
Definition Analysis.h:50
void clear(IRUnitT &IR, llvm::StringRef Name)
Clear any cached analysis results for a single unit of IR.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
Analysis pass which computes BlockFrequencyInfo.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A proxy from a FunctionAnalysisManager to an SCC.
bool isIntrinsic() const
isIntrinsic - Returns true if the function's name starts with "llvm.".
Definition Function.h:251
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:337
Printer pass for the InlineAdvisorAnalysis results.
The InlineAdvisorAnalysis is a module pass because the InlineAdvisor needs to capture state right bef...
Result run(Module &M, ModuleAnalysisManager &MAM)
Interface for deciding whether to inline a call site or not.
virtual void onPassEntry(LazyCallGraph::SCC *SCC=nullptr)
This must be called when the Inliner pass is entered, to allow the InlineAdvisor update internal stat...
virtual void onPassExit(LazyCallGraph::SCC *SCC=nullptr)
This must be called when the Inliner pass is exited, as function passes may be run subsequently.
std::unique_ptr< InlineAdvice > getAdvice(CallBase &CB, bool MandatoryOnly=false)
Get an InlineAdvice containing a recommendation on whether to inline or not.
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition Cloning.h:259
SmallVector< CallBase *, 8 > InlinedCallSites
All of the new call sites inlined into the caller.
Definition Cloning.h:282
InlineResult is basically true or false.
Definition InlineCost.h:181
The inliner pass for the new pass manager.
Definition Inliner.h:36
LLVM_ABI void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition Inliner.cpp:656
LLVM_ABI PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR)
Definition Inliner.cpp:200
A node in the call graph.
An SCC of the call graph.
RefSCC & getOuterRefSCC() const
A lazily constructed view of the call graph of a module.
bool isLibFunction(Function &F) const
Test whether a function is a known and defined library function tracked by the call graph.
LLVM_ABI void markDeadFunction(Function &F)
Mark a function as dead to be removed later by removeDeadFunctions().
Node & get(Function &F)
Get a graph node for a given function, scanning it to populate the graph data as necessary.
SCC * lookupSCC(Node &N) const
Lookup a function's SCC in the graph.
Node * lookup(const Function &F) const
Lookup a function in the graph which has already been scanned and added.
LLVM_ABI PreservedAnalyses run(Module &, ModuleAnalysisManager &)
Definition Inliner.cpp:617
LLVM_ABI void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition Inliner.cpp:664
LLVM_ABI ModuleInlinerWrapperPass(InlineParams Params=getInlineParams(), bool MandatoryFirst=true, InlineContext IC={}, InliningAdvisorMode Mode=InliningAdvisorMode::Default, unsigned MaxDevirtIterations=0)
Definition Inliner.cpp:595
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
Diagnostic information for missed-optimization remarks.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
void clear()
Completely clear the SetVector.
Definition SetVector.h:267
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
This function has undefined behavior.
LLVM Value Representation.
Definition Value.h:75
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
const char FunctionInlineCostMultiplierAttributeName[]
Definition InlineCost.h:60
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Add a small namespace to avoid name clashes with the classes used in the streaming interface.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
InliningAdvisorMode
There are 4 scenarios we can use the InlineAdvisor:
LLVM_ABI std::optional< int > getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind)
DevirtSCCRepeatedPass createDevirtSCCRepeatedPass(CGSCCPassT &&Pass, int MaxIterations)
A function to deduce a function pass type and wrap it in the templated adaptor.
LLVM_ABI LazyCallGraph::SCC & updateCGAndAnalysisManagerForCGSCCPass(LazyCallGraph &G, LazyCallGraph::SCC &C, LazyCallGraph::Node &N, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, FunctionAnalysisManager &FAM)
Helper to update the call graph after running a CGSCC pass.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
LLVM_ABI void setInlineRemark(CallBase &CB, StringRef Message)
Set the inline-remark attribute.
LLVM_ABI Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, BatchAAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition Loads.cpp:553
LLVM_ABI InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, bool TrackInlineHistory=false, Function *ForwardVarArgsTo=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
This function inlines the called function into the basic block of the caller.
AnalysisManager< LazyCallGraph::SCC, LazyCallGraph & > CGSCCAnalysisManager
The CGSCC analysis manager.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, InlineContext IC)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
OuterAnalysisManagerProxy< ModuleAnalysisManager, LazyCallGraph::SCC, LazyCallGraph & > ModuleAnalysisManagerCGSCCProxy
A proxy from a ModuleAnalysisManager to an SCC.
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
LLVM_ABI void filterDeadComdatFunctions(SmallVectorImpl< Function * > &DeadComdatFunctions)
Filter out potentially dead comdat functions where other entries keep the entire comdat group alive.
LLVM_ABI bool tryPromoteCall(CallBase &CB)
Try to promote (devirtualize) a virtual call on an Alloca.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
std::string itostr(int64_t X)
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
#define N
Support structure for SCC passes to communicate updates the call graph back to the CGSCC pass manager...
SmallPriorityWorklist< LazyCallGraph::SCC *, 1 > & CWorklist
Worklist of the SCCs queued for processing.
SmallDenseSet< std::pair< LazyCallGraph::Node *, LazyCallGraph::SCC * >, 4 > & InlinedInternalEdges
A hacky area where the inliner can retain history about inlining decisions that mutated the call grap...
SmallVector< Function *, 4 > & DeadFunctions
Functions that a pass has considered to be dead to be removed at the end of the call graph walk in ba...
SmallPtrSetImpl< LazyCallGraph::SCC * > & InvalidatedSCCs
The set of invalidated SCCs which should be skipped if they are found in CWorklist.
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
Definition InlineCost.h:207
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:89