LLVM 22.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/PassManager.h"
28#include "llvm/Pass.h"
151
152using namespace llvm;
153
154namespace llvm {
155
157 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
158 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
160 "Heuristics-based inliner version"),
162 "Use development mode (runtime-loadable model)"),
164 "Use release mode (AOT-compiled model)")));
165
166/// Flag to enable inline deferral during PGO.
167static cl::opt<bool>
168 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
170 cl::desc("Enable inline deferral during PGO"));
171
172static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
173 cl::init(false), cl::Hidden,
174 cl::desc("Enable module inliner"));
175
177 "mandatory-inlining-first", cl::init(false), cl::Hidden,
178 cl::desc("Perform mandatory inlinings module-wide, before performing "
179 "inlining"));
180
182 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
183 cl::desc("Eagerly invalidate more analyses in default pipelines"));
184
186 "enable-merge-functions", cl::init(false), cl::Hidden,
187 cl::desc("Enable function merging as part of the optimization pipeline"));
188
190 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
191 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
192
194 "enable-global-analyses", cl::init(true), cl::Hidden,
195 cl::desc("Enable inter-procedural analyses"));
196
197static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
198 cl::init(false), cl::Hidden,
199 cl::desc("Run Partial inlining pass"));
200
202 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
203 cl::desc("Run cleanup optimization passes after vectorization"));
204
205static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
206 cl::desc("Run the NewGVN pass"));
207
208static cl::opt<bool>
209 EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden,
210 cl::desc("Enable the LoopInterchange Pass"));
211
212static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
213 cl::init(false), cl::Hidden,
214 cl::desc("Enable Unroll And Jam Pass"));
215
216static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
218 cl::desc("Enable the LoopFlatten Pass"));
219
220// Experimentally allow loop header duplication. This should allow for better
221// optimization at Oz, since loop-idiom recognition can then recognize things
222// like memcpy. If this ends up being useful for many targets, we should drop
223// this flag and make a code generation option that can be controlled
224// independent of the opt level and exposed through the frontend.
226 "enable-loop-header-duplication", cl::init(false), cl::Hidden,
227 cl::desc("Enable loop header duplication at any optimization level"));
228
229static cl::opt<bool>
230 EnableDFAJumpThreading("enable-dfa-jump-thread",
231 cl::desc("Enable DFA jump threading"),
232 cl::init(false), cl::Hidden);
233
234static cl::opt<bool>
235 EnableHotColdSplit("hot-cold-split",
236 cl::desc("Enable hot-cold splitting pass"));
237
238static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
240 cl::desc("Enable ir outliner pass"));
241
242static cl::opt<bool>
243 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
244 cl::desc("Disable pre-instrumentation inliner"));
245
247 "preinline-threshold", cl::Hidden, cl::init(75),
248 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
249 "(default = 75)"));
250
251static cl::opt<bool>
252 EnableGVNHoist("enable-gvn-hoist",
253 cl::desc("Enable the GVN hoisting pass (default = off)"));
254
255static cl::opt<bool>
256 EnableGVNSink("enable-gvn-sink",
257 cl::desc("Enable the GVN sinking pass (default = off)"));
258
260 "enable-jump-table-to-switch",
261 cl::desc("Enable JumpTableToSwitch pass (default = off)"));
262
263// This option is used in simplifying testing SampleFDO optimizations for
264// profile loading.
265static cl::opt<bool>
266 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
267 cl::desc("Enable control height reduction optimization (CHR)"));
268
270 "flattened-profile-used", cl::init(false), cl::Hidden,
271 cl::desc("Indicate the sample profile being used is flattened, i.e., "
272 "no inline hierarchy exists in the profile"));
273
274static cl::opt<bool>
275 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
276 cl::desc("Enable lowering of the matrix intrinsics"));
277
279 "enable-constraint-elimination", cl::init(true), cl::Hidden,
280 cl::desc(
281 "Enable pass to eliminate conditions based on linear constraints"));
282
284 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
285 cl::desc("Enable the attributor inter-procedural deduction pass"),
287 "enable all attributor runs"),
289 "enable module-wide attributor runs"),
291 "enable call graph SCC attributor runs"),
292 clEnumValN(AttributorRunOption::NONE, "none",
293 "disable attributor runs")));
294
296 "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
297 cl::desc("Enable profile instrumentation sampling (default = off)"));
299 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
300 cl::desc("Enable the experimental Loop Versioning LICM pass"));
301
303 "instrument-cold-function-only-path", cl::init(""),
304 cl::desc("File path for cold function only instrumentation(requires use "
305 "with --pgo-instrument-cold-function-only)"),
306 cl::Hidden);
307
310
312} // namespace llvm
313
330
331namespace llvm {
333} // namespace llvm
334
336 OptimizationLevel Level) {
337 for (auto &C : PeepholeEPCallbacks)
338 C(FPM, Level);
339}
342 for (auto &C : LateLoopOptimizationsEPCallbacks)
343 C(LPM, Level);
344}
346 OptimizationLevel Level) {
347 for (auto &C : LoopOptimizerEndEPCallbacks)
348 C(LPM, Level);
349}
352 for (auto &C : ScalarOptimizerLateEPCallbacks)
353 C(FPM, Level);
354}
356 OptimizationLevel Level) {
357 for (auto &C : CGSCCOptimizerLateEPCallbacks)
358 C(CGPM, Level);
359}
361 OptimizationLevel Level) {
362 for (auto &C : VectorizerStartEPCallbacks)
363 C(FPM, Level);
364}
366 OptimizationLevel Level) {
367 for (auto &C : VectorizerEndEPCallbacks)
368 C(FPM, Level);
369}
371 OptimizationLevel Level,
373 for (auto &C : OptimizerEarlyEPCallbacks)
374 C(MPM, Level, Phase);
375}
377 OptimizationLevel Level,
379 for (auto &C : OptimizerLastEPCallbacks)
380 C(MPM, Level, Phase);
381}
384 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
385 C(MPM, Level);
386}
389 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
390 C(MPM, Level);
391}
393 OptimizationLevel Level) {
394 for (auto &C : PipelineStartEPCallbacks)
395 C(MPM, Level);
396}
399 for (auto &C : PipelineEarlySimplificationEPCallbacks)
400 C(MPM, Level, Phase);
401}
402
403// Helper to add AnnotationRemarksPass.
407
408// Helper to check if the current compilation phase is preparing for LTO
413
414// Helper to check if the current compilation phase is LTO backend
419
420// Helper to wrap conditionally Coro passes.
422 // TODO: Skip passes according to Phase.
423 ModulePassManager CoroPM;
424 CoroPM.addPass(CoroEarlyPass());
425 CGSCCPassManager CGPM;
426 CGPM.addPass(CoroSplitPass());
427 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
428 CoroPM.addPass(CoroCleanupPass());
429 CoroPM.addPass(GlobalDCEPass());
430 return CoroConditionalWrapper(std::move(CoroPM));
431}
432
433// TODO: Investigate the cost/benefit of tail call elimination on debugging.
435PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
437
439
441 FPM.addPass(CountVisitsPass());
442
443 // Form SSA out of local memory accesses after breaking apart aggregates into
444 // scalars.
445 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
446
447 // Catch trivial redundancies
448 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
449
450 // Hoisting of scalars and load expressions.
451 FPM.addPass(
452 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
453 FPM.addPass(InstCombinePass());
454
455 FPM.addPass(LibCallsShrinkWrapPass());
456
457 invokePeepholeEPCallbacks(FPM, Level);
458
459 FPM.addPass(
460 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
461
462 // Form canonically associated expression trees, and simplify the trees using
463 // basic mathematical properties. For example, this will form (nearly)
464 // minimal multiplication trees.
465 FPM.addPass(ReassociatePass());
466
467 // Add the primary loop simplification pipeline.
468 // FIXME: Currently this is split into two loop pass pipelines because we run
469 // some function passes in between them. These can and should be removed
470 // and/or replaced by scheduling the loop pass equivalents in the correct
471 // positions. But those equivalent passes aren't powerful enough yet.
472 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
473 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
474 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
475 // `LoopInstSimplify`.
476 LoopPassManager LPM1, LPM2;
477
478 // Simplify the loop body. We do this initially to clean up after other loop
479 // passes run, either when iterating on a loop or on inner loops with
480 // implications on the outer loop.
481 LPM1.addPass(LoopInstSimplifyPass());
482 LPM1.addPass(LoopSimplifyCFGPass());
483
484 // Try to remove as much code from the loop header as possible,
485 // to reduce amount of IR that will have to be duplicated. However,
486 // do not perform speculative hoisting the first time as LICM
487 // will destroy metadata that may not need to be destroyed if run
488 // after loop rotation.
489 // TODO: Investigate promotion cap for O1.
490 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
491 /*AllowSpeculation=*/false));
492
493 LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
495 // TODO: Investigate promotion cap for O1.
496 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
497 /*AllowSpeculation=*/true));
498 LPM1.addPass(SimpleLoopUnswitchPass());
500 LPM1.addPass(LoopFlattenPass());
501
502 LPM2.addPass(LoopIdiomRecognizePass());
503 LPM2.addPass(IndVarSimplifyPass());
504
506
507 LPM2.addPass(LoopDeletionPass());
508
509 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
510 // because it changes IR to makes profile annotation in back compile
511 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
512 // attributes so we need to make sure and allow the full unroll pass to pay
513 // attention to it.
514 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
515 PGOOpt->Action != PGOOptions::SampleUse)
516 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
517 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
518 PTO.ForgetAllSCEVInLoopUnroll));
519
521
522 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
523 /*UseMemorySSA=*/true));
524 FPM.addPass(
525 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
526 FPM.addPass(InstCombinePass());
527 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
528 // *All* loop passes must preserve it, in order to be able to use it.
529 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
530 /*UseMemorySSA=*/false));
531
532 // Delete small array after loop unroll.
533 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
534
535 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
536 FPM.addPass(MemCpyOptPass());
537
538 // Sparse conditional constant propagation.
539 // FIXME: It isn't clear why we do this *after* loop passes rather than
540 // before...
541 FPM.addPass(SCCPPass());
542
543 // Delete dead bit computations (instcombine runs after to fold away the dead
544 // computations, and then ADCE will run later to exploit any new DCE
545 // opportunities that creates).
546 FPM.addPass(BDCEPass());
547
548 // Run instcombine after redundancy and dead bit elimination to exploit
549 // opportunities opened up by them.
550 FPM.addPass(InstCombinePass());
551 invokePeepholeEPCallbacks(FPM, Level);
552
553 FPM.addPass(CoroElidePass());
554
556
557 // Finally, do an expensive DCE pass to catch all the dead code exposed by
558 // the simplifications and basic cleanup after all the simplifications.
559 // TODO: Investigate if this is too expensive.
560 FPM.addPass(ADCEPass());
561 FPM.addPass(
562 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
563 FPM.addPass(InstCombinePass());
564 invokePeepholeEPCallbacks(FPM, Level);
565
566 return FPM;
567}
568
572 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
573
574 // The O1 pipeline has a separate pipeline creation function to simplify
575 // construction readability.
576 if (Level.getSpeedupLevel() == 1)
577 return buildO1FunctionSimplificationPipeline(Level, Phase);
578
580
583
584 // Form SSA out of local memory accesses after breaking apart aggregates into
585 // scalars.
587
588 // Catch trivial redundancies
589 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
592
593 // Hoisting of scalars and load expressions.
594 if (EnableGVNHoist)
595 FPM.addPass(GVNHoistPass());
596
597 // Global value numbering based sinking.
598 if (EnableGVNSink) {
599 FPM.addPass(GVNSinkPass());
600 FPM.addPass(
601 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
602 }
603
604 // Speculative execution if the target has divergent branches; otherwise nop.
605 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
606
607 // Optimize based on known information about branches, and cleanup afterward.
610
611 // Jump table to switch conversion.
616
617 FPM.addPass(
618 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
621
622 if (!Level.isOptimizingForSize())
624
625 invokePeepholeEPCallbacks(FPM, Level);
626
627 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
628 // using the size value profile. Don't perform this when optimizing for size.
629 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
630 !Level.isOptimizingForSize())
632
633 FPM.addPass(TailCallElimPass(/*UpdateFunctionEntryCount=*/
634 isInstrumentedPGOUse()));
635 FPM.addPass(
636 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
637
638 // Form canonically associated expression trees, and simplify the trees using
639 // basic mathematical properties. For example, this will form (nearly)
640 // minimal multiplication trees.
642
645
646 // Add the primary loop simplification pipeline.
647 // FIXME: Currently this is split into two loop pass pipelines because we run
648 // some function passes in between them. These can and should be removed
649 // and/or replaced by scheduling the loop pass equivalents in the correct
650 // positions. But those equivalent passes aren't powerful enough yet.
651 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
652 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
653 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
654 // `LoopInstSimplify`.
655 LoopPassManager LPM1, LPM2;
656
657 // Simplify the loop body. We do this initially to clean up after other loop
658 // passes run, either when iterating on a loop or on inner loops with
659 // implications on the outer loop.
660 LPM1.addPass(LoopInstSimplifyPass());
661 LPM1.addPass(LoopSimplifyCFGPass());
662
663 // Try to remove as much code from the loop header as possible,
664 // to reduce amount of IR that will have to be duplicated. However,
665 // do not perform speculative hoisting the first time as LICM
666 // will destroy metadata that may not need to be destroyed if run
667 // after loop rotation.
668 // TODO: Investigate promotion cap for O1.
669 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
670 /*AllowSpeculation=*/false));
671
672 // Disable header duplication in loop rotation at -Oz.
674 Level != OptimizationLevel::Oz,
676 // TODO: Investigate promotion cap for O1.
677 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
678 /*AllowSpeculation=*/true));
679 LPM1.addPass(
680 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
682 LPM1.addPass(LoopFlattenPass());
683
684 LPM2.addPass(LoopIdiomRecognizePass());
685 LPM2.addPass(IndVarSimplifyPass());
686
687 {
689 ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
691 LPM2.addPass(std::move(ExtraPasses));
692 }
693
695
696 LPM2.addPass(LoopDeletionPass());
697
698 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
699 // because it changes IR to makes profile annotation in back compile
700 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
701 // attributes so we need to make sure and allow the full unroll pass to pay
702 // attention to it.
703 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
704 PGOOpt->Action != PGOOptions::SampleUse)
705 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
706 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
707 PTO.ForgetAllSCEVInLoopUnroll));
708
710
711 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
712 /*UseMemorySSA=*/true));
713 FPM.addPass(
714 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
716 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
717 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
718 // *All* loop passes must preserve it, in order to be able to use it.
719 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
720 /*UseMemorySSA=*/false));
721
722 // Delete small array after loop unroll.
724
725 // Try vectorization/scalarization transforms that are both improvements
726 // themselves and can allow further folds with GVN and InstCombine.
727 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
728
729 // Eliminate redundancies.
731 if (RunNewGVN)
732 FPM.addPass(NewGVNPass());
733 else
734 FPM.addPass(GVNPass());
735
736 // Sparse conditional constant propagation.
737 // FIXME: It isn't clear why we do this *after* loop passes rather than
738 // before...
739 FPM.addPass(SCCPPass());
740
741 // Delete dead bit computations (instcombine runs after to fold away the dead
742 // computations, and then ADCE will run later to exploit any new DCE
743 // opportunities that creates).
744 FPM.addPass(BDCEPass());
745
746 // Run instcombine after redundancy and dead bit elimination to exploit
747 // opportunities opened up by them.
749 invokePeepholeEPCallbacks(FPM, Level);
750
751 // Re-consider control flow based optimizations after redundancy elimination,
752 // redo DCE, etc.
755
758
759 // Finally, do an expensive DCE pass to catch all the dead code exposed by
760 // the simplifications and basic cleanup after all the simplifications.
761 // TODO: Investigate if this is too expensive.
762 FPM.addPass(ADCEPass());
763
764 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
765 FPM.addPass(MemCpyOptPass());
766
767 FPM.addPass(DSEPass());
769
771 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
772 /*AllowSpeculation=*/true),
773 /*UseMemorySSA=*/true));
774
775 FPM.addPass(CoroElidePass());
776
778
780 .convertSwitchRangeToICmp(true)
781 .convertSwitchToArithmetic(true)
782 .hoistCommonInsts(true)
783 .sinkCommonInsts(true)));
785 invokePeepholeEPCallbacks(FPM, Level);
786
787 return FPM;
788}
789
790void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
793}
794
795void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
796 OptimizationLevel Level,
797 ThinOrFullLTOPhase LTOPhase) {
798 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
800 return;
801 InlineParams IP;
802
804
805 // FIXME: The hint threshold has the same value used by the regular inliner
806 // when not optimzing for size. This should probably be lowered after
807 // performance testing.
808 // FIXME: this comment is cargo culted from the old pass manager, revisit).
809 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
811 IP, /* MandatoryFirst */ true,
813 CGSCCPassManager &CGPipeline = MIWP.getPM();
814
816 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
817 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
818 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
819 true))); // Merge & remove basic blocks.
820 FPM.addPass(InstCombinePass()); // Combine silly sequences.
821 invokePeepholeEPCallbacks(FPM, Level);
822
823 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
824 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
825
826 MPM.addPass(std::move(MIWP));
827
828 // Delete anything that is now dead to make sure that we don't instrument
829 // dead code. Instrumentation can end up keeping dead code around and
830 // dramatically increase code size.
831 MPM.addPass(GlobalDCEPass());
832}
833
834void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
835 OptimizationLevel Level) {
837 // Disable header duplication in loop rotation at -Oz.
840 LoopRotatePass(EnableLoopHeaderDuplication ||
841 Level != OptimizationLevel::Oz),
842 /*UseMemorySSA=*/false),
843 PTO.EagerlyInvalidateAnalyses));
844 }
845}
846
847void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
848 OptimizationLevel Level, bool RunProfileGen,
849 bool IsCS, bool AtomicCounterUpdate,
850 std::string ProfileFile,
851 std::string ProfileRemappingFile) {
852 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
853
854 if (!RunProfileGen) {
855 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
856 MPM.addPass(
857 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
858 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
859 // RequireAnalysisPass for PSI before subsequent non-module passes.
860 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
861 return;
862 }
863
864 // Perform PGO instrumentation.
865 MPM.addPass(PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
867
868 addPostPGOLoopRotation(MPM, Level);
869 // Add the profile lowering pass.
870 InstrProfOptions Options;
871 if (!ProfileFile.empty())
872 Options.InstrProfileOutput = ProfileFile;
873 // Do counter promotion at Level greater than O0.
874 Options.DoCounterPromotion = true;
875 Options.UseBFIInPromotion = IsCS;
876 if (EnableSampledInstr) {
877 Options.Sampling = true;
878 // With sampling, there is little beneifit to enable counter promotion.
879 // But note that sampling does work with counter promotion.
880 Options.DoCounterPromotion = false;
881 }
882 Options.Atomic = AtomicCounterUpdate;
883 MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
884}
885
887 bool RunProfileGen, bool IsCS,
888 bool AtomicCounterUpdate,
889 std::string ProfileFile,
890 std::string ProfileRemappingFile) {
891 if (!RunProfileGen) {
892 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
893 MPM.addPass(
894 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
895 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
896 // RequireAnalysisPass for PSI before subsequent non-module passes.
898 return;
899 }
900
901 // Perform PGO instrumentation.
904 // Add the profile lowering pass.
906 if (!ProfileFile.empty())
907 Options.InstrProfileOutput = ProfileFile;
908 // Do not do counter promotion at O0.
909 Options.DoCounterPromotion = false;
910 Options.UseBFIInPromotion = IsCS;
911 Options.Atomic = AtomicCounterUpdate;
913}
914
916 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
917}
918
922 InlineParams IP;
923 if (PTO.InlinerThreshold == -1)
924 IP = getInlineParamsFromOptLevel(Level);
925 else
926 IP = getInlineParams(PTO.InlinerThreshold);
927 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
928 // set hot-caller threshold to 0 to disable hot
929 // callsite inline (as much as possible [1]) because it makes
930 // profile annotation in the backend inaccurate.
931 //
932 // [1] Note the cost of a function could be below zero due to erased
933 // prologue / epilogue.
934 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
936
937 if (PGOOpt)
939
943
944 // Require the GlobalsAA analysis for the module so we can query it within
945 // the CGSCC pipeline.
947 MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
948 // Invalidate AAManager so it can be recreated and pick up the newly
949 // available GlobalsAA.
950 MIWP.addModulePass(
952 }
953
954 // Require the ProfileSummaryAnalysis for the module so we can query it within
955 // the inliner pass.
957
958 // Now begin the main postorder CGSCC pipeline.
959 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
960 // manager and trying to emulate its precise behavior. Much of this doesn't
961 // make a lot of sense and we should revisit the core CGSCC structure.
962 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
963
964 // Note: historically, the PruneEH pass was run first to deduce nounwind and
965 // generally clean up exception handling overhead. It isn't clear this is
966 // valuable as the inliner doesn't currently care whether it is inlining an
967 // invoke or a call.
968
970 MainCGPipeline.addPass(AttributorCGSCCPass());
971
972 // Deduce function attributes. We do another run of this after the function
973 // simplification pipeline, so this only needs to run when it could affect the
974 // function simplification pipeline, which is only the case with recursive
975 // functions.
976 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
977
978 // When at O3 add argument promotion to the pass pipeline.
979 // FIXME: It isn't at all clear why this should be limited to O3.
980 if (Level == OptimizationLevel::O3)
981 MainCGPipeline.addPass(ArgumentPromotionPass());
982
983 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
984 // there are no OpenMP runtime calls present in the module.
985 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
986 MainCGPipeline.addPass(OpenMPOptCGSCCPass(Phase));
987
988 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
989
990 // Add the core function simplification pipeline nested inside the
991 // CGSCC walk.
994 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
995
996 // Finally, deduce any function attributes based on the fully simplified
997 // function.
998 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
999
1000 // Mark that the function is fully simplified and that it shouldn't be
1001 // simplified again if we somehow revisit it due to CGSCC mutations unless
1002 // it's been modified since.
1005
1007 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
1008 MainCGPipeline.addPass(CoroAnnotationElidePass());
1009 }
1010
1011 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
1012 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
1014
1015 return MIWP;
1016}
1017
1022
1024 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
1025 // set hot-caller threshold to 0 to disable hot
1026 // callsite inline (as much as possible [1]) because it makes
1027 // profile annotation in the backend inaccurate.
1028 //
1029 // [1] Note the cost of a function could be below zero due to erased
1030 // prologue / epilogue.
1031 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1032 IP.HotCallSiteThreshold = 0;
1033
1034 if (PGOOpt)
1036
1037 // The inline deferral logic is used to avoid losing some
1038 // inlining chance in future. It is helpful in SCC inliner, in which
1039 // inlining is processed in bottom-up order.
1040 // While in module inliner, the inlining order is a priority-based order
1041 // by default. The inline deferral is unnecessary there. So we disable the
1042 // inline deferral logic in module inliner.
1043 IP.EnableDeferral = false;
1044
1047 MPM.addPass(GlobalOptPass());
1048 MPM.addPass(GlobalDCEPass());
1049 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
1050 }
1051
1054 PTO.EagerlyInvalidateAnalyses));
1055
1059 MPM.addPass(
1061 }
1062
1063 return MPM;
1064}
1065
1069 assert(Level != OptimizationLevel::O0 &&
1070 "Should not be used for O0 pipeline");
1071
1073 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1074
1076
1077 // Place pseudo probe instrumentation as the first pass of the pipeline to
1078 // minimize the impact of optimization changes.
1079 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1082
1083 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1084
1085 // In ThinLTO mode, when flattened profile is used, all the available
1086 // profile information will be annotated in PreLink phase so there is
1087 // no need to load the profile again in PostLink.
1088 bool LoadSampleProfile =
1089 HasSampleProfile &&
1091
1092 // During the ThinLTO backend phase we perform early indirect call promotion
1093 // here, before globalopt. Otherwise imported available_externally functions
1094 // look unreferenced and are removed. If we are going to load the sample
1095 // profile then defer until later.
1096 // TODO: See if we can move later and consolidate with the location where
1097 // we perform ICP when we are loading a sample profile.
1098 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1099 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1100 // determine whether the new direct calls are annotated with prof metadata.
1101 // Ideally this should be determined from whether the IR is annotated with
1102 // sample profile, and not whether the a sample profile was provided on the
1103 // command line. E.g. for flattened profiles where we will not be reloading
1104 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1105 // provide the sample profile file.
1106 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1107 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1108
1109 // Create an early function pass manager to cleanup the output of the
1110 // frontend. Not necessary with LTO post link pipelines since the pre link
1111 // pipeline already cleaned up the frontend output.
1113 // Do basic inference of function attributes from known properties of system
1114 // libraries and other oracles.
1116 MPM.addPass(CoroEarlyPass());
1117
1118 FunctionPassManager EarlyFPM;
1119 EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1120 // Lower llvm.expect to metadata before attempting transforms.
1121 // Compare/branch metadata may alter the behavior of passes like
1122 // SimplifyCFG.
1124 EarlyFPM.addPass(SimplifyCFGPass());
1126 EarlyFPM.addPass(EarlyCSEPass());
1127 if (Level == OptimizationLevel::O3)
1128 EarlyFPM.addPass(CallSiteSplittingPass());
1130 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1131 }
1132
1133 if (LoadSampleProfile) {
1134 // Annotate sample profile right after early FPM to ensure freshness of
1135 // the debug info.
1137 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase, FS));
1138 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1139 // RequireAnalysisPass for PSI before subsequent non-module passes.
1141 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1142 // for the profile annotation to be accurate in the LTO backend.
1143 if (!isLTOPreLink(Phase))
1144 // We perform early indirect call promotion here, before globalopt.
1145 // This is important for the ThinLTO backend phase because otherwise
1146 // imported available_externally functions look unreferenced and are
1147 // removed.
1148 MPM.addPass(
1149 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1150 }
1151
1152 // Try to perform OpenMP specific optimizations on the module. This is a
1153 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1155
1157 MPM.addPass(AttributorPass());
1158
1159 // Lower type metadata and the type.test intrinsic in the ThinLTO
1160 // post link pipeline after ICP. This is to enable usage of the type
1161 // tests in ICP sequences.
1163 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1165
1167
1168 // Interprocedural constant propagation now that basic cleanup has occurred
1169 // and prior to optimizing globals.
1170 // FIXME: This position in the pipeline hasn't been carefully considered in
1171 // years, it should be re-analyzed.
1172 MPM.addPass(IPSCCPPass(
1173 IPSCCPOptions(/*AllowFuncSpec=*/
1174 Level != OptimizationLevel::Os &&
1175 Level != OptimizationLevel::Oz &&
1176 !isLTOPreLink(Phase))));
1177
1178 // Attach metadata to indirect call sites indicating the set of functions
1179 // they may target at run-time. This should follow IPSCCP.
1181
1182 // Optimize globals to try and fold them into constants.
1183 MPM.addPass(GlobalOptPass());
1184
1185 // Create a small function pass pipeline to cleanup after all the global
1186 // optimizations.
1187 FunctionPassManager GlobalCleanupPM;
1188 // FIXME: Should this instead by a run of SROA?
1189 GlobalCleanupPM.addPass(PromotePass());
1190 GlobalCleanupPM.addPass(InstCombinePass());
1191 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1192 GlobalCleanupPM.addPass(
1193 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1194 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1195 PTO.EagerlyInvalidateAnalyses));
1196
1197 // We already asserted this happens in non-FullLTOPostLink earlier.
1198 const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1199 // Enable contextual profiling instrumentation.
1200 const bool IsCtxProfGen =
1202 const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink;
1203 const bool IsPGOInstrGen =
1204 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1205 const bool IsPGOInstrUse =
1206 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1207 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1208 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1209 // enable ctx profiling from the frontend.
1211 "Enabling both instrumented PGO and contextual instrumentation is not "
1212 "supported.");
1213 const bool IsCtxProfUse =
1215
1216 assert(
1218 "--instrument-cold-function-only-path is provided but "
1219 "--pgo-instrument-cold-function-only is not enabled");
1220 const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1221 IsPGOPreLink &&
1223
1224 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1225 IsCtxProfUse || IsColdFuncOnlyInstrGen)
1226 addPreInlinerPasses(MPM, Level, Phase);
1227
1228 // Add all the requested passes for instrumentation PGO, if requested.
1229 if (IsPGOInstrGen || IsPGOInstrUse) {
1230 addPGOInstrPasses(MPM, Level,
1231 /*RunProfileGen=*/IsPGOInstrGen,
1232 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1233 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1234 } else if (IsCtxProfGen || IsCtxProfUse) {
1236 // In pre-link, we just want the instrumented IR. We use the contextual
1237 // profile in the post-thinlink phase.
1238 // The instrumentation will be removed in post-thinlink after IPO.
1239 // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1240 // mechanism for GUIDs.
1241 MPM.addPass(AssignGUIDPass());
1242 if (IsCtxProfUse) {
1243 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
1244 return MPM;
1245 }
1246 // Block further inlining in the instrumented ctxprof case. This avoids
1247 // confusingly collecting profiles for the same GUID corresponding to
1248 // different variants of the function. We could do like PGO and identify
1249 // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
1250 // thinlto to happen before performing any further optimizations, it's
1251 // unnecessary to collect profiles for non-prevailing copies.
1253 addPostPGOLoopRotation(MPM, Level);
1255 } else if (IsColdFuncOnlyInstrGen) {
1256 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1257 /* AtomicCounterUpdate */ false,
1259 /* ProfileRemappingFile */ "");
1260 }
1261
1262 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1263 MPM.addPass(PGOIndirectCallPromotion(false, false));
1264
1265 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1266 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1268
1269 if (IsMemprofUse)
1270 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, FS));
1271
1272 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1273 PGOOpt->Action == PGOOptions::SampleUse))
1274 MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1275
1276 MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1277
1280 else
1281 MPM.addPass(buildInlinerPipeline(Level, Phase));
1282
1283 // Remove any dead arguments exposed by cleanups, constant folding globals,
1284 // and argument promotion.
1286
1289
1291 MPM.addPass(CoroCleanupPass());
1292
1293 // Optimize globals now that functions are fully simplified.
1294 MPM.addPass(GlobalOptPass());
1295 MPM.addPass(GlobalDCEPass());
1296
1297 return MPM;
1298}
1299
1300/// TODO: Should LTO cause any differences to this set of passes?
1301void PassBuilder::addVectorPasses(OptimizationLevel Level,
1303 ThinOrFullLTOPhase LTOPhase) {
1304 const bool IsFullLTO = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink;
1305
1308
1309 // Drop dereferenceable assumes after vectorization, as they are no longer
1310 // needed and can inhibit further optimization.
1311 if (!isLTOPreLink(LTOPhase))
1312 FPM.addPass(DropUnnecessaryAssumesPass(/*DropDereferenceable=*/true));
1313
1315 if (IsFullLTO) {
1316 // The vectorizer may have significantly shortened a loop body; unroll
1317 // again. Unroll small loops to hide loop backedge latency and saturate any
1318 // parallel execution resources of an out-of-order processor. We also then
1319 // need to clean up redundancies and loop invariant code.
1320 // FIXME: It would be really good to use a loop-integrated instruction
1321 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1322 // across the loop nests.
1323 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1326 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1328 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1331 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1332 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1333 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1334 // NOTE: we are very late in the pipeline, and we don't have any LICM
1335 // or SimplifyCFG passes scheduled after us, that would cleanup
1336 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1338 }
1339
1340 if (!IsFullLTO) {
1341 // Eliminate loads by forwarding stores from the previous iteration to loads
1342 // of the current iteration.
1344 }
1345 // Cleanup after the loop optimization passes.
1346 FPM.addPass(InstCombinePass());
1347
1348 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1349 ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ExtraPasses;
1350 // At higher optimization levels, try to clean up any runtime overlap and
1351 // alignment checks inserted by the vectorizer. We want to track correlated
1352 // runtime checks for two inner loops in the same outer loop, fold any
1353 // common computations, hoist loop-invariant aspects out of any outer loop,
1354 // and unswitch the runtime checks if possible. Once hoisted, we may have
1355 // dead (or speculatable) control flows or more combining opportunities.
1356 ExtraPasses.addPass(EarlyCSEPass());
1357 ExtraPasses.addPass(CorrelatedValuePropagationPass());
1358 ExtraPasses.addPass(InstCombinePass());
1359 LoopPassManager LPM;
1360 LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1361 /*AllowSpeculation=*/true));
1362 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1364 ExtraPasses.addPass(
1365 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true));
1366 ExtraPasses.addPass(
1367 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1368 ExtraPasses.addPass(InstCombinePass());
1369 FPM.addPass(std::move(ExtraPasses));
1370 }
1371
1372 // Now that we've formed fast to execute loop structures, we do further
1373 // optimizations. These are run afterward as they might block doing complex
1374 // analyses and transforms such as what are needed for loop vectorization.
1375
1376 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1377 // GVN, loop transforms, and others have already run, so it's now better to
1378 // convert to more optimized IR using more aggressive simplify CFG options.
1379 // The extra sinking transform can create larger basic blocks, so do this
1380 // before SLP vectorization.
1381 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1382 .forwardSwitchCondToPhi(true)
1383 .convertSwitchRangeToICmp(true)
1384 .convertSwitchToArithmetic(true)
1385 .convertSwitchToLookupTable(true)
1386 .needCanonicalLoops(false)
1387 .hoistCommonInsts(true)
1388 .sinkCommonInsts(true)));
1389
1390 if (IsFullLTO) {
1391 FPM.addPass(SCCPPass());
1392 FPM.addPass(InstCombinePass());
1393 FPM.addPass(BDCEPass());
1394 }
1395
1396 // Optimize parallel scalar instruction chains into SIMD instructions.
1397 if (PTO.SLPVectorization) {
1398 FPM.addPass(SLPVectorizerPass());
1399 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1400 FPM.addPass(EarlyCSEPass());
1401 }
1402 }
1403 // Enhance/cleanup vector code.
1404 FPM.addPass(VectorCombinePass());
1405
1406 if (!IsFullLTO) {
1407 FPM.addPass(InstCombinePass());
1408 // Unroll small loops to hide loop backedge latency and saturate any
1409 // parallel execution resources of an out-of-order processor. We also then
1410 // need to clean up redundancies and loop invariant code.
1411 // FIXME: It would be really good to use a loop-integrated instruction
1412 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1413 // across the loop nests.
1414 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1415 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1417 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1418 }
1419 FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1420 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1421 PTO.ForgetAllSCEVInLoopUnroll)));
1422 FPM.addPass(WarnMissedTransformationsPass());
1423 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1424 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1425 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1426 // NOTE: we are very late in the pipeline, and we don't have any LICM
1427 // or SimplifyCFG passes scheduled after us, that would cleanup
1428 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1429 FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1430 }
1431
1432 FPM.addPass(InferAlignmentPass());
1433 FPM.addPass(InstCombinePass());
1434
1435 // This is needed for two reasons:
1436 // 1. It works around problems that instcombine introduces, such as sinking
1437 // expensive FP divides into loops containing multiplications using the
1438 // divide result.
1439 // 2. It helps to clean up some loop-invariant code created by the loop
1440 // unroll pass when IsFullLTO=false.
1442 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1443 /*AllowSpeculation=*/true),
1444 /*UseMemorySSA=*/true));
1445
1446 // Now that we've vectorized and unrolled loops, we may have more refined
1447 // alignment information, try to re-derive it here.
1448 FPM.addPass(AlignmentFromAssumptionsPass());
1449}
1450
1453 ThinOrFullLTOPhase LTOPhase) {
1454 const bool LTOPreLink = isLTOPreLink(LTOPhase);
1456
1457 // Run partial inlining pass to partially inline functions that have
1458 // large bodies.
1461
1462 // Remove avail extern fns and globals definitions since we aren't compiling
1463 // an object file for later LTO. For LTO we want to preserve these so they
1464 // are eligible for inlining at link-time. Note if they are unreferenced they
1465 // will be removed by GlobalDCE later, so this only impacts referenced
1466 // available externally globals. Eventually they will be suppressed during
1467 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1468 // may make globals referenced by available external functions dead and saves
1469 // running remaining passes on the eliminated functions. These should be
1470 // preserved during prelinking for link-time inlining decisions.
1471 if (!LTOPreLink)
1473
1474 // Do RPO function attribute inference across the module to forward-propagate
1475 // attributes where applicable.
1476 // FIXME: Is this really an optimization rather than a canonicalization?
1478
1479 // Do a post inline PGO instrumentation and use pass. This is a context
1480 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1481 // cross-module inline has not been done yet. The context sensitive
1482 // instrumentation is after all the inlines are done.
1483 if (!LTOPreLink && PGOOpt) {
1484 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1485 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1486 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1487 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
1488 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1489 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1490 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1491 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1492 }
1493
1494 // Re-compute GlobalsAA here prior to function passes. This is particularly
1495 // useful as the above will have inlined, DCE'ed, and function-attr
1496 // propagated everything. We should at this point have a reasonably minimal
1497 // and richly annotated call graph. By computing aliasing and mod/ref
1498 // information for all local globals here, the late loop passes and notably
1499 // the vectorizer will be able to use them to help recognize vectorizable
1500 // memory operations.
1503
1504 invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
1505
1506 FunctionPassManager OptimizePM;
1507
1508 // Only drop unnecessary assumes post-inline and post-link, as otherwise
1509 // additional uses of the affected value may be introduced through inlining
1510 // and CSE.
1511 if (!isLTOPreLink(LTOPhase))
1512 OptimizePM.addPass(DropUnnecessaryAssumesPass());
1513
1514 // Scheduling LoopVersioningLICM when inlining is over, because after that
1515 // we may see more accurate aliasing. Reason to run this late is that too
1516 // early versioning may prevent further inlining due to increase of code
1517 // size. Other optimizations which runs later might get benefit of no-alias
1518 // assumption in clone loop.
1520 OptimizePM.addPass(
1522 // LoopVersioningLICM pass might increase new LICM opportunities.
1524 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1525 /*AllowSpeculation=*/true),
1526 /*USeMemorySSA=*/true));
1527 }
1528
1529 OptimizePM.addPass(Float2IntPass());
1531
1532 if (EnableMatrix) {
1533 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1534 OptimizePM.addPass(EarlyCSEPass());
1535 }
1536
1537 // CHR pass should only be applied with the profile information.
1538 // The check is to check the profile summary information in CHR.
1539 if (EnableCHR && Level == OptimizationLevel::O3)
1540 OptimizePM.addPass(ControlHeightReductionPass());
1541
1542 // FIXME: We need to run some loop optimizations to re-rotate loops after
1543 // simplifycfg and others undo their rotation.
1544
1545 // Optimize the loop execution. These passes operate on entire loop nests
1546 // rather than on each loop in an inside-out manner, and so they are actually
1547 // function passes.
1548
1549 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1550
1551 LoopPassManager LPM;
1552 // First rotate loops that may have been un-rotated by prior passes.
1553 // Disable header duplication at -Oz.
1555 Level != OptimizationLevel::Oz,
1556 LTOPreLink));
1557 // Some loops may have become dead by now. Try to delete them.
1558 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1559 // this may need to be revisited once we run GVN before loop deletion
1560 // in the simplification pipeline.
1561 LPM.addPass(LoopDeletionPass());
1562
1563 if (PTO.LoopInterchange)
1564 LPM.addPass(LoopInterchangePass());
1565
1566 OptimizePM.addPass(
1567 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
1568
1569 // FIXME: This may not be the right place in the pipeline.
1570 // We need to have the data to support the right place.
1571 if (PTO.LoopFusion)
1572 OptimizePM.addPass(LoopFusePass());
1573
1574 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1575 // into separate loop that would otherwise inhibit vectorization. This is
1576 // currently only performed for loops marked with the metadata
1577 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1578 OptimizePM.addPass(LoopDistributePass());
1579
1580 // Populates the VFABI attribute with the scalar-to-vector mappings
1581 // from the TargetLibraryInfo.
1582 OptimizePM.addPass(InjectTLIMappings());
1583
1584 addVectorPasses(Level, OptimizePM, LTOPhase);
1585
1586 invokeVectorizerEndEPCallbacks(OptimizePM, Level);
1587
1588 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1589 // canonicalization pass that enables other optimizations. As a result,
1590 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1591 // result too early.
1592 OptimizePM.addPass(LoopSinkPass());
1593
1594 // And finally clean up LCSSA form before generating code.
1595 OptimizePM.addPass(InstSimplifyPass());
1596
1597 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1598 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1599 // flattening of blocks.
1600 OptimizePM.addPass(DivRemPairsPass());
1601
1602 // Try to annotate calls that were created during optimization.
1603 OptimizePM.addPass(
1604 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
1605
1606 // LoopSink (and other loop passes since the last simplifyCFG) might have
1607 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1608 OptimizePM.addPass(
1610 .convertSwitchRangeToICmp(true)
1611 .convertSwitchToArithmetic(true)
1612 .speculateUnpredictables(true)
1613 .hoistLoadsStoresWithCondFaulting(true)));
1614
1615 // Add the core optimizing pipeline.
1616 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1617 PTO.EagerlyInvalidateAnalyses));
1618
1619 // AllocToken transforms heap allocation calls; this needs to run late after
1620 // other allocation call transformations (such as those in InstCombine).
1621 if (!LTOPreLink)
1622 MPM.addPass(AllocTokenPass());
1623
1624 invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
1625
1626 // Split out cold code. Splitting is done late to avoid hiding context from
1627 // other optimizations and inadvertently regressing performance. The tradeoff
1628 // is that this has a higher code size cost than splitting early.
1629 if (EnableHotColdSplit && !LTOPreLink)
1631
1632 // Search the code for similar regions of code. If enough similar regions can
1633 // be found where extracting the regions into their own function will decrease
1634 // the size of the program, we extract the regions, a deduplicate the
1635 // structurally similar regions.
1636 if (EnableIROutliner)
1637 MPM.addPass(IROutlinerPass());
1638
1639 // Now we need to do some global optimization transforms.
1640 // FIXME: It would seem like these should come first in the optimization
1641 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1642 // ordering here.
1643 MPM.addPass(GlobalDCEPass());
1645
1646 // Merge functions if requested. It has a better chance to merge functions
1647 // after ConstantMerge folded jump tables.
1648 if (PTO.MergeFunctions)
1650
1651 if (PTO.CallGraphProfile && !LTOPreLink)
1652 MPM.addPass(CGProfilePass(isLTOPostLink(LTOPhase)));
1653
1654 // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1655 if (!LTOPreLink)
1657
1658 return MPM;
1659}
1660
1664 if (Level == OptimizationLevel::O0)
1665 return buildO0DefaultPipeline(Level, Phase);
1666
1668
1669 // Currently this pipeline is only invoked in an LTO pre link pass or when we
1670 // are not running LTO. If that changes the below checks may need updating.
1672
1673 // If we are invoking this in non-LTO mode, remove any MemProf related
1674 // attributes and metadata, as we don't know whether we are linking with
1675 // a library containing the necessary interfaces.
1678
1679 // Convert @llvm.global.annotations to !annotation metadata.
1681
1682 // Force any function attributes we want the rest of the pipeline to observe.
1684
1685 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1687
1688 // Apply module pipeline start EP callback.
1690
1691 // Add the core simplification pipeline.
1693
1694 // Now add the optimization pipeline.
1696
1697 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1698 PGOOpt->Action == PGOOptions::SampleUse)
1700
1701 // Emit annotation remarks.
1703
1704 if (isLTOPreLink(Phase))
1705 addRequiredLTOPreLinkPasses(MPM);
1706 return MPM;
1707}
1708
1711 bool EmitSummary) {
1713 if (ThinLTO)
1715 else
1717 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1718
1719 // Perform any cleanups to the IR that aren't suitable for per TU compilation,
1720 // like removing CFI/WPD related instructions. Note, we reuse
1721 // LowerTypeTestsPass to clean up type tests rather than duplicate that logic
1722 // in FatLtoCleanup.
1723 MPM.addPass(FatLtoCleanup());
1724
1725 // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1726 // object code, only in the bitcode section, so drop it before we run
1727 // module optimization and generate machine code. If llvm.type.test() isn't in
1728 // the IR, this won't do anything.
1729 MPM.addPass(
1731
1732 // Use the ThinLTO post-link pipeline with sample profiling
1733 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1734 MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1735 else {
1736 // ModuleSimplification does not run the coroutine passes for
1737 // ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO
1738 // builds, otherwise they will miscompile.
1739 if (ThinLTO) {
1740 // TODO: replace w/ buildCoroWrapper() when it takes phase and level into
1741 // consideration.
1742 CGSCCPassManager CGPM;
1746 MPM.addPass(CoroCleanupPass());
1747 }
1748
1749 // otherwise, just use module optimization
1750 MPM.addPass(
1752 // Emit annotation remarks.
1754 }
1755 return MPM;
1756}
1757
1760 if (Level == OptimizationLevel::O0)
1762
1764
1765 // Convert @llvm.global.annotations to !annotation metadata.
1767
1768 // Force any function attributes we want the rest of the pipeline to observe.
1770
1771 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1773
1774 // Apply module pipeline start EP callback.
1776
1777 // If we are planning to perform ThinLTO later, we don't bloat the code with
1778 // unrolling/vectorization/... now. Just simplify the module as much as we
1779 // can.
1782 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1783 // thinlto use the contextual info to perform imports; then use the contextual
1784 // profile in the post-thinlink phase.
1785 if (!UseCtxProfile.empty()) {
1786 addRequiredLTOPreLinkPasses(MPM);
1787 return MPM;
1788 }
1789
1790 // Run partial inlining pass to partially inline functions that have
1791 // large bodies.
1792 // FIXME: It isn't clear whether this is really the right place to run this
1793 // in ThinLTO. Because there is another canonicalization and simplification
1794 // phase that will run after the thin link, running this here ends up with
1795 // less information than will be available later and it may grow functions in
1796 // ways that aren't beneficial.
1799
1800 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1801 PGOOpt->Action == PGOOptions::SampleUse)
1803
1804 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1805 // optimization is going to be done in PostLink stage, but clang can't add
1806 // callbacks there in case of in-process ThinLTO called by linker.
1811
1812 // Emit annotation remarks.
1814
1815 addRequiredLTOPreLinkPasses(MPM);
1816
1817 return MPM;
1818}
1819
1821 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1823
1824 // If we are invoking this without a summary index noting that we are linking
1825 // with a library containing the necessary APIs, remove any MemProf related
1826 // attributes and metadata.
1827 if (!ImportSummary || !ImportSummary->withSupportsHotColdNew())
1829
1830 if (ImportSummary) {
1831 // For ThinLTO we must apply the context disambiguation decisions early, to
1832 // ensure we can correctly match the callsites to summary data.
1835 ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1836
1837 // These passes import type identifier resolutions for whole-program
1838 // devirtualization and CFI. They must run early because other passes may
1839 // disturb the specific instruction patterns that these passes look for,
1840 // creating dependencies on resolutions that may not appear in the summary.
1841 //
1842 // For example, GVN may transform the pattern assume(type.test) appearing in
1843 // two basic blocks into assume(phi(type.test, type.test)), which would
1844 // transform a dependency on a WPD resolution into a dependency on a type
1845 // identifier resolution for CFI.
1846 //
1847 // Also, WPD has access to more precise information than ICP and can
1848 // devirtualize more effectively, so it should operate on the IR first.
1849 //
1850 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1851 // metadata and intrinsics.
1852 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1853 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1854 }
1855
1856 if (Level == OptimizationLevel::O0) {
1857 // Run a second time to clean up any type tests left behind by WPD for use
1858 // in ICP.
1859 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1862
1863 // AllocToken transforms heap allocation calls; this needs to run late after
1864 // other allocation call transformations (such as those in InstCombine).
1865 MPM.addPass(AllocTokenPass());
1866
1867 // Drop available_externally and unreferenced globals. This is necessary
1868 // with ThinLTO in order to avoid leaving undefined references to dead
1869 // globals in the object file.
1871 MPM.addPass(GlobalDCEPass());
1872 return MPM;
1873 }
1874 if (!UseCtxProfile.empty()) {
1875 MPM.addPass(
1877 } else {
1878 // Add the core simplification pipeline.
1881 }
1882 // Now add the optimization pipeline.
1885
1886 // Emit annotation remarks.
1888
1889 return MPM;
1890}
1891
1894 // FIXME: We should use a customized pre-link pipeline!
1895 return buildPerModuleDefaultPipeline(Level,
1897}
1898
1901 ModuleSummaryIndex *ExportSummary) {
1903
1905
1906 // If we are invoking this without a summary index noting that we are linking
1907 // with a library containing the necessary APIs, remove any MemProf related
1908 // attributes and metadata.
1909 if (!ExportSummary || !ExportSummary->withSupportsHotColdNew())
1911
1912 // Create a function that performs CFI checks for cross-DSO calls with targets
1913 // in the current module.
1914 MPM.addPass(CrossDSOCFIPass());
1915
1916 if (Level == OptimizationLevel::O0) {
1917 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1918 // metadata and intrinsics.
1919 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1920 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1921 // Run a second time to clean up any type tests left behind by WPD for use
1922 // in ICP.
1923 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1925
1927
1928 // AllocToken transforms heap allocation calls; this needs to run late after
1929 // other allocation call transformations (such as those in InstCombine).
1930 MPM.addPass(AllocTokenPass());
1931
1933
1934 // Emit annotation remarks.
1936
1937 return MPM;
1938 }
1939
1940 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1941 // Load sample profile before running the LTO optimization pipeline.
1942 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1943 PGOOpt->ProfileRemappingFile,
1945 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1946 // RequireAnalysisPass for PSI before subsequent non-module passes.
1948 }
1949
1950 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1952
1953 // Remove unused virtual tables to improve the quality of code generated by
1954 // whole-program devirtualization and bitset lowering.
1955 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1956
1957 // Do basic inference of function attributes from known properties of system
1958 // libraries and other oracles.
1960
1961 if (Level.getSpeedupLevel() > 1) {
1963 CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));
1964
1965 // Indirect call promotion. This should promote all the targets that are
1966 // left by the earlier promotion pass that promotes intra-module targets.
1967 // This two-step promotion is to save the compile time. For LTO, it should
1968 // produce the same result as if we only do promotion here.
1970 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1971
1972 // Promoting by-reference arguments to by-value exposes more constants to
1973 // IPSCCP.
1974 CGSCCPassManager CGPM;
1977 CGPM.addPass(
1980
1981 // Propagate constants at call sites into the functions they call. This
1982 // opens opportunities for globalopt (and inlining) by substituting function
1983 // pointers passed as arguments to direct uses of functions.
1984 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1985 Level != OptimizationLevel::Os &&
1986 Level != OptimizationLevel::Oz)));
1987
1988 // Attach metadata to indirect call sites indicating the set of functions
1989 // they may target at run-time. This should follow IPSCCP.
1991 }
1992
1993 // Do RPO function attribute inference across the module to forward-propagate
1994 // attributes where applicable.
1995 // FIXME: Is this really an optimization rather than a canonicalization?
1997
1998 // Use in-range annotations on GEP indices to split globals where beneficial.
1999 MPM.addPass(GlobalSplitPass());
2000
2001 // Run whole program optimization of virtual call when the list of callees
2002 // is fixed.
2003 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
2004
2006 // Stop here at -O1.
2007 if (Level == OptimizationLevel::O1) {
2008 // The LowerTypeTestsPass needs to run to lower type metadata and the
2009 // type.test intrinsics. The pass does nothing if CFI is disabled.
2010 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2011 // Run a second time to clean up any type tests left behind by WPD for use
2012 // in ICP (which is performed earlier than this in the regular LTO
2013 // pipeline).
2014 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
2016
2018
2019 // AllocToken transforms heap allocation calls; this needs to run late after
2020 // other allocation call transformations (such as those in InstCombine).
2021 MPM.addPass(AllocTokenPass());
2022
2024
2025 // Emit annotation remarks.
2027
2028 return MPM;
2029 }
2030
2031 // TODO: Skip to match buildCoroWrapper.
2032 MPM.addPass(CoroEarlyPass());
2033
2034 // Optimize globals to try and fold them into constants.
2035 MPM.addPass(GlobalOptPass());
2036
2037 // Promote any localized globals to SSA registers.
2039
2040 // Linking modules together can lead to duplicate global constant, only
2041 // keep one copy of each constant.
2043
2044 // Remove unused arguments from functions.
2046
2047 // Reduce the code after globalopt and ipsccp. Both can open up significant
2048 // simplification opportunities, and both can propagate functions through
2049 // function pointers. When this happens, we often have to resolve varargs
2050 // calls, etc, so let instcombine do this.
2051 FunctionPassManager PeepholeFPM;
2052 PeepholeFPM.addPass(InstCombinePass());
2053 if (Level.getSpeedupLevel() > 1)
2054 PeepholeFPM.addPass(AggressiveInstCombinePass());
2055 invokePeepholeEPCallbacks(PeepholeFPM, Level);
2056
2057 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
2058 PTO.EagerlyInvalidateAnalyses));
2059
2060 // Lower variadic functions for supported targets prior to inlining.
2062
2063 // Note: historically, the PruneEH pass was run first to deduce nounwind and
2064 // generally clean up exception handling overhead. It isn't clear this is
2065 // valuable as the inliner doesn't currently care whether it is inlining an
2066 // invoke or a call.
2067 // Run the inliner now.
2068 if (EnableModuleInliner) {
2072 } else {
2075 /* MandatoryFirst */ true,
2078 }
2079
2080 // Perform context disambiguation after inlining, since that would reduce the
2081 // amount of additional cloning required to distinguish the allocation
2082 // contexts.
2085 /*Summary=*/nullptr,
2086 PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2087
2088 // Optimize globals again after we ran the inliner.
2089 MPM.addPass(GlobalOptPass());
2090
2091 // Run the OpenMPOpt pass again after global optimizations.
2093
2094 // Garbage collect dead functions.
2095 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2096
2097 // If we didn't decide to inline a function, check to see if we can
2098 // transform it to pass arguments by value instead of by reference.
2099 CGSCCPassManager CGPM;
2104
2106 // The IPO Passes may leave cruft around. Clean up after them.
2107 FPM.addPass(InstCombinePass());
2108 invokePeepholeEPCallbacks(FPM, Level);
2109
2112
2114
2115 // Do a post inline PGO instrumentation and use pass. This is a context
2116 // sensitive PGO pass.
2117 if (PGOOpt) {
2118 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2119 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2120 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2121 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
2122 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2123 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2124 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2125 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
2126 }
2127
2128 // Break up allocas
2130
2131 // LTO provides additional opportunities for tailcall elimination due to
2132 // link-time inlining, and visibility of nocapture attribute.
2133 FPM.addPass(
2134 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
2135
2136 // Run a few AA driver optimizations here and now to cleanup the code.
2137 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
2138 PTO.EagerlyInvalidateAnalyses));
2139
2140 MPM.addPass(
2142
2143 // Require the GlobalsAA analysis for the module so we can query it within
2144 // MainFPM.
2147 // Invalidate AAManager so it can be recreated and pick up the newly
2148 // available GlobalsAA.
2149 MPM.addPass(
2151 }
2152
2153 FunctionPassManager MainFPM;
2155 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
2156 /*AllowSpeculation=*/true),
2157 /*USeMemorySSA=*/true));
2158
2159 if (RunNewGVN)
2160 MainFPM.addPass(NewGVNPass());
2161 else
2162 MainFPM.addPass(GVNPass());
2163
2164 // Remove dead memcpy()'s.
2165 MainFPM.addPass(MemCpyOptPass());
2166
2167 // Nuke dead stores.
2168 MainFPM.addPass(DSEPass());
2169 MainFPM.addPass(MoveAutoInitPass());
2171
2172 invokeVectorizerStartEPCallbacks(MainFPM, Level);
2173
2174 LoopPassManager LPM;
2175 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2176 LPM.addPass(LoopFlattenPass());
2177 LPM.addPass(IndVarSimplifyPass());
2178 LPM.addPass(LoopDeletionPass());
2179 // FIXME: Add loop interchange.
2180
2181 // Unroll small loops and perform peeling.
2182 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
2183 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2184 PTO.ForgetAllSCEVInLoopUnroll));
2185 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2186 // *All* loop passes must preserve it, in order to be able to use it.
2187 MainFPM.addPass(
2188 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
2189
2190 MainFPM.addPass(LoopDistributePass());
2191
2192 addVectorPasses(Level, MainFPM, ThinOrFullLTOPhase::FullLTOPostLink);
2193
2194 invokeVectorizerEndEPCallbacks(MainFPM, Level);
2195
2196 // Run the OpenMPOpt CGSCC pass again late.
2199
2200 invokePeepholeEPCallbacks(MainFPM, Level);
2201 MainFPM.addPass(JumpThreadingPass());
2202 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2203 PTO.EagerlyInvalidateAnalyses));
2204
2205 // Lower type metadata and the type.test intrinsic. This pass supports
2206 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2207 // to be run at link time if CFI is enabled. This pass does nothing if
2208 // CFI is disabled.
2209 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2210 // Run a second time to clean up any type tests left behind by WPD for use
2211 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2212 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
2214
2215 // Enable splitting late in the FullLTO post-link pipeline.
2218
2219 // Add late LTO optimization passes.
2220 FunctionPassManager LateFPM;
2221
2222 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2223 // canonicalization pass that enables other optimizations. As a result,
2224 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2225 // result too early.
2226 LateFPM.addPass(LoopSinkPass());
2227
2228 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2229 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2230 // flattening of blocks.
2231 LateFPM.addPass(DivRemPairsPass());
2232
2233 // Delete basic blocks, which optimization passes may have killed.
2235 .convertSwitchRangeToICmp(true)
2236 .convertSwitchToArithmetic(true)
2237 .hoistCommonInsts(true)
2238 .speculateUnpredictables(true)));
2239 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2240
2241 // Drop bodies of available eternally objects to improve GlobalDCE.
2243
2244 // Now that we have optimized the program, discard unreachable functions.
2245 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2246
2247 if (PTO.MergeFunctions)
2249
2251
2252 if (PTO.CallGraphProfile)
2253 MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2254
2255 MPM.addPass(CoroCleanupPass());
2256
2257 // AllocToken transforms heap allocation calls; this needs to run late after
2258 // other allocation call transformations (such as those in InstCombine).
2259 MPM.addPass(AllocTokenPass());
2260
2262
2263 // Emit annotation remarks.
2265
2266 return MPM;
2267}
2268
2272 assert(Level == OptimizationLevel::O0 &&
2273 "buildO0DefaultPipeline should only be used with O0");
2274
2276
2277 // Perform pseudo probe instrumentation in O0 mode. This is for the
2278 // consistency between different build modes. For example, a LTO build can be
2279 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2280 // the postlink will require pseudo probe instrumentation in the prelink.
2281 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2283
2284 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2285 PGOOpt->Action == PGOOptions::IRUse))
2287 MPM,
2288 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2289 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2290 PGOOpt->ProfileRemappingFile);
2291
2292 // Instrument function entry and exit before all inlining.
2294 EntryExitInstrumenterPass(/*PostInlining=*/false)));
2295
2297
2298 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2300
2301 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2302 // Explicitly disable sample loader inlining and use flattened profile in O0
2303 // pipeline.
2304 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2305 PGOOpt->ProfileRemappingFile,
2306 ThinOrFullLTOPhase::None, nullptr,
2307 /*DisableSampleProfileInlining=*/true,
2308 /*UseFlattenedProfile=*/true));
2309 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2310 // RequireAnalysisPass for PSI before subsequent non-module passes.
2312 }
2313
2315
2316 // Build a minimal pipeline based on the semantics required by LLVM,
2317 // which is just that always inlining occurs. Further, disable generating
2318 // lifetime intrinsics to avoid enabling further optimizations during
2319 // code generation.
2321 /*InsertLifetimeIntrinsics=*/false));
2322
2323 if (PTO.MergeFunctions)
2325
2326 if (EnableMatrix)
2327 MPM.addPass(
2329
2330 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2331 CGSCCPassManager CGPM;
2333 if (!CGPM.isEmpty())
2335 }
2336 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2337 LoopPassManager LPM;
2339 if (!LPM.isEmpty()) {
2341 createFunctionToLoopPassAdaptor(std::move(LPM))));
2342 }
2343 }
2344 if (!LoopOptimizerEndEPCallbacks.empty()) {
2345 LoopPassManager LPM;
2347 if (!LPM.isEmpty()) {
2349 createFunctionToLoopPassAdaptor(std::move(LPM))));
2350 }
2351 }
2352 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2355 if (!FPM.isEmpty())
2356 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2357 }
2358
2360
2361 if (!VectorizerStartEPCallbacks.empty()) {
2364 if (!FPM.isEmpty())
2365 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2366 }
2367
2368 if (!VectorizerEndEPCallbacks.empty()) {
2371 if (!FPM.isEmpty())
2372 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2373 }
2374
2376
2377 // AllocToken transforms heap allocation calls; this needs to run late after
2378 // other allocation call transformations (such as those in InstCombine).
2379 if (!isLTOPreLink(Phase))
2380 MPM.addPass(AllocTokenPass());
2381
2383
2384 if (isLTOPreLink(Phase))
2385 addRequiredLTOPreLinkPasses(MPM);
2386
2388
2389 return MPM;
2390}
2391
2393 AAManager AA;
2394
2395 // The order in which these are registered determines their priority when
2396 // being queried.
2397
2398 // Add any target-specific alias analyses that should be run early.
2399 if (TM)
2400 TM->registerEarlyDefaultAliasAnalyses(AA);
2401
2402 // First we register the basic alias analysis that provides the majority of
2403 // per-function local AA logic. This is a stateless, on-demand local set of
2404 // AA techniques.
2405 AA.registerFunctionAnalysis<BasicAA>();
2406
2407 // Next we query fast, specialized alias analyses that wrap IR-embedded
2408 // information about aliasing.
2409 AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2410 AA.registerFunctionAnalysis<TypeBasedAA>();
2411
2412 // Add support for querying global aliasing information when available.
2413 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2414 // analysis, all that the `AAManager` can do is query for any *cached*
2415 // results from `GlobalsAA` through a readonly proxy.
2417 AA.registerModuleAnalysis<GlobalsAA>();
2418
2419 // Add target-specific alias analyses.
2420 if (TM)
2421 TM->registerDefaultAliasAnalyses(AA);
2422
2423 return AA;
2424}
2425
2426bool PassBuilder::isInstrumentedPGOUse() const {
2427 return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
2428 !UseCtxProfile.empty();
2429}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
This file provides a pass manager that only runs its passes if the provided marker analysis has been ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
This header defines various interfaces for pass management in LLVM.
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the Loop Fusion pass.
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase)
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static bool isLTOPostLink(ThinOrFullLTOPhase Phase)
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
A module pass that rewrites heap allocations to use token-enabled allocation functions based on vario...
Definition AllocToken.h:36
Inlines functions marked as "always_inline".
Argument promotion pass.
Assign a GUID to functions as metadata.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
A pass manager to run a set of extra loop passes if the MarkerTy analysis is present.
The core GVN pass object.
Definition GVN.h:128
Pass to remove unused function declarations.
Definition GlobalDCE.h:38
Optimize globals that never have their address taken.
Definition GlobalOpt.h:25
Pass to perform split of global variables.
Definition GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition SCCP.h:48
Pass to outline similar regions.
Definition IROutliner.h:444
Run instruction simplification across each instruction in the function.
Instrumentation based profiling lowering pass.
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Performs Loop Invariant Code Motion Pass.
Definition LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Strips MemProf attributes and metadata.
Merge identical functions.
The module inliner pass for the new pass manager.
Module pass, wrapping the inliner pass.
Definition Inliner.h:65
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition Inliner.h:81
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
Additional 'norecurse' attribute deduction during postlink LTO phase.
OpenMP optimizations pass.
Definition OpenMPOpt.h:42
static LLVM_ABI const OptimizationLevel O3
Optimize for fast execution as much as possible.
static LLVM_ABI const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
static LLVM_ABI const OptimizationLevel O0
Disable as many optimizations as possible.
static LLVM_ABI const OptimizationLevel Os
Similar to O2 but tries to optimize for small code size instead of fast execution without triggering ...
static LLVM_ABI const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static LLVM_ABI const OptimizationLevel O1
Optimize quickly without destroying debuggability.
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
LLVM_ABI void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
LLVM_ABI void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
LLVM_ABI void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
LLVM_ABI void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile)
Add PGOInstrumenation passes for O0 only.
LLVM_ABI void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build a per-module default optimization pipeline.
LLVM_ABI void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI void invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build an O0 pipeline with the minimal semantically required passes.
LLVM_ABI FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
LLVM_ABI void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
LLVM_ABI ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
LLVM_ABI ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build a ThinLTO default optimization pipeline to a pass manager.
LLVM_ABI void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
LLVM_ABI ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
LLVM_ABI ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
LLVM_ABI void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
bool isEmpty() const
Returns if the pass manager contains any passes.
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition PassBuilder.h:78
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition PassBuilder.h:56
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition PassBuilder.h:92
bool LoopFusion
Tuning option to enable/disable loop fusion. Its default value is false.
Definition PassBuilder.h:66
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition PassBuilder.h:82
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition PassBuilder.h:89
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition PassBuilder.h:70
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition PassBuilder.h:74
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition PassBuilder.h:48
LLVM_ABI PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition PassBuilder.h:59
bool LoopInterchange
Tuning option to enable/disable loop interchange.
Definition PassBuilder.h:63
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition PassBuilder.h:52
Reassociate commutative expressions.
Definition Reassociate.h:74
A pass to do RPO deduction and propagation of function attributes.
This pass performs function-level constant propagation and merging.
Definition SCCP.h:30
The sample profiler data loader pass.
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition SimplifyCFG.h:30
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Interfaces for registering analysis passes, producing common pass manager configurations,...
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ Assume
Do not drop type tests (default).
@ All
Drop only llvm.assumes using type test value.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > EnableJumpTableToSwitch("enable-jump-table-to-switch", cl::desc("Enable JumpTableToSwitch pass (default = off)"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(false), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlining pass"))
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > EnableLoopHeaderDuplication("enable-loop-header-duplication", cl::init(false), cl::Hidden, cl::desc("Enable loop header duplication at any optimization level"))
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
static cl::opt< bool > EnableSampledInstr("enable-sampled-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable profile instrumentation sampling (default = off)"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
PassManager< LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, CGSCCUpdateResult & > CGSCCPassManager
The CGSCC pass manager.
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
@ FullLTOPreLink
Full LTO prelink phase.
Definition Pass.h:85
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
Definition Pass.h:83
@ None
No LTO/ThinLTO behavior needed.
Definition Pass.h:79
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
Definition Pass.h:87
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
Definition Pass.h:81
PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & > LoopPassManager
The Loop pass manager.
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
FunctionToLoopPassAdaptor createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< bool > ForgetSCEVInLoopUnroll
PassManager< Module > ModulePassManager
Convenience typedef for a pass manager over modules.
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
static cl::opt< std::string > InstrumentColdFuncOnlyPath("instrument-cold-function-only-path", cl::init(""), cl::desc("File path for cold function only instrumentation(requires use " "with --pgo-instrument-cold-function-only)"), cl::Hidden)
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierarchy exists in the profile"))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
PassManager< Function > FunctionPassManager
Convenience typedef for a pass manager over functions.
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< bool > PGOInstrumentColdFunctionOnly
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the LoopInterchange Pass"))
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::ALL, "all", "enable all attributor runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
cl::opt< unsigned > SetLicmMssaOptCap
A DCE pass that assumes instructions are dead until proven otherwise.
Definition ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition EarlyCSE.h:31
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition GVN.h:415
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition GVN.h:422
A set of parameters to control various transforms performed by IPSCCP pass.
Definition SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
Definition InlineCost.h:207
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition InlineCost.h:224
int DefaultThreshold
The default threshold to start with for a callee.
Definition InlineCost.h:209
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition InlineCost.h:237
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition InlineCost.h:212
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
A utility pass template to force an analysis result to be available.