LLVM 23.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/PassManager.h"
30#include "llvm/Pass.h"
156
157using namespace llvm;
158
159namespace llvm {
160
162 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
163 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
165 "Heuristics-based inliner version"),
167 "Use development mode (runtime-loadable model)"),
169 "Use release mode (AOT-compiled model)")));
170
171/// Flag to enable inline deferral during PGO.
172static cl::opt<bool>
173 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
175 cl::desc("Enable inline deferral during PGO"));
176
177static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
178 cl::init(false), cl::Hidden,
179 cl::desc("Enable module inliner"));
180
182 "mandatory-inlining-first", cl::init(false), cl::Hidden,
183 cl::desc("Perform mandatory inlinings module-wide, before performing "
184 "inlining"));
185
187 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
188 cl::desc("Eagerly invalidate more analyses in default pipelines"));
189
191 "enable-merge-functions", cl::init(false), cl::Hidden,
192 cl::desc("Enable function merging as part of the optimization pipeline"));
193
195 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
196 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
197
199 "enable-global-analyses", cl::init(true), cl::Hidden,
200 cl::desc("Enable inter-procedural analyses"));
201
202static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
203 cl::init(false), cl::Hidden,
204 cl::desc("Run Partial inlining pass"));
205
207 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
208 cl::desc("Run cleanup optimization passes after vectorization"));
209
210static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
211 cl::desc("Run the NewGVN pass"));
212
213static cl::opt<bool>
214 EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden,
215 cl::desc("Enable the LoopInterchange Pass"));
216
217static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
218 cl::init(false), cl::Hidden,
219 cl::desc("Enable Unroll And Jam Pass"));
220
221static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
223 cl::desc("Enable the LoopFlatten Pass"));
224
225static cl::opt<bool>
226 EnableInstrumentor("enable-instrumentor", cl::init(false), cl::Hidden,
227 cl::desc("Enable the Instrumentor Pass"));
228
229static cl::opt<bool>
230 EnableDFAJumpThreading("enable-dfa-jump-thread",
231 cl::desc("Enable DFA jump threading"),
232 cl::init(false), cl::Hidden);
233
234static cl::opt<bool>
235 EnableHotColdSplit("hot-cold-split",
236 cl::desc("Enable hot-cold splitting pass"));
237
238static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
240 cl::desc("Enable ir outliner pass"));
241
242static cl::opt<bool>
243 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
244 cl::desc("Disable pre-instrumentation inliner"));
245
247 "preinline-threshold", cl::Hidden, cl::init(75),
248 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
249 "(default = 75)"));
250
251static cl::opt<bool>
252 EnableGVNHoist("enable-gvn-hoist",
253 cl::desc("Enable the GVN hoisting pass (default = off)"));
254
255static cl::opt<bool>
256 EnableGVNSink("enable-gvn-sink",
257 cl::desc("Enable the GVN sinking pass (default = off)"));
258
260 "enable-jump-table-to-switch", cl::init(true),
261 cl::desc("Enable JumpTableToSwitch pass (default = true)"));
262
263// This option is used in simplifying testing SampleFDO optimizations for
264// profile loading.
265static cl::opt<bool>
266 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
267 cl::desc("Enable control height reduction optimization (CHR)"));
268
270 "flattened-profile-used", cl::init(false), cl::Hidden,
271 cl::desc("Indicate the sample profile being used is flattened, i.e., "
272 "no inline hierarchy exists in the profile"));
273
274static cl::opt<bool>
275 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
276 cl::desc("Enable lowering of the matrix intrinsics"));
277
279 "enable-mergeicmps", cl::init(true), cl::Hidden,
280 cl::desc("Enable MergeICmps pass in the optimization pipeline"));
281
283 "enable-constraint-elimination", cl::init(true), cl::Hidden,
284 cl::desc(
285 "Enable pass to eliminate conditions based on linear constraints"));
286
288 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
289 cl::desc("Enable the attributor inter-procedural deduction pass"),
291 "enable all full attributor runs"),
293 "enable all attributor-light runs"),
295 "enable module-wide attributor runs"),
297 "enable module-wide attributor-light runs"),
299 "enable call graph SCC attributor runs"),
301 "enable call graph SCC attributor-light runs"),
302 clEnumValN(AttributorRunOption::NONE, "none",
303 "disable attributor runs")));
304
306 "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
307 cl::desc("Enable profile instrumentation sampling (default = off)"));
309 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
310 cl::desc("Enable the experimental Loop Versioning LICM pass"));
311
313 "instrument-cold-function-only-path", cl::init(""),
314 cl::desc("File path for cold function only instrumentation(requires use "
315 "with --pgo-instrument-cold-function-only)"),
316 cl::Hidden);
317
318// TODO: There is a similar flag in WPD pass, we should consolidate them by
319// parsing the option only once in PassBuilder and share it across both places.
321 "enable-devirtualize-speculatively",
322 cl::desc("Enable speculative devirtualization optimization"),
323 cl::init(false));
324
327
329} // namespace llvm
330
348
349namespace llvm {
351} // namespace llvm
352
354 OptimizationLevel Level) {
355 for (auto &C : PeepholeEPCallbacks)
356 C(FPM, Level);
357}
360 for (auto &C : LateLoopOptimizationsEPCallbacks)
361 C(LPM, Level);
362}
364 OptimizationLevel Level) {
365 for (auto &C : LoopOptimizerEndEPCallbacks)
366 C(LPM, Level);
367}
370 for (auto &C : ScalarOptimizerLateEPCallbacks)
371 C(FPM, Level);
372}
374 OptimizationLevel Level) {
375 for (auto &C : CGSCCOptimizerLateEPCallbacks)
376 C(CGPM, Level);
377}
379 OptimizationLevel Level) {
380 for (auto &C : VectorizerStartEPCallbacks)
381 C(FPM, Level);
382}
384 OptimizationLevel Level) {
385 for (auto &C : VectorizerEndEPCallbacks)
386 C(FPM, Level);
387}
389 OptimizationLevel Level,
391 for (auto &C : OptimizerEarlyEPCallbacks)
392 C(MPM, Level, Phase);
393}
395 OptimizationLevel Level,
397 for (auto &C : OptimizerLastEPCallbacks)
398 C(MPM, Level, Phase);
399}
402 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
403 C(MPM, Level);
404}
407 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
408 C(MPM, Level);
409}
411 OptimizationLevel Level) {
412 for (auto &C : PipelineStartEPCallbacks)
413 C(MPM, Level);
414}
417 for (auto &C : PipelineEarlySimplificationEPCallbacks)
418 C(MPM, Level, Phase);
419}
420
421// Helper to add AnnotationRemarksPass.
424 // Count the stats for InstCount and FunctionPropertiesAnalysis
425 if (AreStatisticsEnabled()) {
427 MPM.addPass(
429 }
430}
431
432// Helper to check if the current compilation phase is preparing for LTO
437
438// Helper to check if the current compilation phase is LTO backend
443
444// Helper to wrap conditionally Coro passes.
446 // TODO: Skip passes according to Phase.
447 ModulePassManager CoroPM;
448 CoroPM.addPass(CoroEarlyPass());
449 CGSCCPassManager CGPM;
450 CGPM.addPass(CoroSplitPass());
451 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
452 CoroPM.addPass(CoroCleanupPass());
453 CoroPM.addPass(GlobalDCEPass());
454 return CoroConditionalWrapper(std::move(CoroPM));
455}
456
457// TODO: Investigate the cost/benefit of tail call elimination on debugging.
459PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
461
463
465 FPM.addPass(CountVisitsPass());
466
467 // Form SSA out of local memory accesses after breaking apart aggregates into
468 // scalars.
469 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
470
471 // Catch trivial redundancies
472 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
473
474 // Hoisting of scalars and load expressions.
475 FPM.addPass(
476 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
477 FPM.addPass(InstCombinePass());
478
479 FPM.addPass(LibCallsShrinkWrapPass());
480
481 invokePeepholeEPCallbacks(FPM, Level);
482
483 FPM.addPass(
484 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
485
486 // Form canonically associated expression trees, and simplify the trees using
487 // basic mathematical properties. For example, this will form (nearly)
488 // minimal multiplication trees.
489 FPM.addPass(ReassociatePass());
490
491 // Add the primary loop simplification pipeline.
492 // FIXME: Currently this is split into two loop pass pipelines because we run
493 // some function passes in between them. These can and should be removed
494 // and/or replaced by scheduling the loop pass equivalents in the correct
495 // positions. But those equivalent passes aren't powerful enough yet.
496 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
497 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
498 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
499 // `LoopInstSimplify`.
500 LoopPassManager LPM1, LPM2;
501
502 // Simplify the loop body. We do this initially to clean up after other loop
503 // passes run, either when iterating on a loop or on inner loops with
504 // implications on the outer loop.
505 LPM1.addPass(LoopInstSimplifyPass());
506 LPM1.addPass(LoopSimplifyCFGPass());
507
508 // Try to remove as much code from the loop header as possible,
509 // to reduce amount of IR that will have to be duplicated. However,
510 // do not perform speculative hoisting the first time as LICM
511 // will destroy metadata that may not need to be destroyed if run
512 // after loop rotation.
513 // TODO: Investigate promotion cap for O1.
514 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
515 /*AllowSpeculation=*/false));
516
517 LPM1.addPass(
518 LoopRotatePass(/*EnableHeaderDuplication=*/true, isLTOPreLink(Phase)));
519 // TODO: Investigate promotion cap for O1.
520 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
521 /*AllowSpeculation=*/true));
522 LPM1.addPass(SimpleLoopUnswitchPass());
524 LPM1.addPass(LoopFlattenPass());
525
526 LPM2.addPass(LoopIdiomRecognizePass());
527 LPM2.addPass(IndVarSimplifyPass());
528
530
531 LPM2.addPass(LoopDeletionPass());
532
533 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
534 // because it changes IR to makes profile annotation in back compile
535 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
536 // attributes so we need to make sure and allow the full unroll pass to pay
537 // attention to it.
538 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
539 PGOOpt->Action != PGOOptions::SampleUse)
540 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
541 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
542 PTO.ForgetAllSCEVInLoopUnroll));
543
545
546 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
547 /*UseMemorySSA=*/true));
548 FPM.addPass(
549 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
550 FPM.addPass(InstCombinePass());
551 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
552 // *All* loop passes must preserve it, in order to be able to use it.
553 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
554 /*UseMemorySSA=*/false));
555
556 // Delete small array after loop unroll.
557 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
558
559 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
560 FPM.addPass(MemCpyOptPass());
561
562 // Sparse conditional constant propagation.
563 // FIXME: It isn't clear why we do this *after* loop passes rather than
564 // before...
565 FPM.addPass(SCCPPass());
566
567 // Delete dead bit computations (instcombine runs after to fold away the dead
568 // computations, and then ADCE will run later to exploit any new DCE
569 // opportunities that creates).
570 FPM.addPass(BDCEPass());
571
572 // Run instcombine after redundancy and dead bit elimination to exploit
573 // opportunities opened up by them.
574 FPM.addPass(InstCombinePass());
575 invokePeepholeEPCallbacks(FPM, Level);
576
577 FPM.addPass(CoroElidePass());
578
580
581 // Finally, do an expensive DCE pass to catch all the dead code exposed by
582 // the simplifications and basic cleanup after all the simplifications.
583 // TODO: Investigate if this is too expensive.
584 FPM.addPass(ADCEPass());
585 FPM.addPass(
586 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
587 FPM.addPass(InstCombinePass());
588 invokePeepholeEPCallbacks(FPM, Level);
589
590 return FPM;
591}
592
596 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
597
598 // The O1 pipeline has a separate pipeline creation function to simplify
599 // construction readability.
600 if (Level.getSpeedupLevel() == 1)
601 return buildO1FunctionSimplificationPipeline(Level, Phase);
602
604
607
608 // Form SSA out of local memory accesses after breaking apart aggregates into
609 // scalars.
611
612 // Catch trivial redundancies
613 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
616
617 // Hoisting of scalars and load expressions.
618 if (EnableGVNHoist)
619 FPM.addPass(GVNHoistPass());
620
621 // Global value numbering based sinking.
622 if (EnableGVNSink) {
623 FPM.addPass(GVNSinkPass());
624 FPM.addPass(
625 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
626 }
627
628 // Speculative execution if the target has divergent branches; otherwise nop.
629 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
630
631 // Optimize based on known information about branches, and cleanup afterward.
634
635 // Jump table to switch conversion.
640
641 FPM.addPass(
642 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
646
647 invokePeepholeEPCallbacks(FPM, Level);
648
649 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
650 // using the size value profile. Don't perform this when optimizing for size.
651 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse)
653
654 FPM.addPass(TailCallElimPass(/*UpdateFunctionEntryCount=*/
655 isInstrumentedPGOUse()));
656 FPM.addPass(
657 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
658
659 // Form canonically associated expression trees, and simplify the trees using
660 // basic mathematical properties. For example, this will form (nearly)
661 // minimal multiplication trees.
663
666
667 // Add the primary loop simplification pipeline.
668 // FIXME: Currently this is split into two loop pass pipelines because we run
669 // some function passes in between them. These can and should be removed
670 // and/or replaced by scheduling the loop pass equivalents in the correct
671 // positions. But those equivalent passes aren't powerful enough yet.
672 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
673 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
674 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
675 // `LoopInstSimplify`.
676 LoopPassManager LPM1, LPM2;
677
678 // Simplify the loop body. We do this initially to clean up after other loop
679 // passes run, either when iterating on a loop or on inner loops with
680 // implications on the outer loop.
681 LPM1.addPass(LoopInstSimplifyPass());
682 LPM1.addPass(LoopSimplifyCFGPass());
683
684 // Try to remove as much code from the loop header as possible,
685 // to reduce amount of IR that will have to be duplicated. However,
686 // do not perform speculative hoisting the first time as LICM
687 // will destroy metadata that may not need to be destroyed if run
688 // after loop rotation.
689 // TODO: Investigate promotion cap for O1.
690 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
691 /*AllowSpeculation=*/false));
692
693 LPM1.addPass(
694 LoopRotatePass(/*EnableHeaderDuplication=*/true, isLTOPreLink(Phase)));
695 // TODO: Investigate promotion cap for O1.
696 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
697 /*AllowSpeculation=*/true));
698 LPM1.addPass(
699 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
701 LPM1.addPass(LoopFlattenPass());
702
703 LPM2.addPass(LoopIdiomRecognizePass());
704 LPM2.addPass(IndVarSimplifyPass());
705
706 {
708 ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
710 LPM2.addPass(std::move(ExtraPasses));
711 }
712
714
715 LPM2.addPass(LoopDeletionPass());
716
717 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
718 // because it changes IR to makes profile annotation in back compile
719 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
720 // attributes so we need to make sure and allow the full unroll pass to pay
721 // attention to it.
722 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
723 PGOOpt->Action != PGOOptions::SampleUse)
724 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
725 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
726 PTO.ForgetAllSCEVInLoopUnroll));
727
729
730 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
731 /*UseMemorySSA=*/true));
732 FPM.addPass(
733 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
735 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
736 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
737 // *All* loop passes must preserve it, in order to be able to use it.
738 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
739 /*UseMemorySSA=*/false));
740
741 // Delete small array after loop unroll.
743
744 // Try vectorization/scalarization transforms that are both improvements
745 // themselves and can allow further folds with GVN and InstCombine.
746 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
747
748 // Eliminate redundancies.
750 if (RunNewGVN)
751 FPM.addPass(NewGVNPass());
752 else
753 FPM.addPass(GVNPass());
754
755 // Sparse conditional constant propagation.
756 // FIXME: It isn't clear why we do this *after* loop passes rather than
757 // before...
758 FPM.addPass(SCCPPass());
759
760 // Delete dead bit computations (instcombine runs after to fold away the dead
761 // computations, and then ADCE will run later to exploit any new DCE
762 // opportunities that creates).
763 FPM.addPass(BDCEPass());
764
765 // Run instcombine after redundancy and dead bit elimination to exploit
766 // opportunities opened up by them.
768 invokePeepholeEPCallbacks(FPM, Level);
769
770 // Re-consider control flow based optimizations after redundancy elimination,
771 // redo DCE, etc.
774
777
778 // Finally, do an expensive DCE pass to catch all the dead code exposed by
779 // the simplifications and basic cleanup after all the simplifications.
780 // TODO: Investigate if this is too expensive.
781 FPM.addPass(ADCEPass());
782
783 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
784 FPM.addPass(MemCpyOptPass());
785
786 FPM.addPass(DSEPass());
788
790 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
791 /*AllowSpeculation=*/true),
792 /*UseMemorySSA=*/true));
793
794 FPM.addPass(CoroElidePass());
795
797
799 .convertSwitchRangeToICmp(true)
800 .convertSwitchToArithmetic(true)
801 .hoistCommonInsts(true)
802 .sinkCommonInsts(true)));
804 invokePeepholeEPCallbacks(FPM, Level);
805
806 return FPM;
807}
808
809void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
812}
813
814void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
815 OptimizationLevel Level,
816 ThinOrFullLTOPhase LTOPhase) {
817 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
819 return;
820 InlineParams IP;
821
823
824 // FIXME: The hint threshold has the same value used by the regular inliner
825 // when not optimzing for size. This should probably be lowered after
826 // performance testing.
827 // FIXME: this comment is cargo culted from the old pass manager, revisit).
828 IP.HintThreshold = 325;
831 IP, /* MandatoryFirst */ true,
833 CGSCCPassManager &CGPipeline = MIWP.getPM();
834
836 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
837 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
838 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
839 true))); // Merge & remove basic blocks.
840 FPM.addPass(InstCombinePass()); // Combine silly sequences.
841 invokePeepholeEPCallbacks(FPM, Level);
842
843 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
844 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
845
846 MPM.addPass(std::move(MIWP));
847
848 // Delete anything that is now dead to make sure that we don't instrument
849 // dead code. Instrumentation can end up keeping dead code around and
850 // dramatically increase code size.
851 MPM.addPass(GlobalDCEPass());
852}
853
854void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
855 OptimizationLevel Level) {
857 // Disable header duplication in loop rotation at -Oz.
859 createFunctionToLoopPassAdaptor(LoopRotatePass(),
860 /*UseMemorySSA=*/false),
861 PTO.EagerlyInvalidateAnalyses));
862 }
863}
864
865void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
866 OptimizationLevel Level, bool RunProfileGen,
867 bool IsCS, bool AtomicCounterUpdate,
868 std::string ProfileFile,
869 std::string ProfileRemappingFile) {
870 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
871
872 if (!RunProfileGen) {
873 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
874 MPM.addPass(
875 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
876 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
877 // RequireAnalysisPass for PSI before subsequent non-module passes.
878 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
879 return;
880 }
881
882 // Perform PGO instrumentation.
883 MPM.addPass(PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
885
886 addPostPGOLoopRotation(MPM, Level);
887 // Add the profile lowering pass.
888 InstrProfOptions Options;
889 if (!ProfileFile.empty())
890 Options.InstrProfileOutput = ProfileFile;
891 // Do counter promotion at Level greater than O0.
892 Options.DoCounterPromotion = true;
893 Options.UseBFIInPromotion = IsCS;
894 if (EnableSampledInstr) {
895 Options.Sampling = true;
896 // With sampling, there is little beneifit to enable counter promotion.
897 // But note that sampling does work with counter promotion.
898 Options.DoCounterPromotion = false;
899 }
900 Options.Atomic = AtomicCounterUpdate;
901 MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
902}
903
905 bool RunProfileGen, bool IsCS,
906 bool AtomicCounterUpdate,
907 std::string ProfileFile,
908 std::string ProfileRemappingFile) {
909 if (!RunProfileGen) {
910 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
911 MPM.addPass(
912 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
913 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
914 // RequireAnalysisPass for PSI before subsequent non-module passes.
916 return;
917 }
918
919 // Perform PGO instrumentation.
922 // Add the profile lowering pass.
924 if (!ProfileFile.empty())
925 Options.InstrProfileOutput = ProfileFile;
926 // Do not do counter promotion at O0.
927 Options.DoCounterPromotion = false;
928 Options.UseBFIInPromotion = IsCS;
929 Options.Atomic = AtomicCounterUpdate;
931}
932
934 return getInlineParamsFromOptLevel(Level.getSpeedupLevel());
935}
936
940 InlineParams IP;
941 if (PTO.InlinerThreshold == -1)
943 else
944 IP = getInlineParams(PTO.InlinerThreshold);
945 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
946 // set hot-caller threshold to 0 to disable hot
947 // callsite inline (as much as possible [1]) because it makes
948 // profile annotation in the backend inaccurate.
949 //
950 // [1] Note the cost of a function could be below zero due to erased
951 // prologue / epilogue.
952 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
954
955 if (PGOOpt)
957
961
962 // Require the GlobalsAA analysis for the module so we can query it within
963 // the CGSCC pipeline.
965 MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
966 // Invalidate AAManager so it can be recreated and pick up the newly
967 // available GlobalsAA.
968 MIWP.addModulePass(
970 }
971
972 // Require the ProfileSummaryAnalysis for the module so we can query it within
973 // the inliner pass.
975
976 // Now begin the main postorder CGSCC pipeline.
977 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
978 // manager and trying to emulate its precise behavior. Much of this doesn't
979 // make a lot of sense and we should revisit the core CGSCC structure.
980 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
981
982 // Note: historically, the PruneEH pass was run first to deduce nounwind and
983 // generally clean up exception handling overhead. It isn't clear this is
984 // valuable as the inliner doesn't currently care whether it is inlining an
985 // invoke or a call.
986
988 MainCGPipeline.addPass(AttributorCGSCCPass());
990 MainCGPipeline.addPass(AttributorLightCGSCCPass());
991
992 // Deduce function attributes. We do another run of this after the function
993 // simplification pipeline, so this only needs to run when it could affect the
994 // function simplification pipeline, which is only the case with recursive
995 // functions.
996 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
997
998 // When at O3 add argument promotion to the pass pipeline.
999 // FIXME: It isn't at all clear why this should be limited to O3.
1000 if (Level == OptimizationLevel::O3)
1001 MainCGPipeline.addPass(ArgumentPromotionPass());
1002
1003 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
1004 // there are no OpenMP runtime calls present in the module.
1005 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
1006 MainCGPipeline.addPass(OpenMPOptCGSCCPass(Phase));
1007
1008 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
1009
1010 // Add the core function simplification pipeline nested inside the
1011 // CGSCC walk.
1014 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
1015
1016 // Finally, deduce any function attributes based on the fully simplified
1017 // function.
1018 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
1019
1020 // Mark that the function is fully simplified and that it shouldn't be
1021 // simplified again if we somehow revisit it due to CGSCC mutations unless
1022 // it's been modified since.
1025
1027 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
1028 MainCGPipeline.addPass(CoroAnnotationElidePass());
1029 }
1030
1031 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
1032 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
1034
1035 return MIWP;
1036}
1037
1042
1044 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
1045 // set hot-caller threshold to 0 to disable hot
1046 // callsite inline (as much as possible [1]) because it makes
1047 // profile annotation in the backend inaccurate.
1048 //
1049 // [1] Note the cost of a function could be below zero due to erased
1050 // prologue / epilogue.
1051 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1052 IP.HotCallSiteThreshold = 0;
1053
1054 if (PGOOpt)
1056
1057 // The inline deferral logic is used to avoid losing some
1058 // inlining chance in future. It is helpful in SCC inliner, in which
1059 // inlining is processed in bottom-up order.
1060 // While in module inliner, the inlining order is a priority-based order
1061 // by default. The inline deferral is unnecessary there. So we disable the
1062 // inline deferral logic in module inliner.
1063 IP.EnableDeferral = false;
1064
1067 MPM.addPass(GlobalOptPass());
1068 MPM.addPass(GlobalDCEPass());
1069 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
1070 }
1071
1074 PTO.EagerlyInvalidateAnalyses));
1075
1079 MPM.addPass(
1081 }
1082
1083 return MPM;
1084}
1085
1089 assert(Level != OptimizationLevel::O0 &&
1090 "Should not be used for O0 pipeline");
1091
1093 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1094
1096
1097 // Place pseudo probe instrumentation as the first pass of the pipeline to
1098 // minimize the impact of optimization changes.
1099 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1102
1103 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1104
1105 // In ThinLTO mode, when flattened profile is used, all the available
1106 // profile information will be annotated in PreLink phase so there is
1107 // no need to load the profile again in PostLink.
1108 bool LoadSampleProfile =
1109 HasSampleProfile &&
1111
1112 // During the ThinLTO backend phase we perform early indirect call promotion
1113 // here, before globalopt. Otherwise imported available_externally functions
1114 // look unreferenced and are removed. If we are going to load the sample
1115 // profile then defer until later.
1116 // TODO: See if we can move later and consolidate with the location where
1117 // we perform ICP when we are loading a sample profile.
1118 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1119 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1120 // determine whether the new direct calls are annotated with prof metadata.
1121 // Ideally this should be determined from whether the IR is annotated with
1122 // sample profile, and not whether the a sample profile was provided on the
1123 // command line. E.g. for flattened profiles where we will not be reloading
1124 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1125 // provide the sample profile file.
1126 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1127 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1128
1129 // Create an early function pass manager to cleanup the output of the
1130 // frontend. Not necessary with LTO post link pipelines since the pre link
1131 // pipeline already cleaned up the frontend output.
1133 // Do basic inference of function attributes from known properties of system
1134 // libraries and other oracles.
1136 MPM.addPass(CoroEarlyPass());
1137
1138 FunctionPassManager EarlyFPM;
1139 EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1140 // Lower llvm.expect to metadata before attempting transforms.
1141 // Compare/branch metadata may alter the behavior of passes like
1142 // SimplifyCFG.
1144 EarlyFPM.addPass(SimplifyCFGPass());
1146 EarlyFPM.addPass(EarlyCSEPass());
1147 if (Level == OptimizationLevel::O3)
1148 EarlyFPM.addPass(CallSiteSplittingPass());
1150 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1151 }
1152
1153 if (LoadSampleProfile) {
1154 // Annotate sample profile right after early FPM to ensure freshness of
1155 // the debug info.
1157 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase, FS));
1158 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1159 // RequireAnalysisPass for PSI before subsequent non-module passes.
1161 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1162 // for the profile annotation to be accurate in the LTO backend.
1163 if (!isLTOPreLink(Phase))
1164 // We perform early indirect call promotion here, before globalopt.
1165 // This is important for the ThinLTO backend phase because otherwise
1166 // imported available_externally functions look unreferenced and are
1167 // removed.
1168 MPM.addPass(
1169 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1170 }
1171
1172 // Try to perform OpenMP specific optimizations on the module. This is a
1173 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1175
1177 MPM.addPass(AttributorPass());
1180
1181 // Lower type metadata and the type.test intrinsic in the ThinLTO
1182 // post link pipeline after ICP. This is to enable usage of the type
1183 // tests in ICP sequences.
1186
1188
1189 // Interprocedural constant propagation now that basic cleanup has occurred
1190 // and prior to optimizing globals.
1191 // FIXME: This position in the pipeline hasn't been carefully considered in
1192 // years, it should be re-analyzed.
1193 MPM.addPass(
1194 IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/!isLTOPreLink(Phase))));
1195
1196 // Attach metadata to indirect call sites indicating the set of functions
1197 // they may target at run-time. This should follow IPSCCP.
1199
1200 // Optimize globals to try and fold them into constants.
1201 MPM.addPass(GlobalOptPass());
1202
1203 // Create a small function pass pipeline to cleanup after all the global
1204 // optimizations.
1205 FunctionPassManager GlobalCleanupPM;
1206 // FIXME: Should this instead by a run of SROA?
1207 GlobalCleanupPM.addPass(PromotePass());
1208 GlobalCleanupPM.addPass(InstCombinePass());
1209 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1210 GlobalCleanupPM.addPass(
1211 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1212 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1213 PTO.EagerlyInvalidateAnalyses));
1214
1215 // We already asserted this happens in non-FullLTOPostLink earlier.
1216 const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1217 // Enable contextual profiling instrumentation.
1218 const bool IsCtxProfGen =
1220 const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink;
1221 const bool IsPGOInstrGen =
1222 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1223 const bool IsPGOInstrUse =
1224 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1225 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1226 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1227 // enable ctx profiling from the frontend.
1229 "Enabling both instrumented PGO and contextual instrumentation is not "
1230 "supported.");
1231 const bool IsCtxProfUse =
1233
1234 assert(
1236 "--instrument-cold-function-only-path is provided but "
1237 "--pgo-instrument-cold-function-only is not enabled");
1238 const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1239 IsPGOPreLink &&
1241
1242 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1243 IsCtxProfUse || IsColdFuncOnlyInstrGen)
1244 addPreInlinerPasses(MPM, Level, Phase);
1245
1246 // Add all the requested passes for instrumentation PGO, if requested.
1247 if (IsPGOInstrGen || IsPGOInstrUse) {
1248 addPGOInstrPasses(MPM, Level,
1249 /*RunProfileGen=*/IsPGOInstrGen,
1250 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1251 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1252 } else if (IsCtxProfGen || IsCtxProfUse) {
1254 // In pre-link, we just want the instrumented IR. We use the contextual
1255 // profile in the post-thinlink phase.
1256 // The instrumentation will be removed in post-thinlink after IPO.
1257 // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1258 // mechanism for GUIDs.
1259 MPM.addPass(AssignGUIDPass());
1260 if (IsCtxProfUse) {
1261 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
1262 return MPM;
1263 }
1264 // Block further inlining in the instrumented ctxprof case. This avoids
1265 // confusingly collecting profiles for the same GUID corresponding to
1266 // different variants of the function. We could do like PGO and identify
1267 // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
1268 // thinlto to happen before performing any further optimizations, it's
1269 // unnecessary to collect profiles for non-prevailing copies.
1271 addPostPGOLoopRotation(MPM, Level);
1273 } else if (IsColdFuncOnlyInstrGen) {
1274 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1275 /* AtomicCounterUpdate */ false,
1277 /* ProfileRemappingFile */ "");
1278 }
1279
1280 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1281 MPM.addPass(PGOIndirectCallPromotion(false, false));
1282
1283 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1284 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1286
1287 if (IsMemprofUse)
1288 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, FS));
1289
1290 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1291 PGOOpt->Action == PGOOptions::SampleUse))
1292 MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1293
1294 MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1295
1298 else
1299 MPM.addPass(buildInlinerPipeline(Level, Phase));
1300
1301 // Remove any dead arguments exposed by cleanups, constant folding globals,
1302 // and argument promotion.
1304
1307
1309 MPM.addPass(CoroCleanupPass());
1310
1311 // Optimize globals now that functions are fully simplified.
1312 MPM.addPass(GlobalOptPass());
1313 MPM.addPass(GlobalDCEPass());
1314
1315 return MPM;
1316}
1317
1318/// TODO: Should LTO cause any differences to this set of passes?
1319void PassBuilder::addVectorPasses(OptimizationLevel Level,
1321 ThinOrFullLTOPhase LTOPhase) {
1322 const bool IsFullLTO = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink;
1323
1326
1327 // Drop dereferenceable assumes after vectorization, as they are no longer
1328 // needed and can inhibit further optimization.
1329 if (!isLTOPreLink(LTOPhase))
1330 FPM.addPass(DropUnnecessaryAssumesPass(/*DropDereferenceable=*/true));
1331
1333 if (IsFullLTO) {
1334 // The vectorizer may have significantly shortened a loop body; unroll
1335 // again. Unroll small loops to hide loop backedge latency and saturate any
1336 // parallel execution resources of an out-of-order processor. We also then
1337 // need to clean up redundancies and loop invariant code.
1338 // FIXME: It would be really good to use a loop-integrated instruction
1339 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1340 // across the loop nests.
1341 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1344 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1346 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1349 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1350 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1351 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1352 // NOTE: we are very late in the pipeline, and we don't have any LICM
1353 // or SimplifyCFG passes scheduled after us, that would cleanup
1354 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1356 }
1357
1358 if (!IsFullLTO) {
1359 // Eliminate loads by forwarding stores from the previous iteration to loads
1360 // of the current iteration.
1362 }
1363 // Cleanup after the loop optimization passes.
1364 FPM.addPass(InstCombinePass());
1365
1366 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1367 ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ExtraPasses;
1368 // At higher optimization levels, try to clean up any runtime overlap and
1369 // alignment checks inserted by the vectorizer. We want to track correlated
1370 // runtime checks for two inner loops in the same outer loop, fold any
1371 // common computations, hoist loop-invariant aspects out of any outer loop,
1372 // and unswitch the runtime checks if possible. Once hoisted, we may have
1373 // dead (or speculatable) control flows or more combining opportunities.
1374 ExtraPasses.addPass(EarlyCSEPass());
1375 ExtraPasses.addPass(CorrelatedValuePropagationPass());
1376 ExtraPasses.addPass(InstCombinePass());
1377 LoopPassManager LPM;
1378 LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1379 /*AllowSpeculation=*/true));
1380 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1382 ExtraPasses.addPass(
1383 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true));
1384 ExtraPasses.addPass(
1385 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1386 ExtraPasses.addPass(InstCombinePass());
1387 FPM.addPass(std::move(ExtraPasses));
1388 }
1389
1390 // Now that we've formed fast to execute loop structures, we do further
1391 // optimizations. These are run afterward as they might block doing complex
1392 // analyses and transforms such as what are needed for loop vectorization.
1393
1394 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1395 // GVN, loop transforms, and others have already run, so it's now better to
1396 // convert to more optimized IR using more aggressive simplify CFG options.
1397 // The extra sinking transform can create larger basic blocks, so do this
1398 // before SLP vectorization.
1399 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1400 .forwardSwitchCondToPhi(true)
1401 .convertSwitchRangeToICmp(true)
1402 .convertSwitchToArithmetic(true)
1403 .convertSwitchToLookupTable(true)
1404 .needCanonicalLoops(false)
1405 .hoistCommonInsts(true)
1406 .sinkCommonInsts(true)));
1407
1408 if (IsFullLTO) {
1409 FPM.addPass(SCCPPass());
1410 FPM.addPass(InstCombinePass());
1411 FPM.addPass(BDCEPass());
1412 }
1413
1414 // Optimize parallel scalar instruction chains into SIMD instructions.
1415 if (PTO.SLPVectorization) {
1416 FPM.addPass(SLPVectorizerPass());
1417 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1418 FPM.addPass(EarlyCSEPass());
1419 }
1420 }
1421 // Enhance/cleanup vector code.
1422 FPM.addPass(VectorCombinePass());
1423
1424 if (!IsFullLTO) {
1425 FPM.addPass(InstCombinePass());
1426 // Unroll small loops to hide loop backedge latency and saturate any
1427 // parallel execution resources of an out-of-order processor. We also then
1428 // need to clean up redundancies and loop invariant code.
1429 // FIXME: It would be really good to use a loop-integrated instruction
1430 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1431 // across the loop nests.
1432 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1433 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1435 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1436 }
1437 FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1438 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1439 PTO.ForgetAllSCEVInLoopUnroll)));
1440 FPM.addPass(WarnMissedTransformationsPass());
1441 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1442 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1443 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1444 // NOTE: we are very late in the pipeline, and we don't have any LICM
1445 // or SimplifyCFG passes scheduled after us, that would cleanup
1446 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1447 FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1448 }
1449
1450 FPM.addPass(InferAlignmentPass());
1451 FPM.addPass(InstCombinePass());
1452
1453 // This is needed for two reasons:
1454 // 1. It works around problems that instcombine introduces, such as sinking
1455 // expensive FP divides into loops containing multiplications using the
1456 // divide result.
1457 // 2. It helps to clean up some loop-invariant code created by the loop
1458 // unroll pass when IsFullLTO=false.
1460 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1461 /*AllowSpeculation=*/true),
1462 /*UseMemorySSA=*/true));
1463
1464 // Now that we've vectorized and unrolled loops, we may have more refined
1465 // alignment information, try to re-derive it here.
1466 FPM.addPass(AlignmentFromAssumptionsPass());
1467}
1468
1471 ThinOrFullLTOPhase LTOPhase) {
1472 const bool LTOPreLink = isLTOPreLink(LTOPhase);
1474
1475 // Run partial inlining pass to partially inline functions that have
1476 // large bodies.
1479
1480 // Remove avail extern fns and globals definitions since we aren't compiling
1481 // an object file for later LTO. For LTO we want to preserve these so they
1482 // are eligible for inlining at link-time. Note if they are unreferenced they
1483 // will be removed by GlobalDCE later, so this only impacts referenced
1484 // available externally globals. Eventually they will be suppressed during
1485 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1486 // may make globals referenced by available external functions dead and saves
1487 // running remaining passes on the eliminated functions. These should be
1488 // preserved during prelinking for link-time inlining decisions.
1489 if (!LTOPreLink)
1491
1492 // Do RPO function attribute inference across the module to forward-propagate
1493 // attributes where applicable.
1494 // FIXME: Is this really an optimization rather than a canonicalization?
1496
1497 // Do a post inline PGO instrumentation and use pass. This is a context
1498 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1499 // cross-module inline has not been done yet. The context sensitive
1500 // instrumentation is after all the inlines are done.
1501 if (!LTOPreLink && PGOOpt) {
1502 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1503 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1504 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1505 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
1506 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1507 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1508 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1509 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1510 }
1511
1512 // Re-compute GlobalsAA here prior to function passes. This is particularly
1513 // useful as the above will have inlined, DCE'ed, and function-attr
1514 // propagated everything. We should at this point have a reasonably minimal
1515 // and richly annotated call graph. By computing aliasing and mod/ref
1516 // information for all local globals here, the late loop passes and notably
1517 // the vectorizer will be able to use them to help recognize vectorizable
1518 // memory operations.
1521
1522 invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
1523
1524 FunctionPassManager OptimizePM;
1525
1526 // Only drop unnecessary assumes post-inline and post-link, as otherwise
1527 // additional uses of the affected value may be introduced through inlining
1528 // and CSE.
1529 if (!isLTOPreLink(LTOPhase))
1530 OptimizePM.addPass(DropUnnecessaryAssumesPass());
1531
1532 // Scheduling LoopVersioningLICM when inlining is over, because after that
1533 // we may see more accurate aliasing. Reason to run this late is that too
1534 // early versioning may prevent further inlining due to increase of code
1535 // size. Other optimizations which runs later might get benefit of no-alias
1536 // assumption in clone loop.
1538 OptimizePM.addPass(
1540 // LoopVersioningLICM pass might increase new LICM opportunities.
1542 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1543 /*AllowSpeculation=*/true),
1544 /*USeMemorySSA=*/true));
1545 }
1546
1547 OptimizePM.addPass(Float2IntPass());
1549
1550 if (EnableMatrix) {
1551 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1552 OptimizePM.addPass(EarlyCSEPass());
1553 }
1554
1555 // CHR pass should only be applied with the profile information.
1556 // The check is to check the profile summary information in CHR.
1557 if (EnableCHR && Level == OptimizationLevel::O3)
1558 OptimizePM.addPass(ControlHeightReductionPass());
1559
1560 // FIXME: We need to run some loop optimizations to re-rotate loops after
1561 // simplifycfg and others undo their rotation.
1562
1563 // Optimize the loop execution. These passes operate on entire loop nests
1564 // rather than on each loop in an inside-out manner, and so they are actually
1565 // function passes.
1566
1567 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1568
1569 LoopPassManager LPM;
1570 // First rotate loops that may have been un-rotated by prior passes.
1571 // Disable header duplication at -Oz.
1572 LPM.addPass(LoopRotatePass(/*EnableLoopHeaderDuplication=*/true, LTOPreLink,
1573 /*CheckExitCount=*/true));
1574 // Some loops may have become dead by now. Try to delete them.
1575 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1576 // this may need to be revisited once we run GVN before loop deletion
1577 // in the simplification pipeline.
1578 LPM.addPass(LoopDeletionPass());
1579
1580 if (PTO.LoopInterchange)
1581 LPM.addPass(LoopInterchangePass());
1582
1583 OptimizePM.addPass(
1584 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
1585
1586 // FIXME: This may not be the right place in the pipeline.
1587 // We need to have the data to support the right place.
1588 if (PTO.LoopFusion)
1589 OptimizePM.addPass(LoopFusePass());
1590
1591 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1592 // into separate loop that would otherwise inhibit vectorization. This is
1593 // currently only performed for loops marked with the metadata
1594 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1595 OptimizePM.addPass(LoopDistributePass());
1596
1597 // Populates the VFABI attribute with the scalar-to-vector mappings
1598 // from the TargetLibraryInfo.
1599 OptimizePM.addPass(InjectTLIMappings());
1600
1601 addVectorPasses(Level, OptimizePM, LTOPhase);
1602
1603 invokeVectorizerEndEPCallbacks(OptimizePM, Level);
1604
1605 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1606 // canonicalization pass that enables other optimizations. As a result,
1607 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1608 // result too early.
1609 OptimizePM.addPass(LoopSinkPass());
1610
1611 // And finally clean up LCSSA form before generating code.
1612 OptimizePM.addPass(InstSimplifyPass());
1613
1614 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1615 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1616 // flattening of blocks.
1617 OptimizePM.addPass(DivRemPairsPass());
1618
1619 // Merge adjacent icmps into memcmp, then expand memcmp to loads/compares.
1620 // TODO: move this furter up so that it can be optimized by GVN, etc.
1621 if (EnableMergeICmps)
1622 OptimizePM.addPass(MergeICmpsPass());
1623 OptimizePM.addPass(ExpandMemCmpPass());
1624
1625 // Try to annotate calls that were created during optimization.
1626 OptimizePM.addPass(
1627 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
1628
1629 // LoopSink (and other loop passes since the last simplifyCFG) might have
1630 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1631 OptimizePM.addPass(
1633 .convertSwitchRangeToICmp(true)
1634 .convertSwitchToArithmetic(true)
1635 .speculateUnpredictables(true)
1636 .hoistLoadsStoresWithCondFaulting(true)));
1637
1638 // Add the core optimizing pipeline.
1639 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1640 PTO.EagerlyInvalidateAnalyses));
1641
1642 // AllocToken transforms heap allocation calls; this needs to run late after
1643 // other allocation call transformations (such as those in InstCombine).
1644 if (!LTOPreLink)
1645 MPM.addPass(AllocTokenPass());
1646
1647 invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
1648
1649 // Run the Instrumentor pass late.
1652
1653 // Split out cold code. Splitting is done late to avoid hiding context from
1654 // other optimizations and inadvertently regressing performance. The tradeoff
1655 // is that this has a higher code size cost than splitting early.
1656 if (EnableHotColdSplit && !LTOPreLink)
1658
1659 // Search the code for similar regions of code. If enough similar regions can
1660 // be found where extracting the regions into their own function will decrease
1661 // the size of the program, we extract the regions, a deduplicate the
1662 // structurally similar regions.
1663 if (EnableIROutliner)
1664 MPM.addPass(IROutlinerPass());
1665
1666 // Now we need to do some global optimization transforms.
1667 // FIXME: It would seem like these should come first in the optimization
1668 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1669 // ordering here.
1670 MPM.addPass(GlobalDCEPass());
1672
1673 // Merge functions if requested. It has a better chance to merge functions
1674 // after ConstantMerge folded jump tables.
1675 if (PTO.MergeFunctions)
1677
1678 if (PTO.CallGraphProfile && !LTOPreLink)
1679 MPM.addPass(CGProfilePass(isLTOPostLink(LTOPhase)));
1680
1681 // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1682 if (!LTOPreLink)
1684
1685 // Add devirtualization pass only when LTO is not enabled, as otherwise
1686 // the pass is already enabled in the LTO pipeline.
1687 if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) {
1688 // TODO: explore a better pipeline configuration that can improve
1689 // compilation time overhead.
1691 /*ExportSummary*/ nullptr,
1692 /*ImportSummary*/ nullptr,
1693 /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively));
1695 // Given that the devirtualization creates more opportunities for inlining,
1696 // we run the Inliner again here to maximize the optimization gain we
1697 // get from devirtualization.
1698 // Also, we can't run devirtualization before inlining because the
1699 // devirtualization depends on the passes optimizing/eliminating vtable GVs
1700 // and those passes are only effective after inlining.
1701 if (EnableModuleInliner) {
1705 } else {
1708 /* MandatoryFirst */ true,
1710 }
1711 }
1712 return MPM;
1713}
1714
1718 if (Level == OptimizationLevel::O0)
1719 return buildO0DefaultPipeline(Level, Phase);
1720
1722
1723 // Currently this pipeline is only invoked in an LTO pre link pass or when we
1724 // are not running LTO. If that changes the below checks may need updating.
1726
1727 // If we are invoking this in non-LTO mode, remove any MemProf related
1728 // attributes and metadata, as we don't know whether we are linking with
1729 // a library containing the necessary interfaces.
1732
1733 // Convert @llvm.global.annotations to !annotation metadata.
1735
1736 // Force any function attributes we want the rest of the pipeline to observe.
1738
1739 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1741
1742 // Apply module pipeline start EP callback.
1744
1745 // Add the core simplification pipeline.
1747
1748 // Now add the optimization pipeline.
1750
1751 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1752 PGOOpt->Action == PGOOptions::SampleUse)
1754
1755 // Emit annotation remarks.
1757
1758 if (isLTOPreLink(Phase))
1759 addRequiredLTOPreLinkPasses(MPM);
1760 return MPM;
1761}
1762
1765 bool EmitSummary) {
1767 if (ThinLTO)
1769 else
1771 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1772
1773 // Perform any cleanups to the IR that aren't suitable for per TU compilation,
1774 // like removing CFI/WPD related instructions. Note, we reuse
1775 // DropTypeTestsPass to clean up type tests rather than duplicate that logic
1776 // in FatLtoCleanup.
1777 MPM.addPass(FatLtoCleanup());
1778
1779 // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1780 // object code, only in the bitcode section, so drop it before we run
1781 // module optimization and generate machine code. If llvm.type.test() isn't in
1782 // the IR, this won't do anything.
1784
1785 // Use the ThinLTO post-link pipeline with sample profiling
1786 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1787 MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1788 else {
1789 // ModuleSimplification does not run the coroutine passes for
1790 // ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO
1791 // builds, otherwise they will miscompile.
1792 if (ThinLTO) {
1793 // TODO: replace w/ buildCoroWrapper() when it takes phase and level into
1794 // consideration.
1795 CGSCCPassManager CGPM;
1799 MPM.addPass(CoroCleanupPass());
1800 }
1801
1802 // otherwise, just use module optimization
1803 MPM.addPass(
1805 // Emit annotation remarks.
1807 }
1808 return MPM;
1809}
1810
1813 if (Level == OptimizationLevel::O0)
1815
1817
1818 // Convert @llvm.global.annotations to !annotation metadata.
1820
1821 // Force any function attributes we want the rest of the pipeline to observe.
1823
1824 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1826
1827 // Apply module pipeline start EP callback.
1829
1830 // If we are planning to perform ThinLTO later, we don't bloat the code with
1831 // unrolling/vectorization/... now. Just simplify the module as much as we
1832 // can.
1835 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1836 // thinlto use the contextual info to perform imports; then use the contextual
1837 // profile in the post-thinlink phase.
1838 if (!UseCtxProfile.empty()) {
1839 addRequiredLTOPreLinkPasses(MPM);
1840 return MPM;
1841 }
1842
1843 // Run partial inlining pass to partially inline functions that have
1844 // large bodies.
1845 // FIXME: It isn't clear whether this is really the right place to run this
1846 // in ThinLTO. Because there is another canonicalization and simplification
1847 // phase that will run after the thin link, running this here ends up with
1848 // less information than will be available later and it may grow functions in
1849 // ways that aren't beneficial.
1852
1853 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1854 PGOOpt->Action == PGOOptions::SampleUse)
1856
1857 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1858 // optimization is going to be done in PostLink stage, but clang can't add
1859 // callbacks there in case of in-process ThinLTO called by linker.
1864
1865 // Emit annotation remarks.
1867
1868 addRequiredLTOPreLinkPasses(MPM);
1869
1870 return MPM;
1871}
1872
1874 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1876
1877 // If we are invoking this without a summary index noting that we are linking
1878 // with a library containing the necessary APIs, remove any MemProf related
1879 // attributes and metadata.
1880 if (!ImportSummary || !ImportSummary->withSupportsHotColdNew())
1882
1883 if (ImportSummary) {
1884 // For ThinLTO we must apply the context disambiguation decisions early, to
1885 // ensure we can correctly match the callsites to summary data.
1888 ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1889
1890 // These passes import type identifier resolutions for whole-program
1891 // devirtualization and CFI. They must run early because other passes may
1892 // disturb the specific instruction patterns that these passes look for,
1893 // creating dependencies on resolutions that may not appear in the summary.
1894 //
1895 // For example, GVN may transform the pattern assume(type.test) appearing in
1896 // two basic blocks into assume(phi(type.test, type.test)), which would
1897 // transform a dependency on a WPD resolution into a dependency on a type
1898 // identifier resolution for CFI.
1899 //
1900 // Also, WPD has access to more precise information than ICP and can
1901 // devirtualize more effectively, so it should operate on the IR first.
1902 //
1903 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1904 // metadata and intrinsics.
1905 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1906 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1907 }
1908
1909 if (Level == OptimizationLevel::O0) {
1910 // Run a second time to clean up any type tests left behind by WPD for use
1911 // in ICP.
1914
1915 // AllocToken transforms heap allocation calls; this needs to run late after
1916 // other allocation call transformations (such as those in InstCombine).
1917 MPM.addPass(AllocTokenPass());
1918
1919 // Drop available_externally and unreferenced globals. This is necessary
1920 // with ThinLTO in order to avoid leaving undefined references to dead
1921 // globals in the object file.
1923 MPM.addPass(GlobalDCEPass());
1924 return MPM;
1925 }
1926 if (!UseCtxProfile.empty()) {
1927 MPM.addPass(
1929 } else {
1930 // Add the core simplification pipeline.
1933 }
1934 // Now add the optimization pipeline.
1937
1938 // Emit annotation remarks.
1940
1941 return MPM;
1942}
1943
1946 // FIXME: We should use a customized pre-link pipeline!
1947 return buildPerModuleDefaultPipeline(Level,
1949}
1950
1953 ModuleSummaryIndex *ExportSummary) {
1955
1957
1958 // If we are invoking this without a summary index noting that we are linking
1959 // with a library containing the necessary APIs, remove any MemProf related
1960 // attributes and metadata.
1961 if (!ExportSummary || !ExportSummary->withSupportsHotColdNew())
1963
1964 // Create a function that performs CFI checks for cross-DSO calls with targets
1965 // in the current module.
1966 MPM.addPass(CrossDSOCFIPass());
1967
1968 if (Level == OptimizationLevel::O0) {
1969 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1970 // metadata and intrinsics.
1971 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1972 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1973 // Run a second time to clean up any type tests left behind by WPD for use
1974 // in ICP.
1976
1978
1979 // AllocToken transforms heap allocation calls; this needs to run late after
1980 // other allocation call transformations (such as those in InstCombine).
1981 MPM.addPass(AllocTokenPass());
1982
1984
1985 // Emit annotation remarks.
1987
1988 return MPM;
1989 }
1990
1991 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1992 // Load sample profile before running the LTO optimization pipeline.
1993 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1994 PGOOpt->ProfileRemappingFile,
1996 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1997 // RequireAnalysisPass for PSI before subsequent non-module passes.
1999 }
2000
2001 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
2003
2004 // Remove unused virtual tables to improve the quality of code generated by
2005 // whole-program devirtualization and bitset lowering.
2006 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2007
2008 // Do basic inference of function attributes from known properties of system
2009 // libraries and other oracles.
2011
2012 if (Level.getSpeedupLevel() > 1) {
2014 CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));
2015
2016 // Indirect call promotion. This should promote all the targets that are
2017 // left by the earlier promotion pass that promotes intra-module targets.
2018 // This two-step promotion is to save the compile time. For LTO, it should
2019 // produce the same result as if we only do promotion here.
2021 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2022
2023 // Promoting by-reference arguments to by-value exposes more constants to
2024 // IPSCCP.
2025 CGSCCPassManager CGPM;
2028 CGPM.addPass(
2031
2032 // Propagate constants at call sites into the functions they call. This
2033 // opens opportunities for globalopt (and inlining) by substituting function
2034 // pointers passed as arguments to direct uses of functions.
2035 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/true)));
2036
2037 // Attach metadata to indirect call sites indicating the set of functions
2038 // they may target at run-time. This should follow IPSCCP.
2040 }
2041
2042 // Do RPO function attribute inference across the module to forward-propagate
2043 // attributes where applicable.
2044 // FIXME: Is this really an optimization rather than a canonicalization?
2046
2047 // Use in-range annotations on GEP indices to split globals where beneficial.
2048 MPM.addPass(GlobalSplitPass());
2049
2050 // Run whole program optimization of virtual call when the list of callees
2051 // is fixed.
2052 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
2053
2055 // Stop here at -O1.
2056 if (Level == OptimizationLevel::O1) {
2057 // The LowerTypeTestsPass needs to run to lower type metadata and the
2058 // type.test intrinsics. The pass does nothing if CFI is disabled.
2059 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2060 // Run a second time to clean up any type tests left behind by WPD for use
2061 // in ICP (which is performed earlier than this in the regular LTO
2062 // pipeline).
2064
2066
2067 // AllocToken transforms heap allocation calls; this needs to run late after
2068 // other allocation call transformations (such as those in InstCombine).
2069 MPM.addPass(AllocTokenPass());
2070
2072
2073 // Emit annotation remarks.
2075
2076 return MPM;
2077 }
2078
2079 // TODO: Skip to match buildCoroWrapper.
2080 MPM.addPass(CoroEarlyPass());
2081
2082 // Optimize globals to try and fold them into constants.
2083 MPM.addPass(GlobalOptPass());
2084
2085 // Promote any localized globals to SSA registers.
2087
2088 // Linking modules together can lead to duplicate global constant, only
2089 // keep one copy of each constant.
2091
2092 // Remove unused arguments from functions.
2094
2095 // Reduce the code after globalopt and ipsccp. Both can open up significant
2096 // simplification opportunities, and both can propagate functions through
2097 // function pointers. When this happens, we often have to resolve varargs
2098 // calls, etc, so let instcombine do this.
2099 FunctionPassManager PeepholeFPM;
2100 PeepholeFPM.addPass(InstCombinePass());
2101 if (Level.getSpeedupLevel() > 1)
2102 PeepholeFPM.addPass(AggressiveInstCombinePass());
2103 invokePeepholeEPCallbacks(PeepholeFPM, Level);
2104
2105 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
2106 PTO.EagerlyInvalidateAnalyses));
2107
2108 // Lower variadic functions for supported targets prior to inlining.
2110
2111 // Note: historically, the PruneEH pass was run first to deduce nounwind and
2112 // generally clean up exception handling overhead. It isn't clear this is
2113 // valuable as the inliner doesn't currently care whether it is inlining an
2114 // invoke or a call.
2115 // Run the inliner now.
2116 if (EnableModuleInliner) {
2120 } else {
2123 /* MandatoryFirst */ true,
2126 }
2127
2128 // Perform context disambiguation after inlining, since that would reduce the
2129 // amount of additional cloning required to distinguish the allocation
2130 // contexts.
2133 /*Summary=*/nullptr,
2134 PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2135
2136 // Optimize globals again after we ran the inliner.
2137 MPM.addPass(GlobalOptPass());
2138
2139 // Run the OpenMPOpt pass again after global optimizations.
2141
2142 // Garbage collect dead functions.
2143 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2144
2145 // If we didn't decide to inline a function, check to see if we can
2146 // transform it to pass arguments by value instead of by reference.
2147 CGSCCPassManager CGPM;
2152
2154 // The IPO Passes may leave cruft around. Clean up after them.
2155 FPM.addPass(InstCombinePass());
2156 invokePeepholeEPCallbacks(FPM, Level);
2157
2160
2162
2163 // Do a post inline PGO instrumentation and use pass. This is a context
2164 // sensitive PGO pass.
2165 if (PGOOpt) {
2166 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2167 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2168 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2169 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
2170 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2171 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2172 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2173 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
2174 }
2175
2176 // Break up allocas
2178
2179 // LTO provides additional opportunities for tailcall elimination due to
2180 // link-time inlining, and visibility of nocapture attribute.
2181 FPM.addPass(
2182 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
2183
2184 // Run a few AA driver optimizations here and now to cleanup the code.
2185 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
2186 PTO.EagerlyInvalidateAnalyses));
2187
2188 MPM.addPass(
2190
2191 // Require the GlobalsAA analysis for the module so we can query it within
2192 // MainFPM.
2195 // Invalidate AAManager so it can be recreated and pick up the newly
2196 // available GlobalsAA.
2197 MPM.addPass(
2199 }
2200
2201 FunctionPassManager MainFPM;
2203 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
2204 /*AllowSpeculation=*/true),
2205 /*USeMemorySSA=*/true));
2206
2207 if (RunNewGVN)
2208 MainFPM.addPass(NewGVNPass());
2209 else
2210 MainFPM.addPass(GVNPass());
2211
2212 // Remove dead memcpy()'s.
2213 MainFPM.addPass(MemCpyOptPass());
2214
2215 // Nuke dead stores.
2216 MainFPM.addPass(DSEPass());
2217 MainFPM.addPass(MoveAutoInitPass());
2219
2220 invokeVectorizerStartEPCallbacks(MainFPM, Level);
2221
2222 LoopPassManager LPM;
2223 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2224 LPM.addPass(LoopFlattenPass());
2225 LPM.addPass(IndVarSimplifyPass());
2226 LPM.addPass(LoopDeletionPass());
2227 // FIXME: Add loop interchange.
2228
2229 // Unroll small loops and perform peeling.
2230 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
2231 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2232 PTO.ForgetAllSCEVInLoopUnroll));
2233 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2234 // *All* loop passes must preserve it, in order to be able to use it.
2235 MainFPM.addPass(
2236 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
2237
2238 MainFPM.addPass(LoopDistributePass());
2239
2240 addVectorPasses(Level, MainFPM, ThinOrFullLTOPhase::FullLTOPostLink);
2241
2242 invokeVectorizerEndEPCallbacks(MainFPM, Level);
2243
2244 // Run the OpenMPOpt CGSCC pass again late.
2247
2248 invokePeepholeEPCallbacks(MainFPM, Level);
2249 MainFPM.addPass(JumpThreadingPass());
2250 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2251 PTO.EagerlyInvalidateAnalyses));
2252
2253 // Lower type metadata and the type.test intrinsic. This pass supports
2254 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2255 // to be run at link time if CFI is enabled. This pass does nothing if
2256 // CFI is disabled.
2257 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2258 // Run a second time to clean up any type tests left behind by WPD for use
2259 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2261
2262 // Enable splitting late in the FullLTO post-link pipeline.
2265
2266 // Add late LTO optimization passes.
2267 FunctionPassManager LateFPM;
2268
2269 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2270 // canonicalization pass that enables other optimizations. As a result,
2271 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2272 // result too early.
2273 LateFPM.addPass(LoopSinkPass());
2274
2275 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2276 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2277 // flattening of blocks.
2278 LateFPM.addPass(DivRemPairsPass());
2279
2280 // Delete basic blocks, which optimization passes may have killed.
2282 .convertSwitchRangeToICmp(true)
2283 .convertSwitchToArithmetic(true)
2284 .hoistCommonInsts(true)
2285 .speculateUnpredictables(true)));
2286 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2287
2288 // Drop bodies of available eternally objects to improve GlobalDCE.
2290
2291 // Now that we have optimized the program, discard unreachable functions.
2292 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2293
2294 if (PTO.MergeFunctions)
2296
2298
2299 if (PTO.CallGraphProfile)
2300 MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2301
2302 MPM.addPass(CoroCleanupPass());
2303
2304 // AllocToken transforms heap allocation calls; this needs to run late after
2305 // other allocation call transformations (such as those in InstCombine).
2306 MPM.addPass(AllocTokenPass());
2307
2309
2310 // Emit annotation remarks.
2312
2313 return MPM;
2314}
2315
2319 assert(Level == OptimizationLevel::O0 &&
2320 "buildO0DefaultPipeline should only be used with O0");
2321
2323
2324 // Perform pseudo probe instrumentation in O0 mode. This is for the
2325 // consistency between different build modes. For example, a LTO build can be
2326 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2327 // the postlink will require pseudo probe instrumentation in the prelink.
2328 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2330
2331 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2332 PGOOpt->Action == PGOOptions::IRUse))
2334 MPM,
2335 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2336 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2337 PGOOpt->ProfileRemappingFile);
2338
2339 // Instrument function entry and exit before all inlining.
2341 EntryExitInstrumenterPass(/*PostInlining=*/false)));
2342
2344
2345 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2347
2348 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2349 // Explicitly disable sample loader inlining and use flattened profile in O0
2350 // pipeline.
2351 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2352 PGOOpt->ProfileRemappingFile,
2354 /*DisableSampleProfileInlining=*/true,
2355 /*UseFlattenedProfile=*/true));
2356 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2357 // RequireAnalysisPass for PSI before subsequent non-module passes.
2359 }
2360
2362
2363 // Build a minimal pipeline based on the semantics required by LLVM,
2364 // which is just that always inlining occurs. Further, disable generating
2365 // lifetime intrinsics to avoid enabling further optimizations during
2366 // code generation.
2368 /*InsertLifetimeIntrinsics=*/false));
2369
2370 if (PTO.MergeFunctions)
2372
2373 if (EnableMatrix)
2374 MPM.addPass(
2376
2377 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2378 CGSCCPassManager CGPM;
2380 if (!CGPM.isEmpty())
2382 }
2383 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2384 LoopPassManager LPM;
2386 if (!LPM.isEmpty()) {
2388 createFunctionToLoopPassAdaptor(std::move(LPM))));
2389 }
2390 }
2391 if (!LoopOptimizerEndEPCallbacks.empty()) {
2392 LoopPassManager LPM;
2394 if (!LPM.isEmpty()) {
2396 createFunctionToLoopPassAdaptor(std::move(LPM))));
2397 }
2398 }
2399 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2402 if (!FPM.isEmpty())
2403 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2404 }
2405
2407
2408 if (!VectorizerStartEPCallbacks.empty()) {
2411 if (!FPM.isEmpty())
2412 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2413 }
2414
2415 if (!VectorizerEndEPCallbacks.empty()) {
2418 if (!FPM.isEmpty())
2419 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2420 }
2421
2423
2424 // AllocToken transforms heap allocation calls; this needs to run late after
2425 // other allocation call transformations (such as those in InstCombine).
2426 if (!isLTOPreLink(Phase))
2427 MPM.addPass(AllocTokenPass());
2428
2430
2433
2434 if (isLTOPreLink(Phase))
2435 addRequiredLTOPreLinkPasses(MPM);
2436
2437 // Emit annotation remarks.
2439
2440 return MPM;
2441}
2442
2444 AAManager AA;
2445
2446 // The order in which these are registered determines their priority when
2447 // being queried.
2448
2449 // Add any target-specific alias analyses that should be run early.
2450 if (TM)
2451 TM->registerEarlyDefaultAliasAnalyses(AA);
2452
2453 // First we register the basic alias analysis that provides the majority of
2454 // per-function local AA logic. This is a stateless, on-demand local set of
2455 // AA techniques.
2456 AA.registerFunctionAnalysis<BasicAA>();
2457
2458 // Next we query fast, specialized alias analyses that wrap IR-embedded
2459 // information about aliasing.
2460 AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2461 AA.registerFunctionAnalysis<TypeBasedAA>();
2462
2463 // Add support for querying global aliasing information when available.
2464 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2465 // analysis, all that the `AAManager` can do is query for any *cached*
2466 // results from `GlobalsAA` through a readonly proxy.
2468 AA.registerModuleAnalysis<GlobalsAA>();
2469
2470 // Add target-specific alias analyses.
2471 if (TM)
2472 TM->registerDefaultAliasAnalyses(AA);
2473
2474 return AA;
2475}
2476
2477bool PassBuilder::isInstrumentedPGOUse() const {
2478 return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
2479 !UseCtxProfile.empty();
2480}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
This file provides a pass manager that only runs its passes if the provided marker analysis has been ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
This header defines various interfaces for pass management in LLVM.
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the Loop Fusion pass.
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase)
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static bool isLTOPostLink(ThinOrFullLTOPhase Phase)
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
A module pass that rewrites heap allocations to use token-enabled allocation functions based on vario...
Definition AllocToken.h:36
Inlines functions marked as "always_inline".
Argument promotion pass.
Assign a GUID to functions as metadata.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
A pass manager to run a set of extra loop passes if the MarkerTy analysis is present.
Statistics pass for the FunctionPropertiesAnalysis results.
The core GVN pass object.
Definition GVN.h:128
Pass to remove unused function declarations.
Definition GlobalDCE.h:38
Optimize globals that never have their address taken.
Definition GlobalOpt.h:25
Pass to perform split of global variables.
Definition GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition SCCP.h:48
Pass to outline similar regions.
Definition IROutliner.h:468
Run instruction simplification across each instruction in the function.
Instrumentation based profiling lowering pass.
The Instrumentor pass.
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Performs Loop Invariant Code Motion Pass.
Definition LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Strips MemProf attributes and metadata.
Merge identical functions.
The module inliner pass for the new pass manager.
Module pass, wrapping the inliner pass.
Definition Inliner.h:65
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition Inliner.h:81
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
Additional 'norecurse' attribute deduction during postlink LTO phase.
OpenMP optimizations pass.
Definition OpenMPOpt.h:42
static LLVM_ABI const OptimizationLevel O3
Optimize for fast execution as much as possible.
static LLVM_ABI const OptimizationLevel O0
Disable as many optimizations as possible.
static LLVM_ABI const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static LLVM_ABI const OptimizationLevel O1
Optimize quickly without destroying debuggability.
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
LLVM_ABI void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
LLVM_ABI void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
LLVM_ABI void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
LLVM_ABI void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile)
Add PGOInstrumenation passes for O0 only.
LLVM_ABI void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build a per-module default optimization pipeline.
LLVM_ABI void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI void invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build an O0 pipeline with the minimal semantically required passes.
LLVM_ABI FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
LLVM_ABI void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
LLVM_ABI ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
LLVM_ABI ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build a ThinLTO default optimization pipeline to a pass manager.
LLVM_ABI void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
LLVM_ABI ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
LLVM_ABI ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
LLVM_ABI void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
bool isEmpty() const
Returns if the pass manager contains any passes.
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition PassBuilder.h:78
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition PassBuilder.h:56
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition PassBuilder.h:92
bool LoopFusion
Tuning option to enable/disable loop fusion. Its default value is false.
Definition PassBuilder.h:66
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition PassBuilder.h:82
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition PassBuilder.h:89
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition PassBuilder.h:70
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition PassBuilder.h:74
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition PassBuilder.h:48
LLVM_ABI PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition PassBuilder.h:59
bool LoopInterchange
Tuning option to enable/disable loop interchange.
Definition PassBuilder.h:63
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition PassBuilder.h:52
Reassociate commutative expressions.
Definition Reassociate.h:74
A pass to do RPO deduction and propagation of function attributes.
This pass performs function-level constant propagation and merging.
Definition SCCP.h:30
The sample profiler data loader pass.
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition SimplifyCFG.h:30
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Interfaces for registering analysis passes, producing common pass manager configurations,...
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ All
Drop only llvm.assumes using type test value.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(false), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlining pass"))
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
static cl::opt< bool > EnableSampledInstr("enable-sampled-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable profile instrumentation sampling (default = off)"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
static cl::opt< bool > EnableJumpTableToSwitch("enable-jump-table-to-switch", cl::init(true), cl::desc("Enable JumpTableToSwitch pass (default = true)"))
PassManager< LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, CGSCCUpdateResult & > CGSCCPassManager
The CGSCC pass manager.
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
@ CGSCC_LIGHT
@ MODULE_LIGHT
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
@ FullLTOPreLink
Full LTO prelink phase.
Definition Pass.h:85
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
Definition Pass.h:83
@ None
No LTO/ThinLTO behavior needed.
Definition Pass.h:79
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
Definition Pass.h:87
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
Definition Pass.h:81
PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & > LoopPassManager
The Loop pass manager.
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
FunctionToLoopPassAdaptor createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< bool > ForgetSCEVInLoopUnroll
PassManager< Module > ModulePassManager
Convenience typedef for a pass manager over modules.
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
static cl::opt< std::string > InstrumentColdFuncOnlyPath("instrument-cold-function-only-path", cl::init(""), cl::desc("File path for cold function only instrumentation(requires use " "with --pgo-instrument-cold-function-only)"), cl::Hidden)
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierarchy exists in the profile"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::FULL, "full", "enable all full attributor runs"), clEnumValN(AttributorRunOption::LIGHT, "light", "enable all attributor-light runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::MODULE_LIGHT, "module-light", "enable module-wide attributor-light runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::CGSCC_LIGHT, "cgscc-light", "enable call graph SCC attributor-light runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
PassManager< Function > FunctionPassManager
Convenience typedef for a pass manager over functions.
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< bool > PGOInstrumentColdFunctionOnly
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the LoopInterchange Pass"))
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableDevirtualizeSpeculatively("enable-devirtualize-speculatively", cl::desc("Enable speculative devirtualization optimization"), cl::init(false))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
LLVM_ABI InlineParams getInlineParamsFromOptLevel(unsigned OptLevel)
Generate the parameters to tune the inline cost analysis based on command line options.
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
cl::opt< unsigned > SetLicmMssaOptCap
static cl::opt< bool > EnableInstrumentor("enable-instrumentor", cl::init(false), cl::Hidden, cl::desc("Enable the Instrumentor Pass"))
static cl::opt< bool > EnableMergeICmps("enable-mergeicmps", cl::init(true), cl::Hidden, cl::desc("Enable MergeICmps pass in the optimization pipeline"))
A DCE pass that assumes instructions are dead until proven otherwise.
Definition ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
A more lightweight version of the Attributor which only runs attribute inference but no simplificatio...
A more lightweight version of the Attributor which only runs attribute inference but no simplificatio...
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition EarlyCSE.h:31
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition GVN.h:431
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition GVN.h:438
A set of parameters to control various transforms performed by IPSCCP pass.
Definition SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
Definition InlineCost.h:207
std::optional< int > OptSizeHintThreshold
Threshold to use for callees with inline hint, when the caller is optimized for size.
Definition InlineCost.h:216
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition InlineCost.h:228
int DefaultThreshold
The default threshold to start with for a callee.
Definition InlineCost.h:209
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition InlineCost.h:241
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition InlineCost.h:212
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
A utility pass template to force an analysis result to be available.