LLVM 22.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/PassManager.h"
29#include "llvm/Pass.h"
152
153using namespace llvm;
154
155namespace llvm {
156
158 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
159 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
161 "Heuristics-based inliner version"),
163 "Use development mode (runtime-loadable model)"),
165 "Use release mode (AOT-compiled model)")));
166
167/// Flag to enable inline deferral during PGO.
168static cl::opt<bool>
169 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
171 cl::desc("Enable inline deferral during PGO"));
172
173static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
174 cl::init(false), cl::Hidden,
175 cl::desc("Enable module inliner"));
176
178 "mandatory-inlining-first", cl::init(false), cl::Hidden,
179 cl::desc("Perform mandatory inlinings module-wide, before performing "
180 "inlining"));
181
183 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
184 cl::desc("Eagerly invalidate more analyses in default pipelines"));
185
187 "enable-merge-functions", cl::init(false), cl::Hidden,
188 cl::desc("Enable function merging as part of the optimization pipeline"));
189
191 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
192 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
193
195 "enable-global-analyses", cl::init(true), cl::Hidden,
196 cl::desc("Enable inter-procedural analyses"));
197
198static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
199 cl::init(false), cl::Hidden,
200 cl::desc("Run Partial inlining pass"));
201
203 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
204 cl::desc("Run cleanup optimization passes after vectorization"));
205
206static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
207 cl::desc("Run the NewGVN pass"));
208
209static cl::opt<bool>
210 EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden,
211 cl::desc("Enable the LoopInterchange Pass"));
212
213static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
214 cl::init(false), cl::Hidden,
215 cl::desc("Enable Unroll And Jam Pass"));
216
217static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
219 cl::desc("Enable the LoopFlatten Pass"));
220
221// Experimentally allow loop header duplication. This should allow for better
222// optimization at Oz, since loop-idiom recognition can then recognize things
223// like memcpy. If this ends up being useful for many targets, we should drop
224// this flag and make a code generation option that can be controlled
225// independent of the opt level and exposed through the frontend.
227 "enable-loop-header-duplication", cl::init(false), cl::Hidden,
228 cl::desc("Enable loop header duplication at any optimization level"));
229
230static cl::opt<bool>
231 EnableDFAJumpThreading("enable-dfa-jump-thread",
232 cl::desc("Enable DFA jump threading"),
233 cl::init(false), cl::Hidden);
234
235static cl::opt<bool>
236 EnableHotColdSplit("hot-cold-split",
237 cl::desc("Enable hot-cold splitting pass"));
238
239static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
241 cl::desc("Enable ir outliner pass"));
242
243static cl::opt<bool>
244 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
245 cl::desc("Disable pre-instrumentation inliner"));
246
248 "preinline-threshold", cl::Hidden, cl::init(75),
249 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
250 "(default = 75)"));
251
252static cl::opt<bool>
253 EnableGVNHoist("enable-gvn-hoist",
254 cl::desc("Enable the GVN hoisting pass (default = off)"));
255
256static cl::opt<bool>
257 EnableGVNSink("enable-gvn-sink",
258 cl::desc("Enable the GVN sinking pass (default = off)"));
259
261 "enable-jump-table-to-switch",
262 cl::desc("Enable JumpTableToSwitch pass (default = off)"));
263
264// This option is used in simplifying testing SampleFDO optimizations for
265// profile loading.
266static cl::opt<bool>
267 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
268 cl::desc("Enable control height reduction optimization (CHR)"));
269
271 "flattened-profile-used", cl::init(false), cl::Hidden,
272 cl::desc("Indicate the sample profile being used is flattened, i.e., "
273 "no inline hierarchy exists in the profile"));
274
275static cl::opt<bool>
276 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
277 cl::desc("Enable lowering of the matrix intrinsics"));
278
280 "enable-constraint-elimination", cl::init(true), cl::Hidden,
281 cl::desc(
282 "Enable pass to eliminate conditions based on linear constraints"));
283
285 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
286 cl::desc("Enable the attributor inter-procedural deduction pass"),
288 "enable all attributor runs"),
290 "enable module-wide attributor runs"),
292 "enable call graph SCC attributor runs"),
293 clEnumValN(AttributorRunOption::NONE, "none",
294 "disable attributor runs")));
295
297 "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
298 cl::desc("Enable profile instrumentation sampling (default = off)"));
300 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
301 cl::desc("Enable the experimental Loop Versioning LICM pass"));
302
304 "instrument-cold-function-only-path", cl::init(""),
305 cl::desc("File path for cold function only instrumentation(requires use "
306 "with --pgo-instrument-cold-function-only)"),
307 cl::Hidden);
308
309// TODO: There is a similar flag in WPD pass, we should consolidate them by
310// parsing the option only once in PassBuilder and share it across both places.
312 "enable-devirtualize-speculatively",
313 cl::desc("Enable speculative devirtualization optimization"),
314 cl::init(false));
315
318
320} // namespace llvm
321
339
340namespace llvm {
342} // namespace llvm
343
345 OptimizationLevel Level) {
346 for (auto &C : PeepholeEPCallbacks)
347 C(FPM, Level);
348}
351 for (auto &C : LateLoopOptimizationsEPCallbacks)
352 C(LPM, Level);
353}
355 OptimizationLevel Level) {
356 for (auto &C : LoopOptimizerEndEPCallbacks)
357 C(LPM, Level);
358}
361 for (auto &C : ScalarOptimizerLateEPCallbacks)
362 C(FPM, Level);
363}
365 OptimizationLevel Level) {
366 for (auto &C : CGSCCOptimizerLateEPCallbacks)
367 C(CGPM, Level);
368}
370 OptimizationLevel Level) {
371 for (auto &C : VectorizerStartEPCallbacks)
372 C(FPM, Level);
373}
375 OptimizationLevel Level) {
376 for (auto &C : VectorizerEndEPCallbacks)
377 C(FPM, Level);
378}
380 OptimizationLevel Level,
382 for (auto &C : OptimizerEarlyEPCallbacks)
383 C(MPM, Level, Phase);
384}
386 OptimizationLevel Level,
388 for (auto &C : OptimizerLastEPCallbacks)
389 C(MPM, Level, Phase);
390}
393 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
394 C(MPM, Level);
395}
398 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
399 C(MPM, Level);
400}
402 OptimizationLevel Level) {
403 for (auto &C : PipelineStartEPCallbacks)
404 C(MPM, Level);
405}
408 for (auto &C : PipelineEarlySimplificationEPCallbacks)
409 C(MPM, Level, Phase);
410}
411
412// Helper to add AnnotationRemarksPass.
419
420// Helper to check if the current compilation phase is preparing for LTO
425
426// Helper to check if the current compilation phase is LTO backend
431
432// Helper to wrap conditionally Coro passes.
434 // TODO: Skip passes according to Phase.
435 ModulePassManager CoroPM;
436 CoroPM.addPass(CoroEarlyPass());
437 CGSCCPassManager CGPM;
438 CGPM.addPass(CoroSplitPass());
439 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
440 CoroPM.addPass(CoroCleanupPass());
441 CoroPM.addPass(GlobalDCEPass());
442 return CoroConditionalWrapper(std::move(CoroPM));
443}
444
445// TODO: Investigate the cost/benefit of tail call elimination on debugging.
447PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
449
451
453 FPM.addPass(CountVisitsPass());
454
455 // Form SSA out of local memory accesses after breaking apart aggregates into
456 // scalars.
457 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
458
459 // Catch trivial redundancies
460 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
461
462 // Hoisting of scalars and load expressions.
463 FPM.addPass(
464 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
465 FPM.addPass(InstCombinePass());
466
467 FPM.addPass(LibCallsShrinkWrapPass());
468
469 invokePeepholeEPCallbacks(FPM, Level);
470
471 FPM.addPass(
472 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
473
474 // Form canonically associated expression trees, and simplify the trees using
475 // basic mathematical properties. For example, this will form (nearly)
476 // minimal multiplication trees.
477 FPM.addPass(ReassociatePass());
478
479 // Add the primary loop simplification pipeline.
480 // FIXME: Currently this is split into two loop pass pipelines because we run
481 // some function passes in between them. These can and should be removed
482 // and/or replaced by scheduling the loop pass equivalents in the correct
483 // positions. But those equivalent passes aren't powerful enough yet.
484 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
485 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
486 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
487 // `LoopInstSimplify`.
488 LoopPassManager LPM1, LPM2;
489
490 // Simplify the loop body. We do this initially to clean up after other loop
491 // passes run, either when iterating on a loop or on inner loops with
492 // implications on the outer loop.
493 LPM1.addPass(LoopInstSimplifyPass());
494 LPM1.addPass(LoopSimplifyCFGPass());
495
496 // Try to remove as much code from the loop header as possible,
497 // to reduce amount of IR that will have to be duplicated. However,
498 // do not perform speculative hoisting the first time as LICM
499 // will destroy metadata that may not need to be destroyed if run
500 // after loop rotation.
501 // TODO: Investigate promotion cap for O1.
502 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
503 /*AllowSpeculation=*/false));
504
505 LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
507 // TODO: Investigate promotion cap for O1.
508 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
509 /*AllowSpeculation=*/true));
510 LPM1.addPass(SimpleLoopUnswitchPass());
512 LPM1.addPass(LoopFlattenPass());
513
514 LPM2.addPass(LoopIdiomRecognizePass());
515 LPM2.addPass(IndVarSimplifyPass());
516
518
519 LPM2.addPass(LoopDeletionPass());
520
521 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
522 // because it changes IR to makes profile annotation in back compile
523 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
524 // attributes so we need to make sure and allow the full unroll pass to pay
525 // attention to it.
526 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
527 PGOOpt->Action != PGOOptions::SampleUse)
528 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
529 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
530 PTO.ForgetAllSCEVInLoopUnroll));
531
533
534 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
535 /*UseMemorySSA=*/true));
536 FPM.addPass(
537 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
538 FPM.addPass(InstCombinePass());
539 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
540 // *All* loop passes must preserve it, in order to be able to use it.
541 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
542 /*UseMemorySSA=*/false));
543
544 // Delete small array after loop unroll.
545 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
546
547 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
548 FPM.addPass(MemCpyOptPass());
549
550 // Sparse conditional constant propagation.
551 // FIXME: It isn't clear why we do this *after* loop passes rather than
552 // before...
553 FPM.addPass(SCCPPass());
554
555 // Delete dead bit computations (instcombine runs after to fold away the dead
556 // computations, and then ADCE will run later to exploit any new DCE
557 // opportunities that creates).
558 FPM.addPass(BDCEPass());
559
560 // Run instcombine after redundancy and dead bit elimination to exploit
561 // opportunities opened up by them.
562 FPM.addPass(InstCombinePass());
563 invokePeepholeEPCallbacks(FPM, Level);
564
565 FPM.addPass(CoroElidePass());
566
568
569 // Finally, do an expensive DCE pass to catch all the dead code exposed by
570 // the simplifications and basic cleanup after all the simplifications.
571 // TODO: Investigate if this is too expensive.
572 FPM.addPass(ADCEPass());
573 FPM.addPass(
574 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
575 FPM.addPass(InstCombinePass());
576 invokePeepholeEPCallbacks(FPM, Level);
577
578 return FPM;
579}
580
584 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
585
586 // The O1 pipeline has a separate pipeline creation function to simplify
587 // construction readability.
588 if (Level.getSpeedupLevel() == 1)
589 return buildO1FunctionSimplificationPipeline(Level, Phase);
590
592
595
596 // Form SSA out of local memory accesses after breaking apart aggregates into
597 // scalars.
599
600 // Catch trivial redundancies
601 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
604
605 // Hoisting of scalars and load expressions.
606 if (EnableGVNHoist)
607 FPM.addPass(GVNHoistPass());
608
609 // Global value numbering based sinking.
610 if (EnableGVNSink) {
611 FPM.addPass(GVNSinkPass());
612 FPM.addPass(
613 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
614 }
615
616 // Speculative execution if the target has divergent branches; otherwise nop.
617 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
618
619 // Optimize based on known information about branches, and cleanup afterward.
622
623 // Jump table to switch conversion.
628
629 FPM.addPass(
630 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
633
634 if (!Level.isOptimizingForSize())
636
637 invokePeepholeEPCallbacks(FPM, Level);
638
639 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
640 // using the size value profile. Don't perform this when optimizing for size.
641 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
642 !Level.isOptimizingForSize())
644
645 FPM.addPass(TailCallElimPass(/*UpdateFunctionEntryCount=*/
646 isInstrumentedPGOUse()));
647 FPM.addPass(
648 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
649
650 // Form canonically associated expression trees, and simplify the trees using
651 // basic mathematical properties. For example, this will form (nearly)
652 // minimal multiplication trees.
654
657
658 // Add the primary loop simplification pipeline.
659 // FIXME: Currently this is split into two loop pass pipelines because we run
660 // some function passes in between them. These can and should be removed
661 // and/or replaced by scheduling the loop pass equivalents in the correct
662 // positions. But those equivalent passes aren't powerful enough yet.
663 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
664 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
665 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
666 // `LoopInstSimplify`.
667 LoopPassManager LPM1, LPM2;
668
669 // Simplify the loop body. We do this initially to clean up after other loop
670 // passes run, either when iterating on a loop or on inner loops with
671 // implications on the outer loop.
672 LPM1.addPass(LoopInstSimplifyPass());
673 LPM1.addPass(LoopSimplifyCFGPass());
674
675 // Try to remove as much code from the loop header as possible,
676 // to reduce amount of IR that will have to be duplicated. However,
677 // do not perform speculative hoisting the first time as LICM
678 // will destroy metadata that may not need to be destroyed if run
679 // after loop rotation.
680 // TODO: Investigate promotion cap for O1.
681 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
682 /*AllowSpeculation=*/false));
683
684 // Disable header duplication in loop rotation at -Oz.
686 Level != OptimizationLevel::Oz,
688 // TODO: Investigate promotion cap for O1.
689 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
690 /*AllowSpeculation=*/true));
691 LPM1.addPass(
692 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
694 LPM1.addPass(LoopFlattenPass());
695
696 LPM2.addPass(LoopIdiomRecognizePass());
697 LPM2.addPass(IndVarSimplifyPass());
698
699 {
701 ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
703 LPM2.addPass(std::move(ExtraPasses));
704 }
705
707
708 LPM2.addPass(LoopDeletionPass());
709
710 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
711 // because it changes IR to makes profile annotation in back compile
712 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
713 // attributes so we need to make sure and allow the full unroll pass to pay
714 // attention to it.
715 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
716 PGOOpt->Action != PGOOptions::SampleUse)
717 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
718 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
719 PTO.ForgetAllSCEVInLoopUnroll));
720
722
723 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
724 /*UseMemorySSA=*/true));
725 FPM.addPass(
726 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
728 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
729 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
730 // *All* loop passes must preserve it, in order to be able to use it.
731 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
732 /*UseMemorySSA=*/false));
733
734 // Delete small array after loop unroll.
736
737 // Try vectorization/scalarization transforms that are both improvements
738 // themselves and can allow further folds with GVN and InstCombine.
739 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
740
741 // Eliminate redundancies.
743 if (RunNewGVN)
744 FPM.addPass(NewGVNPass());
745 else
746 FPM.addPass(GVNPass());
747
748 // Sparse conditional constant propagation.
749 // FIXME: It isn't clear why we do this *after* loop passes rather than
750 // before...
751 FPM.addPass(SCCPPass());
752
753 // Delete dead bit computations (instcombine runs after to fold away the dead
754 // computations, and then ADCE will run later to exploit any new DCE
755 // opportunities that creates).
756 FPM.addPass(BDCEPass());
757
758 // Run instcombine after redundancy and dead bit elimination to exploit
759 // opportunities opened up by them.
761 invokePeepholeEPCallbacks(FPM, Level);
762
763 // Re-consider control flow based optimizations after redundancy elimination,
764 // redo DCE, etc.
767
770
771 // Finally, do an expensive DCE pass to catch all the dead code exposed by
772 // the simplifications and basic cleanup after all the simplifications.
773 // TODO: Investigate if this is too expensive.
774 FPM.addPass(ADCEPass());
775
776 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
777 FPM.addPass(MemCpyOptPass());
778
779 FPM.addPass(DSEPass());
781
783 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
784 /*AllowSpeculation=*/true),
785 /*UseMemorySSA=*/true));
786
787 FPM.addPass(CoroElidePass());
788
790
792 .convertSwitchRangeToICmp(true)
793 .convertSwitchToArithmetic(true)
794 .hoistCommonInsts(true)
795 .sinkCommonInsts(true)));
797 invokePeepholeEPCallbacks(FPM, Level);
798
799 return FPM;
800}
801
802void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
805}
806
807void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
808 OptimizationLevel Level,
809 ThinOrFullLTOPhase LTOPhase) {
810 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
812 return;
813 InlineParams IP;
814
816
817 // FIXME: The hint threshold has the same value used by the regular inliner
818 // when not optimzing for size. This should probably be lowered after
819 // performance testing.
820 // FIXME: this comment is cargo culted from the old pass manager, revisit).
821 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
823 IP, /* MandatoryFirst */ true,
825 CGSCCPassManager &CGPipeline = MIWP.getPM();
826
828 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
829 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
830 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
831 true))); // Merge & remove basic blocks.
832 FPM.addPass(InstCombinePass()); // Combine silly sequences.
833 invokePeepholeEPCallbacks(FPM, Level);
834
835 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
836 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
837
838 MPM.addPass(std::move(MIWP));
839
840 // Delete anything that is now dead to make sure that we don't instrument
841 // dead code. Instrumentation can end up keeping dead code around and
842 // dramatically increase code size.
843 MPM.addPass(GlobalDCEPass());
844}
845
846void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
847 OptimizationLevel Level) {
849 // Disable header duplication in loop rotation at -Oz.
852 LoopRotatePass(EnableLoopHeaderDuplication ||
853 Level != OptimizationLevel::Oz),
854 /*UseMemorySSA=*/false),
855 PTO.EagerlyInvalidateAnalyses));
856 }
857}
858
859void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
860 OptimizationLevel Level, bool RunProfileGen,
861 bool IsCS, bool AtomicCounterUpdate,
862 std::string ProfileFile,
863 std::string ProfileRemappingFile) {
864 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
865
866 if (!RunProfileGen) {
867 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
868 MPM.addPass(
869 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
870 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
871 // RequireAnalysisPass for PSI before subsequent non-module passes.
872 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
873 return;
874 }
875
876 // Perform PGO instrumentation.
877 MPM.addPass(PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
879
880 addPostPGOLoopRotation(MPM, Level);
881 // Add the profile lowering pass.
882 InstrProfOptions Options;
883 if (!ProfileFile.empty())
884 Options.InstrProfileOutput = ProfileFile;
885 // Do counter promotion at Level greater than O0.
886 Options.DoCounterPromotion = true;
887 Options.UseBFIInPromotion = IsCS;
888 if (EnableSampledInstr) {
889 Options.Sampling = true;
890 // With sampling, there is little beneifit to enable counter promotion.
891 // But note that sampling does work with counter promotion.
892 Options.DoCounterPromotion = false;
893 }
894 Options.Atomic = AtomicCounterUpdate;
895 MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
896}
897
899 bool RunProfileGen, bool IsCS,
900 bool AtomicCounterUpdate,
901 std::string ProfileFile,
902 std::string ProfileRemappingFile) {
903 if (!RunProfileGen) {
904 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
905 MPM.addPass(
906 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
907 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
908 // RequireAnalysisPass for PSI before subsequent non-module passes.
910 return;
911 }
912
913 // Perform PGO instrumentation.
916 // Add the profile lowering pass.
918 if (!ProfileFile.empty())
919 Options.InstrProfileOutput = ProfileFile;
920 // Do not do counter promotion at O0.
921 Options.DoCounterPromotion = false;
922 Options.UseBFIInPromotion = IsCS;
923 Options.Atomic = AtomicCounterUpdate;
925}
926
928 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
929}
930
934 InlineParams IP;
935 if (PTO.InlinerThreshold == -1)
936 IP = getInlineParamsFromOptLevel(Level);
937 else
938 IP = getInlineParams(PTO.InlinerThreshold);
939 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
940 // set hot-caller threshold to 0 to disable hot
941 // callsite inline (as much as possible [1]) because it makes
942 // profile annotation in the backend inaccurate.
943 //
944 // [1] Note the cost of a function could be below zero due to erased
945 // prologue / epilogue.
946 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
948
949 if (PGOOpt)
951
955
956 // Require the GlobalsAA analysis for the module so we can query it within
957 // the CGSCC pipeline.
959 MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
960 // Invalidate AAManager so it can be recreated and pick up the newly
961 // available GlobalsAA.
962 MIWP.addModulePass(
964 }
965
966 // Require the ProfileSummaryAnalysis for the module so we can query it within
967 // the inliner pass.
969
970 // Now begin the main postorder CGSCC pipeline.
971 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
972 // manager and trying to emulate its precise behavior. Much of this doesn't
973 // make a lot of sense and we should revisit the core CGSCC structure.
974 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
975
976 // Note: historically, the PruneEH pass was run first to deduce nounwind and
977 // generally clean up exception handling overhead. It isn't clear this is
978 // valuable as the inliner doesn't currently care whether it is inlining an
979 // invoke or a call.
980
982 MainCGPipeline.addPass(AttributorCGSCCPass());
983
984 // Deduce function attributes. We do another run of this after the function
985 // simplification pipeline, so this only needs to run when it could affect the
986 // function simplification pipeline, which is only the case with recursive
987 // functions.
988 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
989
990 // When at O3 add argument promotion to the pass pipeline.
991 // FIXME: It isn't at all clear why this should be limited to O3.
992 if (Level == OptimizationLevel::O3)
993 MainCGPipeline.addPass(ArgumentPromotionPass());
994
995 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
996 // there are no OpenMP runtime calls present in the module.
997 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
998 MainCGPipeline.addPass(OpenMPOptCGSCCPass(Phase));
999
1000 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
1001
1002 // Add the core function simplification pipeline nested inside the
1003 // CGSCC walk.
1006 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
1007
1008 // Finally, deduce any function attributes based on the fully simplified
1009 // function.
1010 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
1011
1012 // Mark that the function is fully simplified and that it shouldn't be
1013 // simplified again if we somehow revisit it due to CGSCC mutations unless
1014 // it's been modified since.
1017
1019 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
1020 MainCGPipeline.addPass(CoroAnnotationElidePass());
1021 }
1022
1023 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
1024 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
1026
1027 return MIWP;
1028}
1029
1034
1036 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
1037 // set hot-caller threshold to 0 to disable hot
1038 // callsite inline (as much as possible [1]) because it makes
1039 // profile annotation in the backend inaccurate.
1040 //
1041 // [1] Note the cost of a function could be below zero due to erased
1042 // prologue / epilogue.
1043 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1044 IP.HotCallSiteThreshold = 0;
1045
1046 if (PGOOpt)
1048
1049 // The inline deferral logic is used to avoid losing some
1050 // inlining chance in future. It is helpful in SCC inliner, in which
1051 // inlining is processed in bottom-up order.
1052 // While in module inliner, the inlining order is a priority-based order
1053 // by default. The inline deferral is unnecessary there. So we disable the
1054 // inline deferral logic in module inliner.
1055 IP.EnableDeferral = false;
1056
1059 MPM.addPass(GlobalOptPass());
1060 MPM.addPass(GlobalDCEPass());
1061 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
1062 }
1063
1066 PTO.EagerlyInvalidateAnalyses));
1067
1071 MPM.addPass(
1073 }
1074
1075 return MPM;
1076}
1077
1081 assert(Level != OptimizationLevel::O0 &&
1082 "Should not be used for O0 pipeline");
1083
1085 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1086
1088
1089 // Place pseudo probe instrumentation as the first pass of the pipeline to
1090 // minimize the impact of optimization changes.
1091 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1094
1095 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1096
1097 // In ThinLTO mode, when flattened profile is used, all the available
1098 // profile information will be annotated in PreLink phase so there is
1099 // no need to load the profile again in PostLink.
1100 bool LoadSampleProfile =
1101 HasSampleProfile &&
1103
1104 // During the ThinLTO backend phase we perform early indirect call promotion
1105 // here, before globalopt. Otherwise imported available_externally functions
1106 // look unreferenced and are removed. If we are going to load the sample
1107 // profile then defer until later.
1108 // TODO: See if we can move later and consolidate with the location where
1109 // we perform ICP when we are loading a sample profile.
1110 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1111 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1112 // determine whether the new direct calls are annotated with prof metadata.
1113 // Ideally this should be determined from whether the IR is annotated with
1114 // sample profile, and not whether the a sample profile was provided on the
1115 // command line. E.g. for flattened profiles where we will not be reloading
1116 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1117 // provide the sample profile file.
1118 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1119 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1120
1121 // Create an early function pass manager to cleanup the output of the
1122 // frontend. Not necessary with LTO post link pipelines since the pre link
1123 // pipeline already cleaned up the frontend output.
1125 // Do basic inference of function attributes from known properties of system
1126 // libraries and other oracles.
1128 MPM.addPass(CoroEarlyPass());
1129
1130 FunctionPassManager EarlyFPM;
1131 EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1132 // Lower llvm.expect to metadata before attempting transforms.
1133 // Compare/branch metadata may alter the behavior of passes like
1134 // SimplifyCFG.
1136 EarlyFPM.addPass(SimplifyCFGPass());
1138 EarlyFPM.addPass(EarlyCSEPass());
1139 if (Level == OptimizationLevel::O3)
1140 EarlyFPM.addPass(CallSiteSplittingPass());
1142 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1143 }
1144
1145 if (LoadSampleProfile) {
1146 // Annotate sample profile right after early FPM to ensure freshness of
1147 // the debug info.
1149 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase, FS));
1150 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1151 // RequireAnalysisPass for PSI before subsequent non-module passes.
1153 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1154 // for the profile annotation to be accurate in the LTO backend.
1155 if (!isLTOPreLink(Phase))
1156 // We perform early indirect call promotion here, before globalopt.
1157 // This is important for the ThinLTO backend phase because otherwise
1158 // imported available_externally functions look unreferenced and are
1159 // removed.
1160 MPM.addPass(
1161 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1162 }
1163
1164 // Try to perform OpenMP specific optimizations on the module. This is a
1165 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1167
1169 MPM.addPass(AttributorPass());
1170
1171 // Lower type metadata and the type.test intrinsic in the ThinLTO
1172 // post link pipeline after ICP. This is to enable usage of the type
1173 // tests in ICP sequences.
1175 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1177
1179
1180 // Interprocedural constant propagation now that basic cleanup has occurred
1181 // and prior to optimizing globals.
1182 // FIXME: This position in the pipeline hasn't been carefully considered in
1183 // years, it should be re-analyzed.
1184 MPM.addPass(IPSCCPPass(
1185 IPSCCPOptions(/*AllowFuncSpec=*/
1186 Level != OptimizationLevel::Os &&
1187 Level != OptimizationLevel::Oz &&
1188 !isLTOPreLink(Phase))));
1189
1190 // Attach metadata to indirect call sites indicating the set of functions
1191 // they may target at run-time. This should follow IPSCCP.
1193
1194 // Optimize globals to try and fold them into constants.
1195 MPM.addPass(GlobalOptPass());
1196
1197 // Create a small function pass pipeline to cleanup after all the global
1198 // optimizations.
1199 FunctionPassManager GlobalCleanupPM;
1200 // FIXME: Should this instead by a run of SROA?
1201 GlobalCleanupPM.addPass(PromotePass());
1202 GlobalCleanupPM.addPass(InstCombinePass());
1203 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1204 GlobalCleanupPM.addPass(
1205 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1206 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1207 PTO.EagerlyInvalidateAnalyses));
1208
1209 // We already asserted this happens in non-FullLTOPostLink earlier.
1210 const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1211 // Enable contextual profiling instrumentation.
1212 const bool IsCtxProfGen =
1214 const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink;
1215 const bool IsPGOInstrGen =
1216 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1217 const bool IsPGOInstrUse =
1218 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1219 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1220 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1221 // enable ctx profiling from the frontend.
1223 "Enabling both instrumented PGO and contextual instrumentation is not "
1224 "supported.");
1225 const bool IsCtxProfUse =
1227
1228 assert(
1230 "--instrument-cold-function-only-path is provided but "
1231 "--pgo-instrument-cold-function-only is not enabled");
1232 const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1233 IsPGOPreLink &&
1235
1236 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1237 IsCtxProfUse || IsColdFuncOnlyInstrGen)
1238 addPreInlinerPasses(MPM, Level, Phase);
1239
1240 // Add all the requested passes for instrumentation PGO, if requested.
1241 if (IsPGOInstrGen || IsPGOInstrUse) {
1242 addPGOInstrPasses(MPM, Level,
1243 /*RunProfileGen=*/IsPGOInstrGen,
1244 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1245 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1246 } else if (IsCtxProfGen || IsCtxProfUse) {
1248 // In pre-link, we just want the instrumented IR. We use the contextual
1249 // profile in the post-thinlink phase.
1250 // The instrumentation will be removed in post-thinlink after IPO.
1251 // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1252 // mechanism for GUIDs.
1253 MPM.addPass(AssignGUIDPass());
1254 if (IsCtxProfUse) {
1255 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
1256 return MPM;
1257 }
1258 // Block further inlining in the instrumented ctxprof case. This avoids
1259 // confusingly collecting profiles for the same GUID corresponding to
1260 // different variants of the function. We could do like PGO and identify
1261 // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
1262 // thinlto to happen before performing any further optimizations, it's
1263 // unnecessary to collect profiles for non-prevailing copies.
1265 addPostPGOLoopRotation(MPM, Level);
1267 } else if (IsColdFuncOnlyInstrGen) {
1268 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1269 /* AtomicCounterUpdate */ false,
1271 /* ProfileRemappingFile */ "");
1272 }
1273
1274 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1275 MPM.addPass(PGOIndirectCallPromotion(false, false));
1276
1277 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1278 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1280
1281 if (IsMemprofUse)
1282 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, FS));
1283
1284 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1285 PGOOpt->Action == PGOOptions::SampleUse))
1286 MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1287
1288 MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1289
1292 else
1293 MPM.addPass(buildInlinerPipeline(Level, Phase));
1294
1295 // Remove any dead arguments exposed by cleanups, constant folding globals,
1296 // and argument promotion.
1298
1301
1303 MPM.addPass(CoroCleanupPass());
1304
1305 // Optimize globals now that functions are fully simplified.
1306 MPM.addPass(GlobalOptPass());
1307 MPM.addPass(GlobalDCEPass());
1308
1309 return MPM;
1310}
1311
1312/// TODO: Should LTO cause any differences to this set of passes?
1313void PassBuilder::addVectorPasses(OptimizationLevel Level,
1315 ThinOrFullLTOPhase LTOPhase) {
1316 const bool IsFullLTO = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink;
1317
1320
1321 // Drop dereferenceable assumes after vectorization, as they are no longer
1322 // needed and can inhibit further optimization.
1323 if (!isLTOPreLink(LTOPhase))
1324 FPM.addPass(DropUnnecessaryAssumesPass(/*DropDereferenceable=*/true));
1325
1327 if (IsFullLTO) {
1328 // The vectorizer may have significantly shortened a loop body; unroll
1329 // again. Unroll small loops to hide loop backedge latency and saturate any
1330 // parallel execution resources of an out-of-order processor. We also then
1331 // need to clean up redundancies and loop invariant code.
1332 // FIXME: It would be really good to use a loop-integrated instruction
1333 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1334 // across the loop nests.
1335 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1338 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1340 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1343 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1344 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1345 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1346 // NOTE: we are very late in the pipeline, and we don't have any LICM
1347 // or SimplifyCFG passes scheduled after us, that would cleanup
1348 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1350 }
1351
1352 if (!IsFullLTO) {
1353 // Eliminate loads by forwarding stores from the previous iteration to loads
1354 // of the current iteration.
1356 }
1357 // Cleanup after the loop optimization passes.
1358 FPM.addPass(InstCombinePass());
1359
1360 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1361 ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ExtraPasses;
1362 // At higher optimization levels, try to clean up any runtime overlap and
1363 // alignment checks inserted by the vectorizer. We want to track correlated
1364 // runtime checks for two inner loops in the same outer loop, fold any
1365 // common computations, hoist loop-invariant aspects out of any outer loop,
1366 // and unswitch the runtime checks if possible. Once hoisted, we may have
1367 // dead (or speculatable) control flows or more combining opportunities.
1368 ExtraPasses.addPass(EarlyCSEPass());
1369 ExtraPasses.addPass(CorrelatedValuePropagationPass());
1370 ExtraPasses.addPass(InstCombinePass());
1371 LoopPassManager LPM;
1372 LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1373 /*AllowSpeculation=*/true));
1374 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1376 ExtraPasses.addPass(
1377 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true));
1378 ExtraPasses.addPass(
1379 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1380 ExtraPasses.addPass(InstCombinePass());
1381 FPM.addPass(std::move(ExtraPasses));
1382 }
1383
1384 // Now that we've formed fast to execute loop structures, we do further
1385 // optimizations. These are run afterward as they might block doing complex
1386 // analyses and transforms such as what are needed for loop vectorization.
1387
1388 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1389 // GVN, loop transforms, and others have already run, so it's now better to
1390 // convert to more optimized IR using more aggressive simplify CFG options.
1391 // The extra sinking transform can create larger basic blocks, so do this
1392 // before SLP vectorization.
1393 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1394 .forwardSwitchCondToPhi(true)
1395 .convertSwitchRangeToICmp(true)
1396 .convertSwitchToArithmetic(true)
1397 .convertSwitchToLookupTable(true)
1398 .needCanonicalLoops(false)
1399 .hoistCommonInsts(true)
1400 .sinkCommonInsts(true)));
1401
1402 if (IsFullLTO) {
1403 FPM.addPass(SCCPPass());
1404 FPM.addPass(InstCombinePass());
1405 FPM.addPass(BDCEPass());
1406 }
1407
1408 // Optimize parallel scalar instruction chains into SIMD instructions.
1409 if (PTO.SLPVectorization) {
1410 FPM.addPass(SLPVectorizerPass());
1411 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1412 FPM.addPass(EarlyCSEPass());
1413 }
1414 }
1415 // Enhance/cleanup vector code.
1416 FPM.addPass(VectorCombinePass());
1417
1418 if (!IsFullLTO) {
1419 FPM.addPass(InstCombinePass());
1420 // Unroll small loops to hide loop backedge latency and saturate any
1421 // parallel execution resources of an out-of-order processor. We also then
1422 // need to clean up redundancies and loop invariant code.
1423 // FIXME: It would be really good to use a loop-integrated instruction
1424 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1425 // across the loop nests.
1426 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1427 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1429 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1430 }
1431 FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1432 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1433 PTO.ForgetAllSCEVInLoopUnroll)));
1434 FPM.addPass(WarnMissedTransformationsPass());
1435 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1436 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1437 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1438 // NOTE: we are very late in the pipeline, and we don't have any LICM
1439 // or SimplifyCFG passes scheduled after us, that would cleanup
1440 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1441 FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1442 }
1443
1444 FPM.addPass(InferAlignmentPass());
1445 FPM.addPass(InstCombinePass());
1446
1447 // This is needed for two reasons:
1448 // 1. It works around problems that instcombine introduces, such as sinking
1449 // expensive FP divides into loops containing multiplications using the
1450 // divide result.
1451 // 2. It helps to clean up some loop-invariant code created by the loop
1452 // unroll pass when IsFullLTO=false.
1454 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1455 /*AllowSpeculation=*/true),
1456 /*UseMemorySSA=*/true));
1457
1458 // Now that we've vectorized and unrolled loops, we may have more refined
1459 // alignment information, try to re-derive it here.
1460 FPM.addPass(AlignmentFromAssumptionsPass());
1461}
1462
1465 ThinOrFullLTOPhase LTOPhase) {
1466 const bool LTOPreLink = isLTOPreLink(LTOPhase);
1468
1469 // Run partial inlining pass to partially inline functions that have
1470 // large bodies.
1473
1474 // Remove avail extern fns and globals definitions since we aren't compiling
1475 // an object file for later LTO. For LTO we want to preserve these so they
1476 // are eligible for inlining at link-time. Note if they are unreferenced they
1477 // will be removed by GlobalDCE later, so this only impacts referenced
1478 // available externally globals. Eventually they will be suppressed during
1479 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1480 // may make globals referenced by available external functions dead and saves
1481 // running remaining passes on the eliminated functions. These should be
1482 // preserved during prelinking for link-time inlining decisions.
1483 if (!LTOPreLink)
1485
1486 // Do RPO function attribute inference across the module to forward-propagate
1487 // attributes where applicable.
1488 // FIXME: Is this really an optimization rather than a canonicalization?
1490
1491 // Do a post inline PGO instrumentation and use pass. This is a context
1492 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1493 // cross-module inline has not been done yet. The context sensitive
1494 // instrumentation is after all the inlines are done.
1495 if (!LTOPreLink && PGOOpt) {
1496 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1497 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1498 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1499 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
1500 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1501 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1502 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1503 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1504 }
1505
1506 // Re-compute GlobalsAA here prior to function passes. This is particularly
1507 // useful as the above will have inlined, DCE'ed, and function-attr
1508 // propagated everything. We should at this point have a reasonably minimal
1509 // and richly annotated call graph. By computing aliasing and mod/ref
1510 // information for all local globals here, the late loop passes and notably
1511 // the vectorizer will be able to use them to help recognize vectorizable
1512 // memory operations.
1515
1516 invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
1517
1518 FunctionPassManager OptimizePM;
1519
1520 // Only drop unnecessary assumes post-inline and post-link, as otherwise
1521 // additional uses of the affected value may be introduced through inlining
1522 // and CSE.
1523 if (!isLTOPreLink(LTOPhase))
1524 OptimizePM.addPass(DropUnnecessaryAssumesPass());
1525
1526 // Scheduling LoopVersioningLICM when inlining is over, because after that
1527 // we may see more accurate aliasing. Reason to run this late is that too
1528 // early versioning may prevent further inlining due to increase of code
1529 // size. Other optimizations which runs later might get benefit of no-alias
1530 // assumption in clone loop.
1532 OptimizePM.addPass(
1534 // LoopVersioningLICM pass might increase new LICM opportunities.
1536 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1537 /*AllowSpeculation=*/true),
1538 /*USeMemorySSA=*/true));
1539 }
1540
1541 OptimizePM.addPass(Float2IntPass());
1543
1544 if (EnableMatrix) {
1545 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1546 OptimizePM.addPass(EarlyCSEPass());
1547 }
1548
1549 // CHR pass should only be applied with the profile information.
1550 // The check is to check the profile summary information in CHR.
1551 if (EnableCHR && Level == OptimizationLevel::O3)
1552 OptimizePM.addPass(ControlHeightReductionPass());
1553
1554 // FIXME: We need to run some loop optimizations to re-rotate loops after
1555 // simplifycfg and others undo their rotation.
1556
1557 // Optimize the loop execution. These passes operate on entire loop nests
1558 // rather than on each loop in an inside-out manner, and so they are actually
1559 // function passes.
1560
1561 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1562
1563 LoopPassManager LPM;
1564 // First rotate loops that may have been un-rotated by prior passes.
1565 // Disable header duplication at -Oz.
1567 Level != OptimizationLevel::Oz,
1568 LTOPreLink));
1569 // Some loops may have become dead by now. Try to delete them.
1570 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1571 // this may need to be revisited once we run GVN before loop deletion
1572 // in the simplification pipeline.
1573 LPM.addPass(LoopDeletionPass());
1574
1575 if (PTO.LoopInterchange)
1576 LPM.addPass(LoopInterchangePass());
1577
1578 OptimizePM.addPass(
1579 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
1580
1581 // FIXME: This may not be the right place in the pipeline.
1582 // We need to have the data to support the right place.
1583 if (PTO.LoopFusion)
1584 OptimizePM.addPass(LoopFusePass());
1585
1586 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1587 // into separate loop that would otherwise inhibit vectorization. This is
1588 // currently only performed for loops marked with the metadata
1589 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1590 OptimizePM.addPass(LoopDistributePass());
1591
1592 // Populates the VFABI attribute with the scalar-to-vector mappings
1593 // from the TargetLibraryInfo.
1594 OptimizePM.addPass(InjectTLIMappings());
1595
1596 addVectorPasses(Level, OptimizePM, LTOPhase);
1597
1598 invokeVectorizerEndEPCallbacks(OptimizePM, Level);
1599
1600 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1601 // canonicalization pass that enables other optimizations. As a result,
1602 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1603 // result too early.
1604 OptimizePM.addPass(LoopSinkPass());
1605
1606 // And finally clean up LCSSA form before generating code.
1607 OptimizePM.addPass(InstSimplifyPass());
1608
1609 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1610 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1611 // flattening of blocks.
1612 OptimizePM.addPass(DivRemPairsPass());
1613
1614 // Try to annotate calls that were created during optimization.
1615 OptimizePM.addPass(
1616 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
1617
1618 // LoopSink (and other loop passes since the last simplifyCFG) might have
1619 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1620 OptimizePM.addPass(
1622 .convertSwitchRangeToICmp(true)
1623 .convertSwitchToArithmetic(true)
1624 .speculateUnpredictables(true)
1625 .hoistLoadsStoresWithCondFaulting(true)));
1626
1627 // Add the core optimizing pipeline.
1628 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1629 PTO.EagerlyInvalidateAnalyses));
1630
1631 // AllocToken transforms heap allocation calls; this needs to run late after
1632 // other allocation call transformations (such as those in InstCombine).
1633 if (!LTOPreLink)
1634 MPM.addPass(AllocTokenPass());
1635
1636 invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
1637
1638 // Split out cold code. Splitting is done late to avoid hiding context from
1639 // other optimizations and inadvertently regressing performance. The tradeoff
1640 // is that this has a higher code size cost than splitting early.
1641 if (EnableHotColdSplit && !LTOPreLink)
1643
1644 // Search the code for similar regions of code. If enough similar regions can
1645 // be found where extracting the regions into their own function will decrease
1646 // the size of the program, we extract the regions, a deduplicate the
1647 // structurally similar regions.
1648 if (EnableIROutliner)
1649 MPM.addPass(IROutlinerPass());
1650
1651 // Now we need to do some global optimization transforms.
1652 // FIXME: It would seem like these should come first in the optimization
1653 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1654 // ordering here.
1655 MPM.addPass(GlobalDCEPass());
1657
1658 // Merge functions if requested. It has a better chance to merge functions
1659 // after ConstantMerge folded jump tables.
1660 if (PTO.MergeFunctions)
1662
1663 if (PTO.CallGraphProfile && !LTOPreLink)
1664 MPM.addPass(CGProfilePass(isLTOPostLink(LTOPhase)));
1665
1666 // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1667 if (!LTOPreLink)
1669
1670 // Add devirtualization pass only when LTO is not enabled, as otherwise
1671 // the pass is already enabled in the LTO pipeline.
1672 if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) {
1673 // TODO: explore a better pipeline configuration that can improve
1674 // compilation time overhead.
1676 /*ExportSummary*/ nullptr,
1677 /*ImportSummary*/ nullptr,
1678 /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively));
1679 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1681 // Given that the devirtualization creates more opportunities for inlining,
1682 // we run the Inliner again here to maximize the optimization gain we
1683 // get from devirtualization.
1684 // Also, we can't run devirtualization before inlining because the
1685 // devirtualization depends on the passes optimizing/eliminating vtable GVs
1686 // and those passes are only effective after inlining.
1687 if (EnableModuleInliner) {
1691 } else {
1694 /* MandatoryFirst */ true,
1696 }
1697 }
1698 return MPM;
1699}
1700
1704 if (Level == OptimizationLevel::O0)
1705 return buildO0DefaultPipeline(Level, Phase);
1706
1708
1709 // Currently this pipeline is only invoked in an LTO pre link pass or when we
1710 // are not running LTO. If that changes the below checks may need updating.
1712
1713 // If we are invoking this in non-LTO mode, remove any MemProf related
1714 // attributes and metadata, as we don't know whether we are linking with
1715 // a library containing the necessary interfaces.
1718
1719 // Convert @llvm.global.annotations to !annotation metadata.
1721
1722 // Force any function attributes we want the rest of the pipeline to observe.
1724
1725 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1727
1728 // Apply module pipeline start EP callback.
1730
1731 // Add the core simplification pipeline.
1733
1734 // Now add the optimization pipeline.
1736
1737 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1738 PGOOpt->Action == PGOOptions::SampleUse)
1740
1741 // Emit annotation remarks.
1743
1744 if (isLTOPreLink(Phase))
1745 addRequiredLTOPreLinkPasses(MPM);
1746 return MPM;
1747}
1748
1751 bool EmitSummary) {
1753 if (ThinLTO)
1755 else
1757 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1758
1759 // Perform any cleanups to the IR that aren't suitable for per TU compilation,
1760 // like removing CFI/WPD related instructions. Note, we reuse
1761 // LowerTypeTestsPass to clean up type tests rather than duplicate that logic
1762 // in FatLtoCleanup.
1763 MPM.addPass(FatLtoCleanup());
1764
1765 // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1766 // object code, only in the bitcode section, so drop it before we run
1767 // module optimization and generate machine code. If llvm.type.test() isn't in
1768 // the IR, this won't do anything.
1769 MPM.addPass(
1771
1772 // Use the ThinLTO post-link pipeline with sample profiling
1773 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1774 MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1775 else {
1776 // ModuleSimplification does not run the coroutine passes for
1777 // ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO
1778 // builds, otherwise they will miscompile.
1779 if (ThinLTO) {
1780 // TODO: replace w/ buildCoroWrapper() when it takes phase and level into
1781 // consideration.
1782 CGSCCPassManager CGPM;
1786 MPM.addPass(CoroCleanupPass());
1787 }
1788
1789 // otherwise, just use module optimization
1790 MPM.addPass(
1792 // Emit annotation remarks.
1794 }
1795 return MPM;
1796}
1797
1800 if (Level == OptimizationLevel::O0)
1802
1804
1805 // Convert @llvm.global.annotations to !annotation metadata.
1807
1808 // Force any function attributes we want the rest of the pipeline to observe.
1810
1811 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1813
1814 // Apply module pipeline start EP callback.
1816
1817 // If we are planning to perform ThinLTO later, we don't bloat the code with
1818 // unrolling/vectorization/... now. Just simplify the module as much as we
1819 // can.
1822 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1823 // thinlto use the contextual info to perform imports; then use the contextual
1824 // profile in the post-thinlink phase.
1825 if (!UseCtxProfile.empty()) {
1826 addRequiredLTOPreLinkPasses(MPM);
1827 return MPM;
1828 }
1829
1830 // Run partial inlining pass to partially inline functions that have
1831 // large bodies.
1832 // FIXME: It isn't clear whether this is really the right place to run this
1833 // in ThinLTO. Because there is another canonicalization and simplification
1834 // phase that will run after the thin link, running this here ends up with
1835 // less information than will be available later and it may grow functions in
1836 // ways that aren't beneficial.
1839
1840 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1841 PGOOpt->Action == PGOOptions::SampleUse)
1843
1844 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1845 // optimization is going to be done in PostLink stage, but clang can't add
1846 // callbacks there in case of in-process ThinLTO called by linker.
1851
1852 // Emit annotation remarks.
1854
1855 addRequiredLTOPreLinkPasses(MPM);
1856
1857 return MPM;
1858}
1859
1861 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1863
1864 // If we are invoking this without a summary index noting that we are linking
1865 // with a library containing the necessary APIs, remove any MemProf related
1866 // attributes and metadata.
1867 if (!ImportSummary || !ImportSummary->withSupportsHotColdNew())
1869
1870 if (ImportSummary) {
1871 // For ThinLTO we must apply the context disambiguation decisions early, to
1872 // ensure we can correctly match the callsites to summary data.
1875 ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1876
1877 // These passes import type identifier resolutions for whole-program
1878 // devirtualization and CFI. They must run early because other passes may
1879 // disturb the specific instruction patterns that these passes look for,
1880 // creating dependencies on resolutions that may not appear in the summary.
1881 //
1882 // For example, GVN may transform the pattern assume(type.test) appearing in
1883 // two basic blocks into assume(phi(type.test, type.test)), which would
1884 // transform a dependency on a WPD resolution into a dependency on a type
1885 // identifier resolution for CFI.
1886 //
1887 // Also, WPD has access to more precise information than ICP and can
1888 // devirtualize more effectively, so it should operate on the IR first.
1889 //
1890 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1891 // metadata and intrinsics.
1892 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1893 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1894 }
1895
1896 if (Level == OptimizationLevel::O0) {
1897 // Run a second time to clean up any type tests left behind by WPD for use
1898 // in ICP.
1899 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1902
1903 // AllocToken transforms heap allocation calls; this needs to run late after
1904 // other allocation call transformations (such as those in InstCombine).
1905 MPM.addPass(AllocTokenPass());
1906
1907 // Drop available_externally and unreferenced globals. This is necessary
1908 // with ThinLTO in order to avoid leaving undefined references to dead
1909 // globals in the object file.
1911 MPM.addPass(GlobalDCEPass());
1912 return MPM;
1913 }
1914 if (!UseCtxProfile.empty()) {
1915 MPM.addPass(
1917 } else {
1918 // Add the core simplification pipeline.
1921 }
1922 // Now add the optimization pipeline.
1925
1926 // Emit annotation remarks.
1928
1929 return MPM;
1930}
1931
1934 // FIXME: We should use a customized pre-link pipeline!
1935 return buildPerModuleDefaultPipeline(Level,
1937}
1938
1941 ModuleSummaryIndex *ExportSummary) {
1943
1945
1946 // If we are invoking this without a summary index noting that we are linking
1947 // with a library containing the necessary APIs, remove any MemProf related
1948 // attributes and metadata.
1949 if (!ExportSummary || !ExportSummary->withSupportsHotColdNew())
1951
1952 // Create a function that performs CFI checks for cross-DSO calls with targets
1953 // in the current module.
1954 MPM.addPass(CrossDSOCFIPass());
1955
1956 if (Level == OptimizationLevel::O0) {
1957 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1958 // metadata and intrinsics.
1959 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1960 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1961 // Run a second time to clean up any type tests left behind by WPD for use
1962 // in ICP.
1963 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1965
1967
1968 // AllocToken transforms heap allocation calls; this needs to run late after
1969 // other allocation call transformations (such as those in InstCombine).
1970 MPM.addPass(AllocTokenPass());
1971
1973
1974 // Emit annotation remarks.
1976
1977 return MPM;
1978 }
1979
1980 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1981 // Load sample profile before running the LTO optimization pipeline.
1982 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1983 PGOOpt->ProfileRemappingFile,
1985 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1986 // RequireAnalysisPass for PSI before subsequent non-module passes.
1988 }
1989
1990 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1992
1993 // Remove unused virtual tables to improve the quality of code generated by
1994 // whole-program devirtualization and bitset lowering.
1995 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1996
1997 // Do basic inference of function attributes from known properties of system
1998 // libraries and other oracles.
2000
2001 if (Level.getSpeedupLevel() > 1) {
2003 CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));
2004
2005 // Indirect call promotion. This should promote all the targets that are
2006 // left by the earlier promotion pass that promotes intra-module targets.
2007 // This two-step promotion is to save the compile time. For LTO, it should
2008 // produce the same result as if we only do promotion here.
2010 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2011
2012 // Promoting by-reference arguments to by-value exposes more constants to
2013 // IPSCCP.
2014 CGSCCPassManager CGPM;
2017 CGPM.addPass(
2020
2021 // Propagate constants at call sites into the functions they call. This
2022 // opens opportunities for globalopt (and inlining) by substituting function
2023 // pointers passed as arguments to direct uses of functions.
2024 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
2025 Level != OptimizationLevel::Os &&
2026 Level != OptimizationLevel::Oz)));
2027
2028 // Attach metadata to indirect call sites indicating the set of functions
2029 // they may target at run-time. This should follow IPSCCP.
2031 }
2032
2033 // Do RPO function attribute inference across the module to forward-propagate
2034 // attributes where applicable.
2035 // FIXME: Is this really an optimization rather than a canonicalization?
2037
2038 // Use in-range annotations on GEP indices to split globals where beneficial.
2039 MPM.addPass(GlobalSplitPass());
2040
2041 // Run whole program optimization of virtual call when the list of callees
2042 // is fixed.
2043 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
2044
2046 // Stop here at -O1.
2047 if (Level == OptimizationLevel::O1) {
2048 // The LowerTypeTestsPass needs to run to lower type metadata and the
2049 // type.test intrinsics. The pass does nothing if CFI is disabled.
2050 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2051 // Run a second time to clean up any type tests left behind by WPD for use
2052 // in ICP (which is performed earlier than this in the regular LTO
2053 // pipeline).
2054 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
2056
2058
2059 // AllocToken transforms heap allocation calls; this needs to run late after
2060 // other allocation call transformations (such as those in InstCombine).
2061 MPM.addPass(AllocTokenPass());
2062
2064
2065 // Emit annotation remarks.
2067
2068 return MPM;
2069 }
2070
2071 // TODO: Skip to match buildCoroWrapper.
2072 MPM.addPass(CoroEarlyPass());
2073
2074 // Optimize globals to try and fold them into constants.
2075 MPM.addPass(GlobalOptPass());
2076
2077 // Promote any localized globals to SSA registers.
2079
2080 // Linking modules together can lead to duplicate global constant, only
2081 // keep one copy of each constant.
2083
2084 // Remove unused arguments from functions.
2086
2087 // Reduce the code after globalopt and ipsccp. Both can open up significant
2088 // simplification opportunities, and both can propagate functions through
2089 // function pointers. When this happens, we often have to resolve varargs
2090 // calls, etc, so let instcombine do this.
2091 FunctionPassManager PeepholeFPM;
2092 PeepholeFPM.addPass(InstCombinePass());
2093 if (Level.getSpeedupLevel() > 1)
2094 PeepholeFPM.addPass(AggressiveInstCombinePass());
2095 invokePeepholeEPCallbacks(PeepholeFPM, Level);
2096
2097 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
2098 PTO.EagerlyInvalidateAnalyses));
2099
2100 // Lower variadic functions for supported targets prior to inlining.
2102
2103 // Note: historically, the PruneEH pass was run first to deduce nounwind and
2104 // generally clean up exception handling overhead. It isn't clear this is
2105 // valuable as the inliner doesn't currently care whether it is inlining an
2106 // invoke or a call.
2107 // Run the inliner now.
2108 if (EnableModuleInliner) {
2112 } else {
2115 /* MandatoryFirst */ true,
2118 }
2119
2120 // Perform context disambiguation after inlining, since that would reduce the
2121 // amount of additional cloning required to distinguish the allocation
2122 // contexts.
2125 /*Summary=*/nullptr,
2126 PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2127
2128 // Optimize globals again after we ran the inliner.
2129 MPM.addPass(GlobalOptPass());
2130
2131 // Run the OpenMPOpt pass again after global optimizations.
2133
2134 // Garbage collect dead functions.
2135 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2136
2137 // If we didn't decide to inline a function, check to see if we can
2138 // transform it to pass arguments by value instead of by reference.
2139 CGSCCPassManager CGPM;
2144
2146 // The IPO Passes may leave cruft around. Clean up after them.
2147 FPM.addPass(InstCombinePass());
2148 invokePeepholeEPCallbacks(FPM, Level);
2149
2152
2154
2155 // Do a post inline PGO instrumentation and use pass. This is a context
2156 // sensitive PGO pass.
2157 if (PGOOpt) {
2158 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2159 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2160 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2161 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
2162 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2163 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2164 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2165 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
2166 }
2167
2168 // Break up allocas
2170
2171 // LTO provides additional opportunities for tailcall elimination due to
2172 // link-time inlining, and visibility of nocapture attribute.
2173 FPM.addPass(
2174 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
2175
2176 // Run a few AA driver optimizations here and now to cleanup the code.
2177 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
2178 PTO.EagerlyInvalidateAnalyses));
2179
2180 MPM.addPass(
2182
2183 // Require the GlobalsAA analysis for the module so we can query it within
2184 // MainFPM.
2187 // Invalidate AAManager so it can be recreated and pick up the newly
2188 // available GlobalsAA.
2189 MPM.addPass(
2191 }
2192
2193 FunctionPassManager MainFPM;
2195 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
2196 /*AllowSpeculation=*/true),
2197 /*USeMemorySSA=*/true));
2198
2199 if (RunNewGVN)
2200 MainFPM.addPass(NewGVNPass());
2201 else
2202 MainFPM.addPass(GVNPass());
2203
2204 // Remove dead memcpy()'s.
2205 MainFPM.addPass(MemCpyOptPass());
2206
2207 // Nuke dead stores.
2208 MainFPM.addPass(DSEPass());
2209 MainFPM.addPass(MoveAutoInitPass());
2211
2212 invokeVectorizerStartEPCallbacks(MainFPM, Level);
2213
2214 LoopPassManager LPM;
2215 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2216 LPM.addPass(LoopFlattenPass());
2217 LPM.addPass(IndVarSimplifyPass());
2218 LPM.addPass(LoopDeletionPass());
2219 // FIXME: Add loop interchange.
2220
2221 // Unroll small loops and perform peeling.
2222 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
2223 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2224 PTO.ForgetAllSCEVInLoopUnroll));
2225 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2226 // *All* loop passes must preserve it, in order to be able to use it.
2227 MainFPM.addPass(
2228 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
2229
2230 MainFPM.addPass(LoopDistributePass());
2231
2232 addVectorPasses(Level, MainFPM, ThinOrFullLTOPhase::FullLTOPostLink);
2233
2234 invokeVectorizerEndEPCallbacks(MainFPM, Level);
2235
2236 // Run the OpenMPOpt CGSCC pass again late.
2239
2240 invokePeepholeEPCallbacks(MainFPM, Level);
2241 MainFPM.addPass(JumpThreadingPass());
2242 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2243 PTO.EagerlyInvalidateAnalyses));
2244
2245 // Lower type metadata and the type.test intrinsic. This pass supports
2246 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2247 // to be run at link time if CFI is enabled. This pass does nothing if
2248 // CFI is disabled.
2249 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2250 // Run a second time to clean up any type tests left behind by WPD for use
2251 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2252 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
2254
2255 // Enable splitting late in the FullLTO post-link pipeline.
2258
2259 // Add late LTO optimization passes.
2260 FunctionPassManager LateFPM;
2261
2262 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2263 // canonicalization pass that enables other optimizations. As a result,
2264 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2265 // result too early.
2266 LateFPM.addPass(LoopSinkPass());
2267
2268 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2269 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2270 // flattening of blocks.
2271 LateFPM.addPass(DivRemPairsPass());
2272
2273 // Delete basic blocks, which optimization passes may have killed.
2275 .convertSwitchRangeToICmp(true)
2276 .convertSwitchToArithmetic(true)
2277 .hoistCommonInsts(true)
2278 .speculateUnpredictables(true)));
2279 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2280
2281 // Drop bodies of available eternally objects to improve GlobalDCE.
2283
2284 // Now that we have optimized the program, discard unreachable functions.
2285 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2286
2287 if (PTO.MergeFunctions)
2289
2291
2292 if (PTO.CallGraphProfile)
2293 MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2294
2295 MPM.addPass(CoroCleanupPass());
2296
2297 // AllocToken transforms heap allocation calls; this needs to run late after
2298 // other allocation call transformations (such as those in InstCombine).
2299 MPM.addPass(AllocTokenPass());
2300
2302
2303 // Emit annotation remarks.
2305
2306 return MPM;
2307}
2308
2312 assert(Level == OptimizationLevel::O0 &&
2313 "buildO0DefaultPipeline should only be used with O0");
2314
2316
2317 // Perform pseudo probe instrumentation in O0 mode. This is for the
2318 // consistency between different build modes. For example, a LTO build can be
2319 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2320 // the postlink will require pseudo probe instrumentation in the prelink.
2321 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2323
2324 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2325 PGOOpt->Action == PGOOptions::IRUse))
2327 MPM,
2328 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2329 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2330 PGOOpt->ProfileRemappingFile);
2331
2332 // Instrument function entry and exit before all inlining.
2334 EntryExitInstrumenterPass(/*PostInlining=*/false)));
2335
2337
2338 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2340
2341 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2342 // Explicitly disable sample loader inlining and use flattened profile in O0
2343 // pipeline.
2344 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2345 PGOOpt->ProfileRemappingFile,
2346 ThinOrFullLTOPhase::None, nullptr,
2347 /*DisableSampleProfileInlining=*/true,
2348 /*UseFlattenedProfile=*/true));
2349 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2350 // RequireAnalysisPass for PSI before subsequent non-module passes.
2352 }
2353
2355
2356 // Build a minimal pipeline based on the semantics required by LLVM,
2357 // which is just that always inlining occurs. Further, disable generating
2358 // lifetime intrinsics to avoid enabling further optimizations during
2359 // code generation.
2361 /*InsertLifetimeIntrinsics=*/false));
2362
2363 if (PTO.MergeFunctions)
2365
2366 if (EnableMatrix)
2367 MPM.addPass(
2369
2370 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2371 CGSCCPassManager CGPM;
2373 if (!CGPM.isEmpty())
2375 }
2376 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2377 LoopPassManager LPM;
2379 if (!LPM.isEmpty()) {
2381 createFunctionToLoopPassAdaptor(std::move(LPM))));
2382 }
2383 }
2384 if (!LoopOptimizerEndEPCallbacks.empty()) {
2385 LoopPassManager LPM;
2387 if (!LPM.isEmpty()) {
2389 createFunctionToLoopPassAdaptor(std::move(LPM))));
2390 }
2391 }
2392 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2395 if (!FPM.isEmpty())
2396 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2397 }
2398
2400
2401 if (!VectorizerStartEPCallbacks.empty()) {
2404 if (!FPM.isEmpty())
2405 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2406 }
2407
2408 if (!VectorizerEndEPCallbacks.empty()) {
2411 if (!FPM.isEmpty())
2412 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2413 }
2414
2416
2417 // AllocToken transforms heap allocation calls; this needs to run late after
2418 // other allocation call transformations (such as those in InstCombine).
2419 if (!isLTOPreLink(Phase))
2420 MPM.addPass(AllocTokenPass());
2421
2423
2424 if (isLTOPreLink(Phase))
2425 addRequiredLTOPreLinkPasses(MPM);
2426
2427 // Emit annotation remarks.
2429
2430 return MPM;
2431}
2432
2434 AAManager AA;
2435
2436 // The order in which these are registered determines their priority when
2437 // being queried.
2438
2439 // Add any target-specific alias analyses that should be run early.
2440 if (TM)
2441 TM->registerEarlyDefaultAliasAnalyses(AA);
2442
2443 // First we register the basic alias analysis that provides the majority of
2444 // per-function local AA logic. This is a stateless, on-demand local set of
2445 // AA techniques.
2446 AA.registerFunctionAnalysis<BasicAA>();
2447
2448 // Next we query fast, specialized alias analyses that wrap IR-embedded
2449 // information about aliasing.
2450 AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2451 AA.registerFunctionAnalysis<TypeBasedAA>();
2452
2453 // Add support for querying global aliasing information when available.
2454 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2455 // analysis, all that the `AAManager` can do is query for any *cached*
2456 // results from `GlobalsAA` through a readonly proxy.
2458 AA.registerModuleAnalysis<GlobalsAA>();
2459
2460 // Add target-specific alias analyses.
2461 if (TM)
2462 TM->registerDefaultAliasAnalyses(AA);
2463
2464 return AA;
2465}
2466
2467bool PassBuilder::isInstrumentedPGOUse() const {
2468 return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
2469 !UseCtxProfile.empty();
2470}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
This file provides a pass manager that only runs its passes if the provided marker analysis has been ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
This header defines various interfaces for pass management in LLVM.
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the Loop Fusion pass.
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase)
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static bool isLTOPostLink(ThinOrFullLTOPhase Phase)
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
A module pass that rewrites heap allocations to use token-enabled allocation functions based on vario...
Definition AllocToken.h:36
Inlines functions marked as "always_inline".
Argument promotion pass.
Assign a GUID to functions as metadata.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
A pass manager to run a set of extra loop passes if the MarkerTy analysis is present.
The core GVN pass object.
Definition GVN.h:128
Pass to remove unused function declarations.
Definition GlobalDCE.h:38
Optimize globals that never have their address taken.
Definition GlobalOpt.h:25
Pass to perform split of global variables.
Definition GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition SCCP.h:48
Pass to outline similar regions.
Definition IROutliner.h:444
Run instruction simplification across each instruction in the function.
Instrumentation based profiling lowering pass.
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Performs Loop Invariant Code Motion Pass.
Definition LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Strips MemProf attributes and metadata.
Merge identical functions.
The module inliner pass for the new pass manager.
Module pass, wrapping the inliner pass.
Definition Inliner.h:65
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition Inliner.h:81
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
Additional 'norecurse' attribute deduction during postlink LTO phase.
OpenMP optimizations pass.
Definition OpenMPOpt.h:42
static LLVM_ABI const OptimizationLevel O3
Optimize for fast execution as much as possible.
static LLVM_ABI const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
static LLVM_ABI const OptimizationLevel O0
Disable as many optimizations as possible.
static LLVM_ABI const OptimizationLevel Os
Similar to O2 but tries to optimize for small code size instead of fast execution without triggering ...
static LLVM_ABI const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static LLVM_ABI const OptimizationLevel O1
Optimize quickly without destroying debuggability.
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
LLVM_ABI void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
LLVM_ABI void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
LLVM_ABI void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
LLVM_ABI void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile)
Add PGOInstrumenation passes for O0 only.
LLVM_ABI void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build a per-module default optimization pipeline.
LLVM_ABI void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI void invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build an O0 pipeline with the minimal semantically required passes.
LLVM_ABI FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
LLVM_ABI void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
LLVM_ABI ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
LLVM_ABI ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build a ThinLTO default optimization pipeline to a pass manager.
LLVM_ABI void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
LLVM_ABI ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
LLVM_ABI ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
LLVM_ABI void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
bool isEmpty() const
Returns if the pass manager contains any passes.
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition PassBuilder.h:78
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition PassBuilder.h:56
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition PassBuilder.h:92
bool LoopFusion
Tuning option to enable/disable loop fusion. Its default value is false.
Definition PassBuilder.h:66
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition PassBuilder.h:82
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition PassBuilder.h:89
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition PassBuilder.h:70
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition PassBuilder.h:74
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition PassBuilder.h:48
LLVM_ABI PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition PassBuilder.h:59
bool LoopInterchange
Tuning option to enable/disable loop interchange.
Definition PassBuilder.h:63
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition PassBuilder.h:52
Reassociate commutative expressions.
Definition Reassociate.h:74
A pass to do RPO deduction and propagation of function attributes.
This pass performs function-level constant propagation and merging.
Definition SCCP.h:30
The sample profiler data loader pass.
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition SimplifyCFG.h:30
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Interfaces for registering analysis passes, producing common pass manager configurations,...
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ Assume
Do not drop type tests (default).
@ All
Drop only llvm.assumes using type test value.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > EnableJumpTableToSwitch("enable-jump-table-to-switch", cl::desc("Enable JumpTableToSwitch pass (default = off)"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(false), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlining pass"))
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > EnableLoopHeaderDuplication("enable-loop-header-duplication", cl::init(false), cl::Hidden, cl::desc("Enable loop header duplication at any optimization level"))
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
static cl::opt< bool > EnableSampledInstr("enable-sampled-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable profile instrumentation sampling (default = off)"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
PassManager< LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, CGSCCUpdateResult & > CGSCCPassManager
The CGSCC pass manager.
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
@ FullLTOPreLink
Full LTO prelink phase.
Definition Pass.h:85
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
Definition Pass.h:83
@ None
No LTO/ThinLTO behavior needed.
Definition Pass.h:79
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
Definition Pass.h:87
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
Definition Pass.h:81
PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & > LoopPassManager
The Loop pass manager.
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
FunctionToLoopPassAdaptor createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< bool > ForgetSCEVInLoopUnroll
PassManager< Module > ModulePassManager
Convenience typedef for a pass manager over modules.
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
static cl::opt< std::string > InstrumentColdFuncOnlyPath("instrument-cold-function-only-path", cl::init(""), cl::desc("File path for cold function only instrumentation(requires use " "with --pgo-instrument-cold-function-only)"), cl::Hidden)
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierarchy exists in the profile"))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
PassManager< Function > FunctionPassManager
Convenience typedef for a pass manager over functions.
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< bool > PGOInstrumentColdFunctionOnly
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the LoopInterchange Pass"))
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableDevirtualizeSpeculatively("enable-devirtualize-speculatively", cl::desc("Enable speculative devirtualization optimization"), cl::init(false))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::ALL, "all", "enable all attributor runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
cl::opt< unsigned > SetLicmMssaOptCap
A DCE pass that assumes instructions are dead until proven otherwise.
Definition ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition EarlyCSE.h:31
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition GVN.h:415
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition GVN.h:422
A set of parameters to control various transforms performed by IPSCCP pass.
Definition SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
Definition InlineCost.h:207
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition InlineCost.h:224
int DefaultThreshold
The default threshold to start with for a callee.
Definition InlineCost.h:209
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition InlineCost.h:237
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition InlineCost.h:212
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
A utility pass template to force an analysis result to be available.