LLVM 23.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/PassManager.h"
30#include "llvm/Pass.h"
157
158using namespace llvm;
159
160namespace llvm {
161
163 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
164 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
166 "Heuristics-based inliner version"),
168 "Use development mode (runtime-loadable model)"),
170 "Use release mode (AOT-compiled model)")));
171
172/// Flag to enable inline deferral during PGO.
173static cl::opt<bool>
174 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
176 cl::desc("Enable inline deferral during PGO"));
177
178static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
179 cl::init(false), cl::Hidden,
180 cl::desc("Enable module inliner"));
181
183 "mandatory-inlining-first", cl::init(false), cl::Hidden,
184 cl::desc("Perform mandatory inlinings module-wide, before performing "
185 "inlining"));
186
188 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
189 cl::desc("Eagerly invalidate more analyses in default pipelines"));
190
192 "enable-merge-functions", cl::init(false), cl::Hidden,
193 cl::desc("Enable function merging as part of the optimization pipeline"));
194
196 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
197 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
198
200 "enable-global-analyses", cl::init(true), cl::Hidden,
201 cl::desc("Enable inter-procedural analyses"));
202
203static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
204 cl::init(false), cl::Hidden,
205 cl::desc("Run Partial inlining pass"));
206
208 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
209 cl::desc("Run cleanup optimization passes after vectorization"));
210
211static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
212 cl::desc("Run the NewGVN pass"));
213
214static cl::opt<bool>
215 EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden,
216 cl::desc("Enable the LoopInterchange Pass"));
217
218static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
219 cl::init(false), cl::Hidden,
220 cl::desc("Enable Unroll And Jam Pass"));
221
222static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
224 cl::desc("Enable the LoopFlatten Pass"));
225
226static cl::opt<bool>
227 EnableInstrumentor("enable-instrumentor", cl::init(false), cl::Hidden,
228 cl::desc("Enable the Instrumentor Pass"));
229
230static cl::opt<bool>
231 EnableDFAJumpThreading("enable-dfa-jump-thread",
232 cl::desc("Enable DFA jump threading"),
233 cl::init(false), cl::Hidden);
234
235static cl::opt<bool>
236 EnableHotColdSplit("hot-cold-split",
237 cl::desc("Enable hot-cold splitting pass"));
238
239static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
241 cl::desc("Enable ir outliner pass"));
242
243static cl::opt<bool>
244 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
245 cl::desc("Disable pre-instrumentation inliner"));
246
248 "preinline-threshold", cl::Hidden, cl::init(75),
249 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
250 "(default = 75)"));
251
252static cl::opt<bool>
253 EnableGVNHoist("enable-gvn-hoist",
254 cl::desc("Enable the GVN hoisting pass (default = off)"));
255
256static cl::opt<bool>
257 EnableGVNSink("enable-gvn-sink",
258 cl::desc("Enable the GVN sinking pass (default = off)"));
259
261 "enable-jump-table-to-switch", cl::init(true),
262 cl::desc("Enable JumpTableToSwitch pass (default = true)"));
263
264// This option is used in simplifying testing SampleFDO optimizations for
265// profile loading.
266static cl::opt<bool>
267 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
268 cl::desc("Enable control height reduction optimization (CHR)"));
269
271 "flattened-profile-used", cl::init(false), cl::Hidden,
272 cl::desc("Indicate the sample profile being used is flattened, i.e., "
273 "no inline hierarchy exists in the profile"));
274
275static cl::opt<bool>
276 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
277 cl::desc("Enable lowering of the matrix intrinsics"));
278
280 "enable-mergeicmps", cl::init(true), cl::Hidden,
281 cl::desc("Enable MergeICmps pass in the optimization pipeline"));
282
284 "enable-constraint-elimination", cl::init(true), cl::Hidden,
285 cl::desc(
286 "Enable pass to eliminate conditions based on linear constraints"));
287
289 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
290 cl::desc("Enable the attributor inter-procedural deduction pass"),
292 "enable all full attributor runs"),
294 "enable all attributor-light runs"),
296 "enable module-wide attributor runs"),
298 "enable module-wide attributor-light runs"),
300 "enable call graph SCC attributor runs"),
302 "enable call graph SCC attributor-light runs"),
303 clEnumValN(AttributorRunOption::NONE, "none",
304 "disable attributor runs")));
305
307 "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
308 cl::desc("Enable profile instrumentation sampling (default = off)"));
310 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
311 cl::desc("Enable the experimental Loop Versioning LICM pass"));
312
314 "instrument-cold-function-only-path", cl::init(""),
315 cl::desc("File path for cold function only instrumentation(requires use "
316 "with --pgo-instrument-cold-function-only)"),
317 cl::Hidden);
318
319// TODO: There is a similar flag in WPD pass, we should consolidate them by
320// parsing the option only once in PassBuilder and share it across both places.
322 "enable-devirtualize-speculatively",
323 cl::desc("Enable speculative devirtualization optimization"),
324 cl::init(false));
325
328
330} // namespace llvm
331
349
350namespace llvm {
352} // namespace llvm
353
355 OptimizationLevel Level) {
356 for (auto &C : PeepholeEPCallbacks)
357 C(FPM, Level);
358}
361 for (auto &C : LateLoopOptimizationsEPCallbacks)
362 C(LPM, Level);
363}
365 OptimizationLevel Level) {
366 for (auto &C : LoopOptimizerEndEPCallbacks)
367 C(LPM, Level);
368}
371 for (auto &C : ScalarOptimizerLateEPCallbacks)
372 C(FPM, Level);
373}
375 OptimizationLevel Level) {
376 for (auto &C : CGSCCOptimizerLateEPCallbacks)
377 C(CGPM, Level);
378}
380 OptimizationLevel Level) {
381 for (auto &C : VectorizerStartEPCallbacks)
382 C(FPM, Level);
383}
385 OptimizationLevel Level) {
386 for (auto &C : VectorizerEndEPCallbacks)
387 C(FPM, Level);
388}
390 OptimizationLevel Level,
392 for (auto &C : OptimizerEarlyEPCallbacks)
393 C(MPM, Level, Phase);
394}
396 OptimizationLevel Level,
398 for (auto &C : OptimizerLastEPCallbacks)
399 C(MPM, Level, Phase);
400}
403 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
404 C(MPM, Level);
405}
408 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
409 C(MPM, Level);
410}
412 OptimizationLevel Level) {
413 for (auto &C : PipelineStartEPCallbacks)
414 C(MPM, Level);
415}
418 for (auto &C : PipelineEarlySimplificationEPCallbacks)
419 C(MPM, Level, Phase);
420}
421
422// Get IR stats with InstCount before/after the optimization pipeline
424 bool IsPreOptimization) {
425 if (AreStatisticsEnabled()) {
426 MPM.addPass(
429 FunctionPropertiesStatisticsPass(IsPreOptimization)));
430 }
431}
432
433// Helper to add AnnotationRemarksPass.
437
438// Helper to check if the current compilation phase is preparing for LTO
443
444// Helper to check if the current compilation phase is preparing for FullLTO
445[[maybe_unused]] static bool isFullLTOPreLink(ThinOrFullLTOPhase Phase) {
447}
448
449// Helper to check if the current compilation phase is preparing for ThinLTO
453
454// Helper to check if the current compilation phase is LTO backend
459
460// Helper to check if the current compilation phase is FullLTO backend
464
465// Helper to check if the current compilation phase is ThinLTO backend
469
470// Helper to wrap conditionally Coro passes.
472 // TODO: Skip passes according to Phase.
473 ModulePassManager CoroPM;
474 CoroPM.addPass(CoroEarlyPass());
475 CGSCCPassManager CGPM;
476 CGPM.addPass(CoroSplitPass());
477 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
478 CoroPM.addPass(CoroCleanupPass());
479 CoroPM.addPass(GlobalDCEPass());
480 return CoroConditionalWrapper(std::move(CoroPM));
481}
482
483// TODO: Investigate the cost/benefit of tail call elimination on debugging.
485PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
487
489
491 FPM.addPass(CountVisitsPass());
492
493 // Form SSA out of local memory accesses after breaking apart aggregates into
494 // scalars.
495 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
496
497 // Catch trivial redundancies
498 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
499
500 // Hoisting of scalars and load expressions.
501 FPM.addPass(
502 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
503 FPM.addPass(InstCombinePass());
504
505 FPM.addPass(LibCallsShrinkWrapPass());
506
507 invokePeepholeEPCallbacks(FPM, Level);
508
509 FPM.addPass(
510 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
511
512 // Form canonically associated expression trees, and simplify the trees using
513 // basic mathematical properties. For example, this will form (nearly)
514 // minimal multiplication trees.
515 FPM.addPass(ReassociatePass());
516
517 // Add the primary loop simplification pipeline.
518 // FIXME: Currently this is split into two loop pass pipelines because we run
519 // some function passes in between them. These can and should be removed
520 // and/or replaced by scheduling the loop pass equivalents in the correct
521 // positions. But those equivalent passes aren't powerful enough yet.
522 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
523 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
524 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
525 // `LoopInstSimplify`.
526 LoopPassManager LPM1, LPM2;
527
528 // Simplify the loop body. We do this initially to clean up after other loop
529 // passes run, either when iterating on a loop or on inner loops with
530 // implications on the outer loop.
531 LPM1.addPass(LoopInstSimplifyPass());
532 LPM1.addPass(LoopSimplifyCFGPass());
533
534 // Try to remove as much code from the loop header as possible,
535 // to reduce amount of IR that will have to be duplicated. However,
536 // do not perform speculative hoisting the first time as LICM
537 // will destroy metadata that may not need to be destroyed if run
538 // after loop rotation.
539 // TODO: Investigate promotion cap for O1.
540 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
541 /*AllowSpeculation=*/false));
542
543 LPM1.addPass(
544 LoopRotatePass(/*EnableHeaderDuplication=*/true, isLTOPreLink(Phase)));
545 // TODO: Investigate promotion cap for O1.
546 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
547 /*AllowSpeculation=*/true));
548 LPM1.addPass(SimpleLoopUnswitchPass());
550 LPM1.addPass(LoopFlattenPass());
551
552 LPM2.addPass(LoopIdiomRecognizePass());
553 LPM2.addPass(IndVarSimplifyPass());
554
556
557 LPM2.addPass(LoopDeletionPass());
558
559 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
560 // because it changes IR to makes profile annotation in back compile
561 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
562 // attributes so we need to make sure and allow the full unroll pass to pay
563 // attention to it.
564 if (!isThinLTOPreLink(Phase) || !PGOOpt ||
565 PGOOpt->Action != PGOOptions::SampleUse)
566 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
567 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
568 PTO.ForgetAllSCEVInLoopUnroll));
569
571
572 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
573 /*UseMemorySSA=*/true));
574 FPM.addPass(
575 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
576 FPM.addPass(InstCombinePass());
577 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
578 // *All* loop passes must preserve it, in order to be able to use it.
579 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
580 /*UseMemorySSA=*/false));
581
582 // Delete small array after loop unroll.
583 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
584
585 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
586 FPM.addPass(MemCpyOptPass());
587
588 // Sparse conditional constant propagation.
589 // FIXME: It isn't clear why we do this *after* loop passes rather than
590 // before...
591 FPM.addPass(SCCPPass());
592
593 // Delete dead bit computations (instcombine runs after to fold away the dead
594 // computations, and then ADCE will run later to exploit any new DCE
595 // opportunities that creates).
596 FPM.addPass(BDCEPass());
597
598 // Run instcombine after redundancy and dead bit elimination to exploit
599 // opportunities opened up by them.
600 FPM.addPass(InstCombinePass());
601 invokePeepholeEPCallbacks(FPM, Level);
602
603 FPM.addPass(CoroElidePass());
604
606
607 // Finally, do an expensive DCE pass to catch all the dead code exposed by
608 // the simplifications and basic cleanup after all the simplifications.
609 // TODO: Investigate if this is too expensive.
610 FPM.addPass(ADCEPass());
611 FPM.addPass(
612 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
613 FPM.addPass(InstCombinePass());
614 invokePeepholeEPCallbacks(FPM, Level);
615
616 return FPM;
617}
618
622 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
623
624 // The O1 pipeline has a separate pipeline creation function to simplify
625 // construction readability.
626 if (Level.getSpeedupLevel() == 1)
627 return buildO1FunctionSimplificationPipeline(Level, Phase);
628
630
633
634 // Form SSA out of local memory accesses after breaking apart aggregates into
635 // scalars.
637
638 // Catch trivial redundancies
639 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
642
643 // Hoisting of scalars and load expressions.
644 if (EnableGVNHoist)
645 FPM.addPass(GVNHoistPass());
646
647 // Global value numbering based sinking.
648 if (EnableGVNSink) {
649 FPM.addPass(GVNSinkPass());
650 FPM.addPass(
651 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
652 }
653
654 // Speculative execution if the target has divergent branches; otherwise nop.
655 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
656
657 // Optimize based on known information about branches, and cleanup afterward.
660
661 // Jump table to switch conversion.
664
665 FPM.addPass(
666 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
670
671 invokePeepholeEPCallbacks(FPM, Level);
672
673 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
674 // using the size value profile. Don't perform this when optimizing for size.
675 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse)
677
678 FPM.addPass(TailCallElimPass(/*UpdateFunctionEntryCount=*/
679 isInstrumentedPGOUse()));
680 FPM.addPass(
681 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
682
683 // Form canonically associated expression trees, and simplify the trees using
684 // basic mathematical properties. For example, this will form (nearly)
685 // minimal multiplication trees.
687
690
691 // Add the primary loop simplification pipeline.
692 // FIXME: Currently this is split into two loop pass pipelines because we run
693 // some function passes in between them. These can and should be removed
694 // and/or replaced by scheduling the loop pass equivalents in the correct
695 // positions. But those equivalent passes aren't powerful enough yet.
696 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
697 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
698 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
699 // `LoopInstSimplify`.
700 LoopPassManager LPM1, LPM2;
701
702 // Simplify the loop body. We do this initially to clean up after other loop
703 // passes run, either when iterating on a loop or on inner loops with
704 // implications on the outer loop.
705 LPM1.addPass(LoopInstSimplifyPass());
706 LPM1.addPass(LoopSimplifyCFGPass());
707
708 // Try to remove as much code from the loop header as possible,
709 // to reduce amount of IR that will have to be duplicated. However,
710 // do not perform speculative hoisting the first time as LICM
711 // will destroy metadata that may not need to be destroyed if run
712 // after loop rotation.
713 // TODO: Investigate promotion cap for O1.
714 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
715 /*AllowSpeculation=*/false));
716
717 LPM1.addPass(
718 LoopRotatePass(/*EnableHeaderDuplication=*/true, isLTOPreLink(Phase)));
719 // TODO: Investigate promotion cap for O1.
720 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
721 /*AllowSpeculation=*/true));
722 LPM1.addPass(
723 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
725 LPM1.addPass(LoopFlattenPass());
726
727 LPM2.addPass(LoopIdiomRecognizePass());
728 LPM2.addPass(IndVarSimplifyPass());
729
730 {
732 ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
734 LPM2.addPass(std::move(ExtraPasses));
735 }
736
738
739 LPM2.addPass(LoopDeletionPass());
740
741 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
742 // because it changes IR to makes profile annotation in back compile
743 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
744 // attributes so we need to make sure and allow the full unroll pass to pay
745 // attention to it.
746 if (!isThinLTOPreLink(Phase) || !PGOOpt ||
747 PGOOpt->Action != PGOOptions::SampleUse)
748 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
749 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
750 PTO.ForgetAllSCEVInLoopUnroll));
751
753
754 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
755 /*UseMemorySSA=*/true));
756 FPM.addPass(
757 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
759 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
760 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
761 // *All* loop passes must preserve it, in order to be able to use it.
762 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
763 /*UseMemorySSA=*/false));
764
765 // Delete small array after loop unroll.
767
768 // Try vectorization/scalarization transforms that are both improvements
769 // themselves and can allow further folds with GVN and InstCombine.
770 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
771
772 // Eliminate redundancies.
774 if (RunNewGVN)
775 FPM.addPass(NewGVNPass());
776 else
777 FPM.addPass(GVNPass());
778
779 // Sparse conditional constant propagation.
780 // FIXME: It isn't clear why we do this *after* loop passes rather than
781 // before...
782 FPM.addPass(SCCPPass());
783
784 // Delete dead bit computations (instcombine runs after to fold away the dead
785 // computations, and then ADCE will run later to exploit any new DCE
786 // opportunities that creates).
787 FPM.addPass(BDCEPass());
788
789 // Run instcombine after redundancy and dead bit elimination to exploit
790 // opportunities opened up by them.
792 invokePeepholeEPCallbacks(FPM, Level);
793
794 // Re-consider control flow based optimizations after redundancy elimination,
795 // redo DCE, etc.
798
801
802 // Finally, do an expensive DCE pass to catch all the dead code exposed by
803 // the simplifications and basic cleanup after all the simplifications.
804 // TODO: Investigate if this is too expensive.
805 FPM.addPass(ADCEPass());
806
807 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
808 FPM.addPass(MemCpyOptPass());
809
810 FPM.addPass(DSEPass());
812
814 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
815 /*AllowSpeculation=*/true),
816 /*UseMemorySSA=*/true));
817
818 FPM.addPass(CoroElidePass());
819
821
823 .convertSwitchRangeToICmp(true)
824 .convertSwitchToArithmetic(true)
825 .hoistCommonInsts(true)
826 .sinkCommonInsts(true)));
828 invokePeepholeEPCallbacks(FPM, Level);
829
830 return FPM;
831}
832
833void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
836 MPM.addPass(AssignGUIDPass());
837}
838
839void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
840 OptimizationLevel Level,
841 ThinOrFullLTOPhase LTOPhase) {
842 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
844 return;
845 InlineParams IP;
846
848
849 // FIXME: The hint threshold has the same value used by the regular inliner
850 // when not optimzing for size. This should probably be lowered after
851 // performance testing.
852 // FIXME: this comment is cargo culted from the old pass manager, revisit).
853 IP.HintThreshold = 325;
856 IP, /* MandatoryFirst */ true,
858 CGSCCPassManager &CGPipeline = MIWP.getPM();
859
861 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
862 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
863 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
864 true))); // Merge & remove basic blocks.
865 FPM.addPass(InstCombinePass()); // Combine silly sequences.
866 invokePeepholeEPCallbacks(FPM, Level);
867
868 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
869 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
870
871 MPM.addPass(std::move(MIWP));
872
873 // Delete anything that is now dead to make sure that we don't instrument
874 // dead code. Instrumentation can end up keeping dead code around and
875 // dramatically increase code size.
876 MPM.addPass(GlobalDCEPass());
877}
878
879void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
880 OptimizationLevel Level) {
882 // Disable header duplication in loop rotation at -Oz.
884 createFunctionToLoopPassAdaptor(LoopRotatePass(),
885 /*UseMemorySSA=*/false),
886 PTO.EagerlyInvalidateAnalyses));
887 }
888}
889
890void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
891 OptimizationLevel Level, bool RunProfileGen,
892 bool IsCS, bool AtomicCounterUpdate,
893 std::string ProfileFile,
894 std::string ProfileRemappingFile) {
895 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
896
897 if (!RunProfileGen) {
898 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
899 MPM.addPass(
900 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
901 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
902 // RequireAnalysisPass for PSI before subsequent non-module passes.
903 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
904 return;
905 }
906
907 // Perform PGO instrumentation.
908 MPM.addPass(PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
910
911 addPostPGOLoopRotation(MPM, Level);
912 // Add the profile lowering pass.
913 InstrProfOptions Options;
914 if (!ProfileFile.empty())
915 Options.InstrProfileOutput = ProfileFile;
916 // Do counter promotion at Level greater than O0.
917 Options.DoCounterPromotion = true;
918 Options.UseBFIInPromotion = IsCS;
919 if (EnableSampledInstr) {
920 Options.Sampling = true;
921 // With sampling, there is little beneifit to enable counter promotion.
922 // But note that sampling does work with counter promotion.
923 Options.DoCounterPromotion = false;
924 }
925 Options.Atomic = AtomicCounterUpdate;
926 MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
927}
928
930 bool RunProfileGen, bool IsCS,
931 bool AtomicCounterUpdate,
932 std::string ProfileFile,
933 std::string ProfileRemappingFile) {
934 if (!RunProfileGen) {
935 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
936 MPM.addPass(
937 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
938 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
939 // RequireAnalysisPass for PSI before subsequent non-module passes.
941 return;
942 }
943
944 // Perform PGO instrumentation.
947 // Add the profile lowering pass.
949 if (!ProfileFile.empty())
950 Options.InstrProfileOutput = ProfileFile;
951 // Do not do counter promotion at O0.
952 Options.DoCounterPromotion = false;
953 Options.UseBFIInPromotion = IsCS;
954 Options.Atomic = AtomicCounterUpdate;
956}
957
959 return getInlineParamsFromOptLevel(Level.getSpeedupLevel());
960}
961
965 InlineParams IP;
966 if (PTO.InlinerThreshold == -1)
968 else
969 IP = getInlineParams(PTO.InlinerThreshold);
970 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
971 // set hot-caller threshold to 0 to disable hot
972 // callsite inline (as much as possible [1]) because it makes
973 // profile annotation in the backend inaccurate.
974 //
975 // [1] Note the cost of a function could be below zero due to erased
976 // prologue / epilogue.
977 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
979
980 if (PGOOpt)
982
986
987 // Require the GlobalsAA analysis for the module so we can query it within
988 // the CGSCC pipeline.
990 MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
991 // Invalidate AAManager so it can be recreated and pick up the newly
992 // available GlobalsAA.
993 MIWP.addModulePass(
995 }
996
997 // Require the ProfileSummaryAnalysis for the module so we can query it within
998 // the inliner pass.
1000
1001 // Now begin the main postorder CGSCC pipeline.
1002 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
1003 // manager and trying to emulate its precise behavior. Much of this doesn't
1004 // make a lot of sense and we should revisit the core CGSCC structure.
1005 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
1006
1007 // Note: historically, the PruneEH pass was run first to deduce nounwind and
1008 // generally clean up exception handling overhead. It isn't clear this is
1009 // valuable as the inliner doesn't currently care whether it is inlining an
1010 // invoke or a call.
1011
1013 MainCGPipeline.addPass(AttributorCGSCCPass());
1015 MainCGPipeline.addPass(AttributorLightCGSCCPass());
1016
1017 // Deduce function attributes. We do another run of this after the function
1018 // simplification pipeline, so this only needs to run when it could affect the
1019 // function simplification pipeline, which is only the case with recursive
1020 // functions.
1021 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
1022
1023 // When at O3 add argument promotion to the pass pipeline.
1024 // FIXME: It isn't at all clear why this should be limited to O3.
1025 if (Level == OptimizationLevel::O3)
1026 MainCGPipeline.addPass(ArgumentPromotionPass());
1027
1028 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
1029 // there are no OpenMP runtime calls present in the module.
1030 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
1031 MainCGPipeline.addPass(OpenMPOptCGSCCPass(Phase));
1032
1033 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
1034
1035 // Add the core function simplification pipeline nested inside the
1036 // CGSCC walk.
1039 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
1040
1041 // Finally, deduce any function attributes based on the fully simplified
1042 // function.
1043 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
1044
1045 // Mark that the function is fully simplified and that it shouldn't be
1046 // simplified again if we somehow revisit it due to CGSCC mutations unless
1047 // it's been modified since.
1050
1051 if (!isThinLTOPreLink(Phase)) {
1052 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
1053 MainCGPipeline.addPass(CoroAnnotationElidePass());
1054 }
1055
1056 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
1057 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
1059
1060 return MIWP;
1061}
1062
1067
1069 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
1070 // set hot-caller threshold to 0 to disable hot
1071 // callsite inline (as much as possible [1]) because it makes
1072 // profile annotation in the backend inaccurate.
1073 //
1074 // [1] Note the cost of a function could be below zero due to erased
1075 // prologue / epilogue.
1076 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1077 IP.HotCallSiteThreshold = 0;
1078
1079 if (PGOOpt)
1081
1082 // The inline deferral logic is used to avoid losing some
1083 // inlining chance in future. It is helpful in SCC inliner, in which
1084 // inlining is processed in bottom-up order.
1085 // While in module inliner, the inlining order is a priority-based order
1086 // by default. The inline deferral is unnecessary there. So we disable the
1087 // inline deferral logic in module inliner.
1088 IP.EnableDeferral = false;
1089
1092 MPM.addPass(GlobalOptPass());
1093 MPM.addPass(GlobalDCEPass());
1094 MPM.addPass(AssignGUIDPass());
1095 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
1096 }
1097
1100 PTO.EagerlyInvalidateAnalyses));
1101
1102 if (!isThinLTOPreLink(Phase)) {
1105 MPM.addPass(
1107 }
1108
1109 return MPM;
1110}
1111
1115 assert(Level != OptimizationLevel::O0 &&
1116 "Should not be used for O0 pipeline");
1117
1119 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1120
1122
1123 // Place pseudo probe instrumentation as the first pass of the pipeline to
1124 // minimize the impact of optimization changes.
1125 if (PGOOpt && PGOOpt->PseudoProbeForProfiling && !isThinLTOPostLink(Phase))
1127
1128 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1129
1130 // In ThinLTO mode, when flattened profile is used, all the available
1131 // profile information will be annotated in PreLink phase so there is
1132 // no need to load the profile again in PostLink.
1133 bool LoadSampleProfile =
1134 HasSampleProfile && !(FlattenedProfileUsed && isThinLTOPostLink(Phase));
1135
1136 // During the ThinLTO backend phase we perform early indirect call promotion
1137 // here, before globalopt. Otherwise imported available_externally functions
1138 // look unreferenced and are removed. If we are going to load the sample
1139 // profile then defer until later.
1140 // TODO: See if we can move later and consolidate with the location where
1141 // we perform ICP when we are loading a sample profile.
1142 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1143 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1144 // determine whether the new direct calls are annotated with prof metadata.
1145 // Ideally this should be determined from whether the IR is annotated with
1146 // sample profile, and not whether the a sample profile was provided on the
1147 // command line. E.g. for flattened profiles where we will not be reloading
1148 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1149 // provide the sample profile file.
1150 if (isThinLTOPostLink(Phase) && !LoadSampleProfile)
1151 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1152
1153 // Create an early function pass manager to cleanup the output of the
1154 // frontend. Not necessary with LTO post link pipelines since the pre link
1155 // pipeline already cleaned up the frontend output.
1156 if (!isThinLTOPostLink(Phase)) {
1157 // Do basic inference of function attributes from known properties of system
1158 // libraries and other oracles.
1160 MPM.addPass(CoroEarlyPass());
1161
1162 FunctionPassManager EarlyFPM;
1163 EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1164 // Lower llvm.expect to metadata before attempting transforms.
1165 // Compare/branch metadata may alter the behavior of passes like
1166 // SimplifyCFG.
1168 EarlyFPM.addPass(SimplifyCFGPass());
1170 EarlyFPM.addPass(EarlyCSEPass());
1171 if (Level == OptimizationLevel::O3)
1172 EarlyFPM.addPass(CallSiteSplittingPass());
1174 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1175 }
1176
1177 if (LoadSampleProfile) {
1178 // Annotate sample profile right after early FPM to ensure freshness of
1179 // the debug info.
1181 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase, FS));
1182 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1183 // RequireAnalysisPass for PSI before subsequent non-module passes.
1185 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1186 // for the profile annotation to be accurate in the LTO backend.
1187 if (!isLTOPreLink(Phase))
1188 // We perform early indirect call promotion here, before globalopt.
1189 // This is important for the ThinLTO backend phase because otherwise
1190 // imported available_externally functions look unreferenced and are
1191 // removed.
1192 MPM.addPass(
1193 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1194 }
1195
1196 // Try to perform OpenMP specific optimizations on the module. This is a
1197 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1199
1201 MPM.addPass(AttributorPass());
1204
1205 // Lower type metadata and the type.test intrinsic in the ThinLTO
1206 // post link pipeline after ICP. This is to enable usage of the type
1207 // tests in ICP sequences.
1210
1212
1213 // Interprocedural constant propagation now that basic cleanup has occurred
1214 // and prior to optimizing globals.
1215 // FIXME: This position in the pipeline hasn't been carefully considered in
1216 // years, it should be re-analyzed.
1217 MPM.addPass(
1218 IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/!isLTOPreLink(Phase))));
1219
1220 // Attach metadata to indirect call sites indicating the set of functions
1221 // they may target at run-time. This should follow IPSCCP.
1223
1224 // Optimize globals to try and fold them into constants.
1225 MPM.addPass(GlobalOptPass());
1226
1227 // Create a small function pass pipeline to cleanup after all the global
1228 // optimizations.
1229 FunctionPassManager GlobalCleanupPM;
1230 // FIXME: Should this instead by a run of SROA?
1231 GlobalCleanupPM.addPass(PromotePass());
1232 GlobalCleanupPM.addPass(InstCombinePass());
1233 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1234 GlobalCleanupPM.addPass(
1235 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1236 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1237 PTO.EagerlyInvalidateAnalyses));
1238
1239 // We already asserted this happens in non-FullLTOPostLink earlier.
1240 const bool IsPreLink = !isThinLTOPostLink(Phase);
1241 // Enable contextual profiling instrumentation.
1242 const bool IsCtxProfGen =
1244 const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink;
1245 const bool IsPGOInstrGen =
1246 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1247 const bool IsPGOInstrUse =
1248 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1249 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1250 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1251 // enable ctx profiling from the frontend.
1253 "Enabling both instrumented PGO and contextual instrumentation is not "
1254 "supported.");
1255 const bool IsCtxProfUse = !UseCtxProfile.empty() && isThinLTOPreLink(Phase);
1256
1257 assert(
1259 "--instrument-cold-function-only-path is provided but "
1260 "--pgo-instrument-cold-function-only is not enabled");
1261 const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1262 IsPGOPreLink &&
1264
1265 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1266 IsCtxProfUse || IsColdFuncOnlyInstrGen)
1267 addPreInlinerPasses(MPM, Level, Phase);
1268
1269 // Add all the requested passes for instrumentation PGO, if requested.
1270 if (IsPGOInstrGen || IsPGOInstrUse) {
1271 addPGOInstrPasses(MPM, Level,
1272 /*RunProfileGen=*/IsPGOInstrGen,
1273 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1274 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1275 } else if (IsCtxProfGen || IsCtxProfUse) {
1277 // In pre-link, we just want the instrumented IR. We use the contextual
1278 // profile in the post-thinlink phase.
1279 // The instrumentation will be removed in post-thinlink after IPO.
1280 if (IsCtxProfUse) {
1281 MPM.addPass(AssignGUIDPass());
1282 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
1283 return MPM;
1284 }
1285 // Block further inlining in the instrumented ctxprof case. This avoids
1286 // confusingly collecting profiles for the same GUID corresponding to
1287 // different variants of the function. We could do like PGO and identify
1288 // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
1289 // thinlto to happen before performing any further optimizations, it's
1290 // unnecessary to collect profiles for non-prevailing copies.
1292 addPostPGOLoopRotation(MPM, Level);
1293 MPM.addPass(AssignGUIDPass());
1295 } else if (IsColdFuncOnlyInstrGen) {
1296 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1297 /* AtomicCounterUpdate */ false,
1299 /* ProfileRemappingFile */ "");
1300 }
1301
1302 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1303 MPM.addPass(PGOIndirectCallPromotion(false, false));
1304
1305 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1306 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1308
1309 if (IsMemprofUse)
1310 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, FS));
1311
1312 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1313 PGOOpt->Action == PGOOptions::SampleUse))
1314 MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1315
1316 MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1317
1320 else
1321 MPM.addPass(buildInlinerPipeline(Level, Phase));
1322
1323 // Remove any dead arguments exposed by cleanups, constant folding globals,
1324 // and argument promotion.
1326
1329
1330 if (!isThinLTOPreLink(Phase))
1331 MPM.addPass(CoroCleanupPass());
1332
1333 // Optimize globals now that functions are fully simplified.
1334 MPM.addPass(GlobalOptPass());
1335 MPM.addPass(GlobalDCEPass());
1336
1337 return MPM;
1338}
1339
1340/// TODO: Should LTO cause any differences to this set of passes?
1341void PassBuilder::addVectorPasses(OptimizationLevel Level,
1343 ThinOrFullLTOPhase LTOPhase) {
1346
1347 // Drop dereferenceable assumes after vectorization, as they are no longer
1348 // needed and can inhibit further optimization.
1349 if (!isLTOPreLink(LTOPhase))
1350 FPM.addPass(DropUnnecessaryAssumesPass(/*DropDereferenceable=*/true));
1351
1353 if (isFullLTOPostLink(LTOPhase)) {
1354 // The vectorizer may have significantly shortened a loop body; unroll
1355 // again. Unroll small loops to hide loop backedge latency and saturate any
1356 // parallel execution resources of an out-of-order processor. We also then
1357 // need to clean up redundancies and loop invariant code.
1358 // FIXME: It would be really good to use a loop-integrated instruction
1359 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1360 // across the loop nests.
1361 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1364 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1366 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1369 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1370 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1371 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1372 // NOTE: we are very late in the pipeline, and we don't have any LICM
1373 // or SimplifyCFG passes scheduled after us, that would cleanup
1374 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1376 }
1377
1378 if (!isFullLTOPostLink(LTOPhase)) {
1379 // Eliminate loads by forwarding stores from the previous iteration to loads
1380 // of the current iteration.
1382 }
1383 // Cleanup after the loop optimization passes.
1384 FPM.addPass(InstCombinePass());
1385
1386 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1387 ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ExtraPasses;
1388 // At higher optimization levels, try to clean up any runtime overlap and
1389 // alignment checks inserted by the vectorizer. We want to track correlated
1390 // runtime checks for two inner loops in the same outer loop, fold any
1391 // common computations, hoist loop-invariant aspects out of any outer loop,
1392 // and unswitch the runtime checks if possible. Once hoisted, we may have
1393 // dead (or speculatable) control flows or more combining opportunities.
1394 ExtraPasses.addPass(EarlyCSEPass());
1395 ExtraPasses.addPass(CorrelatedValuePropagationPass());
1396 ExtraPasses.addPass(InstCombinePass());
1397 LoopPassManager LPM;
1398 LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1399 /*AllowSpeculation=*/true));
1400 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1402 ExtraPasses.addPass(
1403 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true));
1404 ExtraPasses.addPass(
1405 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1406 ExtraPasses.addPass(InstCombinePass());
1407 FPM.addPass(std::move(ExtraPasses));
1408 }
1409
1410 // Now that we've formed fast to execute loop structures, we do further
1411 // optimizations. These are run afterward as they might block doing complex
1412 // analyses and transforms such as what are needed for loop vectorization.
1413
1414 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1415 // GVN, loop transforms, and others have already run, so it's now better to
1416 // convert to more optimized IR using more aggressive simplify CFG options.
1417 // The extra sinking transform can create larger basic blocks, so do this
1418 // before SLP vectorization.
1419 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1420 .forwardSwitchCondToPhi(true)
1421 .convertSwitchRangeToICmp(true)
1422 .convertSwitchToArithmetic(true)
1423 .convertSwitchToLookupTable(true)
1424 .needCanonicalLoops(false)
1425 .hoistCommonInsts(true)
1426 .sinkCommonInsts(true)));
1427
1428 if (isFullLTOPostLink(LTOPhase)) {
1429 FPM.addPass(SCCPPass());
1430 FPM.addPass(InstCombinePass());
1431 FPM.addPass(BDCEPass());
1432 }
1433
1434 // Optimize parallel scalar instruction chains into SIMD instructions.
1435 if (PTO.SLPVectorization) {
1436 FPM.addPass(SLPVectorizerPass());
1437 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1438 FPM.addPass(EarlyCSEPass());
1439 }
1440 }
1441 // Enhance/cleanup vector code.
1442 FPM.addPass(VectorCombinePass());
1443
1444 if (!isFullLTOPostLink(LTOPhase)) {
1445 FPM.addPass(InstCombinePass());
1446 // Unroll small loops to hide loop backedge latency and saturate any
1447 // parallel execution resources of an out-of-order processor. We also then
1448 // need to clean up redundancies and loop invariant code.
1449 // FIXME: It would be really good to use a loop-integrated instruction
1450 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1451 // across the loop nests.
1452 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1453 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1455 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1456 }
1457 FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1458 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1459 PTO.ForgetAllSCEVInLoopUnroll)));
1460 FPM.addPass(WarnMissedTransformationsPass());
1461 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1462 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1463 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1464 // NOTE: we are very late in the pipeline, and we don't have any LICM
1465 // or SimplifyCFG passes scheduled after us, that would cleanup
1466 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1467 FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1468 }
1469
1470 FPM.addPass(InferAlignmentPass());
1471 FPM.addPass(InstCombinePass());
1472
1473 // This is needed for two reasons:
1474 // 1. It works around problems that instcombine introduces, such as sinking
1475 // expensive FP divides into loops containing multiplications using the
1476 // divide result.
1477 // 2. It helps to clean up some loop-invariant code created by the loop
1478 // unroll pass when IsFullLTO=false.
1480 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1481 /*AllowSpeculation=*/true),
1482 /*UseMemorySSA=*/true));
1483
1484 // Now that we've vectorized and unrolled loops, we may have more refined
1485 // alignment information, try to re-derive it here.
1486 FPM.addPass(AlignmentFromAssumptionsPass());
1487}
1488
1491 ThinOrFullLTOPhase LTOPhase) {
1493
1494 // Run partial inlining pass to partially inline functions that have
1495 // large bodies.
1498
1499 // Remove avail extern fns and globals definitions since we aren't compiling
1500 // an object file for later LTO. For LTO we want to preserve these so they
1501 // are eligible for inlining at link-time. Note if they are unreferenced they
1502 // will be removed by GlobalDCE later, so this only impacts referenced
1503 // available externally globals. Eventually they will be suppressed during
1504 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1505 // may make globals referenced by available external functions dead and saves
1506 // running remaining passes on the eliminated functions. These should be
1507 // preserved during prelinking for link-time inlining decisions.
1508 if (!isLTOPreLink(LTOPhase))
1510
1511 // Do RPO function attribute inference across the module to forward-propagate
1512 // attributes where applicable.
1513 // FIXME: Is this really an optimization rather than a canonicalization?
1515
1516 // Do a post inline PGO instrumentation and use pass. This is a context
1517 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1518 // cross-module inline has not been done yet. The context sensitive
1519 // instrumentation is after all the inlines are done.
1520 if (!isLTOPreLink(LTOPhase) && PGOOpt) {
1521 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1522 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1523 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1524 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
1525 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1526 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1527 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1528 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1529 }
1530
1531 // Re-compute GlobalsAA here prior to function passes. This is particularly
1532 // useful as the above will have inlined, DCE'ed, and function-attr
1533 // propagated everything. We should at this point have a reasonably minimal
1534 // and richly annotated call graph. By computing aliasing and mod/ref
1535 // information for all local globals here, the late loop passes and notably
1536 // the vectorizer will be able to use them to help recognize vectorizable
1537 // memory operations.
1540
1541 invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
1542
1543 FunctionPassManager OptimizePM;
1544
1545 // Only drop unnecessary assumes post-inline and post-link, as otherwise
1546 // additional uses of the affected value may be introduced through inlining
1547 // and CSE.
1548 if (!isLTOPreLink(LTOPhase))
1549 OptimizePM.addPass(DropUnnecessaryAssumesPass());
1550
1551 // Scheduling LoopVersioningLICM when inlining is over, because after that
1552 // we may see more accurate aliasing. Reason to run this late is that too
1553 // early versioning may prevent further inlining due to increase of code
1554 // size. Other optimizations which runs later might get benefit of no-alias
1555 // assumption in clone loop.
1557 OptimizePM.addPass(
1559 // LoopVersioningLICM pass might increase new LICM opportunities.
1561 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1562 /*AllowSpeculation=*/true),
1563 /*USeMemorySSA=*/true));
1564 }
1565
1566 OptimizePM.addPass(Float2IntPass());
1568
1569 if (EnableMatrix) {
1570 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1571 OptimizePM.addPass(EarlyCSEPass());
1572 }
1573
1574 // CHR pass should only be applied with the profile information.
1575 // The check is to check the profile summary information in CHR.
1576 if (EnableCHR && Level == OptimizationLevel::O3)
1577 OptimizePM.addPass(ControlHeightReductionPass());
1578
1579 // FIXME: We need to run some loop optimizations to re-rotate loops after
1580 // simplifycfg and others undo their rotation.
1581
1582 // Optimize the loop execution. These passes operate on entire loop nests
1583 // rather than on each loop in an inside-out manner, and so they are actually
1584 // function passes.
1585
1586 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1587
1588 LoopPassManager LPM;
1589 // First rotate loops that may have been un-rotated by prior passes.
1590 // Disable header duplication at -Oz.
1591 LPM.addPass(LoopRotatePass(/*EnableLoopHeaderDuplication=*/true,
1592 isLTOPreLink(LTOPhase),
1593 /*CheckExitCount=*/true));
1594 // Some loops may have become dead by now. Try to delete them.
1595 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1596 // this may need to be revisited once we run GVN before loop deletion
1597 // in the simplification pipeline.
1598 LPM.addPass(LoopDeletionPass());
1599
1600 if (PTO.LoopInterchange)
1601 LPM.addPass(LoopInterchangePass());
1602
1603 OptimizePM.addPass(
1604 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
1605
1606 // FIXME: This may not be the right place in the pipeline.
1607 // We need to have the data to support the right place.
1608 if (PTO.LoopFusion)
1609 OptimizePM.addPass(LoopFusePass());
1610
1611 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1612 // into separate loop that would otherwise inhibit vectorization. This is
1613 // currently only performed for loops marked with the metadata
1614 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1615 OptimizePM.addPass(LoopDistributePass());
1616
1617 // Populates the VFABI attribute with the scalar-to-vector mappings
1618 // from the TargetLibraryInfo.
1619 OptimizePM.addPass(InjectTLIMappings());
1620
1621 addVectorPasses(Level, OptimizePM, LTOPhase);
1622
1623 invokeVectorizerEndEPCallbacks(OptimizePM, Level);
1624
1625 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1626 // canonicalization pass that enables other optimizations. As a result,
1627 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1628 // result too early.
1629 OptimizePM.addPass(LoopSinkPass());
1630
1631 // And finally clean up LCSSA form before generating code.
1632 OptimizePM.addPass(InstSimplifyPass());
1633
1634 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1635 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1636 // flattening of blocks.
1637 OptimizePM.addPass(DivRemPairsPass());
1638
1639 // Merge adjacent icmps into memcmp, then expand memcmp to loads/compares.
1640 // TODO: move this furter up so that it can be optimized by GVN, etc.
1641 if (EnableMergeICmps)
1642 OptimizePM.addPass(MergeICmpsPass());
1643 OptimizePM.addPass(ExpandMemCmpPass());
1644
1645 // Try to annotate calls that were created during optimization.
1646 OptimizePM.addPass(
1647 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
1648
1649 // LoopSink (and other loop passes since the last simplifyCFG) might have
1650 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1651 OptimizePM.addPass(
1653 .convertSwitchRangeToICmp(true)
1654 .convertSwitchToArithmetic(true)
1655 .speculateUnpredictables(true)
1656 .hoistLoadsStoresWithCondFaulting(true)));
1657
1658 // Add the core optimizing pipeline.
1659 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1660 PTO.EagerlyInvalidateAnalyses));
1661
1662 // AllocToken transforms heap allocation calls; this needs to run late after
1663 // other allocation call transformations (such as those in InstCombine).
1664 if (!isLTOPreLink(LTOPhase))
1665 MPM.addPass(AllocTokenPass());
1666
1667 invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
1668
1669 // Run the Instrumentor pass late.
1671 MPM.addPass(InstrumentorPass(FS));
1672
1673 // Split out cold code. Splitting is done late to avoid hiding context from
1674 // other optimizations and inadvertently regressing performance. The tradeoff
1675 // is that this has a higher code size cost than splitting early.
1676 if (EnableHotColdSplit && !isLTOPreLink(LTOPhase))
1678
1679 // Search the code for similar regions of code. If enough similar regions can
1680 // be found where extracting the regions into their own function will decrease
1681 // the size of the program, we extract the regions, a deduplicate the
1682 // structurally similar regions.
1683 if (EnableIROutliner)
1684 MPM.addPass(IROutlinerPass());
1685
1686 // Now we need to do some global optimization transforms.
1687 // FIXME: It would seem like these should come first in the optimization
1688 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1689 // ordering here.
1690 MPM.addPass(GlobalDCEPass());
1692
1693 // Merge functions if requested. It has a better chance to merge functions
1694 // after ConstantMerge folded jump tables.
1695 if (PTO.MergeFunctions)
1697
1698 if (PTO.CallGraphProfile && !isLTOPreLink(LTOPhase))
1699 MPM.addPass(CGProfilePass(isLTOPostLink(LTOPhase)));
1700
1701 // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1702 if (!isLTOPreLink(LTOPhase))
1704
1705 // Add devirtualization pass only when LTO is not enabled, as otherwise
1706 // the pass is already enabled in the LTO pipeline.
1707 if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) {
1708 // TODO: explore a better pipeline configuration that can improve
1709 // compilation time overhead.
1710 // FIXME: move this earlier (lots of pass ordering tests will need fixing)
1711 MPM.addPass(AssignGUIDPass());
1713 /*ExportSummary*/ nullptr,
1714 /*ImportSummary*/ nullptr,
1715 /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively));
1717 // Given that the devirtualization creates more opportunities for inlining,
1718 // we run the Inliner again here to maximize the optimization gain we
1719 // get from devirtualization.
1720 // Also, we can't run devirtualization before inlining because the
1721 // devirtualization depends on the passes optimizing/eliminating vtable GVs
1722 // and those passes are only effective after inlining.
1723 if (EnableModuleInliner) {
1727 } else {
1730 /* MandatoryFirst */ true,
1732 }
1733 }
1734 return MPM;
1735}
1736
1740 if (Level == OptimizationLevel::O0)
1741 return buildO0DefaultPipeline(Level, Phase);
1742
1744 instructionCountersPass(MPM, /* IsPreOptimization */ true);
1745 // Currently this pipeline is only invoked in an LTO pre link pass or when we
1746 // are not running LTO. If that changes the below checks may need updating.
1748
1749 // If we are invoking this in non-LTO mode, remove any MemProf related
1750 // attributes and metadata, as we don't know whether we are linking with
1751 // a library containing the necessary interfaces.
1754
1755 // Convert @llvm.global.annotations to !annotation metadata.
1757
1758 // Force any function attributes we want the rest of the pipeline to observe.
1760
1761 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1763
1764 // Apply module pipeline start EP callback.
1766
1767 // Add the core simplification pipeline.
1769
1770 // Now add the optimization pipeline.
1772
1773 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1774 PGOOpt->Action == PGOOptions::SampleUse)
1776
1777 // Emit annotation remarks.
1779
1780 if (isLTOPreLink(Phase))
1781 addRequiredLTOPreLinkPasses(MPM);
1782
1783 instructionCountersPass(MPM, /* IsPreOptimization */ false);
1784 return MPM;
1785}
1786
1789 bool EmitSummary) {
1791
1792 instructionCountersPass(MPM, /* IsPreOptimization */ true);
1793
1794 if (ThinLTO)
1796 else
1798 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1799
1800 // Perform any cleanups to the IR that aren't suitable for per TU compilation,
1801 // like removing CFI/WPD related instructions. Note, we reuse
1802 // DropTypeTestsPass to clean up type tests rather than duplicate that logic
1803 // in FatLtoCleanup.
1804 MPM.addPass(FatLtoCleanup());
1805
1806 // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1807 // object code, only in the bitcode section, so drop it before we run
1808 // module optimization and generate machine code. If llvm.type.test() isn't in
1809 // the IR, this won't do anything.
1811
1812 // Use the ThinLTO post-link pipeline with sample profiling
1813 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1814 MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1815 else {
1816 // ModuleSimplification does not run the coroutine passes for
1817 // ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO
1818 // builds, otherwise they will miscompile.
1819 if (ThinLTO) {
1820 // TODO: replace w/ buildCoroWrapper() when it takes phase and level into
1821 // consideration.
1822 CGSCCPassManager CGPM;
1826 MPM.addPass(CoroCleanupPass());
1827 }
1828
1829 // otherwise, just use module optimization
1830 MPM.addPass(
1832 // Emit annotation remarks.
1834 }
1835
1836 instructionCountersPass(MPM, /* IsPreOptimization */ false);
1837
1838 return MPM;
1839}
1840
1843 if (Level == OptimizationLevel::O0)
1845
1847
1848 instructionCountersPass(MPM, /* IsPreOptimization */ true);
1849
1850 // Convert @llvm.global.annotations to !annotation metadata.
1852
1853 // Force any function attributes we want the rest of the pipeline to observe.
1855
1856 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1858
1859 // Apply module pipeline start EP callback.
1861
1862 // If we are planning to perform ThinLTO later, we don't bloat the code with
1863 // unrolling/vectorization/... now. Just simplify the module as much as we
1864 // can.
1867 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1868 // thinlto use the contextual info to perform imports; then use the contextual
1869 // profile in the post-thinlink phase.
1870 if (!UseCtxProfile.empty()) {
1871 addRequiredLTOPreLinkPasses(MPM);
1872 return MPM;
1873 }
1874
1875 // Run partial inlining pass to partially inline functions that have
1876 // large bodies.
1877 // FIXME: It isn't clear whether this is really the right place to run this
1878 // in ThinLTO. Because there is another canonicalization and simplification
1879 // phase that will run after the thin link, running this here ends up with
1880 // less information than will be available later and it may grow functions in
1881 // ways that aren't beneficial.
1884
1885 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1886 PGOOpt->Action == PGOOptions::SampleUse)
1888
1889 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1890 // optimization is going to be done in PostLink stage, but clang can't add
1891 // callbacks there in case of in-process ThinLTO called by linker.
1896
1897 // Emit annotation remarks.
1899
1900 addRequiredLTOPreLinkPasses(MPM);
1901
1902 instructionCountersPass(MPM, /* IsPreOptimization */ false);
1903
1904 return MPM;
1905}
1906
1908 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1910
1911 instructionCountersPass(MPM, /* IsPreOptimization */ true);
1912
1913 // If we are invoking this without a summary index noting that we are linking
1914 // with a library containing the necessary APIs, remove any MemProf related
1915 // attributes and metadata.
1916 if (!ImportSummary || !ImportSummary->withSupportsHotColdNew())
1918
1919 if (ImportSummary) {
1920 // For ThinLTO we must apply the context disambiguation decisions early, to
1921 // ensure we can correctly match the callsites to summary data.
1924 ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1925
1926 // These passes import type identifier resolutions for whole-program
1927 // devirtualization and CFI. They must run early because other passes may
1928 // disturb the specific instruction patterns that these passes look for,
1929 // creating dependencies on resolutions that may not appear in the summary.
1930 //
1931 // For example, GVN may transform the pattern assume(type.test) appearing in
1932 // two basic blocks into assume(phi(type.test, type.test)), which would
1933 // transform a dependency on a WPD resolution into a dependency on a type
1934 // identifier resolution for CFI.
1935 //
1936 // Also, WPD has access to more precise information than ICP and can
1937 // devirtualize more effectively, so it should operate on the IR first.
1938 //
1939 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1940 // metadata and intrinsics.
1941 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1942 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1943 }
1944
1945 if (Level == OptimizationLevel::O0) {
1946 // Run a second time to clean up any type tests left behind by WPD for use
1947 // in ICP.
1950
1951 // AllocToken transforms heap allocation calls; this needs to run late after
1952 // other allocation call transformations (such as those in InstCombine).
1953 MPM.addPass(AllocTokenPass());
1954
1955 // Drop available_externally and unreferenced globals. This is necessary
1956 // with ThinLTO in order to avoid leaving undefined references to dead
1957 // globals in the object file.
1959 MPM.addPass(GlobalDCEPass());
1960 return MPM;
1961 }
1962 if (!UseCtxProfile.empty()) {
1963 MPM.addPass(
1965 } else {
1966 // Add the core simplification pipeline.
1969 }
1970 // Now add the optimization pipeline.
1973
1974 // Emit annotation remarks.
1976
1977 instructionCountersPass(MPM, /* IsPreOptimization */ false);
1978
1979 return MPM;
1980}
1981
1984 // FIXME: We should use a customized pre-link pipeline!
1985 return buildPerModuleDefaultPipeline(Level,
1987}
1988
1991 ModuleSummaryIndex *ExportSummary) {
1993
1994 instructionCountersPass(MPM, /* IsPreOptimization */ true);
1995
1997
1998 // If we are invoking this without a summary index noting that we are linking
1999 // with a library containing the necessary APIs, remove any MemProf related
2000 // attributes and metadata.
2001 if (!ExportSummary || !ExportSummary->withSupportsHotColdNew())
2003
2004 // Create a function that performs CFI checks for cross-DSO calls with targets
2005 // in the current module.
2006 MPM.addPass(CrossDSOCFIPass());
2007
2008 if (Level == OptimizationLevel::O0) {
2009 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
2010 // metadata and intrinsics.
2011 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
2012 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2013 // Run a second time to clean up any type tests left behind by WPD for use
2014 // in ICP.
2016
2018
2019 // AllocToken transforms heap allocation calls; this needs to run late after
2020 // other allocation call transformations (such as those in InstCombine).
2021 MPM.addPass(AllocTokenPass());
2022
2024
2025 // Emit annotation remarks.
2027
2028 return MPM;
2029 }
2030
2031 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2032 // Load sample profile before running the LTO optimization pipeline.
2033 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2034 PGOOpt->ProfileRemappingFile,
2036 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2037 // RequireAnalysisPass for PSI before subsequent non-module passes.
2039 }
2040
2041 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
2043
2044 // Remove unused virtual tables to improve the quality of code generated by
2045 // whole-program devirtualization and bitset lowering.
2046 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2047
2048 // Do basic inference of function attributes from known properties of system
2049 // libraries and other oracles.
2051
2052 if (Level.getSpeedupLevel() > 1) {
2054 CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));
2055
2056 // Indirect call promotion. This should promote all the targets that are
2057 // left by the earlier promotion pass that promotes intra-module targets.
2058 // This two-step promotion is to save the compile time. For LTO, it should
2059 // produce the same result as if we only do promotion here.
2061 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2062
2063 // Promoting by-reference arguments to by-value exposes more constants to
2064 // IPSCCP.
2065 CGSCCPassManager CGPM;
2068 CGPM.addPass(
2071
2072 // Propagate constants at call sites into the functions they call. This
2073 // opens opportunities for globalopt (and inlining) by substituting function
2074 // pointers passed as arguments to direct uses of functions.
2075 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/true)));
2076
2077 // Attach metadata to indirect call sites indicating the set of functions
2078 // they may target at run-time. This should follow IPSCCP.
2080 }
2081
2082 // Do RPO function attribute inference across the module to forward-propagate
2083 // attributes where applicable.
2084 // FIXME: Is this really an optimization rather than a canonicalization?
2086
2087 // Use in-range annotations on GEP indices to split globals where beneficial.
2088 MPM.addPass(GlobalSplitPass());
2089
2090 // Run whole program optimization of virtual call when the list of callees
2091 // is fixed.
2092 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
2093
2095 // Stop here at -O1.
2096 if (Level == OptimizationLevel::O1) {
2097 // The LowerTypeTestsPass needs to run to lower type metadata and the
2098 // type.test intrinsics. The pass does nothing if CFI is disabled.
2099 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2100 // Run a second time to clean up any type tests left behind by WPD for use
2101 // in ICP (which is performed earlier than this in the regular LTO
2102 // pipeline).
2104
2106
2107 // AllocToken transforms heap allocation calls; this needs to run late after
2108 // other allocation call transformations (such as those in InstCombine).
2109 MPM.addPass(AllocTokenPass());
2110
2112
2113 // Emit annotation remarks.
2115
2116 instructionCountersPass(MPM, /* IsPreOptimization */ false);
2117
2118 return MPM;
2119 }
2120
2121 // TODO: Skip to match buildCoroWrapper.
2122 MPM.addPass(CoroEarlyPass());
2123
2124 // Optimize globals to try and fold them into constants.
2125 MPM.addPass(GlobalOptPass());
2126
2127 // Promote any localized globals to SSA registers.
2129
2130 // Linking modules together can lead to duplicate global constant, only
2131 // keep one copy of each constant.
2133
2134 // Remove unused arguments from functions.
2136
2137 // Reduce the code after globalopt and ipsccp. Both can open up significant
2138 // simplification opportunities, and both can propagate functions through
2139 // function pointers. When this happens, we often have to resolve varargs
2140 // calls, etc, so let instcombine do this.
2141 FunctionPassManager PeepholeFPM;
2142 PeepholeFPM.addPass(InstCombinePass());
2143 if (Level.getSpeedupLevel() > 1)
2144 PeepholeFPM.addPass(AggressiveInstCombinePass());
2145 invokePeepholeEPCallbacks(PeepholeFPM, Level);
2146
2147 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
2148 PTO.EagerlyInvalidateAnalyses));
2149
2150 // Lower variadic functions for supported targets prior to inlining.
2152
2153 // Note: historically, the PruneEH pass was run first to deduce nounwind and
2154 // generally clean up exception handling overhead. It isn't clear this is
2155 // valuable as the inliner doesn't currently care whether it is inlining an
2156 // invoke or a call.
2157 // Run the inliner now.
2158 if (EnableModuleInliner) {
2162 } else {
2165 /* MandatoryFirst */ true,
2168 }
2169
2170 // Perform context disambiguation after inlining, since that would reduce the
2171 // amount of additional cloning required to distinguish the allocation
2172 // contexts.
2175 /*Summary=*/nullptr,
2176 PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2177
2178 // Optimize globals again after we ran the inliner.
2179 MPM.addPass(GlobalOptPass());
2180
2181 // Run the OpenMPOpt pass again after global optimizations.
2183
2184 // Garbage collect dead functions.
2185 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2186
2187 // If we didn't decide to inline a function, check to see if we can
2188 // transform it to pass arguments by value instead of by reference.
2189 CGSCCPassManager CGPM;
2194
2196 // The IPO Passes may leave cruft around. Clean up after them.
2197 FPM.addPass(InstCombinePass());
2198 invokePeepholeEPCallbacks(FPM, Level);
2199
2202
2204
2205 // Do a post inline PGO instrumentation and use pass. This is a context
2206 // sensitive PGO pass.
2207 if (PGOOpt) {
2208 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2209 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2210 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2211 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
2212 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2213 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2214 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2215 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
2216 }
2217
2218 // Break up allocas
2220
2221 // LTO provides additional opportunities for tailcall elimination due to
2222 // link-time inlining, and visibility of nocapture attribute.
2223 FPM.addPass(
2224 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
2225
2226 // Run a few AA driver optimizations here and now to cleanup the code.
2227 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
2228 PTO.EagerlyInvalidateAnalyses));
2229
2230 MPM.addPass(
2232
2233 // Require the GlobalsAA analysis for the module so we can query it within
2234 // MainFPM.
2237 // Invalidate AAManager so it can be recreated and pick up the newly
2238 // available GlobalsAA.
2239 MPM.addPass(
2241 }
2242
2243 FunctionPassManager MainFPM;
2245 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
2246 /*AllowSpeculation=*/true),
2247 /*USeMemorySSA=*/true));
2248
2249 if (RunNewGVN)
2250 MainFPM.addPass(NewGVNPass());
2251 else
2252 MainFPM.addPass(GVNPass());
2253
2254 // Remove dead memcpy()'s.
2255 MainFPM.addPass(MemCpyOptPass());
2256
2257 // Nuke dead stores.
2258 MainFPM.addPass(DSEPass());
2259 MainFPM.addPass(MoveAutoInitPass());
2261
2262 invokeVectorizerStartEPCallbacks(MainFPM, Level);
2263
2264 LoopPassManager LPM;
2265 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2266 LPM.addPass(LoopFlattenPass());
2267 LPM.addPass(IndVarSimplifyPass());
2268 LPM.addPass(LoopDeletionPass());
2269 // FIXME: Add loop interchange.
2270
2271 // Unroll small loops and perform peeling.
2272 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
2273 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2274 PTO.ForgetAllSCEVInLoopUnroll));
2275 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2276 // *All* loop passes must preserve it, in order to be able to use it.
2277 MainFPM.addPass(
2278 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
2279
2280 MainFPM.addPass(LoopDistributePass());
2281
2282 addVectorPasses(Level, MainFPM, ThinOrFullLTOPhase::FullLTOPostLink);
2283
2284 invokeVectorizerEndEPCallbacks(MainFPM, Level);
2285
2286 // Run the OpenMPOpt CGSCC pass again late.
2289
2290 invokePeepholeEPCallbacks(MainFPM, Level);
2291 MainFPM.addPass(JumpThreadingPass());
2292 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2293 PTO.EagerlyInvalidateAnalyses));
2294
2295 // Lower type metadata and the type.test intrinsic. This pass supports
2296 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2297 // to be run at link time if CFI is enabled. This pass does nothing if
2298 // CFI is disabled.
2299 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2300 // Run a second time to clean up any type tests left behind by WPD for use
2301 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2303
2304 // Enable splitting late in the FullLTO post-link pipeline.
2307
2308 // Add late LTO optimization passes.
2309 FunctionPassManager LateFPM;
2310
2311 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2312 // canonicalization pass that enables other optimizations. As a result,
2313 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2314 // result too early.
2315 LateFPM.addPass(LoopSinkPass());
2316
2317 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2318 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2319 // flattening of blocks.
2320 LateFPM.addPass(DivRemPairsPass());
2321
2322 // Delete basic blocks, which optimization passes may have killed.
2324 .convertSwitchRangeToICmp(true)
2325 .convertSwitchToArithmetic(true)
2326 .hoistCommonInsts(true)
2327 .speculateUnpredictables(true)));
2328 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2329
2330 // Drop bodies of available eternally objects to improve GlobalDCE.
2332
2333 // Now that we have optimized the program, discard unreachable functions.
2334 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2335
2336 if (PTO.MergeFunctions)
2338
2340
2341 if (PTO.CallGraphProfile)
2342 MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2343
2344 MPM.addPass(CoroCleanupPass());
2345
2346 // AllocToken transforms heap allocation calls; this needs to run late after
2347 // other allocation call transformations (such as those in InstCombine).
2348 MPM.addPass(AllocTokenPass());
2349
2351
2352 // Emit annotation remarks.
2354
2355 instructionCountersPass(MPM, /* IsPreOptimization */ false);
2356
2357 return MPM;
2358}
2359
2363 assert(Level == OptimizationLevel::O0 &&
2364 "buildO0DefaultPipeline should only be used with O0");
2365
2367
2368 instructionCountersPass(MPM, /* IsPreOptimization */ true);
2369
2370 // Perform pseudo probe instrumentation in O0 mode. This is for the
2371 // consistency between different build modes. For example, a LTO build can be
2372 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2373 // the postlink will require pseudo probe instrumentation in the prelink.
2374 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2376
2377 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2378 PGOOpt->Action == PGOOptions::IRUse))
2380 MPM,
2381 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2382 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2383 PGOOpt->ProfileRemappingFile);
2384
2385 // Instrument function entry and exit before all inlining.
2387 EntryExitInstrumenterPass(/*PostInlining=*/false)));
2388
2390
2391 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2393
2394 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2395 // Explicitly disable sample loader inlining and use flattened profile in O0
2396 // pipeline.
2397 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2398 PGOOpt->ProfileRemappingFile,
2400 /*DisableSampleProfileInlining=*/true,
2401 /*UseFlattenedProfile=*/true));
2402 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2403 // RequireAnalysisPass for PSI before subsequent non-module passes.
2405 }
2406
2408
2409 // Build a minimal pipeline based on the semantics required by LLVM,
2410 // which is just that always inlining occurs. Further, disable generating
2411 // lifetime intrinsics to avoid enabling further optimizations during
2412 // code generation.
2414 /*InsertLifetimeIntrinsics=*/false));
2415
2416 if (PTO.MergeFunctions)
2418
2419 if (EnableMatrix)
2420 MPM.addPass(
2422
2423 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2424 CGSCCPassManager CGPM;
2426 if (!CGPM.isEmpty())
2428 }
2429 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2430 LoopPassManager LPM;
2432 if (!LPM.isEmpty()) {
2434 createFunctionToLoopPassAdaptor(std::move(LPM))));
2435 }
2436 }
2437 if (!LoopOptimizerEndEPCallbacks.empty()) {
2438 LoopPassManager LPM;
2440 if (!LPM.isEmpty()) {
2442 createFunctionToLoopPassAdaptor(std::move(LPM))));
2443 }
2444 }
2445 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2448 if (!FPM.isEmpty())
2449 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2450 }
2451
2453
2454 if (!VectorizerStartEPCallbacks.empty()) {
2457 if (!FPM.isEmpty())
2458 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2459 }
2460
2461 if (!VectorizerEndEPCallbacks.empty()) {
2464 if (!FPM.isEmpty())
2465 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2466 }
2467
2469
2470 // AllocToken transforms heap allocation calls; this needs to run late after
2471 // other allocation call transformations (such as those in InstCombine).
2472 if (!isLTOPreLink(Phase))
2473 MPM.addPass(AllocTokenPass());
2474
2476
2478 MPM.addPass(InstrumentorPass(FS));
2479
2480 if (isLTOPreLink(Phase))
2481 addRequiredLTOPreLinkPasses(MPM);
2482
2483 // Emit annotation remarks.
2485
2486 instructionCountersPass(MPM, /* IsPreOptimization */ false);
2487
2488 return MPM;
2489}
2490
2492 AAManager AA;
2493
2494 // The order in which these are registered determines their priority when
2495 // being queried.
2496
2497 // Add any target-specific alias analyses that should be run early.
2498 if (TM)
2499 TM->registerEarlyDefaultAliasAnalyses(AA);
2500
2501 // First we register the basic alias analysis that provides the majority of
2502 // per-function local AA logic. This is a stateless, on-demand local set of
2503 // AA techniques.
2504 AA.registerFunctionAnalysis<BasicAA>();
2505
2506 // Next we query fast, specialized alias analyses that wrap IR-embedded
2507 // information about aliasing.
2508 AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2509 AA.registerFunctionAnalysis<TypeBasedAA>();
2510
2511 // Add support for querying global aliasing information when available.
2512 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2513 // analysis, all that the `AAManager` can do is query for any *cached*
2514 // results from `GlobalsAA` through a readonly proxy.
2516 AA.registerModuleAnalysis<GlobalsAA>();
2517
2518 // Add target-specific alias analyses.
2519 if (TM)
2520 TM->registerDefaultAliasAnalyses(AA);
2521
2522 return AA;
2523}
2524
2525bool PassBuilder::isInstrumentedPGOUse() const {
2526 return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
2527 !UseCtxProfile.empty();
2528}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
This file provides a pass manager that only runs its passes if the provided marker analysis has been ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
This header defines various interfaces for pass management in LLVM.
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the Loop Fusion pass.
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
static bool isThinLTOPostLink(ThinOrFullLTOPhase Phase)
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase)
static bool isFullLTOPostLink(ThinOrFullLTOPhase Phase)
static bool isThinLTOPreLink(ThinOrFullLTOPhase Phase)
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static void instructionCountersPass(ModulePassManager &MPM, bool IsPreOptimization)
static bool isFullLTOPreLink(ThinOrFullLTOPhase Phase)
static bool isLTOPostLink(ThinOrFullLTOPhase Phase)
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
A module pass that rewrites heap allocations to use token-enabled allocation functions based on vario...
Definition AllocToken.h:36
Inlines functions marked as "always_inline".
Argument promotion pass.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
A pass manager to run a set of extra loop passes if the MarkerTy analysis is present.
Statistics pass for the FunctionPropertiesAnalysis results.
The core GVN pass object.
Definition GVN.h:128
Pass to remove unused function declarations.
Definition GlobalDCE.h:38
Optimize globals that never have their address taken.
Definition GlobalOpt.h:25
Pass to perform split of global variables.
Definition GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition SCCP.h:48
Pass to outline similar regions.
Definition IROutliner.h:469
Run instruction simplification across each instruction in the function.
Instrumentation based profiling lowering pass.
The Instrumentor pass.
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Performs Loop Invariant Code Motion Pass.
Definition LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Strips MemProf attributes and metadata.
Merge identical functions.
The module inliner pass for the new pass manager.
Module pass, wrapping the inliner pass.
Definition Inliner.h:65
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition Inliner.h:81
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
Additional 'norecurse' attribute deduction during postlink LTO phase.
OpenMP optimizations pass.
Definition OpenMPOpt.h:42
static LLVM_ABI const OptimizationLevel O3
Optimize for fast execution as much as possible.
static LLVM_ABI const OptimizationLevel O0
Disable as many optimizations as possible.
static LLVM_ABI const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static LLVM_ABI const OptimizationLevel O1
Optimize quickly without destroying debuggability.
static LLVM_ABI bool isCtxIRPGOInstrEnabled()
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
LLVM_ABI void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
LLVM_ABI void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
LLVM_ABI void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
LLVM_ABI void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile)
Add PGOInstrumenation passes for O0 only.
LLVM_ABI void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build a per-module default optimization pipeline.
LLVM_ABI void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI void invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build an O0 pipeline with the minimal semantically required passes.
LLVM_ABI FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
LLVM_ABI void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
LLVM_ABI ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
LLVM_ABI ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build a ThinLTO default optimization pipeline to a pass manager.
LLVM_ABI void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
LLVM_ABI ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
LLVM_ABI ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
LLVM_ABI void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
bool isEmpty() const
Returns if the pass manager contains any passes.
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition PassBuilder.h:78
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition PassBuilder.h:56
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition PassBuilder.h:92
bool LoopFusion
Tuning option to enable/disable loop fusion. Its default value is false.
Definition PassBuilder.h:66
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition PassBuilder.h:82
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition PassBuilder.h:89
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition PassBuilder.h:70
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition PassBuilder.h:74
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition PassBuilder.h:48
LLVM_ABI PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition PassBuilder.h:59
bool LoopInterchange
Tuning option to enable/disable loop interchange.
Definition PassBuilder.h:63
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition PassBuilder.h:52
Reassociate commutative expressions.
Definition Reassociate.h:74
A pass to do RPO deduction and propagation of function attributes.
This pass performs function-level constant propagation and merging.
Definition SCCP.h:30
The sample profiler data loader pass.
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition SimplifyCFG.h:30
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Interfaces for registering analysis passes, producing common pass manager configurations,...
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ All
Drop only llvm.assumes using type test value.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(false), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlining pass"))
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
static cl::opt< bool > EnableSampledInstr("enable-sampled-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable profile instrumentation sampling (default = off)"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
static cl::opt< bool > EnableJumpTableToSwitch("enable-jump-table-to-switch", cl::init(true), cl::desc("Enable JumpTableToSwitch pass (default = true)"))
PassManager< LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, CGSCCUpdateResult & > CGSCCPassManager
The CGSCC pass manager.
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
@ CGSCC_LIGHT
@ MODULE_LIGHT
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
@ FullLTOPreLink
Full LTO prelink phase.
Definition Pass.h:85
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
Definition Pass.h:83
@ None
No LTO/ThinLTO behavior needed.
Definition Pass.h:79
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
Definition Pass.h:87
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
Definition Pass.h:81
PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & > LoopPassManager
The Loop pass manager.
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
FunctionToLoopPassAdaptor createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
LLVM_ABI cl::opt< bool > ForgetSCEVInLoopUnroll
PassManager< Module > ModulePassManager
Convenience typedef for a pass manager over modules.
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
static cl::opt< std::string > InstrumentColdFuncOnlyPath("instrument-cold-function-only-path", cl::init(""), cl::desc("File path for cold function only instrumentation(requires use " "with --pgo-instrument-cold-function-only)"), cl::Hidden)
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierarchy exists in the profile"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::FULL, "full", "enable all full attributor runs"), clEnumValN(AttributorRunOption::LIGHT, "light", "enable all attributor-light runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::MODULE_LIGHT, "module-light", "enable module-wide attributor-light runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::CGSCC_LIGHT, "cgscc-light", "enable call graph SCC attributor-light runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
PassManager< Function > FunctionPassManager
Convenience typedef for a pass manager over functions.
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< bool > PGOInstrumentColdFunctionOnly
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the LoopInterchange Pass"))
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableDevirtualizeSpeculatively("enable-devirtualize-speculatively", cl::desc("Enable speculative devirtualization optimization"), cl::init(false))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
LLVM_ABI cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
LLVM_ABI InlineParams getInlineParamsFromOptLevel(unsigned OptLevel)
Generate the parameters to tune the inline cost analysis based on command line options.
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
LLVM_ABI cl::opt< unsigned > SetLicmMssaOptCap
static cl::opt< bool > EnableInstrumentor("enable-instrumentor", cl::init(false), cl::Hidden, cl::desc("Enable the Instrumentor Pass"))
static cl::opt< bool > EnableMergeICmps("enable-mergeicmps", cl::init(true), cl::Hidden, cl::desc("Enable MergeICmps pass in the optimization pipeline"))
A DCE pass that assumes instructions are dead until proven otherwise.
Definition ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
A more lightweight version of the Attributor which only runs attribute inference but no simplificatio...
A more lightweight version of the Attributor which only runs attribute inference but no simplificatio...
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition EarlyCSE.h:31
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition GVN.h:432
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition GVN.h:439
A set of parameters to control various transforms performed by IPSCCP pass.
Definition SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
Definition InlineCost.h:207
std::optional< int > OptSizeHintThreshold
Threshold to use for callees with inline hint, when the caller is optimized for size.
Definition InlineCost.h:216
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition InlineCost.h:228
int DefaultThreshold
The default threshold to start with for a callee.
Definition InlineCost.h:209
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition InlineCost.h:241
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition InlineCost.h:212
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
A utility pass template to force an analysis result to be available.