LLVM 23.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/PassManager.h"
30#include "llvm/Pass.h"
158
159using namespace llvm;
160
161namespace llvm {
162
164 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
165 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
167 "Heuristics-based inliner version"),
169 "Use development mode (runtime-loadable model)"),
171 "Use release mode (AOT-compiled model)")));
172
173/// Flag to enable inline deferral during PGO.
174static cl::opt<bool>
175 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
177 cl::desc("Enable inline deferral during PGO"));
178
179static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
180 cl::init(false), cl::Hidden,
181 cl::desc("Enable module inliner"));
182
184 "mandatory-inlining-first", cl::init(false), cl::Hidden,
185 cl::desc("Perform mandatory inlinings module-wide, before performing "
186 "inlining"));
187
189 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
190 cl::desc("Eagerly invalidate more analyses in default pipelines"));
191
193 "enable-merge-functions", cl::init(false), cl::Hidden,
194 cl::desc("Enable function merging as part of the optimization pipeline"));
195
197 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
198 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
199
200static cl::opt<bool>
201 TriggerCrash("opt-pipeline-trigger-crash", cl::init(false), cl::Hidden,
202 cl::desc("Trigger crash in optimization pipeline"));
203
205 "enable-global-analyses", cl::init(true), cl::Hidden,
206 cl::desc("Enable inter-procedural analyses"));
207
208static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
209 cl::init(false), cl::Hidden,
210 cl::desc("Run Partial inlining pass"));
211
213 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
214 cl::desc("Run cleanup optimization passes after vectorization"));
215
216static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
217 cl::desc("Run the NewGVN pass"));
218
219static cl::opt<bool>
220 EnableLoopInterchange("enable-loopinterchange", cl::init(true), cl::Hidden,
221 cl::desc("Enable the LoopInterchange Pass"));
222
223static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
224 cl::init(false), cl::Hidden,
225 cl::desc("Enable Unroll And Jam Pass"));
226
227static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
229 cl::desc("Enable the LoopFlatten Pass"));
230
231static cl::opt<bool>
232 EnableInstrumentor("enable-instrumentor", cl::init(false), cl::Hidden,
233 cl::desc("Enable the Instrumentor Pass"));
234
235static cl::opt<bool>
236 EnableDFAJumpThreading("enable-dfa-jump-thread",
237 cl::desc("Enable DFA jump threading"),
238 cl::init(true), cl::Hidden);
239
240static cl::opt<bool>
241 EnableHotColdSplit("hot-cold-split",
242 cl::desc("Enable hot-cold splitting pass"));
243
244static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
246 cl::desc("Enable ir outliner pass"));
247
248static cl::opt<bool>
249 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
250 cl::desc("Disable pre-instrumentation inliner"));
251
253 "preinline-threshold", cl::Hidden, cl::init(75),
254 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
255 "(default = 75)"));
256
257static cl::opt<bool>
258 EnableGVNHoist("enable-gvn-hoist",
259 cl::desc("Enable the GVN hoisting pass (default = off)"));
260
261static cl::opt<bool>
262 EnableGVNSink("enable-gvn-sink",
263 cl::desc("Enable the GVN sinking pass (default = off)"));
264
266 "enable-jump-table-to-switch", cl::init(true),
267 cl::desc("Enable JumpTableToSwitch pass (default = true)"));
268
269// This option is used in simplifying testing SampleFDO optimizations for
270// profile loading.
271static cl::opt<bool>
272 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
273 cl::desc("Enable control height reduction optimization (CHR)"));
274
276 "flattened-profile-used", cl::init(false), cl::Hidden,
277 cl::desc("Indicate the sample profile being used is flattened, i.e., "
278 "no inline hierarchy exists in the profile"));
279
280static cl::opt<bool>
281 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
282 cl::desc("Enable lowering of the matrix intrinsics"));
283
285 "enable-mergeicmps", cl::init(true), cl::Hidden,
286 cl::desc("Enable MergeICmps pass in the optimization pipeline"));
287
289 "enable-constraint-elimination", cl::init(true), cl::Hidden,
290 cl::desc(
291 "Enable pass to eliminate conditions based on linear constraints"));
292
294 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
295 cl::desc("Enable the attributor inter-procedural deduction pass"),
297 "enable all full attributor runs"),
299 "enable all attributor-light runs"),
301 "enable module-wide attributor runs"),
303 "enable module-wide attributor-light runs"),
305 "enable call graph SCC attributor runs"),
307 "enable call graph SCC attributor-light runs"),
308 clEnumValN(AttributorRunOption::NONE, "none",
309 "disable attributor runs")));
310
312 "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
313 cl::desc("Enable profile instrumentation sampling (default = off)"));
315 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
316 cl::desc("Enable the experimental Loop Versioning LICM pass"));
317
319 "instrument-cold-function-only-path", cl::init(""),
320 cl::desc("File path for cold function only instrumentation(requires use "
321 "with --pgo-instrument-cold-function-only)"),
322 cl::Hidden);
323
324// TODO: There is a similar flag in WPD pass, we should consolidate them by
325// parsing the option only once in PassBuilder and share it across both places.
327 "enable-devirtualize-speculatively",
328 cl::desc("Enable speculative devirtualization optimization"),
329 cl::init(false));
330
333
335} // namespace llvm
336
354
355namespace llvm {
357} // namespace llvm
358
360 OptimizationLevel Level) {
361 for (auto &C : PeepholeEPCallbacks)
362 C(FPM, Level);
363}
366 for (auto &C : LateLoopOptimizationsEPCallbacks)
367 C(LPM, Level);
368}
370 OptimizationLevel Level) {
371 for (auto &C : LoopOptimizerEndEPCallbacks)
372 C(LPM, Level);
373}
376 for (auto &C : ScalarOptimizerLateEPCallbacks)
377 C(FPM, Level);
378}
380 OptimizationLevel Level) {
381 for (auto &C : CGSCCOptimizerLateEPCallbacks)
382 C(CGPM, Level);
383}
385 OptimizationLevel Level) {
386 for (auto &C : VectorizerStartEPCallbacks)
387 C(FPM, Level);
388}
390 OptimizationLevel Level) {
391 for (auto &C : VectorizerEndEPCallbacks)
392 C(FPM, Level);
393}
395 OptimizationLevel Level,
397 for (auto &C : OptimizerEarlyEPCallbacks)
398 C(MPM, Level, Phase);
399}
401 OptimizationLevel Level,
403 for (auto &C : OptimizerLastEPCallbacks)
404 C(MPM, Level, Phase);
405}
408 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
409 C(MPM, Level);
410}
413 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
414 C(MPM, Level);
415}
417 OptimizationLevel Level) {
418 for (auto &C : PipelineStartEPCallbacks)
419 C(MPM, Level);
420}
423 for (auto &C : PipelineEarlySimplificationEPCallbacks)
424 C(MPM, Level, Phase);
425}
426
427// Get IR stats with InstCount before/after the optimization pipeline
429 bool IsPreOptimization) {
430 if (AreStatisticsEnabled()) {
431 MPM.addPass(
434 FunctionPropertiesStatisticsPass(IsPreOptimization)));
435 }
436}
437
438// Helper to add AnnotationRemarksPass.
442
443// Helper to check if the current compilation phase is preparing for LTO
448
449// Helper to check if the current compilation phase is preparing for FullLTO
450[[maybe_unused]] static bool isFullLTOPreLink(ThinOrFullLTOPhase Phase) {
452}
453
454// Helper to check if the current compilation phase is preparing for ThinLTO
458
459// Helper to check if the current compilation phase is LTO backend
464
465// Helper to check if the current compilation phase is FullLTO backend
469
470// Helper to check if the current compilation phase is ThinLTO backend
474
475// Helper to wrap conditionally Coro passes.
477 // TODO: Skip passes according to Phase.
478 ModulePassManager CoroPM;
479 CoroPM.addPass(CoroEarlyPass());
480 CGSCCPassManager CGPM;
481 CGPM.addPass(CoroSplitPass());
482 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
483 CoroPM.addPass(CoroCleanupPass());
484 CoroPM.addPass(GlobalDCEPass());
485 return CoroConditionalWrapper(std::move(CoroPM));
486}
487
488// TODO: Investigate the cost/benefit of tail call elimination on debugging.
490PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
492
494
496 FPM.addPass(CountVisitsPass());
497
498 // Form SSA out of local memory accesses after breaking apart aggregates into
499 // scalars.
500 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
501
502 // Catch trivial redundancies
503 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
504
505 // Hoisting of scalars and load expressions.
506 FPM.addPass(
507 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
508 FPM.addPass(InstCombinePass());
509
510 FPM.addPass(LibCallsShrinkWrapPass());
511
512 invokePeepholeEPCallbacks(FPM, Level);
513
514 FPM.addPass(
515 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
516
517 // Form canonically associated expression trees, and simplify the trees using
518 // basic mathematical properties. For example, this will form (nearly)
519 // minimal multiplication trees.
520 FPM.addPass(ReassociatePass());
521
522 // Add the primary loop simplification pipeline.
523 // FIXME: Currently this is split into two loop pass pipelines because we run
524 // some function passes in between them. These can and should be removed
525 // and/or replaced by scheduling the loop pass equivalents in the correct
526 // positions. But those equivalent passes aren't powerful enough yet.
527 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
528 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
529 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
530 // `LoopInstSimplify`.
531 LoopPassManager LPM1, LPM2;
532
533 // Simplify the loop body. We do this initially to clean up after other loop
534 // passes run, either when iterating on a loop or on inner loops with
535 // implications on the outer loop.
536 LPM1.addPass(LoopInstSimplifyPass());
537 LPM1.addPass(LoopSimplifyCFGPass());
538
539 // Try to remove as much code from the loop header as possible,
540 // to reduce amount of IR that will have to be duplicated. However,
541 // do not perform speculative hoisting the first time as LICM
542 // will destroy metadata that may not need to be destroyed if run
543 // after loop rotation.
544 // TODO: Investigate promotion cap for O1.
545 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
546 /*AllowSpeculation=*/false));
547
548 LPM1.addPass(
549 LoopRotatePass(/*EnableHeaderDuplication=*/true, isLTOPreLink(Phase)));
550 // TODO: Investigate promotion cap for O1.
551 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
552 /*AllowSpeculation=*/true));
553 LPM1.addPass(SimpleLoopUnswitchPass());
555 LPM1.addPass(LoopFlattenPass());
556
557 LPM2.addPass(LoopIdiomRecognizePass());
558 LPM2.addPass(IndVarSimplifyPass());
559
561
562 LPM2.addPass(LoopDeletionPass());
563
564 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
565 // because it changes IR to makes profile annotation in back compile
566 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
567 // attributes so we need to make sure and allow the full unroll pass to pay
568 // attention to it.
569 if (!isThinLTOPreLink(Phase) || !PGOOpt ||
570 PGOOpt->Action != PGOOptions::SampleUse)
571 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
572 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
573 PTO.ForgetAllSCEVInLoopUnroll));
574
576
577 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
578 /*UseMemorySSA=*/true));
579 FPM.addPass(
580 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
581 FPM.addPass(InstCombinePass());
582 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
583 // *All* loop passes must preserve it, in order to be able to use it.
584 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
585 /*UseMemorySSA=*/false));
586
587 // Delete small array after loop unroll.
588 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
589
590 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
591 FPM.addPass(MemCpyOptPass());
592
593 // Sparse conditional constant propagation.
594 // FIXME: It isn't clear why we do this *after* loop passes rather than
595 // before...
596 FPM.addPass(SCCPPass());
597
598 // Delete dead bit computations (instcombine runs after to fold away the dead
599 // computations, and then ADCE will run later to exploit any new DCE
600 // opportunities that creates).
601 FPM.addPass(BDCEPass());
602
603 // Run instcombine after redundancy and dead bit elimination to exploit
604 // opportunities opened up by them.
605 FPM.addPass(InstCombinePass());
606 invokePeepholeEPCallbacks(FPM, Level);
607
608 FPM.addPass(CoroElidePass());
609
611
612 // Finally, do an expensive DCE pass to catch all the dead code exposed by
613 // the simplifications and basic cleanup after all the simplifications.
614 // TODO: Investigate if this is too expensive.
615 FPM.addPass(ADCEPass());
616 FPM.addPass(
617 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
618 FPM.addPass(InstCombinePass());
619 invokePeepholeEPCallbacks(FPM, Level);
620
621 return FPM;
622}
623
627 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
628
629 // The O1 pipeline has a separate pipeline creation function to simplify
630 // construction readability.
631 if (Level.getSpeedupLevel() == 1)
632 return buildO1FunctionSimplificationPipeline(Level, Phase);
633
635
638
639 // Form SSA out of local memory accesses after breaking apart aggregates into
640 // scalars.
642
643 // Catch trivial redundancies
644 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
647
648 // Hoisting of scalars and load expressions.
649 if (EnableGVNHoist)
650 FPM.addPass(GVNHoistPass());
651
652 // Global value numbering based sinking.
653 if (EnableGVNSink) {
654 FPM.addPass(GVNSinkPass());
655 FPM.addPass(
656 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
657 }
658
659 // Speculative execution if the target has divergent branches; otherwise nop.
660 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
661
662 // Optimize based on known information about branches, and cleanup afterward.
665
666 // Jump table to switch conversion.
669
670 FPM.addPass(
671 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
675
676 invokePeepholeEPCallbacks(FPM, Level);
677
678 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
679 // using the size value profile. Don't perform this when optimizing for size.
680 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse)
682
683 FPM.addPass(TailCallElimPass(/*UpdateFunctionEntryCount=*/
684 isInstrumentedPGOUse()));
685 FPM.addPass(
686 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
687
688 // Form canonically associated expression trees, and simplify the trees using
689 // basic mathematical properties. For example, this will form (nearly)
690 // minimal multiplication trees.
692
695
696 // Add the primary loop simplification pipeline.
697 // FIXME: Currently this is split into two loop pass pipelines because we run
698 // some function passes in between them. These can and should be removed
699 // and/or replaced by scheduling the loop pass equivalents in the correct
700 // positions. But those equivalent passes aren't powerful enough yet.
701 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
702 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
703 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
704 // `LoopInstSimplify`.
705 LoopPassManager LPM1, LPM2;
706
707 // Simplify the loop body. We do this initially to clean up after other loop
708 // passes run, either when iterating on a loop or on inner loops with
709 // implications on the outer loop.
710 LPM1.addPass(LoopInstSimplifyPass());
711 LPM1.addPass(LoopSimplifyCFGPass());
712
713 // Try to remove as much code from the loop header as possible,
714 // to reduce amount of IR that will have to be duplicated. However,
715 // do not perform speculative hoisting the first time as LICM
716 // will destroy metadata that may not need to be destroyed if run
717 // after loop rotation.
718 // TODO: Investigate promotion cap for O1.
719 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
720 /*AllowSpeculation=*/false));
721
722 LPM1.addPass(
723 LoopRotatePass(/*EnableHeaderDuplication=*/true, isLTOPreLink(Phase)));
724 // TODO: Investigate promotion cap for O1.
725 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
726 /*AllowSpeculation=*/true));
727 LPM1.addPass(
728 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
730 LPM1.addPass(LoopFlattenPass());
731
732 LPM2.addPass(LoopIdiomRecognizePass());
733 LPM2.addPass(IndVarSimplifyPass());
734
735 {
737 ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
739 LPM2.addPass(std::move(ExtraPasses));
740 }
741
743
744 LPM2.addPass(LoopDeletionPass());
745
746 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
747 // because it changes IR to makes profile annotation in back compile
748 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
749 // attributes so we need to make sure and allow the full unroll pass to pay
750 // attention to it.
751 if (!isThinLTOPreLink(Phase) || !PGOOpt ||
752 PGOOpt->Action != PGOOptions::SampleUse)
753 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
754 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
755 PTO.ForgetAllSCEVInLoopUnroll));
756
758
759 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
760 /*UseMemorySSA=*/true));
761 FPM.addPass(
762 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
764 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
765 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
766 // *All* loop passes must preserve it, in order to be able to use it.
767 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
768 /*UseMemorySSA=*/false));
769
770 // Delete small array after loop unroll.
772
773 // Try vectorization/scalarization transforms that are both improvements
774 // themselves and can allow further folds with GVN and InstCombine.
775 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
776
777 // Eliminate redundancies.
779 if (RunNewGVN)
780 FPM.addPass(NewGVNPass());
781 else
782 FPM.addPass(GVNPass());
783
784 // Sparse conditional constant propagation.
785 // FIXME: It isn't clear why we do this *after* loop passes rather than
786 // before...
787 FPM.addPass(SCCPPass());
788
789 // Delete dead bit computations (instcombine runs after to fold away the dead
790 // computations, and then ADCE will run later to exploit any new DCE
791 // opportunities that creates).
792 FPM.addPass(BDCEPass());
793
794 // Run instcombine after redundancy and dead bit elimination to exploit
795 // opportunities opened up by them.
797 invokePeepholeEPCallbacks(FPM, Level);
798
799 // Re-consider control flow based optimizations after redundancy elimination,
800 // redo DCE, etc.
803
806
807 // Finally, do an expensive DCE pass to catch all the dead code exposed by
808 // the simplifications and basic cleanup after all the simplifications.
809 // TODO: Investigate if this is too expensive.
810 FPM.addPass(ADCEPass());
811
812 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
813 FPM.addPass(MemCpyOptPass());
814
815 FPM.addPass(DSEPass());
817
819 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
820 /*AllowSpeculation=*/true),
821 /*UseMemorySSA=*/true));
822
823 FPM.addPass(CoroElidePass());
824
826
828 .convertSwitchRangeToICmp(true)
829 .convertSwitchToArithmetic(true)
830 .hoistCommonInsts(true)
831 .sinkCommonInsts(true)));
833 invokePeepholeEPCallbacks(FPM, Level);
834
835 return FPM;
836}
837
838void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
841}
842
843void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
844 OptimizationLevel Level,
845 ThinOrFullLTOPhase LTOPhase) {
846 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
848 return;
849 InlineParams IP;
850
852
853 // FIXME: The hint threshold has the same value used by the regular inliner
854 // when not optimzing for size. This should probably be lowered after
855 // performance testing.
856 // FIXME: this comment is cargo culted from the old pass manager, revisit).
857 IP.HintThreshold = 325;
860 IP, /* MandatoryFirst */ true,
862 CGSCCPassManager &CGPipeline = MIWP.getPM();
863
865 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
866 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
867 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
868 true))); // Merge & remove basic blocks.
869 FPM.addPass(InstCombinePass()); // Combine silly sequences.
870 invokePeepholeEPCallbacks(FPM, Level);
871
872 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
873 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
874
875 MPM.addPass(std::move(MIWP));
876
877 // Delete anything that is now dead to make sure that we don't instrument
878 // dead code. Instrumentation can end up keeping dead code around and
879 // dramatically increase code size.
880 MPM.addPass(GlobalDCEPass());
881}
882
883void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
884 OptimizationLevel Level) {
886 // Disable header duplication in loop rotation at -Oz.
888 createFunctionToLoopPassAdaptor(LoopRotatePass(),
889 /*UseMemorySSA=*/false),
890 PTO.EagerlyInvalidateAnalyses));
891 }
892}
893
894void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
895 OptimizationLevel Level, bool RunProfileGen,
896 bool IsCS, bool AtomicCounterUpdate,
897 std::string ProfileFile,
898 std::string ProfileRemappingFile) {
899 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
900
901 if (!RunProfileGen) {
902 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
903 MPM.addPass(
904 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
905 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
906 // RequireAnalysisPass for PSI before subsequent non-module passes.
907 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
908 return;
909 }
910
911 // Perform PGO instrumentation.
912 MPM.addPass(PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
914
915 addPostPGOLoopRotation(MPM, Level);
916 // Add the profile lowering pass.
917 InstrProfOptions Options;
918 if (!ProfileFile.empty())
919 Options.InstrProfileOutput = ProfileFile;
920 // Do counter promotion at Level greater than O0.
921 Options.DoCounterPromotion = true;
922 Options.UseBFIInPromotion = IsCS;
923 if (EnableSampledInstr) {
924 Options.Sampling = true;
925 // With sampling, there is little beneifit to enable counter promotion.
926 // But note that sampling does work with counter promotion.
927 Options.DoCounterPromotion = false;
928 }
929 Options.Atomic = AtomicCounterUpdate;
930 MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
931}
932
934 bool RunProfileGen, bool IsCS,
935 bool AtomicCounterUpdate,
936 std::string ProfileFile,
937 std::string ProfileRemappingFile) {
938 if (!RunProfileGen) {
939 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
940 MPM.addPass(
941 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
942 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
943 // RequireAnalysisPass for PSI before subsequent non-module passes.
945 return;
946 }
947
948 // Perform PGO instrumentation.
951 // Add the profile lowering pass.
953 if (!ProfileFile.empty())
954 Options.InstrProfileOutput = ProfileFile;
955 // Do not do counter promotion at O0.
956 Options.DoCounterPromotion = false;
957 Options.UseBFIInPromotion = IsCS;
958 Options.Atomic = AtomicCounterUpdate;
960}
961
963 return getInlineParamsFromOptLevel(Level.getSpeedupLevel());
964}
965
969 InlineParams IP;
970 if (PTO.InlinerThreshold == -1)
972 else
973 IP = getInlineParams(PTO.InlinerThreshold);
974 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
975 // set hot-caller threshold to 0 to disable hot
976 // callsite inline (as much as possible [1]) because it makes
977 // profile annotation in the backend inaccurate.
978 //
979 // [1] Note the cost of a function could be below zero due to erased
980 // prologue / epilogue.
981 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
983
984 if (PGOOpt)
986
990
991 // Require the GlobalsAA analysis for the module so we can query it within
992 // the CGSCC pipeline.
994 MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
995 // Invalidate AAManager so it can be recreated and pick up the newly
996 // available GlobalsAA.
997 MIWP.addModulePass(
999 }
1000
1001 // Require the ProfileSummaryAnalysis for the module so we can query it within
1002 // the inliner pass.
1004
1005 // Now begin the main postorder CGSCC pipeline.
1006 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
1007 // manager and trying to emulate its precise behavior. Much of this doesn't
1008 // make a lot of sense and we should revisit the core CGSCC structure.
1009 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
1010
1011 // Note: historically, the PruneEH pass was run first to deduce nounwind and
1012 // generally clean up exception handling overhead. It isn't clear this is
1013 // valuable as the inliner doesn't currently care whether it is inlining an
1014 // invoke or a call.
1015
1017 MainCGPipeline.addPass(AttributorCGSCCPass());
1019 MainCGPipeline.addPass(AttributorLightCGSCCPass());
1020
1021 // Deduce function attributes. We do another run of this after the function
1022 // simplification pipeline, so this only needs to run when it could affect the
1023 // function simplification pipeline, which is only the case with recursive
1024 // functions.
1025 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
1026
1027 // When at O3 add argument promotion to the pass pipeline.
1028 // FIXME: It isn't at all clear why this should be limited to O3.
1029 if (Level == OptimizationLevel::O3)
1030 MainCGPipeline.addPass(ArgumentPromotionPass());
1031
1032 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
1033 // there are no OpenMP runtime calls present in the module.
1034 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
1035 MainCGPipeline.addPass(OpenMPOptCGSCCPass(Phase));
1036
1037 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
1038
1039 // Add the core function simplification pipeline nested inside the
1040 // CGSCC walk.
1043 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
1044
1045 // Finally, deduce any function attributes based on the fully simplified
1046 // function.
1047 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
1048
1049 // Mark that the function is fully simplified and that it shouldn't be
1050 // simplified again if we somehow revisit it due to CGSCC mutations unless
1051 // it's been modified since.
1054
1055 if (!isThinLTOPreLink(Phase)) {
1056 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
1057 MainCGPipeline.addPass(CoroAnnotationElidePass());
1058 }
1059
1060 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
1061 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
1063
1064 return MIWP;
1065}
1066
1071
1073 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
1074 // set hot-caller threshold to 0 to disable hot
1075 // callsite inline (as much as possible [1]) because it makes
1076 // profile annotation in the backend inaccurate.
1077 //
1078 // [1] Note the cost of a function could be below zero due to erased
1079 // prologue / epilogue.
1080 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1081 IP.HotCallSiteThreshold = 0;
1082
1083 if (PGOOpt)
1085
1086 // The inline deferral logic is used to avoid losing some
1087 // inlining chance in future. It is helpful in SCC inliner, in which
1088 // inlining is processed in bottom-up order.
1089 // While in module inliner, the inlining order is a priority-based order
1090 // by default. The inline deferral is unnecessary there. So we disable the
1091 // inline deferral logic in module inliner.
1092 IP.EnableDeferral = false;
1093
1096 MPM.addPass(GlobalOptPass());
1097 MPM.addPass(GlobalDCEPass());
1098 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
1099 }
1100
1103 PTO.EagerlyInvalidateAnalyses));
1104
1105 if (!isThinLTOPreLink(Phase)) {
1108 MPM.addPass(
1110 }
1111
1112 return MPM;
1113}
1114
1118 assert(Level != OptimizationLevel::O0 &&
1119 "Should not be used for O0 pipeline");
1120
1122 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1123
1125
1126 // Place pseudo probe instrumentation as the first pass of the pipeline to
1127 // minimize the impact of optimization changes.
1128 if (PGOOpt && PGOOpt->PseudoProbeForProfiling && !isThinLTOPostLink(Phase))
1130
1131 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1132
1133 // In ThinLTO mode, when flattened profile is used, all the available
1134 // profile information will be annotated in PreLink phase so there is
1135 // no need to load the profile again in PostLink.
1136 bool LoadSampleProfile =
1137 HasSampleProfile && !(FlattenedProfileUsed && isThinLTOPostLink(Phase));
1138
1139 // During the ThinLTO backend phase we perform early indirect call promotion
1140 // here, before globalopt. Otherwise imported available_externally functions
1141 // look unreferenced and are removed. If we are going to load the sample
1142 // profile then defer until later.
1143 // TODO: See if we can move later and consolidate with the location where
1144 // we perform ICP when we are loading a sample profile.
1145 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1146 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1147 // determine whether the new direct calls are annotated with prof metadata.
1148 // Ideally this should be determined from whether the IR is annotated with
1149 // sample profile, and not whether the a sample profile was provided on the
1150 // command line. E.g. for flattened profiles where we will not be reloading
1151 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1152 // provide the sample profile file.
1153 if (isThinLTOPostLink(Phase) && !LoadSampleProfile)
1154 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1155
1156 // Create an early function pass manager to cleanup the output of the
1157 // frontend. Not necessary with LTO post link pipelines since the pre link
1158 // pipeline already cleaned up the frontend output.
1159 if (!isThinLTOPostLink(Phase)) {
1160 // Do basic inference of function attributes from known properties of system
1161 // libraries and other oracles.
1163 MPM.addPass(CoroEarlyPass());
1164
1165 FunctionPassManager EarlyFPM;
1166 EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1167 // Lower llvm.expect to metadata before attempting transforms.
1168 // Compare/branch metadata may alter the behavior of passes like
1169 // SimplifyCFG.
1171 EarlyFPM.addPass(SimplifyCFGPass());
1173 EarlyFPM.addPass(EarlyCSEPass());
1174 if (Level == OptimizationLevel::O3)
1175 EarlyFPM.addPass(CallSiteSplittingPass());
1177 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1178 }
1179
1180 if (LoadSampleProfile) {
1181 // Annotate sample profile right after early FPM to ensure freshness of
1182 // the debug info.
1184 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase, FS));
1185 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1186 // RequireAnalysisPass for PSI before subsequent non-module passes.
1188 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1189 // for the profile annotation to be accurate in the LTO backend.
1190 if (!isLTOPreLink(Phase))
1191 // We perform early indirect call promotion here, before globalopt.
1192 // This is important for the ThinLTO backend phase because otherwise
1193 // imported available_externally functions look unreferenced and are
1194 // removed.
1195 MPM.addPass(
1196 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1197 }
1198
1199 // Try to perform OpenMP specific optimizations on the module. This is a
1200 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1202
1204 MPM.addPass(AttributorPass());
1207
1208 // Lower type metadata and the type.test intrinsic in the ThinLTO
1209 // post link pipeline after ICP. This is to enable usage of the type
1210 // tests in ICP sequences.
1213
1215
1216 // Interprocedural constant propagation now that basic cleanup has occurred
1217 // and prior to optimizing globals.
1218 // FIXME: This position in the pipeline hasn't been carefully considered in
1219 // years, it should be re-analyzed.
1220 MPM.addPass(
1221 IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/!isLTOPreLink(Phase))));
1222
1223 // Attach metadata to indirect call sites indicating the set of functions
1224 // they may target at run-time. This should follow IPSCCP.
1226
1227 // Optimize globals to try and fold them into constants.
1228 MPM.addPass(GlobalOptPass());
1229
1230 // Create a small function pass pipeline to cleanup after all the global
1231 // optimizations.
1232 FunctionPassManager GlobalCleanupPM;
1233 // FIXME: Should this instead by a run of SROA?
1234 GlobalCleanupPM.addPass(PromotePass());
1235 GlobalCleanupPM.addPass(InstCombinePass());
1236 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1237 GlobalCleanupPM.addPass(
1238 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1239 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1240 PTO.EagerlyInvalidateAnalyses));
1241
1242 // We already asserted this happens in non-FullLTOPostLink earlier.
1243 const bool IsPreLink = !isThinLTOPostLink(Phase);
1244 // Enable contextual profiling instrumentation.
1245 const bool IsCtxProfGen =
1247 const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink;
1248 const bool IsPGOInstrGen =
1249 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1250 const bool IsPGOInstrUse =
1251 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1252 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1253 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1254 // enable ctx profiling from the frontend.
1256 "Enabling both instrumented PGO and contextual instrumentation is not "
1257 "supported.");
1258 const bool IsCtxProfUse = !UseCtxProfile.empty() && isThinLTOPreLink(Phase);
1259
1260 assert(
1262 "--instrument-cold-function-only-path is provided but "
1263 "--pgo-instrument-cold-function-only is not enabled");
1264 const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1265 IsPGOPreLink &&
1267
1268 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1269 IsCtxProfUse || IsColdFuncOnlyInstrGen)
1270 addPreInlinerPasses(MPM, Level, Phase);
1271
1272 // Add all the requested passes for instrumentation PGO, if requested.
1273 if (IsPGOInstrGen || IsPGOInstrUse) {
1274 addPGOInstrPasses(MPM, Level,
1275 /*RunProfileGen=*/IsPGOInstrGen,
1276 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1277 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1278 } else if (IsCtxProfGen || IsCtxProfUse) {
1280 // In pre-link, we just want the instrumented IR. We use the contextual
1281 // profile in the post-thinlink phase.
1282 // The instrumentation will be removed in post-thinlink after IPO.
1283 // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1284 // mechanism for GUIDs.
1285 MPM.addPass(AssignGUIDPass());
1286 if (IsCtxProfUse) {
1287 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
1288 return MPM;
1289 }
1290 // Block further inlining in the instrumented ctxprof case. This avoids
1291 // confusingly collecting profiles for the same GUID corresponding to
1292 // different variants of the function. We could do like PGO and identify
1293 // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
1294 // thinlto to happen before performing any further optimizations, it's
1295 // unnecessary to collect profiles for non-prevailing copies.
1297 addPostPGOLoopRotation(MPM, Level);
1299 } else if (IsColdFuncOnlyInstrGen) {
1300 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1301 /* AtomicCounterUpdate */ false,
1303 /* ProfileRemappingFile */ "");
1304 }
1305
1306 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1307 MPM.addPass(PGOIndirectCallPromotion(false, false));
1308
1309 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1310 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1312
1313 if (IsMemprofUse)
1314 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, FS));
1315
1316 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1317 PGOOpt->Action == PGOOptions::SampleUse))
1318 MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1319
1320 MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1321
1324 else
1325 MPM.addPass(buildInlinerPipeline(Level, Phase));
1326
1327 // Remove any dead arguments exposed by cleanups, constant folding globals,
1328 // and argument promotion.
1330
1333
1334 if (!isThinLTOPreLink(Phase))
1335 MPM.addPass(CoroCleanupPass());
1336
1337 // Optimize globals now that functions are fully simplified.
1338 MPM.addPass(GlobalOptPass());
1339 MPM.addPass(GlobalDCEPass());
1340
1341 return MPM;
1342}
1343
1344/// TODO: Should LTO cause any differences to this set of passes?
1345void PassBuilder::addVectorPasses(OptimizationLevel Level,
1347 ThinOrFullLTOPhase LTOPhase) {
1350
1351 // Drop dereferenceable assumes after vectorization, as they are no longer
1352 // needed and can inhibit further optimization.
1353 if (!isLTOPreLink(LTOPhase))
1354 FPM.addPass(DropUnnecessaryAssumesPass(/*DropDereferenceable=*/true));
1355
1357 if (isFullLTOPostLink(LTOPhase)) {
1358 // The vectorizer may have significantly shortened a loop body; unroll
1359 // again. Unroll small loops to hide loop backedge latency and saturate any
1360 // parallel execution resources of an out-of-order processor. We also then
1361 // need to clean up redundancies and loop invariant code.
1362 // FIXME: It would be really good to use a loop-integrated instruction
1363 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1364 // across the loop nests.
1365 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1368 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1370 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1373 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1374 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1375 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1376 // NOTE: we are very late in the pipeline, and we don't have any LICM
1377 // or SimplifyCFG passes scheduled after us, that would cleanup
1378 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1379
1380 // We also turn on struct to vector canonicalization here, which allows
1381 // converting allocas of homogeneous structs into vector allocas when the
1382 // allocas' users are all memory intrinsics. This allows promotion in some
1383 // cases because structs cannot promote to SSA values, but vectors can. We
1384 // only turn this on after memcpyopt runs because this might hinder
1385 // memcpyopt's optimizations if done before. Look at the documentation for
1386 // `tryCanonicalizeStructToVector` in SROA.cpp to see why.
1388 /*AggregateToVector=*/true)));
1389 }
1390
1391 if (!isFullLTOPostLink(LTOPhase)) {
1392 // Eliminate loads by forwarding stores from the previous iteration to loads
1393 // of the current iteration.
1395 }
1396 // Cleanup after the loop optimization passes.
1397 FPM.addPass(InstCombinePass());
1398
1399 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1400 ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ExtraPasses;
1401 // At higher optimization levels, try to clean up any runtime overlap and
1402 // alignment checks inserted by the vectorizer. We want to track correlated
1403 // runtime checks for two inner loops in the same outer loop, fold any
1404 // common computations, hoist loop-invariant aspects out of any outer loop,
1405 // and unswitch the runtime checks if possible. Once hoisted, we may have
1406 // dead (or speculatable) control flows or more combining opportunities.
1407 ExtraPasses.addPass(EarlyCSEPass());
1408 ExtraPasses.addPass(CorrelatedValuePropagationPass());
1409 ExtraPasses.addPass(InstCombinePass());
1410 LoopPassManager LPM;
1411 LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1412 /*AllowSpeculation=*/true));
1413 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1415 ExtraPasses.addPass(
1416 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true));
1417 ExtraPasses.addPass(
1418 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1419 ExtraPasses.addPass(InstCombinePass());
1420 FPM.addPass(std::move(ExtraPasses));
1421 }
1422
1423 // Now that we've formed fast to execute loop structures, we do further
1424 // optimizations. These are run afterward as they might block doing complex
1425 // analyses and transforms such as what are needed for loop vectorization.
1426
1427 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1428 // GVN, loop transforms, and others have already run, so it's now better to
1429 // convert to more optimized IR using more aggressive simplify CFG options.
1430 // The extra sinking transform can create larger basic blocks, so do this
1431 // before SLP vectorization.
1432 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1433 .forwardSwitchCondToPhi(true)
1434 .convertSwitchRangeToICmp(true)
1435 .convertSwitchToArithmetic(true)
1436 .convertSwitchToLookupTable(true)
1437 .needCanonicalLoops(false)
1438 .hoistCommonInsts(true)
1439 .sinkCommonInsts(true)));
1440
1441 if (isFullLTOPostLink(LTOPhase)) {
1442 FPM.addPass(SCCPPass());
1443 FPM.addPass(InstCombinePass());
1444 FPM.addPass(BDCEPass());
1445 }
1446
1447 // Optimize parallel scalar instruction chains into SIMD instructions.
1448 if (PTO.SLPVectorization) {
1449 FPM.addPass(SLPVectorizerPass());
1450 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1451 FPM.addPass(EarlyCSEPass());
1452 }
1453 }
1454 // Enhance/cleanup vector code.
1455 FPM.addPass(VectorCombinePass());
1456
1457 if (!isFullLTOPostLink(LTOPhase)) {
1458 FPM.addPass(InstCombinePass());
1459 // Unroll small loops to hide loop backedge latency and saturate any
1460 // parallel execution resources of an out-of-order processor. We also then
1461 // need to clean up redundancies and loop invariant code.
1462 // FIXME: It would be really good to use a loop-integrated instruction
1463 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1464 // across the loop nests.
1465 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1466 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1468 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1469 }
1470 FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1471 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1472 PTO.ForgetAllSCEVInLoopUnroll)));
1473 FPM.addPass(WarnMissedTransformationsPass());
1474 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1475 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1476 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1477 // NOTE: we are very late in the pipeline, and we don't have any LICM
1478 // or SimplifyCFG passes scheduled after us, that would cleanup
1479 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1480
1481 // We also turn on struct to vector canonicalization here, which allows
1482 // converting allocas of homogeneous structs into vector allocas when the
1483 // allocas' users are all memory intrinsics. This allows promotion in some
1484 // cases because structs cannot promote to SSA values, but vectors can. We
1485 // only turn this on after memcpyopt runs because this might hinder
1486 // memcpyopt's optimizations if done before. Look at the documentation for
1487 // `tryCanonicalizeStructToVector` in SROA.cpp to see why.
1488 FPM.addPass(SROAPass(SROAOptions(SROAOptions::PreserveCFG,
1489 /*AggregateToVector=*/true)));
1490 }
1491
1492 FPM.addPass(InferAlignmentPass());
1493 FPM.addPass(InstCombinePass());
1494
1495 // This is needed for two reasons:
1496 // 1. It works around problems that instcombine introduces, such as sinking
1497 // expensive FP divides into loops containing multiplications using the
1498 // divide result.
1499 // 2. It helps to clean up some loop-invariant code created by the loop
1500 // unroll pass when IsFullLTO=false.
1502 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1503 /*AllowSpeculation=*/true),
1504 /*UseMemorySSA=*/true));
1505
1506 // Now that we've vectorized and unrolled loops, we may have more refined
1507 // alignment information, try to re-derive it here.
1508 FPM.addPass(AlignmentFromAssumptionsPass());
1509}
1510
1513 ThinOrFullLTOPhase LTOPhase) {
1515
1516 // Run partial inlining pass to partially inline functions that have
1517 // large bodies.
1520
1521 // Remove avail extern fns and globals definitions since we aren't compiling
1522 // an object file for later LTO. For LTO we want to preserve these so they
1523 // are eligible for inlining at link-time. Note if they are unreferenced they
1524 // will be removed by GlobalDCE later, so this only impacts referenced
1525 // available externally globals. Eventually they will be suppressed during
1526 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1527 // may make globals referenced by available external functions dead and saves
1528 // running remaining passes on the eliminated functions. These should be
1529 // preserved during prelinking for link-time inlining decisions.
1530 if (!isLTOPreLink(LTOPhase))
1532
1533 // Do RPO function attribute inference across the module to forward-propagate
1534 // attributes where applicable.
1535 // FIXME: Is this really an optimization rather than a canonicalization?
1537
1538 // Do a post inline PGO instrumentation and use pass. This is a context
1539 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1540 // cross-module inline has not been done yet. The context sensitive
1541 // instrumentation is after all the inlines are done.
1542 if (!isLTOPreLink(LTOPhase) && PGOOpt) {
1543 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1544 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1545 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1546 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
1547 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1548 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1549 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1550 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1551 }
1552
1553 // Re-compute GlobalsAA here prior to function passes. This is particularly
1554 // useful as the above will have inlined, DCE'ed, and function-attr
1555 // propagated everything. We should at this point have a reasonably minimal
1556 // and richly annotated call graph. By computing aliasing and mod/ref
1557 // information for all local globals here, the late loop passes and notably
1558 // the vectorizer will be able to use them to help recognize vectorizable
1559 // memory operations.
1562
1563 invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
1564
1565 FunctionPassManager OptimizePM;
1566
1567 // Only drop unnecessary assumes post-inline and post-link, as otherwise
1568 // additional uses of the affected value may be introduced through inlining
1569 // and CSE.
1570 if (!isLTOPreLink(LTOPhase))
1571 OptimizePM.addPass(DropUnnecessaryAssumesPass());
1572
1573 // Scheduling LoopVersioningLICM when inlining is over, because after that
1574 // we may see more accurate aliasing. Reason to run this late is that too
1575 // early versioning may prevent further inlining due to increase of code
1576 // size. Other optimizations which runs later might get benefit of no-alias
1577 // assumption in clone loop.
1579 OptimizePM.addPass(
1581 // LoopVersioningLICM pass might increase new LICM opportunities.
1583 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1584 /*AllowSpeculation=*/true),
1585 /*USeMemorySSA=*/true));
1586 }
1587
1588 OptimizePM.addPass(Float2IntPass());
1590
1591 if (EnableMatrix) {
1592 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1593 OptimizePM.addPass(EarlyCSEPass());
1594 }
1595
1596 // CHR pass should only be applied with the profile information.
1597 // The check is to check the profile summary information in CHR.
1598 if (EnableCHR && Level == OptimizationLevel::O3)
1599 OptimizePM.addPass(ControlHeightReductionPass());
1600
1601 // FIXME: We need to run some loop optimizations to re-rotate loops after
1602 // simplifycfg and others undo their rotation.
1603
1604 // Optimize the loop execution. These passes operate on entire loop nests
1605 // rather than on each loop in an inside-out manner, and so they are actually
1606 // function passes.
1607
1608 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1609
1610 LoopPassManager LPM;
1611 // First rotate loops that may have been un-rotated by prior passes.
1612 // Disable header duplication at -Oz.
1613 LPM.addPass(LoopRotatePass(/*EnableLoopHeaderDuplication=*/true,
1614 isLTOPreLink(LTOPhase),
1615 /*CheckExitCount=*/true));
1616 // Some loops may have become dead by now. Try to delete them.
1617 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1618 // this may need to be revisited once we run GVN before loop deletion
1619 // in the simplification pipeline.
1620 LPM.addPass(LoopDeletionPass());
1621
1622 if (PTO.LoopInterchange)
1623 LPM.addPass(LoopInterchangePass());
1624
1625 OptimizePM.addPass(
1626 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
1627
1628 // FIXME: This may not be the right place in the pipeline.
1629 // We need to have the data to support the right place.
1630 if (PTO.LoopFusion)
1631 OptimizePM.addPass(LoopFusePass());
1632
1633 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1634 // into separate loop that would otherwise inhibit vectorization. This is
1635 // currently only performed for loops marked with the metadata
1636 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1637 OptimizePM.addPass(LoopDistributePass());
1638
1639 // Populates the VFABI attribute with the scalar-to-vector mappings
1640 // from the TargetLibraryInfo.
1641 OptimizePM.addPass(InjectTLIMappings());
1642
1643 addVectorPasses(Level, OptimizePM, LTOPhase);
1644
1645 invokeVectorizerEndEPCallbacks(OptimizePM, Level);
1646
1647 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1648 // canonicalization pass that enables other optimizations. As a result,
1649 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1650 // result too early.
1651 OptimizePM.addPass(LoopSinkPass());
1652
1653 // And finally clean up LCSSA form before generating code.
1654 OptimizePM.addPass(InstSimplifyPass());
1655
1656 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1657 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1658 // flattening of blocks.
1659 OptimizePM.addPass(DivRemPairsPass());
1660
1661 // Merge adjacent icmps into memcmp, then expand memcmp to loads/compares.
1662 // TODO: move this furter up so that it can be optimized by GVN, etc.
1663 if (EnableMergeICmps)
1664 OptimizePM.addPass(MergeICmpsPass());
1665 OptimizePM.addPass(ExpandMemCmpPass());
1666
1667 // Try to annotate calls that were created during optimization.
1668 OptimizePM.addPass(
1669 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
1670
1671 // LoopSink (and other loop passes since the last simplifyCFG) might have
1672 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1673 OptimizePM.addPass(
1675 .convertSwitchRangeToICmp(true)
1676 .convertSwitchToArithmetic(true)
1677 .speculateUnpredictables(true)
1678 .hoistLoadsStoresWithCondFaulting(true)));
1679
1680 // Add the core optimizing pipeline.
1681 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1682 PTO.EagerlyInvalidateAnalyses));
1683
1684 // AllocToken transforms heap allocation calls; this needs to run late after
1685 // other allocation call transformations (such as those in InstCombine).
1686 if (!isLTOPreLink(LTOPhase))
1687 MPM.addPass(AllocTokenPass());
1688
1689 invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
1690
1691 // Run the Instrumentor pass late.
1693 MPM.addPass(InstrumentorPass(FS));
1694
1695 // Split out cold code. Splitting is done late to avoid hiding context from
1696 // other optimizations and inadvertently regressing performance. The tradeoff
1697 // is that this has a higher code size cost than splitting early.
1698 if (EnableHotColdSplit && !isLTOPreLink(LTOPhase))
1700
1701 // Search the code for similar regions of code. If enough similar regions can
1702 // be found where extracting the regions into their own function will decrease
1703 // the size of the program, we extract the regions, a deduplicate the
1704 // structurally similar regions.
1705 if (EnableIROutliner)
1706 MPM.addPass(IROutlinerPass());
1707
1708 // Now we need to do some global optimization transforms.
1709 // FIXME: It would seem like these should come first in the optimization
1710 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1711 // ordering here.
1712 MPM.addPass(GlobalDCEPass());
1714
1715 // Merge functions if requested. It has a better chance to merge functions
1716 // after ConstantMerge folded jump tables.
1717 if (PTO.MergeFunctions)
1719
1720 if (PTO.CallGraphProfile && !isLTOPreLink(LTOPhase))
1721 MPM.addPass(CGProfilePass(isLTOPostLink(LTOPhase)));
1722
1723 // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1724 if (!isLTOPreLink(LTOPhase))
1726
1727 // Add devirtualization pass only when LTO is not enabled, as otherwise
1728 // the pass is already enabled in the LTO pipeline.
1729 if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) {
1730 // TODO: explore a better pipeline configuration that can improve
1731 // compilation time overhead.
1733 /*ExportSummary*/ nullptr,
1734 /*ImportSummary*/ nullptr,
1735 /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively));
1737 // Given that the devirtualization creates more opportunities for inlining,
1738 // we run the Inliner again here to maximize the optimization gain we
1739 // get from devirtualization.
1740 // Also, we can't run devirtualization before inlining because the
1741 // devirtualization depends on the passes optimizing/eliminating vtable GVs
1742 // and those passes are only effective after inlining.
1743 if (EnableModuleInliner) {
1747 } else {
1750 /* MandatoryFirst */ true,
1752 }
1753 }
1754
1755 // Attach !implicit.ref metadata from all functions to copyright strings.
1757
1758 return MPM;
1759}
1760
1764 if (Level == OptimizationLevel::O0)
1765 return buildO0DefaultPipeline(Level, Phase);
1766
1768 instructionCountersPass(MPM, /* IsPreOptimization */ true);
1769 // Currently this pipeline is only invoked in an LTO pre link pass or when we
1770 // are not running LTO. If that changes the below checks may need updating.
1772
1773 // If we are invoking this in non-LTO mode, remove any MemProf related
1774 // attributes and metadata, as we don't know whether we are linking with
1775 // a library containing the necessary interfaces.
1778
1779 // Convert @llvm.global.annotations to !annotation metadata.
1781
1782 // Force any function attributes we want the rest of the pipeline to observe.
1784
1785 if (TriggerCrash)
1787
1788 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1790
1791 // Apply module pipeline start EP callback.
1793
1794 // Add the core simplification pipeline.
1796
1797 // Now add the optimization pipeline.
1799
1800 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1801 PGOOpt->Action == PGOOptions::SampleUse)
1803
1804 // Emit annotation remarks.
1806
1807 if (isLTOPreLink(Phase))
1808 addRequiredLTOPreLinkPasses(MPM);
1809
1810 instructionCountersPass(MPM, /* IsPreOptimization */ false);
1811 return MPM;
1812}
1813
1816 bool EmitSummary) {
1818
1819 instructionCountersPass(MPM, /* IsPreOptimization */ true);
1820
1821 if (ThinLTO)
1823 else
1825 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1826
1827 // Perform any cleanups to the IR that aren't suitable for per TU compilation,
1828 // like removing CFI/WPD related instructions. Note, we reuse
1829 // DropTypeTestsPass to clean up type tests rather than duplicate that logic
1830 // in FatLtoCleanup.
1831 MPM.addPass(FatLtoCleanup());
1832
1833 // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1834 // object code, only in the bitcode section, so drop it before we run
1835 // module optimization and generate machine code. If llvm.type.test() isn't in
1836 // the IR, this won't do anything.
1838
1839 // Use the ThinLTO post-link pipeline with sample profiling
1840 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1841 MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1842 else {
1843 // ModuleSimplification does not run the coroutine passes for
1844 // ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO
1845 // builds, otherwise they will miscompile.
1846 if (ThinLTO) {
1847 // TODO: replace w/ buildCoroWrapper() when it takes phase and level into
1848 // consideration.
1849 CGSCCPassManager CGPM;
1853 MPM.addPass(CoroCleanupPass());
1854 }
1855
1856 // otherwise, just use module optimization
1857 MPM.addPass(
1859 // Emit annotation remarks.
1861 }
1862
1863 instructionCountersPass(MPM, /* IsPreOptimization */ false);
1864
1865 return MPM;
1866}
1867
1870 if (Level == OptimizationLevel::O0)
1872
1874
1875 instructionCountersPass(MPM, /* IsPreOptimization */ true);
1876
1877 // Convert @llvm.global.annotations to !annotation metadata.
1879
1880 // Force any function attributes we want the rest of the pipeline to observe.
1882
1883 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1885
1886 // Apply module pipeline start EP callback.
1888
1889 // If we are planning to perform ThinLTO later, we don't bloat the code with
1890 // unrolling/vectorization/... now. Just simplify the module as much as we
1891 // can.
1894 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1895 // thinlto use the contextual info to perform imports; then use the contextual
1896 // profile in the post-thinlink phase.
1897 if (!UseCtxProfile.empty()) {
1898 addRequiredLTOPreLinkPasses(MPM);
1899 return MPM;
1900 }
1901
1902 // Run partial inlining pass to partially inline functions that have
1903 // large bodies.
1904 // FIXME: It isn't clear whether this is really the right place to run this
1905 // in ThinLTO. Because there is another canonicalization and simplification
1906 // phase that will run after the thin link, running this here ends up with
1907 // less information than will be available later and it may grow functions in
1908 // ways that aren't beneficial.
1911
1912 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1913 PGOOpt->Action == PGOOptions::SampleUse)
1915
1916 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1917 // optimization is going to be done in PostLink stage, but clang can't add
1918 // callbacks there in case of in-process ThinLTO called by linker.
1923
1924 // Emit annotation remarks.
1926
1927 // Attach !implicit.ref metadata from all functions to copyright strings.
1929
1930 addRequiredLTOPreLinkPasses(MPM);
1931
1932 instructionCountersPass(MPM, /* IsPreOptimization */ false);
1933
1934 return MPM;
1935}
1936
1938 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1940
1941 instructionCountersPass(MPM, /* IsPreOptimization */ true);
1942
1943 // If we are invoking this without a summary index noting that we are linking
1944 // with a library containing the necessary APIs, remove any MemProf related
1945 // attributes and metadata.
1946 if (!ImportSummary || !ImportSummary->withSupportsHotColdNew())
1948
1949 if (ImportSummary) {
1950 // For ThinLTO we must apply the context disambiguation decisions early, to
1951 // ensure we can correctly match the callsites to summary data.
1954 ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1955
1956 // These passes import type identifier resolutions for whole-program
1957 // devirtualization and CFI. They must run early because other passes may
1958 // disturb the specific instruction patterns that these passes look for,
1959 // creating dependencies on resolutions that may not appear in the summary.
1960 //
1961 // For example, GVN may transform the pattern assume(type.test) appearing in
1962 // two basic blocks into assume(phi(type.test, type.test)), which would
1963 // transform a dependency on a WPD resolution into a dependency on a type
1964 // identifier resolution for CFI.
1965 //
1966 // Also, WPD has access to more precise information than ICP and can
1967 // devirtualize more effectively, so it should operate on the IR first.
1968 //
1969 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1970 // metadata and intrinsics.
1971 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1972 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1973 }
1974
1975 if (Level == OptimizationLevel::O0) {
1976 // Run a second time to clean up any type tests left behind by WPD for use
1977 // in ICP.
1980
1981 // AllocToken transforms heap allocation calls; this needs to run late after
1982 // other allocation call transformations (such as those in InstCombine).
1983 MPM.addPass(AllocTokenPass());
1984
1985 // Drop available_externally and unreferenced globals. This is necessary
1986 // with ThinLTO in order to avoid leaving undefined references to dead
1987 // globals in the object file.
1989 MPM.addPass(GlobalDCEPass());
1990 return MPM;
1991 }
1992 if (!UseCtxProfile.empty()) {
1993 MPM.addPass(
1995 } else {
1996 // Add the core simplification pipeline.
1999 }
2000 // Now add the optimization pipeline.
2003
2004 // Emit annotation remarks.
2006
2007 instructionCountersPass(MPM, /* IsPreOptimization */ false);
2008
2009 return MPM;
2010}
2011
2014 // FIXME: We should use a customized pre-link pipeline!
2015 return buildPerModuleDefaultPipeline(Level,
2017}
2018
2021 ModuleSummaryIndex *ExportSummary) {
2023
2024 instructionCountersPass(MPM, /* IsPreOptimization */ true);
2025
2027
2028 // If we are invoking this without a summary index noting that we are linking
2029 // with a library containing the necessary APIs, remove any MemProf related
2030 // attributes and metadata.
2031 if (!ExportSummary || !ExportSummary->withSupportsHotColdNew())
2033
2034 // Create a function that performs CFI checks for cross-DSO calls with targets
2035 // in the current module.
2036 MPM.addPass(CrossDSOCFIPass());
2037
2038 if (Level == OptimizationLevel::O0) {
2039 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
2040 // metadata and intrinsics.
2041 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
2042 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2043 // Run a second time to clean up any type tests left behind by WPD for use
2044 // in ICP.
2046
2048
2049 // AllocToken transforms heap allocation calls; this needs to run late after
2050 // other allocation call transformations (such as those in InstCombine).
2051 MPM.addPass(AllocTokenPass());
2052
2054
2055 // Emit annotation remarks.
2057
2058 return MPM;
2059 }
2060
2061 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2062 // Load sample profile before running the LTO optimization pipeline.
2063 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2064 PGOOpt->ProfileRemappingFile,
2066 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2067 // RequireAnalysisPass for PSI before subsequent non-module passes.
2069 }
2070
2071 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
2073
2074 // Remove unused virtual tables to improve the quality of code generated by
2075 // whole-program devirtualization and bitset lowering.
2076 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2077
2078 // Do basic inference of function attributes from known properties of system
2079 // libraries and other oracles.
2081
2082 if (Level.getSpeedupLevel() > 1) {
2084 CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));
2085
2086 // Indirect call promotion. This should promote all the targets that are
2087 // left by the earlier promotion pass that promotes intra-module targets.
2088 // This two-step promotion is to save the compile time. For LTO, it should
2089 // produce the same result as if we only do promotion here.
2091 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2092
2093 // Promoting by-reference arguments to by-value exposes more constants to
2094 // IPSCCP.
2095 CGSCCPassManager CGPM;
2098 CGPM.addPass(
2101
2102 // Propagate constants at call sites into the functions they call. This
2103 // opens opportunities for globalopt (and inlining) by substituting function
2104 // pointers passed as arguments to direct uses of functions.
2105 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/true)));
2106
2107 // Attach metadata to indirect call sites indicating the set of functions
2108 // they may target at run-time. This should follow IPSCCP.
2110 }
2111
2112 // Do RPO function attribute inference across the module to forward-propagate
2113 // attributes where applicable.
2114 // FIXME: Is this really an optimization rather than a canonicalization?
2116
2117 // Use in-range annotations on GEP indices to split globals where beneficial.
2118 MPM.addPass(GlobalSplitPass());
2119
2120 // Run whole program optimization of virtual call when the list of callees
2121 // is fixed.
2122 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
2123
2125 // Stop here at -O1.
2126 if (Level == OptimizationLevel::O1) {
2127 // The LowerTypeTestsPass needs to run to lower type metadata and the
2128 // type.test intrinsics. The pass does nothing if CFI is disabled.
2129 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2130 // Run a second time to clean up any type tests left behind by WPD for use
2131 // in ICP (which is performed earlier than this in the regular LTO
2132 // pipeline).
2134
2136
2137 // AllocToken transforms heap allocation calls; this needs to run late after
2138 // other allocation call transformations (such as those in InstCombine).
2139 MPM.addPass(AllocTokenPass());
2140
2142
2143 // Emit annotation remarks.
2145
2146 instructionCountersPass(MPM, /* IsPreOptimization */ false);
2147
2148 return MPM;
2149 }
2150
2151 // TODO: Skip to match buildCoroWrapper.
2152 MPM.addPass(CoroEarlyPass());
2153
2154 // Optimize globals to try and fold them into constants.
2155 MPM.addPass(GlobalOptPass());
2156
2157 // Promote any localized globals to SSA registers.
2159
2160 // Linking modules together can lead to duplicate global constant, only
2161 // keep one copy of each constant.
2163
2164 // Remove unused arguments from functions.
2166
2167 // Reduce the code after globalopt and ipsccp. Both can open up significant
2168 // simplification opportunities, and both can propagate functions through
2169 // function pointers. When this happens, we often have to resolve varargs
2170 // calls, etc, so let instcombine do this.
2171 FunctionPassManager PeepholeFPM;
2172 PeepholeFPM.addPass(InstCombinePass());
2173 if (Level.getSpeedupLevel() > 1)
2174 PeepholeFPM.addPass(AggressiveInstCombinePass());
2175 invokePeepholeEPCallbacks(PeepholeFPM, Level);
2176
2177 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
2178 PTO.EagerlyInvalidateAnalyses));
2179
2180 // Lower variadic functions for supported targets prior to inlining.
2182
2183 // Note: historically, the PruneEH pass was run first to deduce nounwind and
2184 // generally clean up exception handling overhead. It isn't clear this is
2185 // valuable as the inliner doesn't currently care whether it is inlining an
2186 // invoke or a call.
2187 // Run the inliner now.
2188 if (EnableModuleInliner) {
2192 } else {
2195 /* MandatoryFirst */ true,
2198 }
2199
2200 // Perform context disambiguation after inlining, since that would reduce the
2201 // amount of additional cloning required to distinguish the allocation
2202 // contexts.
2205 /*Summary=*/nullptr,
2206 PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2207
2208 // Optimize globals again after we ran the inliner.
2209 MPM.addPass(GlobalOptPass());
2210
2211 // Run the OpenMPOpt pass again after global optimizations.
2213
2214 // Garbage collect dead functions.
2215 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2216
2217 // If we didn't decide to inline a function, check to see if we can
2218 // transform it to pass arguments by value instead of by reference.
2219 CGSCCPassManager CGPM;
2225
2227 // The IPO Passes may leave cruft around. Clean up after them.
2228 FPM.addPass(InstCombinePass());
2229 invokePeepholeEPCallbacks(FPM, Level);
2230
2233
2235
2236 // Do a post inline PGO instrumentation and use pass. This is a context
2237 // sensitive PGO pass.
2238 if (PGOOpt) {
2239 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2240 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2241 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2242 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
2243 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2244 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2245 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2246 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
2247 }
2248
2249 // Break up allocas
2251
2252 // LTO provides additional opportunities for tailcall elimination due to
2253 // link-time inlining, and visibility of nocapture attribute.
2254 FPM.addPass(
2255 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
2256
2257 // Run a few AA driver optimizations here and now to cleanup the code.
2258 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
2259 PTO.EagerlyInvalidateAnalyses));
2260
2261 MPM.addPass(
2263
2264 // Require the GlobalsAA analysis for the module so we can query it within
2265 // MainFPM.
2268 // Invalidate AAManager so it can be recreated and pick up the newly
2269 // available GlobalsAA.
2270 MPM.addPass(
2272 }
2273
2274 FunctionPassManager MainFPM;
2276 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
2277 /*AllowSpeculation=*/true),
2278 /*USeMemorySSA=*/true));
2279
2280 if (RunNewGVN)
2281 MainFPM.addPass(NewGVNPass());
2282 else
2283 MainFPM.addPass(GVNPass());
2284
2285 // Remove dead memcpy()'s.
2286 MainFPM.addPass(MemCpyOptPass());
2287
2288 // Nuke dead stores.
2289 MainFPM.addPass(DSEPass());
2290 MainFPM.addPass(MoveAutoInitPass());
2292
2293 invokeVectorizerStartEPCallbacks(MainFPM, Level);
2294
2295 LoopPassManager LPM;
2296 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2297 LPM.addPass(LoopFlattenPass());
2298 LPM.addPass(IndVarSimplifyPass());
2299 LPM.addPass(LoopDeletionPass());
2300 // FIXME: Add loop interchange.
2301
2302 // Unroll small loops and perform peeling.
2303 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
2304 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2305 PTO.ForgetAllSCEVInLoopUnroll));
2306 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2307 // *All* loop passes must preserve it, in order to be able to use it.
2308 MainFPM.addPass(
2309 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
2310
2311 MainFPM.addPass(LoopDistributePass());
2312
2313 addVectorPasses(Level, MainFPM, ThinOrFullLTOPhase::FullLTOPostLink);
2314
2315 invokeVectorizerEndEPCallbacks(MainFPM, Level);
2316
2317 // Run the OpenMPOpt CGSCC pass again late.
2320
2321 invokePeepholeEPCallbacks(MainFPM, Level);
2322 MainFPM.addPass(JumpThreadingPass());
2323 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2324 PTO.EagerlyInvalidateAnalyses));
2325
2326 // Lower type metadata and the type.test intrinsic. This pass supports
2327 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2328 // to be run at link time if CFI is enabled. This pass does nothing if
2329 // CFI is disabled.
2330 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2331 // Run a second time to clean up any type tests left behind by WPD for use
2332 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2334
2335 // Enable splitting late in the FullLTO post-link pipeline.
2338
2339 // Add late LTO optimization passes.
2340 FunctionPassManager LateFPM;
2341
2342 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2343 // canonicalization pass that enables other optimizations. As a result,
2344 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2345 // result too early.
2346 LateFPM.addPass(LoopSinkPass());
2347
2348 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2349 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2350 // flattening of blocks.
2351 LateFPM.addPass(DivRemPairsPass());
2352
2353 // Delete basic blocks, which optimization passes may have killed.
2355 .convertSwitchRangeToICmp(true)
2356 .convertSwitchToArithmetic(true)
2357 .hoistCommonInsts(true)
2358 .speculateUnpredictables(true)));
2359 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2360
2361 // Drop bodies of available eternally objects to improve GlobalDCE.
2363
2364 // Now that we have optimized the program, discard unreachable functions.
2365 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2366
2367 if (PTO.MergeFunctions)
2369
2371
2372 if (PTO.CallGraphProfile)
2373 MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2374
2375 MPM.addPass(CoroCleanupPass());
2376
2377 // AllocToken transforms heap allocation calls; this needs to run late after
2378 // other allocation call transformations (such as those in InstCombine).
2379 MPM.addPass(AllocTokenPass());
2380
2382
2383 // Emit annotation remarks.
2385
2386 instructionCountersPass(MPM, /* IsPreOptimization */ false);
2387
2388 return MPM;
2389}
2390
2394 assert(Level == OptimizationLevel::O0 &&
2395 "buildO0DefaultPipeline should only be used with O0");
2396
2398
2399 instructionCountersPass(MPM, /* IsPreOptimization */ true);
2400
2401 // Perform pseudo probe instrumentation in O0 mode. This is for the
2402 // consistency between different build modes. For example, a LTO build can be
2403 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2404 // the postlink will require pseudo probe instrumentation in the prelink.
2405 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2407
2408 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2409 PGOOpt->Action == PGOOptions::IRUse))
2411 MPM,
2412 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2413 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2414 PGOOpt->ProfileRemappingFile);
2415
2416 // Instrument function entry and exit before all inlining.
2418 EntryExitInstrumenterPass(/*PostInlining=*/false)));
2419
2421
2422 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2424
2425 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2426 // Explicitly disable sample loader inlining and use flattened profile in O0
2427 // pipeline.
2428 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2429 PGOOpt->ProfileRemappingFile,
2431 /*DisableSampleProfileInlining=*/true,
2432 /*UseFlattenedProfile=*/true));
2433 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2434 // RequireAnalysisPass for PSI before subsequent non-module passes.
2436 }
2437
2439
2440 // Build a minimal pipeline based on the semantics required by LLVM,
2441 // which is just that always inlining occurs. Further, disable generating
2442 // lifetime intrinsics to avoid enabling further optimizations during
2443 // code generation.
2445 /*InsertLifetimeIntrinsics=*/false));
2446
2447 if (PTO.MergeFunctions)
2449
2450 if (EnableMatrix)
2451 MPM.addPass(
2453
2454 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2455 CGSCCPassManager CGPM;
2457 if (!CGPM.isEmpty())
2459 }
2460 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2461 LoopPassManager LPM;
2463 if (!LPM.isEmpty()) {
2465 createFunctionToLoopPassAdaptor(std::move(LPM))));
2466 }
2467 }
2468 if (!LoopOptimizerEndEPCallbacks.empty()) {
2469 LoopPassManager LPM;
2471 if (!LPM.isEmpty()) {
2473 createFunctionToLoopPassAdaptor(std::move(LPM))));
2474 }
2475 }
2476 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2479 if (!FPM.isEmpty())
2480 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2481 }
2482
2484
2485 if (!VectorizerStartEPCallbacks.empty()) {
2488 if (!FPM.isEmpty())
2489 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2490 }
2491
2492 if (!VectorizerEndEPCallbacks.empty()) {
2495 if (!FPM.isEmpty())
2496 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2497 }
2498
2500
2501 // AllocToken transforms heap allocation calls; this needs to run late after
2502 // other allocation call transformations (such as those in InstCombine).
2503 if (!isLTOPreLink(Phase))
2504 MPM.addPass(AllocTokenPass());
2505
2507
2509 MPM.addPass(InstrumentorPass(FS));
2510
2511 // Attach !implicit.ref metadata from all functions to copyright strings.
2513
2514 if (isLTOPreLink(Phase))
2515 addRequiredLTOPreLinkPasses(MPM);
2516
2517 // Emit annotation remarks.
2519
2520 instructionCountersPass(MPM, /* IsPreOptimization */ false);
2521
2522 return MPM;
2523}
2524
2526 AAManager AA;
2527
2528 // The order in which these are registered determines their priority when
2529 // being queried.
2530
2531 // Add any target-specific alias analyses that should be run early.
2532 if (TM)
2533 TM->registerEarlyDefaultAliasAnalyses(AA);
2534
2535 // First we register the basic alias analysis that provides the majority of
2536 // per-function local AA logic. This is a stateless, on-demand local set of
2537 // AA techniques.
2538 AA.registerFunctionAnalysis<BasicAA>();
2539
2540 // Next we query fast, specialized alias analyses that wrap IR-embedded
2541 // information about aliasing.
2542 AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2543 AA.registerFunctionAnalysis<TypeBasedAA>();
2544
2545 // Add support for querying global aliasing information when available.
2546 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2547 // analysis, all that the `AAManager` can do is query for any *cached*
2548 // results from `GlobalsAA` through a readonly proxy.
2550 AA.registerModuleAnalysis<GlobalsAA>();
2551
2552 // Add target-specific alias analyses.
2553 if (TM)
2554 TM->registerDefaultAliasAnalyses(AA);
2555
2556 return AA;
2557}
2558
2559bool PassBuilder::isInstrumentedPGOUse() const {
2560 return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
2561 !UseCtxProfile.empty();
2562}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
This file provides a pass manager that only runs its passes if the provided marker analysis has been ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
This header defines various interfaces for pass management in LLVM.
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the Loop Fusion pass.
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
static bool isThinLTOPostLink(ThinOrFullLTOPhase Phase)
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase)
static bool isFullLTOPostLink(ThinOrFullLTOPhase Phase)
static bool isThinLTOPreLink(ThinOrFullLTOPhase Phase)
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static void instructionCountersPass(ModulePassManager &MPM, bool IsPreOptimization)
static bool isFullLTOPreLink(ThinOrFullLTOPhase Phase)
static bool isLTOPostLink(ThinOrFullLTOPhase Phase)
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
A module pass that rewrites heap allocations to use token-enabled allocation functions based on vario...
Definition AllocToken.h:36
Inlines functions marked as "always_inline".
Argument promotion pass.
Assign a GUID to functions as metadata.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
A pass manager to run a set of extra loop passes if the MarkerTy analysis is present.
Statistics pass for the FunctionPropertiesAnalysis results.
The core GVN pass object.
Definition GVN.h:131
Pass to remove unused function declarations.
Definition GlobalDCE.h:38
Optimize globals that never have their address taken.
Definition GlobalOpt.h:25
Pass to perform split of global variables.
Definition GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition SCCP.h:48
Pass to outline similar regions.
Definition IROutliner.h:462
Run instruction simplification across each instruction in the function.
Instrumentation based profiling lowering pass.
The Instrumentor pass.
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Performs Loop Invariant Code Motion Pass.
Definition LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Strips MemProf attributes and metadata.
Merge identical functions.
The module inliner pass for the new pass manager.
Module pass, wrapping the inliner pass.
Definition Inliner.h:65
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition Inliner.h:81
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
Additional 'norecurse' attribute deduction during postlink LTO phase.
OpenMP optimizations pass.
Definition OpenMPOpt.h:42
static LLVM_ABI const OptimizationLevel O3
Optimize for fast execution as much as possible.
static LLVM_ABI const OptimizationLevel O0
Disable as many optimizations as possible.
static LLVM_ABI const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static LLVM_ABI const OptimizationLevel O1
Optimize quickly without destroying debuggability.
static LLVM_ABI bool isCtxIRPGOInstrEnabled()
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
LLVM_ABI void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
LLVM_ABI void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
LLVM_ABI void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
LLVM_ABI void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile)
Add PGOInstrumenation passes for O0 only.
LLVM_ABI void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build a per-module default optimization pipeline.
LLVM_ABI void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI void invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build an O0 pipeline with the minimal semantically required passes.
LLVM_ABI FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
LLVM_ABI void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
LLVM_ABI ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
LLVM_ABI ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build a ThinLTO default optimization pipeline to a pass manager.
LLVM_ABI void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
LLVM_ABI ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
LLVM_ABI ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
LLVM_ABI void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
bool isEmpty() const
Returns if the pass manager contains any passes.
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition PassBuilder.h:78
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition PassBuilder.h:56
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition PassBuilder.h:92
bool LoopFusion
Tuning option to enable/disable loop fusion. Its default value is false.
Definition PassBuilder.h:66
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition PassBuilder.h:82
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition PassBuilder.h:89
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition PassBuilder.h:70
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition PassBuilder.h:74
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition PassBuilder.h:48
LLVM_ABI PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition PassBuilder.h:59
bool LoopInterchange
Tuning option to enable/disable loop interchange.
Definition PassBuilder.h:63
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition PassBuilder.h:52
Reassociate commutative expressions.
Definition Reassociate.h:74
A pass to do RPO deduction and propagation of function attributes.
This pass performs function-level constant propagation and merging.
Definition SCCP.h:30
The sample profiler data loader pass.
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition SimplifyCFG.h:30
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Interfaces for registering analysis passes, producing common pass manager configurations,...
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ All
Drop only llvm.assumes using type test value.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(true), cl::Hidden)
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(false), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlining pass"))
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
static cl::opt< bool > EnableSampledInstr("enable-sampled-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable profile instrumentation sampling (default = off)"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
static cl::opt< bool > EnableJumpTableToSwitch("enable-jump-table-to-switch", cl::init(true), cl::desc("Enable JumpTableToSwitch pass (default = true)"))
PassManager< LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, CGSCCUpdateResult & > CGSCCPassManager
The CGSCC pass manager.
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
@ CGSCC_LIGHT
@ MODULE_LIGHT
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
@ FullLTOPreLink
Full LTO prelink phase.
Definition Pass.h:85
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
Definition Pass.h:83
@ None
No LTO/ThinLTO behavior needed.
Definition Pass.h:79
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
Definition Pass.h:87
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
Definition Pass.h:81
PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & > LoopPassManager
The Loop pass manager.
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
FunctionToLoopPassAdaptor createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
LLVM_ABI cl::opt< bool > ForgetSCEVInLoopUnroll
PassManager< Module > ModulePassManager
Convenience typedef for a pass manager over modules.
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
static cl::opt< std::string > InstrumentColdFuncOnlyPath("instrument-cold-function-only-path", cl::init(""), cl::desc("File path for cold function only instrumentation(requires use " "with --pgo-instrument-cold-function-only)"), cl::Hidden)
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierarchy exists in the profile"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::FULL, "full", "enable all full attributor runs"), clEnumValN(AttributorRunOption::LIGHT, "light", "enable all attributor-light runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::MODULE_LIGHT, "module-light", "enable module-wide attributor-light runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::CGSCC_LIGHT, "cgscc-light", "enable call graph SCC attributor-light runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(true), cl::Hidden, cl::desc("Enable the LoopInterchange Pass"))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
static cl::opt< bool > TriggerCrash("opt-pipeline-trigger-crash", cl::init(false), cl::Hidden, cl::desc("Trigger crash in optimization pipeline"))
PassManager< Function > FunctionPassManager
Convenience typedef for a pass manager over functions.
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< bool > PGOInstrumentColdFunctionOnly
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableDevirtualizeSpeculatively("enable-devirtualize-speculatively", cl::desc("Enable speculative devirtualization optimization"), cl::init(false))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
LLVM_ABI cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
LLVM_ABI InlineParams getInlineParamsFromOptLevel(unsigned OptLevel)
Generate the parameters to tune the inline cost analysis based on command line options.
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
LLVM_ABI cl::opt< unsigned > SetLicmMssaOptCap
static cl::opt< bool > EnableInstrumentor("enable-instrumentor", cl::init(false), cl::Hidden, cl::desc("Enable the Instrumentor Pass"))
static cl::opt< bool > EnableMergeICmps("enable-mergeicmps", cl::init(true), cl::Hidden, cl::desc("Enable MergeICmps pass in the optimization pipeline"))
A DCE pass that assumes instructions are dead until proven otherwise.
Definition ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
A more lightweight version of the Attributor which only runs attribute inference but no simplificatio...
A more lightweight version of the Attributor which only runs attribute inference but no simplificatio...
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition EarlyCSE.h:31
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition GVN.h:506
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition GVN.h:513
A set of parameters to control various transforms performed by IPSCCP pass.
Definition SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
Definition InlineCost.h:207
std::optional< int > OptSizeHintThreshold
Threshold to use for callees with inline hint, when the caller is optimized for size.
Definition InlineCost.h:216
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition InlineCost.h:228
int DefaultThreshold
The default threshold to start with for a callee.
Definition InlineCost.h:209
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition InlineCost.h:241
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition InlineCost.h:212
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
A utility pass template to force an analysis result to be available.