LLVM 23.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/PassManager.h"
30#include "llvm/Pass.h"
155
156using namespace llvm;
157
158namespace llvm {
159
161 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
162 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
164 "Heuristics-based inliner version"),
166 "Use development mode (runtime-loadable model)"),
168 "Use release mode (AOT-compiled model)")));
169
170/// Flag to enable inline deferral during PGO.
171static cl::opt<bool>
172 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
174 cl::desc("Enable inline deferral during PGO"));
175
176static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
177 cl::init(false), cl::Hidden,
178 cl::desc("Enable module inliner"));
179
181 "mandatory-inlining-first", cl::init(false), cl::Hidden,
182 cl::desc("Perform mandatory inlinings module-wide, before performing "
183 "inlining"));
184
186 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
187 cl::desc("Eagerly invalidate more analyses in default pipelines"));
188
190 "enable-merge-functions", cl::init(false), cl::Hidden,
191 cl::desc("Enable function merging as part of the optimization pipeline"));
192
194 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
195 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
196
198 "enable-global-analyses", cl::init(true), cl::Hidden,
199 cl::desc("Enable inter-procedural analyses"));
200
201static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
202 cl::init(false), cl::Hidden,
203 cl::desc("Run Partial inlining pass"));
204
206 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
207 cl::desc("Run cleanup optimization passes after vectorization"));
208
209static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
210 cl::desc("Run the NewGVN pass"));
211
212static cl::opt<bool>
213 EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden,
214 cl::desc("Enable the LoopInterchange Pass"));
215
216static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
217 cl::init(false), cl::Hidden,
218 cl::desc("Enable Unroll And Jam Pass"));
219
220static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
222 cl::desc("Enable the LoopFlatten Pass"));
223
224static cl::opt<bool>
225 EnableDFAJumpThreading("enable-dfa-jump-thread",
226 cl::desc("Enable DFA jump threading"),
227 cl::init(false), cl::Hidden);
228
229static cl::opt<bool>
230 EnableHotColdSplit("hot-cold-split",
231 cl::desc("Enable hot-cold splitting pass"));
232
233static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
235 cl::desc("Enable ir outliner pass"));
236
237static cl::opt<bool>
238 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
239 cl::desc("Disable pre-instrumentation inliner"));
240
242 "preinline-threshold", cl::Hidden, cl::init(75),
243 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
244 "(default = 75)"));
245
246static cl::opt<bool>
247 EnableGVNHoist("enable-gvn-hoist",
248 cl::desc("Enable the GVN hoisting pass (default = off)"));
249
250static cl::opt<bool>
251 EnableGVNSink("enable-gvn-sink",
252 cl::desc("Enable the GVN sinking pass (default = off)"));
253
255 "enable-jump-table-to-switch",
256 cl::desc("Enable JumpTableToSwitch pass (default = off)"));
257
258// This option is used in simplifying testing SampleFDO optimizations for
259// profile loading.
260static cl::opt<bool>
261 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
262 cl::desc("Enable control height reduction optimization (CHR)"));
263
265 "flattened-profile-used", cl::init(false), cl::Hidden,
266 cl::desc("Indicate the sample profile being used is flattened, i.e., "
267 "no inline hierarchy exists in the profile"));
268
269static cl::opt<bool>
270 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
271 cl::desc("Enable lowering of the matrix intrinsics"));
272
274 "enable-mergeicmps", cl::init(true), cl::Hidden,
275 cl::desc("Enable MergeICmps pass in the optimization pipeline"));
276
278 "enable-constraint-elimination", cl::init(true), cl::Hidden,
279 cl::desc(
280 "Enable pass to eliminate conditions based on linear constraints"));
281
283 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
284 cl::desc("Enable the attributor inter-procedural deduction pass"),
286 "enable all full attributor runs"),
288 "enable all attributor-light runs"),
290 "enable module-wide attributor runs"),
292 "enable module-wide attributor-light runs"),
294 "enable call graph SCC attributor runs"),
296 "enable call graph SCC attributor-light runs"),
297 clEnumValN(AttributorRunOption::NONE, "none",
298 "disable attributor runs")));
299
301 "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
302 cl::desc("Enable profile instrumentation sampling (default = off)"));
304 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
305 cl::desc("Enable the experimental Loop Versioning LICM pass"));
306
308 "instrument-cold-function-only-path", cl::init(""),
309 cl::desc("File path for cold function only instrumentation(requires use "
310 "with --pgo-instrument-cold-function-only)"),
311 cl::Hidden);
312
313// TODO: There is a similar flag in WPD pass, we should consolidate them by
314// parsing the option only once in PassBuilder and share it across both places.
316 "enable-devirtualize-speculatively",
317 cl::desc("Enable speculative devirtualization optimization"),
318 cl::init(false));
319
322
324} // namespace llvm
325
343
344namespace llvm {
346} // namespace llvm
347
349 OptimizationLevel Level) {
350 for (auto &C : PeepholeEPCallbacks)
351 C(FPM, Level);
352}
355 for (auto &C : LateLoopOptimizationsEPCallbacks)
356 C(LPM, Level);
357}
359 OptimizationLevel Level) {
360 for (auto &C : LoopOptimizerEndEPCallbacks)
361 C(LPM, Level);
362}
365 for (auto &C : ScalarOptimizerLateEPCallbacks)
366 C(FPM, Level);
367}
369 OptimizationLevel Level) {
370 for (auto &C : CGSCCOptimizerLateEPCallbacks)
371 C(CGPM, Level);
372}
374 OptimizationLevel Level) {
375 for (auto &C : VectorizerStartEPCallbacks)
376 C(FPM, Level);
377}
379 OptimizationLevel Level) {
380 for (auto &C : VectorizerEndEPCallbacks)
381 C(FPM, Level);
382}
384 OptimizationLevel Level,
386 for (auto &C : OptimizerEarlyEPCallbacks)
387 C(MPM, Level, Phase);
388}
390 OptimizationLevel Level,
392 for (auto &C : OptimizerLastEPCallbacks)
393 C(MPM, Level, Phase);
394}
397 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
398 C(MPM, Level);
399}
402 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
403 C(MPM, Level);
404}
406 OptimizationLevel Level) {
407 for (auto &C : PipelineStartEPCallbacks)
408 C(MPM, Level);
409}
412 for (auto &C : PipelineEarlySimplificationEPCallbacks)
413 C(MPM, Level, Phase);
414}
415
416// Get IR stats with InstCount and FunctionPropertiesAnalysis.
418 bool IsPreOptimization) {
419 if (AreStatisticsEnabled()) {
420 MPM.addPass(
423 FunctionPropertiesStatisticsPass(IsPreOptimization)));
424 }
425}
426// Helper to add AnnotationRemarksPass.
430
431// Helper to check if the current compilation phase is preparing for LTO
436
437// Helper to check if the current compilation phase is LTO backend
442
443// Helper to wrap conditionally Coro passes.
445 // TODO: Skip passes according to Phase.
446 ModulePassManager CoroPM;
447 CoroPM.addPass(CoroEarlyPass());
448 CGSCCPassManager CGPM;
449 CGPM.addPass(CoroSplitPass());
450 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
451 CoroPM.addPass(CoroCleanupPass());
452 CoroPM.addPass(GlobalDCEPass());
453 return CoroConditionalWrapper(std::move(CoroPM));
454}
455
456// TODO: Investigate the cost/benefit of tail call elimination on debugging.
458PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
460
462
464 FPM.addPass(CountVisitsPass());
465
466 // Form SSA out of local memory accesses after breaking apart aggregates into
467 // scalars.
468 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
469
470 // Catch trivial redundancies
471 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
472
473 // Hoisting of scalars and load expressions.
474 FPM.addPass(
475 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
476 FPM.addPass(InstCombinePass());
477
478 FPM.addPass(LibCallsShrinkWrapPass());
479
480 invokePeepholeEPCallbacks(FPM, Level);
481
482 FPM.addPass(
483 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
484
485 // Form canonically associated expression trees, and simplify the trees using
486 // basic mathematical properties. For example, this will form (nearly)
487 // minimal multiplication trees.
488 FPM.addPass(ReassociatePass());
489
490 // Add the primary loop simplification pipeline.
491 // FIXME: Currently this is split into two loop pass pipelines because we run
492 // some function passes in between them. These can and should be removed
493 // and/or replaced by scheduling the loop pass equivalents in the correct
494 // positions. But those equivalent passes aren't powerful enough yet.
495 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
496 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
497 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
498 // `LoopInstSimplify`.
499 LoopPassManager LPM1, LPM2;
500
501 // Simplify the loop body. We do this initially to clean up after other loop
502 // passes run, either when iterating on a loop or on inner loops with
503 // implications on the outer loop.
504 LPM1.addPass(LoopInstSimplifyPass());
505 LPM1.addPass(LoopSimplifyCFGPass());
506
507 // Try to remove as much code from the loop header as possible,
508 // to reduce amount of IR that will have to be duplicated. However,
509 // do not perform speculative hoisting the first time as LICM
510 // will destroy metadata that may not need to be destroyed if run
511 // after loop rotation.
512 // TODO: Investigate promotion cap for O1.
513 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
514 /*AllowSpeculation=*/false));
515
516 LPM1.addPass(
517 LoopRotatePass(/*EnableHeaderDuplication=*/true, isLTOPreLink(Phase)));
518 // TODO: Investigate promotion cap for O1.
519 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
520 /*AllowSpeculation=*/true));
521 LPM1.addPass(SimpleLoopUnswitchPass());
523 LPM1.addPass(LoopFlattenPass());
524
525 LPM2.addPass(LoopIdiomRecognizePass());
526 LPM2.addPass(IndVarSimplifyPass());
527
529
530 LPM2.addPass(LoopDeletionPass());
531
532 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
533 // because it changes IR to makes profile annotation in back compile
534 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
535 // attributes so we need to make sure and allow the full unroll pass to pay
536 // attention to it.
537 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
538 PGOOpt->Action != PGOOptions::SampleUse)
539 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
540 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
541 PTO.ForgetAllSCEVInLoopUnroll));
542
544
545 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
546 /*UseMemorySSA=*/true));
547 FPM.addPass(
548 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
549 FPM.addPass(InstCombinePass());
550 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
551 // *All* loop passes must preserve it, in order to be able to use it.
552 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
553 /*UseMemorySSA=*/false));
554
555 // Delete small array after loop unroll.
556 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
557
558 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
559 FPM.addPass(MemCpyOptPass());
560
561 // Sparse conditional constant propagation.
562 // FIXME: It isn't clear why we do this *after* loop passes rather than
563 // before...
564 FPM.addPass(SCCPPass());
565
566 // Delete dead bit computations (instcombine runs after to fold away the dead
567 // computations, and then ADCE will run later to exploit any new DCE
568 // opportunities that creates).
569 FPM.addPass(BDCEPass());
570
571 // Run instcombine after redundancy and dead bit elimination to exploit
572 // opportunities opened up by them.
573 FPM.addPass(InstCombinePass());
574 invokePeepholeEPCallbacks(FPM, Level);
575
576 FPM.addPass(CoroElidePass());
577
579
580 // Finally, do an expensive DCE pass to catch all the dead code exposed by
581 // the simplifications and basic cleanup after all the simplifications.
582 // TODO: Investigate if this is too expensive.
583 FPM.addPass(ADCEPass());
584 FPM.addPass(
585 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
586 FPM.addPass(InstCombinePass());
587 invokePeepholeEPCallbacks(FPM, Level);
588
589 return FPM;
590}
591
595 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
596
597 // The O1 pipeline has a separate pipeline creation function to simplify
598 // construction readability.
599 if (Level.getSpeedupLevel() == 1)
600 return buildO1FunctionSimplificationPipeline(Level, Phase);
601
603
606
607 // Form SSA out of local memory accesses after breaking apart aggregates into
608 // scalars.
610
611 // Catch trivial redundancies
612 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
615
616 // Hoisting of scalars and load expressions.
617 if (EnableGVNHoist)
618 FPM.addPass(GVNHoistPass());
619
620 // Global value numbering based sinking.
621 if (EnableGVNSink) {
622 FPM.addPass(GVNSinkPass());
623 FPM.addPass(
624 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
625 }
626
627 // Speculative execution if the target has divergent branches; otherwise nop.
628 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
629
630 // Optimize based on known information about branches, and cleanup afterward.
633
634 // Jump table to switch conversion.
639
640 FPM.addPass(
641 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
645
646 invokePeepholeEPCallbacks(FPM, Level);
647
648 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
649 // using the size value profile. Don't perform this when optimizing for size.
650 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse)
652
653 FPM.addPass(TailCallElimPass(/*UpdateFunctionEntryCount=*/
654 isInstrumentedPGOUse()));
655 FPM.addPass(
656 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
657
658 // Form canonically associated expression trees, and simplify the trees using
659 // basic mathematical properties. For example, this will form (nearly)
660 // minimal multiplication trees.
662
665
666 // Add the primary loop simplification pipeline.
667 // FIXME: Currently this is split into two loop pass pipelines because we run
668 // some function passes in between them. These can and should be removed
669 // and/or replaced by scheduling the loop pass equivalents in the correct
670 // positions. But those equivalent passes aren't powerful enough yet.
671 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
672 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
673 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
674 // `LoopInstSimplify`.
675 LoopPassManager LPM1, LPM2;
676
677 // Simplify the loop body. We do this initially to clean up after other loop
678 // passes run, either when iterating on a loop or on inner loops with
679 // implications on the outer loop.
680 LPM1.addPass(LoopInstSimplifyPass());
681 LPM1.addPass(LoopSimplifyCFGPass());
682
683 // Try to remove as much code from the loop header as possible,
684 // to reduce amount of IR that will have to be duplicated. However,
685 // do not perform speculative hoisting the first time as LICM
686 // will destroy metadata that may not need to be destroyed if run
687 // after loop rotation.
688 // TODO: Investigate promotion cap for O1.
689 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
690 /*AllowSpeculation=*/false));
691
692 LPM1.addPass(
693 LoopRotatePass(/*EnableHeaderDuplication=*/true, isLTOPreLink(Phase)));
694 // TODO: Investigate promotion cap for O1.
695 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
696 /*AllowSpeculation=*/true));
697 LPM1.addPass(
698 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
700 LPM1.addPass(LoopFlattenPass());
701
702 LPM2.addPass(LoopIdiomRecognizePass());
703 LPM2.addPass(IndVarSimplifyPass());
704
705 {
707 ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
709 LPM2.addPass(std::move(ExtraPasses));
710 }
711
713
714 LPM2.addPass(LoopDeletionPass());
715
716 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
717 // because it changes IR to makes profile annotation in back compile
718 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
719 // attributes so we need to make sure and allow the full unroll pass to pay
720 // attention to it.
721 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
722 PGOOpt->Action != PGOOptions::SampleUse)
723 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
724 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
725 PTO.ForgetAllSCEVInLoopUnroll));
726
728
729 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
730 /*UseMemorySSA=*/true));
731 FPM.addPass(
732 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
734 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
735 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
736 // *All* loop passes must preserve it, in order to be able to use it.
737 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
738 /*UseMemorySSA=*/false));
739
740 // Delete small array after loop unroll.
742
743 // Try vectorization/scalarization transforms that are both improvements
744 // themselves and can allow further folds with GVN and InstCombine.
745 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
746
747 // Eliminate redundancies.
749 if (RunNewGVN)
750 FPM.addPass(NewGVNPass());
751 else
752 FPM.addPass(GVNPass());
753
754 // Sparse conditional constant propagation.
755 // FIXME: It isn't clear why we do this *after* loop passes rather than
756 // before...
757 FPM.addPass(SCCPPass());
758
759 // Delete dead bit computations (instcombine runs after to fold away the dead
760 // computations, and then ADCE will run later to exploit any new DCE
761 // opportunities that creates).
762 FPM.addPass(BDCEPass());
763
764 // Run instcombine after redundancy and dead bit elimination to exploit
765 // opportunities opened up by them.
767 invokePeepholeEPCallbacks(FPM, Level);
768
769 // Re-consider control flow based optimizations after redundancy elimination,
770 // redo DCE, etc.
773
776
777 // Finally, do an expensive DCE pass to catch all the dead code exposed by
778 // the simplifications and basic cleanup after all the simplifications.
779 // TODO: Investigate if this is too expensive.
780 FPM.addPass(ADCEPass());
781
782 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
783 FPM.addPass(MemCpyOptPass());
784
785 FPM.addPass(DSEPass());
787
789 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
790 /*AllowSpeculation=*/true),
791 /*UseMemorySSA=*/true));
792
793 FPM.addPass(CoroElidePass());
794
796
798 .convertSwitchRangeToICmp(true)
799 .convertSwitchToArithmetic(true)
800 .hoistCommonInsts(true)
801 .sinkCommonInsts(true)));
803 invokePeepholeEPCallbacks(FPM, Level);
804
805 return FPM;
806}
807
808void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
811}
812
813void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
814 OptimizationLevel Level,
815 ThinOrFullLTOPhase LTOPhase) {
816 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
818 return;
819 InlineParams IP;
820
822
823 // FIXME: The hint threshold has the same value used by the regular inliner
824 // when not optimzing for size. This should probably be lowered after
825 // performance testing.
826 // FIXME: this comment is cargo culted from the old pass manager, revisit).
827 IP.HintThreshold = 325;
830 IP, /* MandatoryFirst */ true,
832 CGSCCPassManager &CGPipeline = MIWP.getPM();
833
835 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
836 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
837 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
838 true))); // Merge & remove basic blocks.
839 FPM.addPass(InstCombinePass()); // Combine silly sequences.
840 invokePeepholeEPCallbacks(FPM, Level);
841
842 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
843 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
844
845 MPM.addPass(std::move(MIWP));
846
847 // Delete anything that is now dead to make sure that we don't instrument
848 // dead code. Instrumentation can end up keeping dead code around and
849 // dramatically increase code size.
850 MPM.addPass(GlobalDCEPass());
851}
852
853void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
854 OptimizationLevel Level) {
856 // Disable header duplication in loop rotation at -Oz.
858 createFunctionToLoopPassAdaptor(LoopRotatePass(),
859 /*UseMemorySSA=*/false),
860 PTO.EagerlyInvalidateAnalyses));
861 }
862}
863
864void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
865 OptimizationLevel Level, bool RunProfileGen,
866 bool IsCS, bool AtomicCounterUpdate,
867 std::string ProfileFile,
868 std::string ProfileRemappingFile) {
869 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
870
871 if (!RunProfileGen) {
872 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
873 MPM.addPass(
874 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
875 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
876 // RequireAnalysisPass for PSI before subsequent non-module passes.
877 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
878 return;
879 }
880
881 // Perform PGO instrumentation.
882 MPM.addPass(PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
884
885 addPostPGOLoopRotation(MPM, Level);
886 // Add the profile lowering pass.
887 InstrProfOptions Options;
888 if (!ProfileFile.empty())
889 Options.InstrProfileOutput = ProfileFile;
890 // Do counter promotion at Level greater than O0.
891 Options.DoCounterPromotion = true;
892 Options.UseBFIInPromotion = IsCS;
893 if (EnableSampledInstr) {
894 Options.Sampling = true;
895 // With sampling, there is little beneifit to enable counter promotion.
896 // But note that sampling does work with counter promotion.
897 Options.DoCounterPromotion = false;
898 }
899 Options.Atomic = AtomicCounterUpdate;
900 MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
901}
902
904 bool RunProfileGen, bool IsCS,
905 bool AtomicCounterUpdate,
906 std::string ProfileFile,
907 std::string ProfileRemappingFile) {
908 if (!RunProfileGen) {
909 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
910 MPM.addPass(
911 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
912 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
913 // RequireAnalysisPass for PSI before subsequent non-module passes.
915 return;
916 }
917
918 // Perform PGO instrumentation.
921 // Add the profile lowering pass.
923 if (!ProfileFile.empty())
924 Options.InstrProfileOutput = ProfileFile;
925 // Do not do counter promotion at O0.
926 Options.DoCounterPromotion = false;
927 Options.UseBFIInPromotion = IsCS;
928 Options.Atomic = AtomicCounterUpdate;
930}
931
933 return getInlineParamsFromOptLevel(Level.getSpeedupLevel());
934}
935
939 InlineParams IP;
940 if (PTO.InlinerThreshold == -1)
942 else
943 IP = getInlineParams(PTO.InlinerThreshold);
944 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
945 // set hot-caller threshold to 0 to disable hot
946 // callsite inline (as much as possible [1]) because it makes
947 // profile annotation in the backend inaccurate.
948 //
949 // [1] Note the cost of a function could be below zero due to erased
950 // prologue / epilogue.
951 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
953
954 if (PGOOpt)
956
960
961 // Require the GlobalsAA analysis for the module so we can query it within
962 // the CGSCC pipeline.
964 MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
965 // Invalidate AAManager so it can be recreated and pick up the newly
966 // available GlobalsAA.
967 MIWP.addModulePass(
969 }
970
971 // Require the ProfileSummaryAnalysis for the module so we can query it within
972 // the inliner pass.
974
975 // Now begin the main postorder CGSCC pipeline.
976 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
977 // manager and trying to emulate its precise behavior. Much of this doesn't
978 // make a lot of sense and we should revisit the core CGSCC structure.
979 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
980
981 // Note: historically, the PruneEH pass was run first to deduce nounwind and
982 // generally clean up exception handling overhead. It isn't clear this is
983 // valuable as the inliner doesn't currently care whether it is inlining an
984 // invoke or a call.
985
987 MainCGPipeline.addPass(AttributorCGSCCPass());
989 MainCGPipeline.addPass(AttributorLightCGSCCPass());
990
991 // Deduce function attributes. We do another run of this after the function
992 // simplification pipeline, so this only needs to run when it could affect the
993 // function simplification pipeline, which is only the case with recursive
994 // functions.
995 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
996
997 // When at O3 add argument promotion to the pass pipeline.
998 // FIXME: It isn't at all clear why this should be limited to O3.
999 if (Level == OptimizationLevel::O3)
1000 MainCGPipeline.addPass(ArgumentPromotionPass());
1001
1002 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
1003 // there are no OpenMP runtime calls present in the module.
1004 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
1005 MainCGPipeline.addPass(OpenMPOptCGSCCPass(Phase));
1006
1007 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
1008
1009 // Add the core function simplification pipeline nested inside the
1010 // CGSCC walk.
1013 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
1014
1015 // Finally, deduce any function attributes based on the fully simplified
1016 // function.
1017 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
1018
1019 // Mark that the function is fully simplified and that it shouldn't be
1020 // simplified again if we somehow revisit it due to CGSCC mutations unless
1021 // it's been modified since.
1024
1026 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
1027 MainCGPipeline.addPass(CoroAnnotationElidePass());
1028 }
1029
1030 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
1031 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
1033
1034 return MIWP;
1035}
1036
1041
1043 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
1044 // set hot-caller threshold to 0 to disable hot
1045 // callsite inline (as much as possible [1]) because it makes
1046 // profile annotation in the backend inaccurate.
1047 //
1048 // [1] Note the cost of a function could be below zero due to erased
1049 // prologue / epilogue.
1050 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1051 IP.HotCallSiteThreshold = 0;
1052
1053 if (PGOOpt)
1055
1056 // The inline deferral logic is used to avoid losing some
1057 // inlining chance in future. It is helpful in SCC inliner, in which
1058 // inlining is processed in bottom-up order.
1059 // While in module inliner, the inlining order is a priority-based order
1060 // by default. The inline deferral is unnecessary there. So we disable the
1061 // inline deferral logic in module inliner.
1062 IP.EnableDeferral = false;
1063
1066 MPM.addPass(GlobalOptPass());
1067 MPM.addPass(GlobalDCEPass());
1068 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
1069 }
1070
1073 PTO.EagerlyInvalidateAnalyses));
1074
1078 MPM.addPass(
1080 }
1081
1082 return MPM;
1083}
1084
1088 assert(Level != OptimizationLevel::O0 &&
1089 "Should not be used for O0 pipeline");
1090
1092 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1093
1095
1096 // Place pseudo probe instrumentation as the first pass of the pipeline to
1097 // minimize the impact of optimization changes.
1098 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1101
1102 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1103
1104 // In ThinLTO mode, when flattened profile is used, all the available
1105 // profile information will be annotated in PreLink phase so there is
1106 // no need to load the profile again in PostLink.
1107 bool LoadSampleProfile =
1108 HasSampleProfile &&
1110
1111 // During the ThinLTO backend phase we perform early indirect call promotion
1112 // here, before globalopt. Otherwise imported available_externally functions
1113 // look unreferenced and are removed. If we are going to load the sample
1114 // profile then defer until later.
1115 // TODO: See if we can move later and consolidate with the location where
1116 // we perform ICP when we are loading a sample profile.
1117 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1118 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1119 // determine whether the new direct calls are annotated with prof metadata.
1120 // Ideally this should be determined from whether the IR is annotated with
1121 // sample profile, and not whether the a sample profile was provided on the
1122 // command line. E.g. for flattened profiles where we will not be reloading
1123 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1124 // provide the sample profile file.
1125 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1126 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1127
1128 // Create an early function pass manager to cleanup the output of the
1129 // frontend. Not necessary with LTO post link pipelines since the pre link
1130 // pipeline already cleaned up the frontend output.
1132 // Do basic inference of function attributes from known properties of system
1133 // libraries and other oracles.
1135 MPM.addPass(CoroEarlyPass());
1136
1137 FunctionPassManager EarlyFPM;
1138 EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1139 // Lower llvm.expect to metadata before attempting transforms.
1140 // Compare/branch metadata may alter the behavior of passes like
1141 // SimplifyCFG.
1143 EarlyFPM.addPass(SimplifyCFGPass());
1145 EarlyFPM.addPass(EarlyCSEPass());
1146 if (Level == OptimizationLevel::O3)
1147 EarlyFPM.addPass(CallSiteSplittingPass());
1149 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1150 }
1151
1152 if (LoadSampleProfile) {
1153 // Annotate sample profile right after early FPM to ensure freshness of
1154 // the debug info.
1156 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase, FS));
1157 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1158 // RequireAnalysisPass for PSI before subsequent non-module passes.
1160 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1161 // for the profile annotation to be accurate in the LTO backend.
1162 if (!isLTOPreLink(Phase))
1163 // We perform early indirect call promotion here, before globalopt.
1164 // This is important for the ThinLTO backend phase because otherwise
1165 // imported available_externally functions look unreferenced and are
1166 // removed.
1167 MPM.addPass(
1168 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1169 }
1170
1171 // Try to perform OpenMP specific optimizations on the module. This is a
1172 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1174
1176 MPM.addPass(AttributorPass());
1179
1180 // Lower type metadata and the type.test intrinsic in the ThinLTO
1181 // post link pipeline after ICP. This is to enable usage of the type
1182 // tests in ICP sequences.
1185
1187
1188 // Interprocedural constant propagation now that basic cleanup has occurred
1189 // and prior to optimizing globals.
1190 // FIXME: This position in the pipeline hasn't been carefully considered in
1191 // years, it should be re-analyzed.
1192 MPM.addPass(
1193 IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/!isLTOPreLink(Phase))));
1194
1195 // Attach metadata to indirect call sites indicating the set of functions
1196 // they may target at run-time. This should follow IPSCCP.
1198
1199 // Optimize globals to try and fold them into constants.
1200 MPM.addPass(GlobalOptPass());
1201
1202 // Create a small function pass pipeline to cleanup after all the global
1203 // optimizations.
1204 FunctionPassManager GlobalCleanupPM;
1205 // FIXME: Should this instead by a run of SROA?
1206 GlobalCleanupPM.addPass(PromotePass());
1207 GlobalCleanupPM.addPass(InstCombinePass());
1208 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1209 GlobalCleanupPM.addPass(
1210 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1211 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1212 PTO.EagerlyInvalidateAnalyses));
1213
1214 // We already asserted this happens in non-FullLTOPostLink earlier.
1215 const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1216 // Enable contextual profiling instrumentation.
1217 const bool IsCtxProfGen =
1219 const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink;
1220 const bool IsPGOInstrGen =
1221 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1222 const bool IsPGOInstrUse =
1223 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1224 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1225 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1226 // enable ctx profiling from the frontend.
1228 "Enabling both instrumented PGO and contextual instrumentation is not "
1229 "supported.");
1230 const bool IsCtxProfUse =
1232
1233 assert(
1235 "--instrument-cold-function-only-path is provided but "
1236 "--pgo-instrument-cold-function-only is not enabled");
1237 const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1238 IsPGOPreLink &&
1240
1241 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1242 IsCtxProfUse || IsColdFuncOnlyInstrGen)
1243 addPreInlinerPasses(MPM, Level, Phase);
1244
1245 // Add all the requested passes for instrumentation PGO, if requested.
1246 if (IsPGOInstrGen || IsPGOInstrUse) {
1247 addPGOInstrPasses(MPM, Level,
1248 /*RunProfileGen=*/IsPGOInstrGen,
1249 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1250 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1251 } else if (IsCtxProfGen || IsCtxProfUse) {
1253 // In pre-link, we just want the instrumented IR. We use the contextual
1254 // profile in the post-thinlink phase.
1255 // The instrumentation will be removed in post-thinlink after IPO.
1256 // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1257 // mechanism for GUIDs.
1258 MPM.addPass(AssignGUIDPass());
1259 if (IsCtxProfUse) {
1260 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
1261 return MPM;
1262 }
1263 // Block further inlining in the instrumented ctxprof case. This avoids
1264 // confusingly collecting profiles for the same GUID corresponding to
1265 // different variants of the function. We could do like PGO and identify
1266 // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
1267 // thinlto to happen before performing any further optimizations, it's
1268 // unnecessary to collect profiles for non-prevailing copies.
1270 addPostPGOLoopRotation(MPM, Level);
1272 } else if (IsColdFuncOnlyInstrGen) {
1273 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1274 /* AtomicCounterUpdate */ false,
1276 /* ProfileRemappingFile */ "");
1277 }
1278
1279 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1280 MPM.addPass(PGOIndirectCallPromotion(false, false));
1281
1282 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1283 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1285
1286 if (IsMemprofUse)
1287 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, FS));
1288
1289 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1290 PGOOpt->Action == PGOOptions::SampleUse))
1291 MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1292
1293 MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1294
1297 else
1298 MPM.addPass(buildInlinerPipeline(Level, Phase));
1299
1300 // Remove any dead arguments exposed by cleanups, constant folding globals,
1301 // and argument promotion.
1303
1306
1308 MPM.addPass(CoroCleanupPass());
1309
1310 // Optimize globals now that functions are fully simplified.
1311 MPM.addPass(GlobalOptPass());
1312 MPM.addPass(GlobalDCEPass());
1313
1314 return MPM;
1315}
1316
1317/// TODO: Should LTO cause any differences to this set of passes?
1318void PassBuilder::addVectorPasses(OptimizationLevel Level,
1320 ThinOrFullLTOPhase LTOPhase) {
1321 const bool IsFullLTO = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink;
1322
1325
1326 // Drop dereferenceable assumes after vectorization, as they are no longer
1327 // needed and can inhibit further optimization.
1328 if (!isLTOPreLink(LTOPhase))
1329 FPM.addPass(DropUnnecessaryAssumesPass(/*DropDereferenceable=*/true));
1330
1332 if (IsFullLTO) {
1333 // The vectorizer may have significantly shortened a loop body; unroll
1334 // again. Unroll small loops to hide loop backedge latency and saturate any
1335 // parallel execution resources of an out-of-order processor. We also then
1336 // need to clean up redundancies and loop invariant code.
1337 // FIXME: It would be really good to use a loop-integrated instruction
1338 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1339 // across the loop nests.
1340 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1343 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1345 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1348 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1349 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1350 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1351 // NOTE: we are very late in the pipeline, and we don't have any LICM
1352 // or SimplifyCFG passes scheduled after us, that would cleanup
1353 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1355 }
1356
1357 if (!IsFullLTO) {
1358 // Eliminate loads by forwarding stores from the previous iteration to loads
1359 // of the current iteration.
1361 }
1362 // Cleanup after the loop optimization passes.
1363 FPM.addPass(InstCombinePass());
1364
1365 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1366 ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ExtraPasses;
1367 // At higher optimization levels, try to clean up any runtime overlap and
1368 // alignment checks inserted by the vectorizer. We want to track correlated
1369 // runtime checks for two inner loops in the same outer loop, fold any
1370 // common computations, hoist loop-invariant aspects out of any outer loop,
1371 // and unswitch the runtime checks if possible. Once hoisted, we may have
1372 // dead (or speculatable) control flows or more combining opportunities.
1373 ExtraPasses.addPass(EarlyCSEPass());
1374 ExtraPasses.addPass(CorrelatedValuePropagationPass());
1375 ExtraPasses.addPass(InstCombinePass());
1376 LoopPassManager LPM;
1377 LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1378 /*AllowSpeculation=*/true));
1379 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1381 ExtraPasses.addPass(
1382 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true));
1383 ExtraPasses.addPass(
1384 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1385 ExtraPasses.addPass(InstCombinePass());
1386 FPM.addPass(std::move(ExtraPasses));
1387 }
1388
1389 // Now that we've formed fast to execute loop structures, we do further
1390 // optimizations. These are run afterward as they might block doing complex
1391 // analyses and transforms such as what are needed for loop vectorization.
1392
1393 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1394 // GVN, loop transforms, and others have already run, so it's now better to
1395 // convert to more optimized IR using more aggressive simplify CFG options.
1396 // The extra sinking transform can create larger basic blocks, so do this
1397 // before SLP vectorization.
1398 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1399 .forwardSwitchCondToPhi(true)
1400 .convertSwitchRangeToICmp(true)
1401 .convertSwitchToArithmetic(true)
1402 .convertSwitchToLookupTable(true)
1403 .needCanonicalLoops(false)
1404 .hoistCommonInsts(true)
1405 .sinkCommonInsts(true)));
1406
1407 if (IsFullLTO) {
1408 FPM.addPass(SCCPPass());
1409 FPM.addPass(InstCombinePass());
1410 FPM.addPass(BDCEPass());
1411 }
1412
1413 // Optimize parallel scalar instruction chains into SIMD instructions.
1414 if (PTO.SLPVectorization) {
1415 FPM.addPass(SLPVectorizerPass());
1416 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1417 FPM.addPass(EarlyCSEPass());
1418 }
1419 }
1420 // Enhance/cleanup vector code.
1421 FPM.addPass(VectorCombinePass());
1422
1423 if (!IsFullLTO) {
1424 FPM.addPass(InstCombinePass());
1425 // Unroll small loops to hide loop backedge latency and saturate any
1426 // parallel execution resources of an out-of-order processor. We also then
1427 // need to clean up redundancies and loop invariant code.
1428 // FIXME: It would be really good to use a loop-integrated instruction
1429 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1430 // across the loop nests.
1431 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1432 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1434 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1435 }
1436 FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1437 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1438 PTO.ForgetAllSCEVInLoopUnroll)));
1439 FPM.addPass(WarnMissedTransformationsPass());
1440 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1441 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1442 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1443 // NOTE: we are very late in the pipeline, and we don't have any LICM
1444 // or SimplifyCFG passes scheduled after us, that would cleanup
1445 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1446 FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1447 }
1448
1449 FPM.addPass(InferAlignmentPass());
1450 FPM.addPass(InstCombinePass());
1451
1452 // This is needed for two reasons:
1453 // 1. It works around problems that instcombine introduces, such as sinking
1454 // expensive FP divides into loops containing multiplications using the
1455 // divide result.
1456 // 2. It helps to clean up some loop-invariant code created by the loop
1457 // unroll pass when IsFullLTO=false.
1459 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1460 /*AllowSpeculation=*/true),
1461 /*UseMemorySSA=*/true));
1462
1463 // Now that we've vectorized and unrolled loops, we may have more refined
1464 // alignment information, try to re-derive it here.
1465 FPM.addPass(AlignmentFromAssumptionsPass());
1466}
1467
1470 ThinOrFullLTOPhase LTOPhase) {
1471 const bool LTOPreLink = isLTOPreLink(LTOPhase);
1473
1474 // Run partial inlining pass to partially inline functions that have
1475 // large bodies.
1478
1479 // Remove avail extern fns and globals definitions since we aren't compiling
1480 // an object file for later LTO. For LTO we want to preserve these so they
1481 // are eligible for inlining at link-time. Note if they are unreferenced they
1482 // will be removed by GlobalDCE later, so this only impacts referenced
1483 // available externally globals. Eventually they will be suppressed during
1484 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1485 // may make globals referenced by available external functions dead and saves
1486 // running remaining passes on the eliminated functions. These should be
1487 // preserved during prelinking for link-time inlining decisions.
1488 if (!LTOPreLink)
1490
1491 // Do RPO function attribute inference across the module to forward-propagate
1492 // attributes where applicable.
1493 // FIXME: Is this really an optimization rather than a canonicalization?
1495
1496 // Do a post inline PGO instrumentation and use pass. This is a context
1497 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1498 // cross-module inline has not been done yet. The context sensitive
1499 // instrumentation is after all the inlines are done.
1500 if (!LTOPreLink && PGOOpt) {
1501 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1502 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1503 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1504 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
1505 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1506 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1507 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1508 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1509 }
1510
1511 // Re-compute GlobalsAA here prior to function passes. This is particularly
1512 // useful as the above will have inlined, DCE'ed, and function-attr
1513 // propagated everything. We should at this point have a reasonably minimal
1514 // and richly annotated call graph. By computing aliasing and mod/ref
1515 // information for all local globals here, the late loop passes and notably
1516 // the vectorizer will be able to use them to help recognize vectorizable
1517 // memory operations.
1520
1521 invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
1522
1523 FunctionPassManager OptimizePM;
1524
1525 // Only drop unnecessary assumes post-inline and post-link, as otherwise
1526 // additional uses of the affected value may be introduced through inlining
1527 // and CSE.
1528 if (!isLTOPreLink(LTOPhase))
1529 OptimizePM.addPass(DropUnnecessaryAssumesPass());
1530
1531 // Scheduling LoopVersioningLICM when inlining is over, because after that
1532 // we may see more accurate aliasing. Reason to run this late is that too
1533 // early versioning may prevent further inlining due to increase of code
1534 // size. Other optimizations which runs later might get benefit of no-alias
1535 // assumption in clone loop.
1537 OptimizePM.addPass(
1539 // LoopVersioningLICM pass might increase new LICM opportunities.
1541 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1542 /*AllowSpeculation=*/true),
1543 /*USeMemorySSA=*/true));
1544 }
1545
1546 OptimizePM.addPass(Float2IntPass());
1548
1549 if (EnableMatrix) {
1550 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1551 OptimizePM.addPass(EarlyCSEPass());
1552 }
1553
1554 // CHR pass should only be applied with the profile information.
1555 // The check is to check the profile summary information in CHR.
1556 if (EnableCHR && Level == OptimizationLevel::O3)
1557 OptimizePM.addPass(ControlHeightReductionPass());
1558
1559 // FIXME: We need to run some loop optimizations to re-rotate loops after
1560 // simplifycfg and others undo their rotation.
1561
1562 // Optimize the loop execution. These passes operate on entire loop nests
1563 // rather than on each loop in an inside-out manner, and so they are actually
1564 // function passes.
1565
1566 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1567
1568 LoopPassManager LPM;
1569 // First rotate loops that may have been un-rotated by prior passes.
1570 // Disable header duplication at -Oz.
1571 LPM.addPass(LoopRotatePass(/*EnableLoopHeaderDuplication=*/true, LTOPreLink,
1572 /*CheckExitCount=*/true));
1573 // Some loops may have become dead by now. Try to delete them.
1574 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1575 // this may need to be revisited once we run GVN before loop deletion
1576 // in the simplification pipeline.
1577 LPM.addPass(LoopDeletionPass());
1578
1579 if (PTO.LoopInterchange)
1580 LPM.addPass(LoopInterchangePass());
1581
1582 OptimizePM.addPass(
1583 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
1584
1585 // FIXME: This may not be the right place in the pipeline.
1586 // We need to have the data to support the right place.
1587 if (PTO.LoopFusion)
1588 OptimizePM.addPass(LoopFusePass());
1589
1590 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1591 // into separate loop that would otherwise inhibit vectorization. This is
1592 // currently only performed for loops marked with the metadata
1593 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1594 OptimizePM.addPass(LoopDistributePass());
1595
1596 // Populates the VFABI attribute with the scalar-to-vector mappings
1597 // from the TargetLibraryInfo.
1598 OptimizePM.addPass(InjectTLIMappings());
1599
1600 addVectorPasses(Level, OptimizePM, LTOPhase);
1601
1602 invokeVectorizerEndEPCallbacks(OptimizePM, Level);
1603
1604 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1605 // canonicalization pass that enables other optimizations. As a result,
1606 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1607 // result too early.
1608 OptimizePM.addPass(LoopSinkPass());
1609
1610 // And finally clean up LCSSA form before generating code.
1611 OptimizePM.addPass(InstSimplifyPass());
1612
1613 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1614 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1615 // flattening of blocks.
1616 OptimizePM.addPass(DivRemPairsPass());
1617
1618 // Merge adjacent icmps into memcmp, then expand memcmp to loads/compares.
1619 // TODO: move this furter up so that it can be optimized by GVN, etc.
1620 if (EnableMergeICmps)
1621 OptimizePM.addPass(MergeICmpsPass());
1622 OptimizePM.addPass(ExpandMemCmpPass());
1623
1624 // Try to annotate calls that were created during optimization.
1625 OptimizePM.addPass(
1626 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
1627
1628 // LoopSink (and other loop passes since the last simplifyCFG) might have
1629 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1630 OptimizePM.addPass(
1632 .convertSwitchRangeToICmp(true)
1633 .convertSwitchToArithmetic(true)
1634 .speculateUnpredictables(true)
1635 .hoistLoadsStoresWithCondFaulting(true)));
1636
1637 // Add the core optimizing pipeline.
1638 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1639 PTO.EagerlyInvalidateAnalyses));
1640
1641 // AllocToken transforms heap allocation calls; this needs to run late after
1642 // other allocation call transformations (such as those in InstCombine).
1643 if (!LTOPreLink)
1644 MPM.addPass(AllocTokenPass());
1645
1646 invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
1647
1648 // Split out cold code. Splitting is done late to avoid hiding context from
1649 // other optimizations and inadvertently regressing performance. The tradeoff
1650 // is that this has a higher code size cost than splitting early.
1651 if (EnableHotColdSplit && !LTOPreLink)
1653
1654 // Search the code for similar regions of code. If enough similar regions can
1655 // be found where extracting the regions into their own function will decrease
1656 // the size of the program, we extract the regions, a deduplicate the
1657 // structurally similar regions.
1658 if (EnableIROutliner)
1659 MPM.addPass(IROutlinerPass());
1660
1661 // Now we need to do some global optimization transforms.
1662 // FIXME: It would seem like these should come first in the optimization
1663 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1664 // ordering here.
1665 MPM.addPass(GlobalDCEPass());
1667
1668 // Merge functions if requested. It has a better chance to merge functions
1669 // after ConstantMerge folded jump tables.
1670 if (PTO.MergeFunctions)
1672
1673 if (PTO.CallGraphProfile && !LTOPreLink)
1674 MPM.addPass(CGProfilePass(isLTOPostLink(LTOPhase)));
1675
1676 // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1677 if (!LTOPreLink)
1679
1680 // Add devirtualization pass only when LTO is not enabled, as otherwise
1681 // the pass is already enabled in the LTO pipeline.
1682 if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) {
1683 // TODO: explore a better pipeline configuration that can improve
1684 // compilation time overhead.
1686 /*ExportSummary*/ nullptr,
1687 /*ImportSummary*/ nullptr,
1688 /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively));
1690 // Given that the devirtualization creates more opportunities for inlining,
1691 // we run the Inliner again here to maximize the optimization gain we
1692 // get from devirtualization.
1693 // Also, we can't run devirtualization before inlining because the
1694 // devirtualization depends on the passes optimizing/eliminating vtable GVs
1695 // and those passes are only effective after inlining.
1696 if (EnableModuleInliner) {
1700 } else {
1703 /* MandatoryFirst */ true,
1705 }
1706 }
1707 return MPM;
1708}
1709
1713 if (Level == OptimizationLevel::O0)
1714 return buildO0DefaultPipeline(Level, Phase);
1715
1717 instructionCountersPass(MPM, /*IsPreOptimization=*/true);
1718
1719 // Currently this pipeline is only invoked in an LTO pre link pass or when we
1720 // are not running LTO. If that changes the below checks may need updating.
1722
1723 // If we are invoking this in non-LTO mode, remove any MemProf related
1724 // attributes and metadata, as we don't know whether we are linking with
1725 // a library containing the necessary interfaces.
1728
1729 // Convert @llvm.global.annotations to !annotation metadata.
1731
1732 // Force any function attributes we want the rest of the pipeline to observe.
1734
1735 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1737
1738 // Apply module pipeline start EP callback.
1740
1741 // Add the core simplification pipeline.
1743
1744 // Now add the optimization pipeline.
1746
1747 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1748 PGOOpt->Action == PGOOptions::SampleUse)
1750
1751 // Emit annotation remarks.
1753
1754 if (isLTOPreLink(Phase))
1755 addRequiredLTOPreLinkPasses(MPM);
1756
1757 instructionCountersPass(MPM, /*IsPreOptimization=*/false);
1758
1759 return MPM;
1760}
1761
1764 bool EmitSummary) {
1766
1767 instructionCountersPass(MPM, /*IsPreOptimization=*/true);
1768
1769 if (ThinLTO)
1771 else
1773 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1774
1775 // Perform any cleanups to the IR that aren't suitable for per TU compilation,
1776 // like removing CFI/WPD related instructions. Note, we reuse
1777 // DropTypeTestsPass to clean up type tests rather than duplicate that logic
1778 // in FatLtoCleanup.
1779 MPM.addPass(FatLtoCleanup());
1780
1781 // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1782 // object code, only in the bitcode section, so drop it before we run
1783 // module optimization and generate machine code. If llvm.type.test() isn't in
1784 // the IR, this won't do anything.
1786
1787 // Use the ThinLTO post-link pipeline with sample profiling
1788 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1789 MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1790 else {
1791 // ModuleSimplification does not run the coroutine passes for
1792 // ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO
1793 // builds, otherwise they will miscompile.
1794 if (ThinLTO) {
1795 // TODO: replace w/ buildCoroWrapper() when it takes phase and level into
1796 // consideration.
1797 CGSCCPassManager CGPM;
1801 MPM.addPass(CoroCleanupPass());
1802 }
1803
1804 // otherwise, just use module optimization
1805 MPM.addPass(
1807 // Emit annotation remarks.
1809 }
1810
1811 instructionCountersPass(MPM, /*IsPreOptimization=*/false);
1812
1813 return MPM;
1814}
1815
1818 if (Level == OptimizationLevel::O0)
1820
1822
1823 instructionCountersPass(MPM, /*IsPreOptimization=*/true);
1824
1825 // Convert @llvm.global.annotations to !annotation metadata.
1827
1828 // Force any function attributes we want the rest of the pipeline to observe.
1830
1831 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1833
1834 // Apply module pipeline start EP callback.
1836
1837 // If we are planning to perform ThinLTO later, we don't bloat the code with
1838 // unrolling/vectorization/... now. Just simplify the module as much as we
1839 // can.
1842 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1843 // thinlto use the contextual info to perform imports; then use the contextual
1844 // profile in the post-thinlink phase.
1845 if (!UseCtxProfile.empty()) {
1846 addRequiredLTOPreLinkPasses(MPM);
1847 return MPM;
1848 }
1849
1850 // Run partial inlining pass to partially inline functions that have
1851 // large bodies.
1852 // FIXME: It isn't clear whether this is really the right place to run this
1853 // in ThinLTO. Because there is another canonicalization and simplification
1854 // phase that will run after the thin link, running this here ends up with
1855 // less information than will be available later and it may grow functions in
1856 // ways that aren't beneficial.
1859
1860 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1861 PGOOpt->Action == PGOOptions::SampleUse)
1863
1864 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1865 // optimization is going to be done in PostLink stage, but clang can't add
1866 // callbacks there in case of in-process ThinLTO called by linker.
1871
1872 // Emit annotation remarks.
1874
1875 addRequiredLTOPreLinkPasses(MPM);
1876
1877 instructionCountersPass(MPM, /*IsPreOptimization=*/false);
1878
1879 return MPM;
1880}
1881
1883 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1885
1886 instructionCountersPass(MPM, /*IsPreOptimization=*/true);
1887
1888 // If we are invoking this without a summary index noting that we are linking
1889 // with a library containing the necessary APIs, remove any MemProf related
1890 // attributes and metadata.
1891 if (!ImportSummary || !ImportSummary->withSupportsHotColdNew())
1893
1894 if (ImportSummary) {
1895 // For ThinLTO we must apply the context disambiguation decisions early, to
1896 // ensure we can correctly match the callsites to summary data.
1899 ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1900
1901 // These passes import type identifier resolutions for whole-program
1902 // devirtualization and CFI. They must run early because other passes may
1903 // disturb the specific instruction patterns that these passes look for,
1904 // creating dependencies on resolutions that may not appear in the summary.
1905 //
1906 // For example, GVN may transform the pattern assume(type.test) appearing in
1907 // two basic blocks into assume(phi(type.test, type.test)), which would
1908 // transform a dependency on a WPD resolution into a dependency on a type
1909 // identifier resolution for CFI.
1910 //
1911 // Also, WPD has access to more precise information than ICP and can
1912 // devirtualize more effectively, so it should operate on the IR first.
1913 //
1914 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1915 // metadata and intrinsics.
1916 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1917 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1918 }
1919
1920 if (Level == OptimizationLevel::O0) {
1921 // Run a second time to clean up any type tests left behind by WPD for use
1922 // in ICP.
1925
1926 // AllocToken transforms heap allocation calls; this needs to run late after
1927 // other allocation call transformations (such as those in InstCombine).
1928 MPM.addPass(AllocTokenPass());
1929
1930 // Drop available_externally and unreferenced globals. This is necessary
1931 // with ThinLTO in order to avoid leaving undefined references to dead
1932 // globals in the object file.
1934 MPM.addPass(GlobalDCEPass());
1935
1936 instructionCountersPass(MPM, /*IsPreOptimization=*/false);
1937 return MPM;
1938 }
1939 if (!UseCtxProfile.empty()) {
1940 MPM.addPass(
1942 } else {
1943 // Add the core simplification pipeline.
1946 }
1947 // Now add the optimization pipeline.
1950
1951 // Emit annotation remarks.
1953
1954 instructionCountersPass(MPM, /*IsPreOptimization=*/false);
1955
1956 return MPM;
1957}
1958
1961 // FIXME: We should use a customized pre-link pipeline!
1962 return buildPerModuleDefaultPipeline(Level,
1964}
1965
1968 ModuleSummaryIndex *ExportSummary) {
1970
1971 instructionCountersPass(MPM, /*IsPreOptimization=*/true);
1972
1974
1975 // If we are invoking this without a summary index noting that we are linking
1976 // with a library containing the necessary APIs, remove any MemProf related
1977 // attributes and metadata.
1978 if (!ExportSummary || !ExportSummary->withSupportsHotColdNew())
1980
1981 // Create a function that performs CFI checks for cross-DSO calls with targets
1982 // in the current module.
1983 MPM.addPass(CrossDSOCFIPass());
1984
1985 if (Level == OptimizationLevel::O0) {
1986 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1987 // metadata and intrinsics.
1988 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1989 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1990 // Run a second time to clean up any type tests left behind by WPD for use
1991 // in ICP.
1993
1995
1996 // AllocToken transforms heap allocation calls; this needs to run late after
1997 // other allocation call transformations (such as those in InstCombine).
1998 MPM.addPass(AllocTokenPass());
1999
2001
2002 // Emit annotation remarks.
2004
2005 return MPM;
2006 }
2007
2008 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2009 // Load sample profile before running the LTO optimization pipeline.
2010 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2011 PGOOpt->ProfileRemappingFile,
2013 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2014 // RequireAnalysisPass for PSI before subsequent non-module passes.
2016 }
2017
2018 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
2020
2021 // Remove unused virtual tables to improve the quality of code generated by
2022 // whole-program devirtualization and bitset lowering.
2023 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2024
2025 // Do basic inference of function attributes from known properties of system
2026 // libraries and other oracles.
2028
2029 if (Level.getSpeedupLevel() > 1) {
2031 CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));
2032
2033 // Indirect call promotion. This should promote all the targets that are
2034 // left by the earlier promotion pass that promotes intra-module targets.
2035 // This two-step promotion is to save the compile time. For LTO, it should
2036 // produce the same result as if we only do promotion here.
2038 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2039
2040 // Promoting by-reference arguments to by-value exposes more constants to
2041 // IPSCCP.
2042 CGSCCPassManager CGPM;
2045 CGPM.addPass(
2048
2049 // Propagate constants at call sites into the functions they call. This
2050 // opens opportunities for globalopt (and inlining) by substituting function
2051 // pointers passed as arguments to direct uses of functions.
2052 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/true)));
2053
2054 // Attach metadata to indirect call sites indicating the set of functions
2055 // they may target at run-time. This should follow IPSCCP.
2057 }
2058
2059 // Do RPO function attribute inference across the module to forward-propagate
2060 // attributes where applicable.
2061 // FIXME: Is this really an optimization rather than a canonicalization?
2063
2064 // Use in-range annotations on GEP indices to split globals where beneficial.
2065 MPM.addPass(GlobalSplitPass());
2066
2067 // Run whole program optimization of virtual call when the list of callees
2068 // is fixed.
2069 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
2070
2072 // Stop here at -O1.
2073 if (Level == OptimizationLevel::O1) {
2074 // The LowerTypeTestsPass needs to run to lower type metadata and the
2075 // type.test intrinsics. The pass does nothing if CFI is disabled.
2076 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2077 // Run a second time to clean up any type tests left behind by WPD for use
2078 // in ICP (which is performed earlier than this in the regular LTO
2079 // pipeline).
2081
2083
2084 // AllocToken transforms heap allocation calls; this needs to run late after
2085 // other allocation call transformations (such as those in InstCombine).
2086 MPM.addPass(AllocTokenPass());
2087
2089
2090 // Emit annotation remarks.
2092
2093 return MPM;
2094 }
2095
2096 // TODO: Skip to match buildCoroWrapper.
2097 MPM.addPass(CoroEarlyPass());
2098
2099 // Optimize globals to try and fold them into constants.
2100 MPM.addPass(GlobalOptPass());
2101
2102 // Promote any localized globals to SSA registers.
2104
2105 // Linking modules together can lead to duplicate global constant, only
2106 // keep one copy of each constant.
2108
2109 // Remove unused arguments from functions.
2111
2112 // Reduce the code after globalopt and ipsccp. Both can open up significant
2113 // simplification opportunities, and both can propagate functions through
2114 // function pointers. When this happens, we often have to resolve varargs
2115 // calls, etc, so let instcombine do this.
2116 FunctionPassManager PeepholeFPM;
2117 PeepholeFPM.addPass(InstCombinePass());
2118 if (Level.getSpeedupLevel() > 1)
2119 PeepholeFPM.addPass(AggressiveInstCombinePass());
2120 invokePeepholeEPCallbacks(PeepholeFPM, Level);
2121
2122 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
2123 PTO.EagerlyInvalidateAnalyses));
2124
2125 // Lower variadic functions for supported targets prior to inlining.
2127
2128 // Note: historically, the PruneEH pass was run first to deduce nounwind and
2129 // generally clean up exception handling overhead. It isn't clear this is
2130 // valuable as the inliner doesn't currently care whether it is inlining an
2131 // invoke or a call.
2132 // Run the inliner now.
2133 if (EnableModuleInliner) {
2137 } else {
2140 /* MandatoryFirst */ true,
2143 }
2144
2145 // Perform context disambiguation after inlining, since that would reduce the
2146 // amount of additional cloning required to distinguish the allocation
2147 // contexts.
2150 /*Summary=*/nullptr,
2151 PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2152
2153 // Optimize globals again after we ran the inliner.
2154 MPM.addPass(GlobalOptPass());
2155
2156 // Run the OpenMPOpt pass again after global optimizations.
2158
2159 // Garbage collect dead functions.
2160 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2161
2162 // If we didn't decide to inline a function, check to see if we can
2163 // transform it to pass arguments by value instead of by reference.
2164 CGSCCPassManager CGPM;
2169
2171 // The IPO Passes may leave cruft around. Clean up after them.
2172 FPM.addPass(InstCombinePass());
2173 invokePeepholeEPCallbacks(FPM, Level);
2174
2177
2179
2180 // Do a post inline PGO instrumentation and use pass. This is a context
2181 // sensitive PGO pass.
2182 if (PGOOpt) {
2183 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2184 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2185 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2186 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
2187 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2188 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2189 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2190 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
2191 }
2192
2193 // Break up allocas
2195
2196 // LTO provides additional opportunities for tailcall elimination due to
2197 // link-time inlining, and visibility of nocapture attribute.
2198 FPM.addPass(
2199 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
2200
2201 // Run a few AA driver optimizations here and now to cleanup the code.
2202 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
2203 PTO.EagerlyInvalidateAnalyses));
2204
2205 MPM.addPass(
2207
2208 // Require the GlobalsAA analysis for the module so we can query it within
2209 // MainFPM.
2212 // Invalidate AAManager so it can be recreated and pick up the newly
2213 // available GlobalsAA.
2214 MPM.addPass(
2216 }
2217
2218 FunctionPassManager MainFPM;
2220 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
2221 /*AllowSpeculation=*/true),
2222 /*USeMemorySSA=*/true));
2223
2224 if (RunNewGVN)
2225 MainFPM.addPass(NewGVNPass());
2226 else
2227 MainFPM.addPass(GVNPass());
2228
2229 // Remove dead memcpy()'s.
2230 MainFPM.addPass(MemCpyOptPass());
2231
2232 // Nuke dead stores.
2233 MainFPM.addPass(DSEPass());
2234 MainFPM.addPass(MoveAutoInitPass());
2236
2237 invokeVectorizerStartEPCallbacks(MainFPM, Level);
2238
2239 LoopPassManager LPM;
2240 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2241 LPM.addPass(LoopFlattenPass());
2242 LPM.addPass(IndVarSimplifyPass());
2243 LPM.addPass(LoopDeletionPass());
2244 // FIXME: Add loop interchange.
2245
2246 // Unroll small loops and perform peeling.
2247 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
2248 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2249 PTO.ForgetAllSCEVInLoopUnroll));
2250 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2251 // *All* loop passes must preserve it, in order to be able to use it.
2252 MainFPM.addPass(
2253 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
2254
2255 MainFPM.addPass(LoopDistributePass());
2256
2257 addVectorPasses(Level, MainFPM, ThinOrFullLTOPhase::FullLTOPostLink);
2258
2259 invokeVectorizerEndEPCallbacks(MainFPM, Level);
2260
2261 // Run the OpenMPOpt CGSCC pass again late.
2264
2265 invokePeepholeEPCallbacks(MainFPM, Level);
2266 MainFPM.addPass(JumpThreadingPass());
2267 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2268 PTO.EagerlyInvalidateAnalyses));
2269
2270 // Lower type metadata and the type.test intrinsic. This pass supports
2271 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2272 // to be run at link time if CFI is enabled. This pass does nothing if
2273 // CFI is disabled.
2274 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2275 // Run a second time to clean up any type tests left behind by WPD for use
2276 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2278
2279 // Enable splitting late in the FullLTO post-link pipeline.
2282
2283 // Add late LTO optimization passes.
2284 FunctionPassManager LateFPM;
2285
2286 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2287 // canonicalization pass that enables other optimizations. As a result,
2288 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2289 // result too early.
2290 LateFPM.addPass(LoopSinkPass());
2291
2292 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2293 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2294 // flattening of blocks.
2295 LateFPM.addPass(DivRemPairsPass());
2296
2297 // Delete basic blocks, which optimization passes may have killed.
2299 .convertSwitchRangeToICmp(true)
2300 .convertSwitchToArithmetic(true)
2301 .hoistCommonInsts(true)
2302 .speculateUnpredictables(true)));
2303 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2304
2305 // Drop bodies of available eternally objects to improve GlobalDCE.
2307
2308 // Now that we have optimized the program, discard unreachable functions.
2309 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2310
2311 if (PTO.MergeFunctions)
2313
2315
2316 if (PTO.CallGraphProfile)
2317 MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2318
2319 MPM.addPass(CoroCleanupPass());
2320
2321 // AllocToken transforms heap allocation calls; this needs to run late after
2322 // other allocation call transformations (such as those in InstCombine).
2323 MPM.addPass(AllocTokenPass());
2324
2326
2327 // Emit annotation remarks.
2329
2330 instructionCountersPass(MPM, /*IsPreOptimization=*/false);
2331
2332 return MPM;
2333}
2334
2338 assert(Level == OptimizationLevel::O0 &&
2339 "buildO0DefaultPipeline should only be used with O0");
2340
2342
2343 instructionCountersPass(MPM, /*IsPreOptimization=*/true);
2344
2345 // Perform pseudo probe instrumentation in O0 mode. This is for the
2346 // consistency between different build modes. For example, a LTO build can be
2347 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2348 // the postlink will require pseudo probe instrumentation in the prelink.
2349 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2351
2352 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2353 PGOOpt->Action == PGOOptions::IRUse))
2355 MPM,
2356 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2357 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2358 PGOOpt->ProfileRemappingFile);
2359
2360 // Instrument function entry and exit before all inlining.
2362 EntryExitInstrumenterPass(/*PostInlining=*/false)));
2363
2365
2366 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2368
2369 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2370 // Explicitly disable sample loader inlining and use flattened profile in O0
2371 // pipeline.
2372 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2373 PGOOpt->ProfileRemappingFile,
2375 /*DisableSampleProfileInlining=*/true,
2376 /*UseFlattenedProfile=*/true));
2377 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2378 // RequireAnalysisPass for PSI before subsequent non-module passes.
2380 }
2381
2383
2384 // Build a minimal pipeline based on the semantics required by LLVM,
2385 // which is just that always inlining occurs. Further, disable generating
2386 // lifetime intrinsics to avoid enabling further optimizations during
2387 // code generation.
2389 /*InsertLifetimeIntrinsics=*/false));
2390
2391 if (PTO.MergeFunctions)
2393
2394 if (EnableMatrix)
2395 MPM.addPass(
2397
2398 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2399 CGSCCPassManager CGPM;
2401 if (!CGPM.isEmpty())
2403 }
2404 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2405 LoopPassManager LPM;
2407 if (!LPM.isEmpty()) {
2409 createFunctionToLoopPassAdaptor(std::move(LPM))));
2410 }
2411 }
2412 if (!LoopOptimizerEndEPCallbacks.empty()) {
2413 LoopPassManager LPM;
2415 if (!LPM.isEmpty()) {
2417 createFunctionToLoopPassAdaptor(std::move(LPM))));
2418 }
2419 }
2420 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2423 if (!FPM.isEmpty())
2424 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2425 }
2426
2428
2429 if (!VectorizerStartEPCallbacks.empty()) {
2432 if (!FPM.isEmpty())
2433 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2434 }
2435
2436 if (!VectorizerEndEPCallbacks.empty()) {
2439 if (!FPM.isEmpty())
2440 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2441 }
2442
2444
2445 // AllocToken transforms heap allocation calls; this needs to run late after
2446 // other allocation call transformations (such as those in InstCombine).
2447 if (!isLTOPreLink(Phase))
2448 MPM.addPass(AllocTokenPass());
2449
2451
2452 if (isLTOPreLink(Phase))
2453 addRequiredLTOPreLinkPasses(MPM);
2454
2455 // Emit annotation remarks.
2457
2458 instructionCountersPass(MPM, /*IsPreOptimization=*/false);
2459
2460 return MPM;
2461}
2462
2464 AAManager AA;
2465
2466 // The order in which these are registered determines their priority when
2467 // being queried.
2468
2469 // Add any target-specific alias analyses that should be run early.
2470 if (TM)
2471 TM->registerEarlyDefaultAliasAnalyses(AA);
2472
2473 // First we register the basic alias analysis that provides the majority of
2474 // per-function local AA logic. This is a stateless, on-demand local set of
2475 // AA techniques.
2476 AA.registerFunctionAnalysis<BasicAA>();
2477
2478 // Next we query fast, specialized alias analyses that wrap IR-embedded
2479 // information about aliasing.
2480 AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2481 AA.registerFunctionAnalysis<TypeBasedAA>();
2482
2483 // Add support for querying global aliasing information when available.
2484 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2485 // analysis, all that the `AAManager` can do is query for any *cached*
2486 // results from `GlobalsAA` through a readonly proxy.
2488 AA.registerModuleAnalysis<GlobalsAA>();
2489
2490 // Add target-specific alias analyses.
2491 if (TM)
2492 TM->registerDefaultAliasAnalyses(AA);
2493
2494 return AA;
2495}
2496
2497bool PassBuilder::isInstrumentedPGOUse() const {
2498 return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
2499 !UseCtxProfile.empty();
2500}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
This file provides a pass manager that only runs its passes if the provided marker analysis has been ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
This header defines various interfaces for pass management in LLVM.
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the Loop Fusion pass.
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase)
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static void instructionCountersPass(ModulePassManager &MPM, bool IsPreOptimization)
static bool isLTOPostLink(ThinOrFullLTOPhase Phase)
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
A module pass that rewrites heap allocations to use token-enabled allocation functions based on vario...
Definition AllocToken.h:36
Inlines functions marked as "always_inline".
Argument promotion pass.
Assign a GUID to functions as metadata.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
A pass manager to run a set of extra loop passes if the MarkerTy analysis is present.
Statistics pass for the FunctionPropertiesAnalysis results.
The core GVN pass object.
Definition GVN.h:128
Pass to remove unused function declarations.
Definition GlobalDCE.h:38
Optimize globals that never have their address taken.
Definition GlobalOpt.h:25
Pass to perform split of global variables.
Definition GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition SCCP.h:48
Pass to outline similar regions.
Definition IROutliner.h:468
Run instruction simplification across each instruction in the function.
Instrumentation based profiling lowering pass.
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Performs Loop Invariant Code Motion Pass.
Definition LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Strips MemProf attributes and metadata.
Merge identical functions.
The module inliner pass for the new pass manager.
Module pass, wrapping the inliner pass.
Definition Inliner.h:65
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition Inliner.h:81
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
Additional 'norecurse' attribute deduction during postlink LTO phase.
OpenMP optimizations pass.
Definition OpenMPOpt.h:42
static LLVM_ABI const OptimizationLevel O3
Optimize for fast execution as much as possible.
static LLVM_ABI const OptimizationLevel O0
Disable as many optimizations as possible.
static LLVM_ABI const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static LLVM_ABI const OptimizationLevel O1
Optimize quickly without destroying debuggability.
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
LLVM_ABI void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
LLVM_ABI void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
LLVM_ABI void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
LLVM_ABI void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile)
Add PGOInstrumenation passes for O0 only.
LLVM_ABI void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build a per-module default optimization pipeline.
LLVM_ABI void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI void invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build an O0 pipeline with the minimal semantically required passes.
LLVM_ABI FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
LLVM_ABI void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
LLVM_ABI ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
LLVM_ABI ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build a ThinLTO default optimization pipeline to a pass manager.
LLVM_ABI void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
LLVM_ABI ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
LLVM_ABI ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
LLVM_ABI void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
bool isEmpty() const
Returns if the pass manager contains any passes.
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition PassBuilder.h:78
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition PassBuilder.h:56
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition PassBuilder.h:92
bool LoopFusion
Tuning option to enable/disable loop fusion. Its default value is false.
Definition PassBuilder.h:66
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition PassBuilder.h:82
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition PassBuilder.h:89
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition PassBuilder.h:70
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition PassBuilder.h:74
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition PassBuilder.h:48
LLVM_ABI PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition PassBuilder.h:59
bool LoopInterchange
Tuning option to enable/disable loop interchange.
Definition PassBuilder.h:63
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition PassBuilder.h:52
Reassociate commutative expressions.
Definition Reassociate.h:74
A pass to do RPO deduction and propagation of function attributes.
This pass performs function-level constant propagation and merging.
Definition SCCP.h:30
The sample profiler data loader pass.
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition SimplifyCFG.h:30
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Interfaces for registering analysis passes, producing common pass manager configurations,...
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ All
Drop only llvm.assumes using type test value.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > EnableJumpTableToSwitch("enable-jump-table-to-switch", cl::desc("Enable JumpTableToSwitch pass (default = off)"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(false), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlining pass"))
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
static cl::opt< bool > EnableSampledInstr("enable-sampled-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable profile instrumentation sampling (default = off)"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
PassManager< LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, CGSCCUpdateResult & > CGSCCPassManager
The CGSCC pass manager.
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
@ CGSCC_LIGHT
@ MODULE_LIGHT
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
@ FullLTOPreLink
Full LTO prelink phase.
Definition Pass.h:85
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
Definition Pass.h:83
@ None
No LTO/ThinLTO behavior needed.
Definition Pass.h:79
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
Definition Pass.h:87
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
Definition Pass.h:81
PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & > LoopPassManager
The Loop pass manager.
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
FunctionToLoopPassAdaptor createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< bool > ForgetSCEVInLoopUnroll
PassManager< Module > ModulePassManager
Convenience typedef for a pass manager over modules.
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
static cl::opt< std::string > InstrumentColdFuncOnlyPath("instrument-cold-function-only-path", cl::init(""), cl::desc("File path for cold function only instrumentation(requires use " "with --pgo-instrument-cold-function-only)"), cl::Hidden)
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierarchy exists in the profile"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::FULL, "full", "enable all full attributor runs"), clEnumValN(AttributorRunOption::LIGHT, "light", "enable all attributor-light runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::MODULE_LIGHT, "module-light", "enable module-wide attributor-light runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::CGSCC_LIGHT, "cgscc-light", "enable call graph SCC attributor-light runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
PassManager< Function > FunctionPassManager
Convenience typedef for a pass manager over functions.
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< bool > PGOInstrumentColdFunctionOnly
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the LoopInterchange Pass"))
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableDevirtualizeSpeculatively("enable-devirtualize-speculatively", cl::desc("Enable speculative devirtualization optimization"), cl::init(false))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
LLVM_ABI InlineParams getInlineParamsFromOptLevel(unsigned OptLevel)
Generate the parameters to tune the inline cost analysis based on command line options.
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
cl::opt< unsigned > SetLicmMssaOptCap
static cl::opt< bool > EnableMergeICmps("enable-mergeicmps", cl::init(true), cl::Hidden, cl::desc("Enable MergeICmps pass in the optimization pipeline"))
A DCE pass that assumes instructions are dead until proven otherwise.
Definition ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
A more lightweight version of the Attributor which only runs attribute inference but no simplificatio...
A more lightweight version of the Attributor which only runs attribute inference but no simplificatio...
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition EarlyCSE.h:31
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition GVN.h:431
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition GVN.h:438
A set of parameters to control various transforms performed by IPSCCP pass.
Definition SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
Definition InlineCost.h:207
std::optional< int > OptSizeHintThreshold
Threshold to use for callees with inline hint, when the caller is optimized for size.
Definition InlineCost.h:216
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition InlineCost.h:228
int DefaultThreshold
The default threshold to start with for a callee.
Definition InlineCost.h:209
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition InlineCost.h:241
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition InlineCost.h:212
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
A utility pass template to force an analysis result to be available.