LLVM 19.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
26#include "llvm/IR/PassManager.h"
141
142using namespace llvm;
143
145 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
146 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
147 cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
148 "Heuristics-based inliner version"),
149 clEnumValN(InliningAdvisorMode::Development, "development",
150 "Use development mode (runtime-loadable model)"),
151 clEnumValN(InliningAdvisorMode::Release, "release",
152 "Use release mode (AOT-compiled model)")));
153
155 "enable-npm-synthetic-counts", cl::Hidden,
156 cl::desc("Run synthetic function entry count generation "
157 "pass"));
158
159/// Flag to enable inline deferral during PGO.
160static cl::opt<bool>
161 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
163 cl::desc("Enable inline deferral during PGO"));
164
165static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
166 cl::init(false), cl::Hidden,
167 cl::desc("Enable module inliner"));
168
170 "mandatory-inlining-first", cl::init(false), cl::Hidden,
171 cl::desc("Perform mandatory inlinings module-wide, before performing "
172 "inlining"));
173
175 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
176 cl::desc("Eagerly invalidate more analyses in default pipelines"));
177
179 "enable-merge-functions", cl::init(false), cl::Hidden,
180 cl::desc("Enable function merging as part of the optimization pipeline"));
181
183 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
184 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
185
187 "enable-global-analyses", cl::init(true), cl::Hidden,
188 cl::desc("Enable inter-procedural analyses"));
189
190static cl::opt<bool>
191 RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden,
192 cl::desc("Run Partial inlinining pass"));
193
195 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
196 cl::desc("Run cleanup optimization passes after vectorization"));
197
198static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
199 cl::desc("Run the NewGVN pass"));
200
202 "enable-loopinterchange", cl::init(false), cl::Hidden,
203 cl::desc("Enable the experimental LoopInterchange Pass"));
204
205static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
206 cl::init(false), cl::Hidden,
207 cl::desc("Enable Unroll And Jam Pass"));
208
209static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
211 cl::desc("Enable the LoopFlatten Pass"));
212
213// Experimentally allow loop header duplication. This should allow for better
214// optimization at Oz, since loop-idiom recognition can then recognize things
215// like memcpy. If this ends up being useful for many targets, we should drop
216// this flag and make a code generation option that can be controlled
217// independent of the opt level and exposed through the frontend.
219 "enable-loop-header-duplication", cl::init(false), cl::Hidden,
220 cl::desc("Enable loop header duplication at any optimization level"));
221
222static cl::opt<bool>
223 EnableDFAJumpThreading("enable-dfa-jump-thread",
224 cl::desc("Enable DFA jump threading"),
225 cl::init(false), cl::Hidden);
226
227// TODO: turn on and remove flag
229 "enable-pgo-force-function-attrs",
230 cl::desc("Enable pass to set function attributes based on PGO profiles"),
231 cl::init(false));
232
233static cl::opt<bool>
234 EnableHotColdSplit("hot-cold-split",
235 cl::desc("Enable hot-cold splitting pass"));
236
237static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
239 cl::desc("Enable ir outliner pass"));
240
241static cl::opt<bool>
242 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
243 cl::desc("Disable pre-instrumentation inliner"));
244
246 "preinline-threshold", cl::Hidden, cl::init(75),
247 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
248 "(default = 75)"));
249
250static cl::opt<bool>
251 EnableGVNHoist("enable-gvn-hoist",
252 cl::desc("Enable the GVN hoisting pass (default = off)"));
253
254static cl::opt<bool>
255 EnableGVNSink("enable-gvn-sink",
256 cl::desc("Enable the GVN sinking pass (default = off)"));
257
259 "enable-jump-table-to-switch",
260 cl::desc("Enable JumpTableToSwitch pass (default = off)"));
261
262// This option is used in simplifying testing SampleFDO optimizations for
263// profile loading.
264static cl::opt<bool>
265 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
266 cl::desc("Enable control height reduction optimization (CHR)"));
267
269 "flattened-profile-used", cl::init(false), cl::Hidden,
270 cl::desc("Indicate the sample profile being used is flattened, i.e., "
271 "no inline hierachy exists in the profile"));
272
274 "enable-order-file-instrumentation", cl::init(false), cl::Hidden,
275 cl::desc("Enable order file instrumentation (default = off)"));
276
277static cl::opt<bool>
278 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
279 cl::desc("Enable lowering of the matrix intrinsics"));
280
282 "enable-constraint-elimination", cl::init(true), cl::Hidden,
283 cl::desc(
284 "Enable pass to eliminate conditions based on linear constraints"));
285
287 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
288 cl::desc("Enable the attributor inter-procedural deduction pass"),
289 cl::values(clEnumValN(AttributorRunOption::ALL, "all",
290 "enable all attributor runs"),
291 clEnumValN(AttributorRunOption::MODULE, "module",
292 "enable module-wide attributor runs"),
293 clEnumValN(AttributorRunOption::CGSCC, "cgscc",
294 "enable call graph SCC attributor runs"),
295 clEnumValN(AttributorRunOption::NONE, "none",
296 "disable attributor runs")));
297
299 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
300 cl::desc("Enable the experimental Loop Versioning LICM pass"));
301
302namespace llvm {
304
306} // namespace llvm
307
309 LoopInterleaving = true;
310 LoopVectorization = true;
311 SLPVectorization = false;
312 LoopUnrolling = true;
316 CallGraphProfile = true;
317 UnifiedLTO = false;
319 InlinerThreshold = -1;
321}
322
323namespace llvm {
325} // namespace llvm
326
328 OptimizationLevel Level) {
329 for (auto &C : PeepholeEPCallbacks)
330 C(FPM, Level);
331}
334 for (auto &C : LateLoopOptimizationsEPCallbacks)
335 C(LPM, Level);
336}
338 OptimizationLevel Level) {
339 for (auto &C : LoopOptimizerEndEPCallbacks)
340 C(LPM, Level);
341}
344 for (auto &C : ScalarOptimizerLateEPCallbacks)
345 C(FPM, Level);
346}
348 OptimizationLevel Level) {
349 for (auto &C : CGSCCOptimizerLateEPCallbacks)
350 C(CGPM, Level);
351}
353 OptimizationLevel Level) {
354 for (auto &C : VectorizerStartEPCallbacks)
355 C(FPM, Level);
356}
358 OptimizationLevel Level) {
359 for (auto &C : OptimizerEarlyEPCallbacks)
360 C(MPM, Level);
361}
363 OptimizationLevel Level) {
364 for (auto &C : OptimizerLastEPCallbacks)
365 C(MPM, Level);
366}
369 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
370 C(MPM, Level);
371}
374 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
375 C(MPM, Level);
376}
378 OptimizationLevel Level) {
379 for (auto &C : PipelineStartEPCallbacks)
380 C(MPM, Level);
381}
384 for (auto &C : PipelineEarlySimplificationEPCallbacks)
385 C(MPM, Level);
386}
387
388// Helper to add AnnotationRemarksPass.
391}
392
393// Helper to check if the current compilation phase is preparing for LTO
397}
398
399// TODO: Investigate the cost/benefit of tail call elimination on debugging.
401PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
403
405
408
409 // Form SSA out of local memory accesses after breaking apart aggregates into
410 // scalars.
412
413 // Catch trivial redundancies
414 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
415
416 // Hoisting of scalars and load expressions.
417 FPM.addPass(
418 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
420
422
423 invokePeepholeEPCallbacks(FPM, Level);
424
425 FPM.addPass(
426 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
427
428 // Form canonically associated expression trees, and simplify the trees using
429 // basic mathematical properties. For example, this will form (nearly)
430 // minimal multiplication trees.
432
433 // Add the primary loop simplification pipeline.
434 // FIXME: Currently this is split into two loop pass pipelines because we run
435 // some function passes in between them. These can and should be removed
436 // and/or replaced by scheduling the loop pass equivalents in the correct
437 // positions. But those equivalent passes aren't powerful enough yet.
438 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
439 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
440 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
441 // `LoopInstSimplify`.
442 LoopPassManager LPM1, LPM2;
443
444 // Simplify the loop body. We do this initially to clean up after other loop
445 // passes run, either when iterating on a loop or on inner loops with
446 // implications on the outer loop.
449
450 // Try to remove as much code from the loop header as possible,
451 // to reduce amount of IR that will have to be duplicated. However,
452 // do not perform speculative hoisting the first time as LICM
453 // will destroy metadata that may not need to be destroyed if run
454 // after loop rotation.
455 // TODO: Investigate promotion cap for O1.
457 /*AllowSpeculation=*/false));
458
459 LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
461 // TODO: Investigate promotion cap for O1.
463 /*AllowSpeculation=*/true));
466 LPM1.addPass(LoopFlattenPass());
467
470
472
474
477
478 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
479 // because it changes IR to makes profile annotation in back compile
480 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
481 // attributes so we need to make sure and allow the full unroll pass to pay
482 // attention to it.
483 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
484 PGOOpt->Action != PGOOptions::SampleUse)
485 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
486 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
488
490
491 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
492 /*UseMemorySSA=*/true,
493 /*UseBlockFrequencyInfo=*/true));
494 FPM.addPass(
495 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
497 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
498 // *All* loop passes must preserve it, in order to be able to use it.
499 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
500 /*UseMemorySSA=*/false,
501 /*UseBlockFrequencyInfo=*/false));
502
503 // Delete small array after loop unroll.
505
506 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
507 FPM.addPass(MemCpyOptPass());
508
509 // Sparse conditional constant propagation.
510 // FIXME: It isn't clear why we do this *after* loop passes rather than
511 // before...
512 FPM.addPass(SCCPPass());
513
514 // Delete dead bit computations (instcombine runs after to fold away the dead
515 // computations, and then ADCE will run later to exploit any new DCE
516 // opportunities that creates).
517 FPM.addPass(BDCEPass());
518
519 // Run instcombine after redundancy and dead bit elimination to exploit
520 // opportunities opened up by them.
522 invokePeepholeEPCallbacks(FPM, Level);
523
524 FPM.addPass(CoroElidePass());
525
527
528 // Finally, do an expensive DCE pass to catch all the dead code exposed by
529 // the simplifications and basic cleanup after all the simplifications.
530 // TODO: Investigate if this is too expensive.
531 FPM.addPass(ADCEPass());
532 FPM.addPass(
533 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
535 invokePeepholeEPCallbacks(FPM, Level);
536
537 return FPM;
538}
539
543 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
544
545 // The O1 pipeline has a separate pipeline creation function to simplify
546 // construction readability.
547 if (Level.getSpeedupLevel() == 1)
548 return buildO1FunctionSimplificationPipeline(Level, Phase);
549
551
554
555 // Form SSA out of local memory accesses after breaking apart aggregates into
556 // scalars.
558
559 // Catch trivial redundancies
560 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
563
564 // Hoisting of scalars and load expressions.
565 if (EnableGVNHoist)
566 FPM.addPass(GVNHoistPass());
567
568 // Global value numbering based sinking.
569 if (EnableGVNSink) {
570 FPM.addPass(GVNSinkPass());
571 FPM.addPass(
572 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
573 }
574
575 // Speculative execution if the target has divergent branches; otherwise nop.
576 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
577
578 // Optimize based on known information about branches, and cleanup afterward.
581
582 // Jump table to switch conversion.
585
586 FPM.addPass(
587 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
590
591 if (!Level.isOptimizingForSize())
593
594 invokePeepholeEPCallbacks(FPM, Level);
595
596 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
597 // using the size value profile. Don't perform this when optimizing for size.
598 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
599 !Level.isOptimizingForSize())
601
603 FPM.addPass(
604 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
605
606 // Form canonically associated expression trees, and simplify the trees using
607 // basic mathematical properties. For example, this will form (nearly)
608 // minimal multiplication trees.
610
613
614 // Add the primary loop simplification pipeline.
615 // FIXME: Currently this is split into two loop pass pipelines because we run
616 // some function passes in between them. These can and should be removed
617 // and/or replaced by scheduling the loop pass equivalents in the correct
618 // positions. But those equivalent passes aren't powerful enough yet.
619 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
620 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
621 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
622 // `LoopInstSimplify`.
623 LoopPassManager LPM1, LPM2;
624
625 // Simplify the loop body. We do this initially to clean up after other loop
626 // passes run, either when iterating on a loop or on inner loops with
627 // implications on the outer loop.
630
631 // Try to remove as much code from the loop header as possible,
632 // to reduce amount of IR that will have to be duplicated. However,
633 // do not perform speculative hoisting the first time as LICM
634 // will destroy metadata that may not need to be destroyed if run
635 // after loop rotation.
636 // TODO: Investigate promotion cap for O1.
638 /*AllowSpeculation=*/false));
639
640 // Disable header duplication in loop rotation at -Oz.
642 Level != OptimizationLevel::Oz,
644 // TODO: Investigate promotion cap for O1.
646 /*AllowSpeculation=*/true));
647 LPM1.addPass(
648 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
650 LPM1.addPass(LoopFlattenPass());
651
654
655 {
657 ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
659 LPM2.addPass(std::move(ExtraPasses));
660 }
661
663
665
668
669 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
670 // because it changes IR to makes profile annotation in back compile
671 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
672 // attributes so we need to make sure and allow the full unroll pass to pay
673 // attention to it.
674 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
675 PGOOpt->Action != PGOOptions::SampleUse)
676 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
677 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
679
681
682 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
683 /*UseMemorySSA=*/true,
684 /*UseBlockFrequencyInfo=*/true));
685 FPM.addPass(
686 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
688 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
689 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
690 // *All* loop passes must preserve it, in order to be able to use it.
691 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
692 /*UseMemorySSA=*/false,
693 /*UseBlockFrequencyInfo=*/false));
694
695 // Delete small array after loop unroll.
697
698 // Try vectorization/scalarization transforms that are both improvements
699 // themselves and can allow further folds with GVN and InstCombine.
700 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
701
702 // Eliminate redundancies.
704 if (RunNewGVN)
705 FPM.addPass(NewGVNPass());
706 else
707 FPM.addPass(GVNPass());
708
709 // Sparse conditional constant propagation.
710 // FIXME: It isn't clear why we do this *after* loop passes rather than
711 // before...
712 FPM.addPass(SCCPPass());
713
714 // Delete dead bit computations (instcombine runs after to fold away the dead
715 // computations, and then ADCE will run later to exploit any new DCE
716 // opportunities that creates).
717 FPM.addPass(BDCEPass());
718
719 // Run instcombine after redundancy and dead bit elimination to exploit
720 // opportunities opened up by them.
722 invokePeepholeEPCallbacks(FPM, Level);
723
724 // Re-consider control flow based optimizations after redundancy elimination,
725 // redo DCE, etc.
728
731
732 // Finally, do an expensive DCE pass to catch all the dead code exposed by
733 // the simplifications and basic cleanup after all the simplifications.
734 // TODO: Investigate if this is too expensive.
735 FPM.addPass(ADCEPass());
736
737 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
738 FPM.addPass(MemCpyOptPass());
739
740 FPM.addPass(DSEPass());
742
745 /*AllowSpeculation=*/true),
746 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
747
748 FPM.addPass(CoroElidePass());
749
751
753 .convertSwitchRangeToICmp(true)
754 .hoistCommonInsts(true)
755 .sinkCommonInsts(true)));
757 invokePeepholeEPCallbacks(FPM, Level);
758
759 return FPM;
760}
761
762void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
765}
766
767void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
768 OptimizationLevel Level,
769 ThinOrFullLTOPhase LTOPhase) {
770 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
772 return;
773 InlineParams IP;
774
776
777 // FIXME: The hint threshold has the same value used by the regular inliner
778 // when not optimzing for size. This should probably be lowered after
779 // performance testing.
780 // FIXME: this comment is cargo culted from the old pass manager, revisit).
781 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
783 IP, /* MandatoryFirst */ true,
785 CGSCCPassManager &CGPipeline = MIWP.getPM();
786
789 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
790 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
791 true))); // Merge & remove basic blocks.
792 FPM.addPass(InstCombinePass()); // Combine silly sequences.
793 invokePeepholeEPCallbacks(FPM, Level);
794
795 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
796 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
797
798 MPM.addPass(std::move(MIWP));
799
800 // Delete anything that is now dead to make sure that we don't instrument
801 // dead code. Instrumentation can end up keeping dead code around and
802 // dramatically increase code size.
804}
805
806void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
807 OptimizationLevel Level) {
809 // Disable header duplication in loop rotation at -Oz.
813 Level != OptimizationLevel::Oz),
814 /*UseMemorySSA=*/false,
815 /*UseBlockFrequencyInfo=*/false),
817 }
818}
819
820void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
821 OptimizationLevel Level, bool RunProfileGen,
822 bool IsCS, bool AtomicCounterUpdate,
823 std::string ProfileFile,
824 std::string ProfileRemappingFile,
826 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
827
828 if (!RunProfileGen) {
829 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
830 MPM.addPass(
831 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
832 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
833 // RequireAnalysisPass for PSI before subsequent non-module passes.
835 return;
836 }
837
838 // Perform PGO instrumentation.
840
841 addPostPGOLoopRotation(MPM, Level);
842 // Add the profile lowering pass.
844 if (!ProfileFile.empty())
845 Options.InstrProfileOutput = ProfileFile;
846 // Do counter promotion at Level greater than O0.
847 Options.DoCounterPromotion = true;
848 Options.UseBFIInPromotion = IsCS;
849 Options.Atomic = AtomicCounterUpdate;
851}
852
854 ModulePassManager &MPM, bool RunProfileGen, bool IsCS,
855 bool AtomicCounterUpdate, std::string ProfileFile,
856 std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) {
857 if (!RunProfileGen) {
858 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
859 MPM.addPass(
860 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
861 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
862 // RequireAnalysisPass for PSI before subsequent non-module passes.
864 return;
865 }
866
867 // Perform PGO instrumentation.
869 // Add the profile lowering pass.
871 if (!ProfileFile.empty())
872 Options.InstrProfileOutput = ProfileFile;
873 // Do not do counter promotion at O0.
874 Options.DoCounterPromotion = false;
875 Options.UseBFIInPromotion = IsCS;
876 Options.Atomic = AtomicCounterUpdate;
878}
879
881 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
882}
883
887 InlineParams IP;
888 if (PTO.InlinerThreshold == -1)
889 IP = getInlineParamsFromOptLevel(Level);
890 else
892 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
893 // disable hot callsite inline (as much as possible [1]) because it makes
894 // profile annotation in the backend inaccurate.
895 //
896 // [1] Note the cost of a function could be below zero due to erased
897 // prologue / epilogue.
898 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
899 PGOOpt->Action == PGOOptions::SampleUse)
901
902 if (PGOOpt)
904
908
909 // Require the GlobalsAA analysis for the module so we can query it within
910 // the CGSCC pipeline.
913 // Invalidate AAManager so it can be recreated and pick up the newly
914 // available GlobalsAA.
915 MIWP.addModulePass(
917 }
918
919 // Require the ProfileSummaryAnalysis for the module so we can query it within
920 // the inliner pass.
922
923 // Now begin the main postorder CGSCC pipeline.
924 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
925 // manager and trying to emulate its precise behavior. Much of this doesn't
926 // make a lot of sense and we should revisit the core CGSCC structure.
927 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
928
929 // Note: historically, the PruneEH pass was run first to deduce nounwind and
930 // generally clean up exception handling overhead. It isn't clear this is
931 // valuable as the inliner doesn't currently care whether it is inlining an
932 // invoke or a call.
933
935 MainCGPipeline.addPass(AttributorCGSCCPass());
936
937 // Deduce function attributes. We do another run of this after the function
938 // simplification pipeline, so this only needs to run when it could affect the
939 // function simplification pipeline, which is only the case with recursive
940 // functions.
941 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
942
943 // When at O3 add argument promotion to the pass pipeline.
944 // FIXME: It isn't at all clear why this should be limited to O3.
945 if (Level == OptimizationLevel::O3)
946 MainCGPipeline.addPass(ArgumentPromotionPass());
947
948 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
949 // there are no OpenMP runtime calls present in the module.
950 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
951 MainCGPipeline.addPass(OpenMPOptCGSCCPass());
952
953 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
954
955 // Add the core function simplification pipeline nested inside the
956 // CGSCC walk.
959 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
960
961 // Finally, deduce any function attributes based on the fully simplified
962 // function.
963 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
964
965 // Mark that the function is fully simplified and that it shouldn't be
966 // simplified again if we somehow revisit it due to CGSCC mutations unless
967 // it's been modified since.
970
971 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
972
973 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
974 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
976
977 return MIWP;
978}
979
984
986 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
987 // disable hot callsite inline (as much as possible [1]) because it makes
988 // profile annotation in the backend inaccurate.
989 //
990 // [1] Note the cost of a function could be below zero due to erased
991 // prologue / epilogue.
992 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
993 PGOOpt->Action == PGOOptions::SampleUse)
995
996 if (PGOOpt)
998
999 // The inline deferral logic is used to avoid losing some
1000 // inlining chance in future. It is helpful in SCC inliner, in which
1001 // inlining is processed in bottom-up order.
1002 // While in module inliner, the inlining order is a priority-based order
1003 // by default. The inline deferral is unnecessary there. So we disable the
1004 // inline deferral logic in module inliner.
1005 IP.EnableDeferral = false;
1006
1008
1012
1015
1016 return MPM;
1017}
1018
1022 assert(Level != OptimizationLevel::O0 &&
1023 "Should not be used for O0 pipeline");
1024
1026 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1027
1029
1030 // Place pseudo probe instrumentation as the first pass of the pipeline to
1031 // minimize the impact of optimization changes.
1032 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1035
1036 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1037
1038 // In ThinLTO mode, when flattened profile is used, all the available
1039 // profile information will be annotated in PreLink phase so there is
1040 // no need to load the profile again in PostLink.
1041 bool LoadSampleProfile =
1042 HasSampleProfile &&
1044
1045 // During the ThinLTO backend phase we perform early indirect call promotion
1046 // here, before globalopt. Otherwise imported available_externally functions
1047 // look unreferenced and are removed. If we are going to load the sample
1048 // profile then defer until later.
1049 // TODO: See if we can move later and consolidate with the location where
1050 // we perform ICP when we are loading a sample profile.
1051 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1052 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1053 // determine whether the new direct calls are annotated with prof metadata.
1054 // Ideally this should be determined from whether the IR is annotated with
1055 // sample profile, and not whether the a sample profile was provided on the
1056 // command line. E.g. for flattened profiles where we will not be reloading
1057 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1058 // provide the sample profile file.
1059 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1060 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1061
1062 // Create an early function pass manager to cleanup the output of the
1063 // frontend. Not necessary with LTO post link pipelines since the pre link
1064 // pipeline already cleaned up the frontend output.
1066 // Do basic inference of function attributes from known properties of system
1067 // libraries and other oracles.
1070
1071 FunctionPassManager EarlyFPM;
1072 // Lower llvm.expect to metadata before attempting transforms.
1073 // Compare/branch metadata may alter the behavior of passes like
1074 // SimplifyCFG.
1076 EarlyFPM.addPass(SimplifyCFGPass());
1078 EarlyFPM.addPass(EarlyCSEPass());
1079 if (Level == OptimizationLevel::O3)
1080 EarlyFPM.addPass(CallSiteSplittingPass());
1082 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1083 }
1084
1085 if (LoadSampleProfile) {
1086 // Annotate sample profile right after early FPM to ensure freshness of
1087 // the debug info.
1088 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1089 PGOOpt->ProfileRemappingFile, Phase));
1090 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1091 // RequireAnalysisPass for PSI before subsequent non-module passes.
1093 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1094 // for the profile annotation to be accurate in the LTO backend.
1095 if (!isLTOPreLink(Phase))
1096 // We perform early indirect call promotion here, before globalopt.
1097 // This is important for the ThinLTO backend phase because otherwise
1098 // imported available_externally functions look unreferenced and are
1099 // removed.
1100 MPM.addPass(
1101 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1102 }
1103
1104 // Try to perform OpenMP specific optimizations on the module. This is a
1105 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1107
1110
1111 // Lower type metadata and the type.test intrinsic in the ThinLTO
1112 // post link pipeline after ICP. This is to enable usage of the type
1113 // tests in ICP sequences.
1115 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1116
1118
1119 // Interprocedural constant propagation now that basic cleanup has occurred
1120 // and prior to optimizing globals.
1121 // FIXME: This position in the pipeline hasn't been carefully considered in
1122 // years, it should be re-analyzed.
1124 IPSCCPOptions(/*AllowFuncSpec=*/
1125 Level != OptimizationLevel::Os &&
1126 Level != OptimizationLevel::Oz &&
1127 !isLTOPreLink(Phase))));
1128
1129 // Attach metadata to indirect call sites indicating the set of functions
1130 // they may target at run-time. This should follow IPSCCP.
1132
1133 // Optimize globals to try and fold them into constants.
1135
1136 // Create a small function pass pipeline to cleanup after all the global
1137 // optimizations.
1138 FunctionPassManager GlobalCleanupPM;
1139 // FIXME: Should this instead by a run of SROA?
1140 GlobalCleanupPM.addPass(PromotePass());
1141 GlobalCleanupPM.addPass(InstCombinePass());
1142 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1143 GlobalCleanupPM.addPass(
1144 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1145 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1147
1148 // We already asserted this happens in non-FullLTOPostLink earlier.
1149 const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1150 const bool IsPGOPreLink = PGOOpt && IsPreLink;
1151 const bool IsPGOInstrGen =
1152 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1153 const bool IsPGOInstrUse =
1154 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1155 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1156 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1157 // enable ctx profiling from the frontend.
1158 assert(
1160 "Enabling both instrumented FDO and contextual instrumentation is not "
1161 "supported.");
1162 // Enable contextual profiling instrumentation.
1163 const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink &&
1165
1166 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen)
1167 addPreInlinerPasses(MPM, Level, Phase);
1168
1169 // Add all the requested passes for instrumentation PGO, if requested.
1170 if (IsPGOInstrGen || IsPGOInstrUse) {
1171 addPGOInstrPasses(MPM, Level,
1172 /*RunProfileGen=*/IsPGOInstrGen,
1173 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1174 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1175 PGOOpt->FS);
1176 } else if (IsCtxProfGen) {
1178 addPostPGOLoopRotation(MPM, Level);
1180 }
1181
1182 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1183 MPM.addPass(PGOIndirectCallPromotion(false, false));
1184
1185 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1186 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
1187
1188 if (IsMemprofUse)
1189 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS));
1190
1191 // Synthesize function entry counts for non-PGO compilation.
1192 if (EnableSyntheticCounts && !PGOOpt)
1194
1195 if (EnablePGOForceFunctionAttrs && PGOOpt)
1196 MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1197
1198 MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1199
1202 else
1204
1205 // Remove any dead arguments exposed by cleanups, constant folding globals,
1206 // and argument promotion.
1208
1210
1211 // Optimize globals now that functions are fully simplified.
1214
1215 return MPM;
1216}
1217
1218/// TODO: Should LTO cause any differences to this set of passes?
1219void PassBuilder::addVectorPasses(OptimizationLevel Level,
1220 FunctionPassManager &FPM, bool IsFullLTO) {
1223
1226 if (IsFullLTO) {
1227 // The vectorizer may have significantly shortened a loop body; unroll
1228 // again. Unroll small loops to hide loop backedge latency and saturate any
1229 // parallel execution resources of an out-of-order processor. We also then
1230 // need to clean up redundancies and loop invariant code.
1231 // FIXME: It would be really good to use a loop-integrated instruction
1232 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1233 // across the loop nests.
1234 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1237 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1239 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1242 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1243 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1244 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1245 // NOTE: we are very late in the pipeline, and we don't have any LICM
1246 // or SimplifyCFG passes scheduled after us, that would cleanup
1247 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1249 }
1250
1251 if (!IsFullLTO) {
1252 // Eliminate loads by forwarding stores from the previous iteration to loads
1253 // of the current iteration.
1255 }
1256 // Cleanup after the loop optimization passes.
1257 FPM.addPass(InstCombinePass());
1258
1259 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1260 ExtraVectorPassManager ExtraPasses;
1261 // At higher optimization levels, try to clean up any runtime overlap and
1262 // alignment checks inserted by the vectorizer. We want to track correlated
1263 // runtime checks for two inner loops in the same outer loop, fold any
1264 // common computations, hoist loop-invariant aspects out of any outer loop,
1265 // and unswitch the runtime checks if possible. Once hoisted, we may have
1266 // dead (or speculatable) control flows or more combining opportunities.
1267 ExtraPasses.addPass(EarlyCSEPass());
1269 ExtraPasses.addPass(InstCombinePass());
1270 LoopPassManager LPM;
1272 /*AllowSpeculation=*/true));
1273 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1275 ExtraPasses.addPass(
1276 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1277 /*UseBlockFrequencyInfo=*/true));
1278 ExtraPasses.addPass(
1279 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1280 ExtraPasses.addPass(InstCombinePass());
1281 FPM.addPass(std::move(ExtraPasses));
1282 }
1283
1284 // Now that we've formed fast to execute loop structures, we do further
1285 // optimizations. These are run afterward as they might block doing complex
1286 // analyses and transforms such as what are needed for loop vectorization.
1287
1288 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1289 // GVN, loop transforms, and others have already run, so it's now better to
1290 // convert to more optimized IR using more aggressive simplify CFG options.
1291 // The extra sinking transform can create larger basic blocks, so do this
1292 // before SLP vectorization.
1294 .forwardSwitchCondToPhi(true)
1295 .convertSwitchRangeToICmp(true)
1296 .convertSwitchToLookupTable(true)
1297 .needCanonicalLoops(false)
1298 .hoistCommonInsts(true)
1299 .sinkCommonInsts(true)));
1300
1301 if (IsFullLTO) {
1302 FPM.addPass(SCCPPass());
1303 FPM.addPass(InstCombinePass());
1304 FPM.addPass(BDCEPass());
1305 }
1306
1307 // Optimize parallel scalar instruction chains into SIMD instructions.
1308 if (PTO.SLPVectorization) {
1310 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1311 FPM.addPass(EarlyCSEPass());
1312 }
1313 }
1314 // Enhance/cleanup vector code.
1316
1317 if (!IsFullLTO) {
1318 FPM.addPass(InstCombinePass());
1319 // Unroll small loops to hide loop backedge latency and saturate any
1320 // parallel execution resources of an out-of-order processor. We also then
1321 // need to clean up redundancies and loop invariant code.
1322 // FIXME: It would be really good to use a loop-integrated instruction
1323 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1324 // across the loop nests.
1325 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1326 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1328 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1329 }
1331 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1334 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1335 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1336 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1337 // NOTE: we are very late in the pipeline, and we don't have any LICM
1338 // or SimplifyCFG passes scheduled after us, that would cleanup
1339 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1341 }
1342
1345 FPM.addPass(InstCombinePass());
1346
1347 // This is needed for two reasons:
1348 // 1. It works around problems that instcombine introduces, such as sinking
1349 // expensive FP divides into loops containing multiplications using the
1350 // divide result.
1351 // 2. It helps to clean up some loop-invariant code created by the loop
1352 // unroll pass when IsFullLTO=false.
1355 /*AllowSpeculation=*/true),
1356 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1357
1358 // Now that we've vectorized and unrolled loops, we may have more refined
1359 // alignment information, try to re-derive it here.
1361}
1362
1365 ThinOrFullLTOPhase LTOPhase) {
1366 const bool LTOPreLink = isLTOPreLink(LTOPhase);
1368
1369 // Run partial inlining pass to partially inline functions that have
1370 // large bodies.
1373
1374 // Remove avail extern fns and globals definitions since we aren't compiling
1375 // an object file for later LTO. For LTO we want to preserve these so they
1376 // are eligible for inlining at link-time. Note if they are unreferenced they
1377 // will be removed by GlobalDCE later, so this only impacts referenced
1378 // available externally globals. Eventually they will be suppressed during
1379 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1380 // may make globals referenced by available external functions dead and saves
1381 // running remaining passes on the eliminated functions. These should be
1382 // preserved during prelinking for link-time inlining decisions.
1383 if (!LTOPreLink)
1385
1388
1389 // Do RPO function attribute inference across the module to forward-propagate
1390 // attributes where applicable.
1391 // FIXME: Is this really an optimization rather than a canonicalization?
1393
1394 // Do a post inline PGO instrumentation and use pass. This is a context
1395 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1396 // cross-module inline has not been done yet. The context sensitive
1397 // instrumentation is after all the inlines are done.
1398 if (!LTOPreLink && PGOOpt) {
1399 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1400 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1401 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1402 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1403 PGOOpt->FS);
1404 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1405 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1406 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1407 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1408 PGOOpt->FS);
1409 }
1410
1411 // Re-compute GlobalsAA here prior to function passes. This is particularly
1412 // useful as the above will have inlined, DCE'ed, and function-attr
1413 // propagated everything. We should at this point have a reasonably minimal
1414 // and richly annotated call graph. By computing aliasing and mod/ref
1415 // information for all local globals here, the late loop passes and notably
1416 // the vectorizer will be able to use them to help recognize vectorizable
1417 // memory operations.
1420
1422
1423 FunctionPassManager OptimizePM;
1424 // Scheduling LoopVersioningLICM when inlining is over, because after that
1425 // we may see more accurate aliasing. Reason to run this late is that too
1426 // early versioning may prevent further inlining due to increase of code
1427 // size. Other optimizations which runs later might get benefit of no-alias
1428 // assumption in clone loop.
1430 OptimizePM.addPass(
1432 // LoopVersioningLICM pass might increase new LICM opportunities.
1435 /*AllowSpeculation=*/true),
1436 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1437 }
1438
1439 OptimizePM.addPass(Float2IntPass());
1441
1442 if (EnableMatrix) {
1443 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1444 OptimizePM.addPass(EarlyCSEPass());
1445 }
1446
1447 // CHR pass should only be applied with the profile information.
1448 // The check is to check the profile summary information in CHR.
1449 if (EnableCHR && Level == OptimizationLevel::O3)
1450 OptimizePM.addPass(ControlHeightReductionPass());
1451
1452 // FIXME: We need to run some loop optimizations to re-rotate loops after
1453 // simplifycfg and others undo their rotation.
1454
1455 // Optimize the loop execution. These passes operate on entire loop nests
1456 // rather than on each loop in an inside-out manner, and so they are actually
1457 // function passes.
1458
1459 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1460
1461 LoopPassManager LPM;
1462 // First rotate loops that may have been un-rotated by prior passes.
1463 // Disable header duplication at -Oz.
1465 Level != OptimizationLevel::Oz,
1466 LTOPreLink));
1467 // Some loops may have become dead by now. Try to delete them.
1468 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1469 // this may need to be revisited once we run GVN before loop deletion
1470 // in the simplification pipeline.
1473 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1474
1475 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1476 // into separate loop that would otherwise inhibit vectorization. This is
1477 // currently only performed for loops marked with the metadata
1478 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1479 OptimizePM.addPass(LoopDistributePass());
1480
1481 // Populates the VFABI attribute with the scalar-to-vector mappings
1482 // from the TargetLibraryInfo.
1483 OptimizePM.addPass(InjectTLIMappings());
1484
1485 addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1486
1487 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1488 // canonicalization pass that enables other optimizations. As a result,
1489 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1490 // result too early.
1491 OptimizePM.addPass(LoopSinkPass());
1492
1493 // And finally clean up LCSSA form before generating code.
1494 OptimizePM.addPass(InstSimplifyPass());
1495
1496 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1497 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1498 // flattening of blocks.
1499 OptimizePM.addPass(DivRemPairsPass());
1500
1501 // Try to annotate calls that were created during optimization.
1502 OptimizePM.addPass(TailCallElimPass());
1503
1504 // LoopSink (and other loop passes since the last simplifyCFG) might have
1505 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1506 OptimizePM.addPass(
1507 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1508
1509 // Add the core optimizing pipeline.
1510 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1512
1514
1515 // Split out cold code. Splitting is done late to avoid hiding context from
1516 // other optimizations and inadvertently regressing performance. The tradeoff
1517 // is that this has a higher code size cost than splitting early.
1518 if (EnableHotColdSplit && !LTOPreLink)
1520
1521 // Search the code for similar regions of code. If enough similar regions can
1522 // be found where extracting the regions into their own function will decrease
1523 // the size of the program, we extract the regions, a deduplicate the
1524 // structurally similar regions.
1525 if (EnableIROutliner)
1527
1528 // Merge functions if requested.
1529 if (PTO.MergeFunctions)
1531
1532 // Now we need to do some global optimization transforms.
1533 // FIXME: It would seem like these should come first in the optimization
1534 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1535 // ordering here.
1538
1539 if (PTO.CallGraphProfile && !LTOPreLink)
1542
1543 // TODO: Relative look table converter pass caused an issue when full lto is
1544 // enabled. See https://reviews.llvm.org/D94355 for more details.
1545 // Until the issue fixed, disable this pass during pre-linking phase.
1546 if (!LTOPreLink)
1548
1549 return MPM;
1550}
1551
1554 bool LTOPreLink) {
1555 if (Level == OptimizationLevel::O0)
1556 return buildO0DefaultPipeline(Level, LTOPreLink);
1557
1559
1560 // Convert @llvm.global.annotations to !annotation metadata.
1562
1563 // Force any function attributes we want the rest of the pipeline to observe.
1565
1566 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1568
1569 // Apply module pipeline start EP callback.
1571
1572 const ThinOrFullLTOPhase LTOPhase = LTOPreLink
1575 // Add the core simplification pipeline.
1577
1578 // Now add the optimization pipeline.
1580
1581 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1582 PGOOpt->Action == PGOOptions::SampleUse)
1584
1585 // Emit annotation remarks.
1587
1588 if (LTOPreLink)
1589 addRequiredLTOPreLinkPasses(MPM);
1590 return MPM;
1591}
1592
1595 bool EmitSummary) {
1597 if (ThinLTO)
1599 else
1601 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1602
1603 // Use the ThinLTO post-link pipeline with sample profiling
1604 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1605 MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1606 else {
1607 // otherwise, just use module optimization
1608 MPM.addPass(
1610 // Emit annotation remarks.
1612 }
1613 return MPM;
1614}
1615
1618 if (Level == OptimizationLevel::O0)
1619 return buildO0DefaultPipeline(Level, /*LTOPreLink*/true);
1620
1622
1623 // Convert @llvm.global.annotations to !annotation metadata.
1625
1626 // Force any function attributes we want the rest of the pipeline to observe.
1628
1629 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1631
1632 // Apply module pipeline start EP callback.
1634
1635 // If we are planning to perform ThinLTO later, we don't bloat the code with
1636 // unrolling/vectorization/... now. Just simplify the module as much as we
1637 // can.
1640
1641 // Run partial inlining pass to partially inline functions that have
1642 // large bodies.
1643 // FIXME: It isn't clear whether this is really the right place to run this
1644 // in ThinLTO. Because there is another canonicalization and simplification
1645 // phase that will run after the thin link, running this here ends up with
1646 // less information than will be available later and it may grow functions in
1647 // ways that aren't beneficial.
1650
1651 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1652 PGOOpt->Action == PGOOptions::SampleUse)
1654
1655 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1656 // optimization is going to be done in PostLink stage, but clang can't add
1657 // callbacks there in case of in-process ThinLTO called by linker.
1660
1661 // Emit annotation remarks.
1663
1664 addRequiredLTOPreLinkPasses(MPM);
1665
1666 return MPM;
1667}
1668
1670 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1672
1673 if (ImportSummary) {
1674 // For ThinLTO we must apply the context disambiguation decisions early, to
1675 // ensure we can correctly match the callsites to summary data.
1678
1679 // These passes import type identifier resolutions for whole-program
1680 // devirtualization and CFI. They must run early because other passes may
1681 // disturb the specific instruction patterns that these passes look for,
1682 // creating dependencies on resolutions that may not appear in the summary.
1683 //
1684 // For example, GVN may transform the pattern assume(type.test) appearing in
1685 // two basic blocks into assume(phi(type.test, type.test)), which would
1686 // transform a dependency on a WPD resolution into a dependency on a type
1687 // identifier resolution for CFI.
1688 //
1689 // Also, WPD has access to more precise information than ICP and can
1690 // devirtualize more effectively, so it should operate on the IR first.
1691 //
1692 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1693 // metadata and intrinsics.
1694 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1695 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1696 }
1697
1698 if (Level == OptimizationLevel::O0) {
1699 // Run a second time to clean up any type tests left behind by WPD for use
1700 // in ICP.
1701 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1702 // Drop available_externally and unreferenced globals. This is necessary
1703 // with ThinLTO in order to avoid leaving undefined references to dead
1704 // globals in the object file.
1707 return MPM;
1708 }
1709
1710 // Add the core simplification pipeline.
1713
1714 // Now add the optimization pipeline.
1717
1718 // Emit annotation remarks.
1720
1721 return MPM;
1722}
1723
1726 // FIXME: We should use a customized pre-link pipeline!
1727 return buildPerModuleDefaultPipeline(Level,
1728 /* LTOPreLink */ true);
1729}
1730
1733 ModuleSummaryIndex *ExportSummary) {
1735
1737
1738 // Create a function that performs CFI checks for cross-DSO calls with targets
1739 // in the current module.
1741
1742 if (Level == OptimizationLevel::O0) {
1743 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1744 // metadata and intrinsics.
1745 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1746 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1747 // Run a second time to clean up any type tests left behind by WPD for use
1748 // in ICP.
1749 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1750
1752
1753 // Emit annotation remarks.
1755
1756 return MPM;
1757 }
1758
1759 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1760 // Load sample profile before running the LTO optimization pipeline.
1761 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1762 PGOOpt->ProfileRemappingFile,
1764 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1765 // RequireAnalysisPass for PSI before subsequent non-module passes.
1767 }
1768
1769 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1771
1772 // Remove unused virtual tables to improve the quality of code generated by
1773 // whole-program devirtualization and bitset lowering.
1774 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1775
1776 // Do basic inference of function attributes from known properties of system
1777 // libraries and other oracles.
1779
1780 if (Level.getSpeedupLevel() > 1) {
1783
1784 // Indirect call promotion. This should promote all the targets that are
1785 // left by the earlier promotion pass that promotes intra-module targets.
1786 // This two-step promotion is to save the compile time. For LTO, it should
1787 // produce the same result as if we only do promotion here.
1789 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1790
1791 // Propagate constants at call sites into the functions they call. This
1792 // opens opportunities for globalopt (and inlining) by substituting function
1793 // pointers passed as arguments to direct uses of functions.
1794 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1795 Level != OptimizationLevel::Os &&
1796 Level != OptimizationLevel::Oz)));
1797
1798 // Attach metadata to indirect call sites indicating the set of functions
1799 // they may target at run-time. This should follow IPSCCP.
1801 }
1802
1803 // Now deduce any function attributes based in the current code.
1804 MPM.addPass(
1806
1807 // Do RPO function attribute inference across the module to forward-propagate
1808 // attributes where applicable.
1809 // FIXME: Is this really an optimization rather than a canonicalization?
1811
1812 // Use in-range annotations on GEP indices to split globals where beneficial.
1814
1815 // Run whole program optimization of virtual call when the list of callees
1816 // is fixed.
1817 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1818
1819 // Stop here at -O1.
1820 if (Level == OptimizationLevel::O1) {
1821 // The LowerTypeTestsPass needs to run to lower type metadata and the
1822 // type.test intrinsics. The pass does nothing if CFI is disabled.
1823 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1824 // Run a second time to clean up any type tests left behind by WPD for use
1825 // in ICP (which is performed earlier than this in the regular LTO
1826 // pipeline).
1827 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1828
1830
1831 // Emit annotation remarks.
1833
1834 return MPM;
1835 }
1836
1837 // Optimize globals to try and fold them into constants.
1839
1840 // Promote any localized globals to SSA registers.
1842
1843 // Linking modules together can lead to duplicate global constant, only
1844 // keep one copy of each constant.
1846
1847 // Remove unused arguments from functions.
1849
1850 // Reduce the code after globalopt and ipsccp. Both can open up significant
1851 // simplification opportunities, and both can propagate functions through
1852 // function pointers. When this happens, we often have to resolve varargs
1853 // calls, etc, so let instcombine do this.
1854 FunctionPassManager PeepholeFPM;
1855 PeepholeFPM.addPass(InstCombinePass());
1856 if (Level.getSpeedupLevel() > 1)
1857 PeepholeFPM.addPass(AggressiveInstCombinePass());
1858 invokePeepholeEPCallbacks(PeepholeFPM, Level);
1859
1860 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
1862
1863 // Note: historically, the PruneEH pass was run first to deduce nounwind and
1864 // generally clean up exception handling overhead. It isn't clear this is
1865 // valuable as the inliner doesn't currently care whether it is inlining an
1866 // invoke or a call.
1867 // Run the inliner now.
1868 if (EnableModuleInliner) {
1872 } else {
1875 /* MandatoryFirst */ true,
1878 }
1879
1880 // Perform context disambiguation after inlining, since that would reduce the
1881 // amount of additional cloning required to distinguish the allocation
1882 // contexts.
1885
1886 // Optimize globals again after we ran the inliner.
1888
1889 // Run the OpenMPOpt pass again after global optimizations.
1891
1892 // Garbage collect dead functions.
1893 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1894
1895 // If we didn't decide to inline a function, check to see if we can
1896 // transform it to pass arguments by value instead of by reference.
1898
1900 // The IPO Passes may leave cruft around. Clean up after them.
1901 FPM.addPass(InstCombinePass());
1902 invokePeepholeEPCallbacks(FPM, Level);
1903
1906
1908
1909 // Do a post inline PGO instrumentation and use pass. This is a context
1910 // sensitive PGO pass.
1911 if (PGOOpt) {
1912 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1913 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1914 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1915 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1916 PGOOpt->FS);
1917 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1918 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1919 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1920 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1921 PGOOpt->FS);
1922 }
1923
1924 // Break up allocas
1926
1927 // LTO provides additional opportunities for tailcall elimination due to
1928 // link-time inlining, and visibility of nocapture attribute.
1930
1931 // Run a few AA driver optimizations here and now to cleanup the code.
1934
1935 MPM.addPass(
1937
1938 // Require the GlobalsAA analysis for the module so we can query it within
1939 // MainFPM.
1942 // Invalidate AAManager so it can be recreated and pick up the newly
1943 // available GlobalsAA.
1944 MPM.addPass(
1946 }
1947
1948 FunctionPassManager MainFPM;
1951 /*AllowSpeculation=*/true),
1952 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1953
1954 if (RunNewGVN)
1955 MainFPM.addPass(NewGVNPass());
1956 else
1957 MainFPM.addPass(GVNPass());
1958
1959 // Remove dead memcpy()'s.
1960 MainFPM.addPass(MemCpyOptPass());
1961
1962 // Nuke dead stores.
1963 MainFPM.addPass(DSEPass());
1964 MainFPM.addPass(MoveAutoInitPass());
1966
1967 LoopPassManager LPM;
1968 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
1969 LPM.addPass(LoopFlattenPass());
1972 // FIXME: Add loop interchange.
1973
1974 // Unroll small loops and perform peeling.
1975 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
1976 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
1978 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
1979 // *All* loop passes must preserve it, in order to be able to use it.
1981 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
1982
1983 MainFPM.addPass(LoopDistributePass());
1984
1985 addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
1986
1987 // Run the OpenMPOpt CGSCC pass again late.
1990
1991 invokePeepholeEPCallbacks(MainFPM, Level);
1992 MainFPM.addPass(JumpThreadingPass());
1995
1996 // Lower type metadata and the type.test intrinsic. This pass supports
1997 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
1998 // to be run at link time if CFI is enabled. This pass does nothing if
1999 // CFI is disabled.
2000 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2001 // Run a second time to clean up any type tests left behind by WPD for use
2002 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2003 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
2004
2005 // Enable splitting late in the FullLTO post-link pipeline.
2008
2009 // Add late LTO optimization passes.
2010 FunctionPassManager LateFPM;
2011
2012 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2013 // canonicalization pass that enables other optimizations. As a result,
2014 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2015 // result too early.
2016 LateFPM.addPass(LoopSinkPass());
2017
2018 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2019 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2020 // flattening of blocks.
2021 LateFPM.addPass(DivRemPairsPass());
2022
2023 // Delete basic blocks, which optimization passes may have killed.
2024 LateFPM.addPass(SimplifyCFGPass(
2025 SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts(
2026 true)));
2027 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2028
2029 // Drop bodies of available eternally objects to improve GlobalDCE.
2031
2032 // Now that we have optimized the program, discard unreachable functions.
2033 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2034
2035 if (PTO.MergeFunctions)
2037
2038 if (PTO.CallGraphProfile)
2039 MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2040
2042
2043 // Emit annotation remarks.
2045
2046 return MPM;
2047}
2048
2050 bool LTOPreLink) {
2051 assert(Level == OptimizationLevel::O0 &&
2052 "buildO0DefaultPipeline should only be used with O0");
2053
2055
2056 // Perform pseudo probe instrumentation in O0 mode. This is for the
2057 // consistency between different build modes. For example, a LTO build can be
2058 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2059 // the postlink will require pseudo probe instrumentation in the prelink.
2060 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2062
2063 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2064 PGOOpt->Action == PGOOptions::IRUse))
2066 MPM,
2067 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2068 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2069 PGOOpt->ProfileRemappingFile, PGOOpt->FS);
2070
2072
2073 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2075
2077
2078 // Build a minimal pipeline based on the semantics required by LLVM,
2079 // which is just that always inlining occurs. Further, disable generating
2080 // lifetime intrinsics to avoid enabling further optimizations during
2081 // code generation.
2083 /*InsertLifetimeIntrinsics=*/false));
2084
2085 if (PTO.MergeFunctions)
2087
2088 if (EnableMatrix)
2089 MPM.addPass(
2091
2092 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2093 CGSCCPassManager CGPM;
2095 if (!CGPM.isEmpty())
2097 }
2098 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2099 LoopPassManager LPM;
2101 if (!LPM.isEmpty()) {
2103 createFunctionToLoopPassAdaptor(std::move(LPM))));
2104 }
2105 }
2106 if (!LoopOptimizerEndEPCallbacks.empty()) {
2107 LoopPassManager LPM;
2109 if (!LPM.isEmpty()) {
2111 createFunctionToLoopPassAdaptor(std::move(LPM))));
2112 }
2113 }
2114 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2117 if (!FPM.isEmpty())
2119 }
2120
2122
2123 if (!VectorizerStartEPCallbacks.empty()) {
2126 if (!FPM.isEmpty())
2128 }
2129
2130 ModulePassManager CoroPM;
2131 CoroPM.addPass(CoroEarlyPass());
2132 CGSCCPassManager CGPM;
2133 CGPM.addPass(CoroSplitPass());
2134 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
2135 CoroPM.addPass(CoroCleanupPass());
2136 CoroPM.addPass(GlobalDCEPass());
2137 MPM.addPass(CoroConditionalWrapper(std::move(CoroPM)));
2138
2140
2141 if (LTOPreLink)
2142 addRequiredLTOPreLinkPasses(MPM);
2143
2145
2146 return MPM;
2147}
2148
2150 AAManager AA;
2151
2152 // The order in which these are registered determines their priority when
2153 // being queried.
2154
2155 // First we register the basic alias analysis that provides the majority of
2156 // per-function local AA logic. This is a stateless, on-demand local set of
2157 // AA techniques.
2159
2160 // Next we query fast, specialized alias analyses that wrap IR-embedded
2161 // information about aliasing.
2164
2165 // Add support for querying global aliasing information when available.
2166 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2167 // analysis, all that the `AAManager` can do is query for any *cached*
2168 // results from `GlobalsAA` through a readonly proxy.
2171
2172 // Add target-specific alias analyses.
2173 if (TM)
2175
2176 return AA;
2177}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:693
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
static LVOptions Options
Definition: LVOptions.cpp:25
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
ModulePassManager MPM
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(false), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlinining pass"))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the experimental LoopInterchange Pass"))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > EnableLoopHeaderDuplication("enable-loop-header-duplication", cl::init(false), cl::Hidden, cl::desc("Enable loop header duplication at any optimization level"))
static cl::opt< bool > EnablePGOForceFunctionAttrs("enable-pgo-force-function-attrs", cl::desc("Enable pass to set function attributes based on PGO profiles"), cl::init(false))
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::ALL, "all", "enable all attributor runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< bool > EnableOrderFileInstrumentation("enable-order-file-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable order file instrumentation (default = off)"))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
static cl::opt< bool > EnableSyntheticCounts("enable-npm-synthetic-counts", cl::Hidden, cl::desc("Run synthetic function entry count generation " "pass"))
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
static cl::opt< bool > EnableJumpTableToSwitch("enable-jump-table-to-switch", cl::desc("Enable JumpTableToSwitch pass (default = off)"))
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierachy exists in the profile"))
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
This header defines various interfaces for pass management in LLVM.
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
void registerFunctionAnalysis()
Register a specific AA result.
void registerModuleAnalysis()
Register a specific AA result.
Inlines functions marked as "always_inline".
Definition: AlwaysInliner.h:32
Argument promotion pass.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
Definition: ConstantMerge.h:29
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
The core GVN pass object.
Definition: GVN.h:117
Pass to remove unused function declarations.
Definition: GlobalDCE.h:36
Optimize globals that never have their address taken.
Definition: GlobalOpt.h:25
Pass to perform split of global variables.
Definition: GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition: SCCP.h:48
Pass to outline similar regions.
Definition: IROutliner.h:444
Run instruction simplification across each instruction in the function.
The instrumentation pass for recording function order.
Instrumentation based profiling lowering pass.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Definition: JumpThreading.h:79
Performs Loop Invariant Code Motion Pass.
Definition: LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Definition: LoopRotation.h:24
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition: LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Merge identical functions.
The module inliner pass for the new pass manager.
Definition: ModuleInliner.h:27
Module pass, wrapping the inliner pass.
Definition: Inliner.h:62
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition: Inliner.h:78
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
OpenMP optimizations pass.
Definition: OpenMPOpt.h:42
static const OptimizationLevel O3
Optimize for fast execution as much as possible.
static const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
static const OptimizationLevel O0
Disable as many optimizations as possible.
static const OptimizationLevel Os
Similar to O2 but tries to optimize for small code size instead of fast execution without triggering ...
static const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static const OptimizationLevel O1
Optimize quickly without destroying debuggability.
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Build an O0 pipeline with the minimal semantically required passes.
void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Build a per-module default optimization pipeline.
void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build a ThinLTO default optimization pipeline to a pass manager.
void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile, IntrusiveRefCntPtr< vfs::FileSystem > FS)
Add PGOInstrumenation passes for O0 only.
ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t< is_detected< HasRunOnLoopT, PassT >::value > addPass(PassT &&Pass)
LLVM_ATTRIBUTE_MINSIZE void addPass(PassT &&Pass)
Definition: PassManager.h:249
bool isEmpty() const
Returns if the pass manager contains any passes.
Definition: PassManager.h:269
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition: PassBuilder.h:72
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition: PassBuilder.h:57
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition: PassBuilder.h:86
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition: PassBuilder.h:76
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition: PassBuilder.h:83
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition: PassBuilder.h:64
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition: PassBuilder.h:68
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition: PassBuilder.h:49
PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition: PassBuilder.h:60
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition: PassBuilder.h:53
Reassociate commutative expressions.
Definition: Reassociate.h:71
A pass to do RPO deduction and propagation of function attributes.
Definition: FunctionAttrs.h:73
This pass performs function-level constant propagation and merging.
Definition: SCCP.h:29
The sample profiler data loader pass.
Definition: SampleProfile.h:39
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition: SimplifyCFG.h:29
virtual void registerDefaultAliasAnalyses(AAManager &)
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Definition: VectorCombine.h:23
Interfaces for registering analysis passes, producing common pass manager configurations,...
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:718
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
cl::opt< bool > EnableKnowledgeRetention
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: PassManager.h:916
@ MODULE
Definition: Attributor.h:6427
@ CGSCC
Definition: Attributor.h:6428
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:76
@ FullLTOPreLink
Full LTO prelink phase.
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
@ None
No LTO/ThinLTO behavior needed.
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< bool > ForgetSCEVInLoopUnroll
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
cl::opt< bool > EnableInferAlignmentPass
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
std::enable_if_t< is_detected< HasRunOnLoopT, LoopPassT >::value, FunctionToLoopPassAdaptor > createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false, bool UseBlockFrequencyInfo=false, bool UseBranchProbabilityInfo=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
cl::opt< unsigned > SetLicmMssaOptCap
A DCE pass that assumes instructions are dead until proven otherwise.
Definition: ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition: DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition: EarlyCSE.h:30
A pass manager to run a set of extra function simplification passes after vectorization,...
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition: GVN.h:392
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition: GVN.h:399
A set of parameters to control various transforms performed by IPSCCP pass.
Definition: SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Definition: InlineAdvisor.h:59
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:206
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:223
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:208
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition: InlineCost.h:236
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:211
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Definition: PassManager.h:969
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
Definition: FunctionAttrs.h:49
A utility pass template to force an analysis result to be available.
Definition: PassManager.h:942