LLVM 18.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
26#include "llvm/IR/PassManager.h"
136
137using namespace llvm;
138
140 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
141 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
142 cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
143 "Heuristics-based inliner version"),
144 clEnumValN(InliningAdvisorMode::Development, "development",
145 "Use development mode (runtime-loadable model)"),
146 clEnumValN(InliningAdvisorMode::Release, "release",
147 "Use release mode (AOT-compiled model)")));
148
150 "enable-npm-synthetic-counts", cl::Hidden,
151 cl::desc("Run synthetic function entry count generation "
152 "pass"));
153
154/// Flag to enable inline deferral during PGO.
155static cl::opt<bool>
156 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
158 cl::desc("Enable inline deferral during PGO"));
159
160static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
161 cl::init(false), cl::Hidden,
162 cl::desc("Enable module inliner"));
163
165 "mandatory-inlining-first", cl::init(true), cl::Hidden,
166 cl::desc("Perform mandatory inlinings module-wide, before performing "
167 "inlining"));
168
170 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
171 cl::desc("Eagerly invalidate more analyses in default pipelines"));
172
174 "enable-merge-functions", cl::init(false), cl::Hidden,
175 cl::desc("Enable function merging as part of the optimization pipeline"));
176
178 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
179 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
180
182 "enable-global-analyses", cl::init(true), cl::Hidden,
183 cl::desc("Enable inter-procedural analyses"));
184
185static cl::opt<bool>
186 RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden,
187 cl::desc("Run Partial inlinining pass"));
188
190 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
191 cl::desc("Run cleanup optimization passes after vectorization"));
192
193static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
194 cl::desc("Run the NewGVN pass"));
195
197 "enable-loopinterchange", cl::init(false), cl::Hidden,
198 cl::desc("Enable the experimental LoopInterchange Pass"));
199
200static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
201 cl::init(false), cl::Hidden,
202 cl::desc("Enable Unroll And Jam Pass"));
203
204static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
206 cl::desc("Enable the LoopFlatten Pass"));
207
208static cl::opt<bool>
209 EnableDFAJumpThreading("enable-dfa-jump-thread",
210 cl::desc("Enable DFA jump threading"),
211 cl::init(false), cl::Hidden);
212
213static cl::opt<bool>
214 EnableHotColdSplit("hot-cold-split",
215 cl::desc("Enable hot-cold splitting pass"));
216
217static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
219 cl::desc("Enable ir outliner pass"));
220
221static cl::opt<bool>
222 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
223 cl::desc("Disable pre-instrumentation inliner"));
224
226 "preinline-threshold", cl::Hidden, cl::init(75),
227 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
228 "(default = 75)"));
229
230static cl::opt<bool>
231 EnableGVNHoist("enable-gvn-hoist",
232 cl::desc("Enable the GVN hoisting pass (default = off)"));
233
234static cl::opt<bool>
235 EnableGVNSink("enable-gvn-sink",
236 cl::desc("Enable the GVN sinking pass (default = off)"));
237
238// This option is used in simplifying testing SampleFDO optimizations for
239// profile loading.
240static cl::opt<bool>
241 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
242 cl::desc("Enable control height reduction optimization (CHR)"));
243
245 "flattened-profile-used", cl::init(false), cl::Hidden,
246 cl::desc("Indicate the sample profile being used is flattened, i.e., "
247 "no inline hierachy exists in the profile"));
248
250 "enable-order-file-instrumentation", cl::init(false), cl::Hidden,
251 cl::desc("Enable order file instrumentation (default = off)"));
252
253static cl::opt<bool>
254 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
255 cl::desc("Enable lowering of the matrix intrinsics"));
256
258 "enable-constraint-elimination", cl::init(true), cl::Hidden,
259 cl::desc(
260 "Enable pass to eliminate conditions based on linear constraints"));
261
263 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
264 cl::desc("Enable the attributor inter-procedural deduction pass"),
265 cl::values(clEnumValN(AttributorRunOption::ALL, "all",
266 "enable all attributor runs"),
267 clEnumValN(AttributorRunOption::MODULE, "module",
268 "enable module-wide attributor runs"),
269 clEnumValN(AttributorRunOption::CGSCC, "cgscc",
270 "enable call graph SCC attributor runs"),
271 clEnumValN(AttributorRunOption::NONE, "none",
272 "disable attributor runs")));
273
275 "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
276 cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation"));
277
279
281 LoopInterleaving = true;
282 LoopVectorization = true;
283 SLPVectorization = false;
284 LoopUnrolling = true;
288 CallGraphProfile = true;
289 UnifiedLTO = false;
291 InlinerThreshold = -1;
293}
294
295namespace llvm {
298} // namespace llvm
299
301 OptimizationLevel Level) {
302 for (auto &C : PeepholeEPCallbacks)
303 C(FPM, Level);
304}
307 for (auto &C : LateLoopOptimizationsEPCallbacks)
308 C(LPM, Level);
309}
311 OptimizationLevel Level) {
312 for (auto &C : LoopOptimizerEndEPCallbacks)
313 C(LPM, Level);
314}
317 for (auto &C : ScalarOptimizerLateEPCallbacks)
318 C(FPM, Level);
319}
321 OptimizationLevel Level) {
322 for (auto &C : CGSCCOptimizerLateEPCallbacks)
323 C(CGPM, Level);
324}
326 OptimizationLevel Level) {
327 for (auto &C : VectorizerStartEPCallbacks)
328 C(FPM, Level);
329}
331 OptimizationLevel Level) {
332 for (auto &C : OptimizerEarlyEPCallbacks)
333 C(MPM, Level);
334}
336 OptimizationLevel Level) {
337 for (auto &C : OptimizerLastEPCallbacks)
338 C(MPM, Level);
339}
342 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
343 C(MPM, Level);
344}
347 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
348 C(MPM, Level);
349}
351 OptimizationLevel Level) {
352 for (auto &C : PipelineStartEPCallbacks)
353 C(MPM, Level);
354}
357 for (auto &C : PipelineEarlySimplificationEPCallbacks)
358 C(MPM, Level);
359}
360
361// Helper to add AnnotationRemarksPass.
364}
365
366// Helper to check if the current compilation phase is preparing for LTO
370}
371
372// TODO: Investigate the cost/benefit of tail call elimination on debugging.
374PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
376
378
381
382 // Form SSA out of local memory accesses after breaking apart aggregates into
383 // scalars.
385
386 // Catch trivial redundancies
387 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
388
389 // Hoisting of scalars and load expressions.
390 FPM.addPass(
391 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
393
395
396 invokePeepholeEPCallbacks(FPM, Level);
397
398 FPM.addPass(
399 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
400
401 // Form canonically associated expression trees, and simplify the trees using
402 // basic mathematical properties. For example, this will form (nearly)
403 // minimal multiplication trees.
405
406 // Add the primary loop simplification pipeline.
407 // FIXME: Currently this is split into two loop pass pipelines because we run
408 // some function passes in between them. These can and should be removed
409 // and/or replaced by scheduling the loop pass equivalents in the correct
410 // positions. But those equivalent passes aren't powerful enough yet.
411 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
412 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
413 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
414 // `LoopInstSimplify`.
415 LoopPassManager LPM1, LPM2;
416
417 // Simplify the loop body. We do this initially to clean up after other loop
418 // passes run, either when iterating on a loop or on inner loops with
419 // implications on the outer loop.
422
423 // Try to remove as much code from the loop header as possible,
424 // to reduce amount of IR that will have to be duplicated. However,
425 // do not perform speculative hoisting the first time as LICM
426 // will destroy metadata that may not need to be destroyed if run
427 // after loop rotation.
428 // TODO: Investigate promotion cap for O1.
430 /*AllowSpeculation=*/false));
431
432 LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
434 // TODO: Investigate promotion cap for O1.
436 /*AllowSpeculation=*/true));
439 LPM1.addPass(LoopFlattenPass());
440
443
445
447
450
451 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
452 // because it changes IR to makes profile annotation in back compile
453 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
454 // attributes so we need to make sure and allow the full unroll pass to pay
455 // attention to it.
456 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
457 PGOOpt->Action != PGOOptions::SampleUse)
458 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
459 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
461
463
464 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
465 /*UseMemorySSA=*/true,
466 /*UseBlockFrequencyInfo=*/true));
467 FPM.addPass(
468 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
470 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
471 // *All* loop passes must preserve it, in order to be able to use it.
472 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
473 /*UseMemorySSA=*/false,
474 /*UseBlockFrequencyInfo=*/false));
475
476 // Delete small array after loop unroll.
478
479 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
480 FPM.addPass(MemCpyOptPass());
481
482 // Sparse conditional constant propagation.
483 // FIXME: It isn't clear why we do this *after* loop passes rather than
484 // before...
485 FPM.addPass(SCCPPass());
486
487 // Delete dead bit computations (instcombine runs after to fold away the dead
488 // computations, and then ADCE will run later to exploit any new DCE
489 // opportunities that creates).
490 FPM.addPass(BDCEPass());
491
492 // Run instcombine after redundancy and dead bit elimination to exploit
493 // opportunities opened up by them.
495 invokePeepholeEPCallbacks(FPM, Level);
496
497 FPM.addPass(CoroElidePass());
498
500
501 // Finally, do an expensive DCE pass to catch all the dead code exposed by
502 // the simplifications and basic cleanup after all the simplifications.
503 // TODO: Investigate if this is too expensive.
504 FPM.addPass(ADCEPass());
505 FPM.addPass(
506 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
508 invokePeepholeEPCallbacks(FPM, Level);
509
510 return FPM;
511}
512
516 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
517
518 // The O1 pipeline has a separate pipeline creation function to simplify
519 // construction readability.
520 if (Level.getSpeedupLevel() == 1)
521 return buildO1FunctionSimplificationPipeline(Level, Phase);
522
524
527
528 // Form SSA out of local memory accesses after breaking apart aggregates into
529 // scalars.
531
532 // Catch trivial redundancies
533 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
536
537 // Hoisting of scalars and load expressions.
538 if (EnableGVNHoist)
539 FPM.addPass(GVNHoistPass());
540
541 // Global value numbering based sinking.
542 if (EnableGVNSink) {
543 FPM.addPass(GVNSinkPass());
544 FPM.addPass(
545 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
546 }
547
548 // Speculative execution if the target has divergent branches; otherwise nop.
549 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
550
551 // Optimize based on known information about branches, and cleanup afterward.
554
555 FPM.addPass(
556 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
559
560 if (!Level.isOptimizingForSize())
562
563 invokePeepholeEPCallbacks(FPM, Level);
564
565 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
566 // using the size value profile. Don't perform this when optimizing for size.
567 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
568 !Level.isOptimizingForSize())
570
572 FPM.addPass(
573 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
574
575 // Form canonically associated expression trees, and simplify the trees using
576 // basic mathematical properties. For example, this will form (nearly)
577 // minimal multiplication trees.
579
582
583 // Add the primary loop simplification pipeline.
584 // FIXME: Currently this is split into two loop pass pipelines because we run
585 // some function passes in between them. These can and should be removed
586 // and/or replaced by scheduling the loop pass equivalents in the correct
587 // positions. But those equivalent passes aren't powerful enough yet.
588 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
589 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
590 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
591 // `LoopInstSimplify`.
592 LoopPassManager LPM1, LPM2;
593
594 // Simplify the loop body. We do this initially to clean up after other loop
595 // passes run, either when iterating on a loop or on inner loops with
596 // implications on the outer loop.
599
600 // Try to remove as much code from the loop header as possible,
601 // to reduce amount of IR that will have to be duplicated. However,
602 // do not perform speculative hoisting the first time as LICM
603 // will destroy metadata that may not need to be destroyed if run
604 // after loop rotation.
605 // TODO: Investigate promotion cap for O1.
607 /*AllowSpeculation=*/false));
608
609 // Disable header duplication in loop rotation at -Oz.
610 LPM1.addPass(
612 // TODO: Investigate promotion cap for O1.
614 /*AllowSpeculation=*/true));
615 LPM1.addPass(
616 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
618 LPM1.addPass(LoopFlattenPass());
619
622
624
626
629
630 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
631 // because it changes IR to makes profile annotation in back compile
632 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
633 // attributes so we need to make sure and allow the full unroll pass to pay
634 // attention to it.
635 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
636 PGOOpt->Action != PGOOptions::SampleUse)
637 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
638 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
640
642
643 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
644 /*UseMemorySSA=*/true,
645 /*UseBlockFrequencyInfo=*/true));
646 FPM.addPass(
647 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
649 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
650 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
651 // *All* loop passes must preserve it, in order to be able to use it.
652 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
653 /*UseMemorySSA=*/false,
654 /*UseBlockFrequencyInfo=*/false));
655
656 // Delete small array after loop unroll.
658
659 // Try vectorization/scalarization transforms that are both improvements
660 // themselves and can allow further folds with GVN and InstCombine.
661 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
662
663 // Eliminate redundancies.
665 if (RunNewGVN)
666 FPM.addPass(NewGVNPass());
667 else
668 FPM.addPass(GVNPass());
669
670 // Sparse conditional constant propagation.
671 // FIXME: It isn't clear why we do this *after* loop passes rather than
672 // before...
673 FPM.addPass(SCCPPass());
674
675 // Delete dead bit computations (instcombine runs after to fold away the dead
676 // computations, and then ADCE will run later to exploit any new DCE
677 // opportunities that creates).
678 FPM.addPass(BDCEPass());
679
680 // Run instcombine after redundancy and dead bit elimination to exploit
681 // opportunities opened up by them.
683 invokePeepholeEPCallbacks(FPM, Level);
684
685 // Re-consider control flow based optimizations after redundancy elimination,
686 // redo DCE, etc.
687 if (EnableDFAJumpThreading && Level.getSizeLevel() == 0)
689
692
693 // Finally, do an expensive DCE pass to catch all the dead code exposed by
694 // the simplifications and basic cleanup after all the simplifications.
695 // TODO: Investigate if this is too expensive.
696 FPM.addPass(ADCEPass());
697
698 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
699 FPM.addPass(MemCpyOptPass());
700
701 FPM.addPass(DSEPass());
703
706 /*AllowSpeculation=*/true),
707 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
708
709 FPM.addPass(CoroElidePass());
710
712
714 .convertSwitchRangeToICmp(true)
715 .hoistCommonInsts(true)
716 .sinkCommonInsts(true)));
718 invokePeepholeEPCallbacks(FPM, Level);
719
720 return FPM;
721}
722
723void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
726}
727
728void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
729 OptimizationLevel Level, bool RunProfileGen,
730 bool IsCS, bool AtomicCounterUpdate,
731 std::string ProfileFile,
732 std::string ProfileRemappingFile,
733 ThinOrFullLTOPhase LTOPhase,
735 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
736 if (!IsCS && !DisablePreInliner) {
737 InlineParams IP;
738
740
741 // FIXME: The hint threshold has the same value used by the regular inliner
742 // when not optimzing for size. This should probably be lowered after
743 // performance testing.
744 // FIXME: this comment is cargo culted from the old pass manager, revisit).
745 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
747 IP, /* MandatoryFirst */ true,
749 CGSCCPassManager &CGPipeline = MIWP.getPM();
750
753 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
754 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
755 true))); // Merge & remove basic blocks.
756 FPM.addPass(InstCombinePass()); // Combine silly sequences.
757 invokePeepholeEPCallbacks(FPM, Level);
758
759 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
760 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
761
762 MPM.addPass(std::move(MIWP));
763
764 // Delete anything that is now dead to make sure that we don't instrument
765 // dead code. Instrumentation can end up keeping dead code around and
766 // dramatically increase code size.
768 }
769
770 if (!RunProfileGen) {
771 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
772 MPM.addPass(
773 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
774 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
775 // RequireAnalysisPass for PSI before subsequent non-module passes.
777 return;
778 }
779
780 // Perform PGO instrumentation.
782
784 // Disable header duplication in loop rotation at -Oz.
788 /*UseMemorySSA=*/false,
789 /*UseBlockFrequencyInfo=*/false),
791 }
792
793 // Add the profile lowering pass.
795 if (!ProfileFile.empty())
796 Options.InstrProfileOutput = ProfileFile;
797 // Do counter promotion at Level greater than O0.
798 Options.DoCounterPromotion = true;
799 Options.UseBFIInPromotion = IsCS;
800 Options.Atomic = AtomicCounterUpdate;
802}
803
805 ModulePassManager &MPM, bool RunProfileGen, bool IsCS,
806 bool AtomicCounterUpdate, std::string ProfileFile,
807 std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) {
808 if (!RunProfileGen) {
809 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
810 MPM.addPass(
811 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
812 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
813 // RequireAnalysisPass for PSI before subsequent non-module passes.
815 return;
816 }
817
818 // Perform PGO instrumentation.
820 // Add the profile lowering pass.
822 if (!ProfileFile.empty())
823 Options.InstrProfileOutput = ProfileFile;
824 // Do not do counter promotion at O0.
825 Options.DoCounterPromotion = false;
826 Options.UseBFIInPromotion = IsCS;
827 Options.Atomic = AtomicCounterUpdate;
829}
830
832 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
833}
834
838 InlineParams IP;
839 if (PTO.InlinerThreshold == -1)
840 IP = getInlineParamsFromOptLevel(Level);
841 else
843 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
844 // disable hot callsite inline (as much as possible [1]) because it makes
845 // profile annotation in the backend inaccurate.
846 //
847 // [1] Note the cost of a function could be below zero due to erased
848 // prologue / epilogue.
849 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
850 PGOOpt->Action == PGOOptions::SampleUse)
852
853 if (PGOOpt)
855
859
860 // Require the GlobalsAA analysis for the module so we can query it within
861 // the CGSCC pipeline.
864 // Invalidate AAManager so it can be recreated and pick up the newly
865 // available GlobalsAA.
866 MIWP.addModulePass(
868 }
869
870 // Require the ProfileSummaryAnalysis for the module so we can query it within
871 // the inliner pass.
873
874 // Now begin the main postorder CGSCC pipeline.
875 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
876 // manager and trying to emulate its precise behavior. Much of this doesn't
877 // make a lot of sense and we should revisit the core CGSCC structure.
878 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
879
880 // Note: historically, the PruneEH pass was run first to deduce nounwind and
881 // generally clean up exception handling overhead. It isn't clear this is
882 // valuable as the inliner doesn't currently care whether it is inlining an
883 // invoke or a call.
884
886 MainCGPipeline.addPass(AttributorCGSCCPass());
887
888 // Deduce function attributes. We do another run of this after the function
889 // simplification pipeline, so this only needs to run when it could affect the
890 // function simplification pipeline, which is only the case with recursive
891 // functions.
892 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
893
894 // When at O3 add argument promotion to the pass pipeline.
895 // FIXME: It isn't at all clear why this should be limited to O3.
896 if (Level == OptimizationLevel::O3)
897 MainCGPipeline.addPass(ArgumentPromotionPass());
898
899 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
900 // there are no OpenMP runtime calls present in the module.
901 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
902 MainCGPipeline.addPass(OpenMPOptCGSCCPass());
903
904 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
905
906 // Add the core function simplification pipeline nested inside the
907 // CGSCC walk.
910 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
911
912 // Finally, deduce any function attributes based on the fully simplified
913 // function.
914 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
915
916 // Mark that the function is fully simplified and that it shouldn't be
917 // simplified again if we somehow revisit it due to CGSCC mutations unless
918 // it's been modified since.
921
922 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
923
924 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
925 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
927
928 return MIWP;
929}
930
935
937 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
938 // disable hot callsite inline (as much as possible [1]) because it makes
939 // profile annotation in the backend inaccurate.
940 //
941 // [1] Note the cost of a function could be below zero due to erased
942 // prologue / epilogue.
943 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
944 PGOOpt->Action == PGOOptions::SampleUse)
946
947 if (PGOOpt)
949
950 // The inline deferral logic is used to avoid losing some
951 // inlining chance in future. It is helpful in SCC inliner, in which
952 // inlining is processed in bottom-up order.
953 // While in module inliner, the inlining order is a priority-based order
954 // by default. The inline deferral is unnecessary there. So we disable the
955 // inline deferral logic in module inliner.
956 IP.EnableDeferral = false;
957
959
963
966
967 return MPM;
968}
969
973 assert(Level != OptimizationLevel::O0 &&
974 "Should not be used for O0 pipeline");
975
977 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
978
980
981 // Place pseudo probe instrumentation as the first pass of the pipeline to
982 // minimize the impact of optimization changes.
983 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
986
987 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
988
989 // In ThinLTO mode, when flattened profile is used, all the available
990 // profile information will be annotated in PreLink phase so there is
991 // no need to load the profile again in PostLink.
992 bool LoadSampleProfile =
993 HasSampleProfile &&
995
996 // During the ThinLTO backend phase we perform early indirect call promotion
997 // here, before globalopt. Otherwise imported available_externally functions
998 // look unreferenced and are removed. If we are going to load the sample
999 // profile then defer until later.
1000 // TODO: See if we can move later and consolidate with the location where
1001 // we perform ICP when we are loading a sample profile.
1002 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1003 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1004 // determine whether the new direct calls are annotated with prof metadata.
1005 // Ideally this should be determined from whether the IR is annotated with
1006 // sample profile, and not whether the a sample profile was provided on the
1007 // command line. E.g. for flattened profiles where we will not be reloading
1008 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1009 // provide the sample profile file.
1010 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1011 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1012
1013 // Create an early function pass manager to cleanup the output of the
1014 // frontend. Not necessary with LTO post link pipelines since the pre link
1015 // pipeline already cleaned up the frontend output.
1017 // Do basic inference of function attributes from known properties of system
1018 // libraries and other oracles.
1021
1022 FunctionPassManager EarlyFPM;
1023 // Lower llvm.expect to metadata before attempting transforms.
1024 // Compare/branch metadata may alter the behavior of passes like
1025 // SimplifyCFG.
1027 EarlyFPM.addPass(SimplifyCFGPass());
1029 EarlyFPM.addPass(EarlyCSEPass());
1030 if (Level == OptimizationLevel::O3)
1031 EarlyFPM.addPass(CallSiteSplittingPass());
1033 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1034 }
1035
1036 if (LoadSampleProfile) {
1037 // Annotate sample profile right after early FPM to ensure freshness of
1038 // the debug info.
1039 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1040 PGOOpt->ProfileRemappingFile, Phase));
1041 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1042 // RequireAnalysisPass for PSI before subsequent non-module passes.
1044 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1045 // for the profile annotation to be accurate in the LTO backend.
1046 if (!isLTOPreLink(Phase))
1047 // We perform early indirect call promotion here, before globalopt.
1048 // This is important for the ThinLTO backend phase because otherwise
1049 // imported available_externally functions look unreferenced and are
1050 // removed.
1051 MPM.addPass(
1052 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1053 }
1054
1055 // Try to perform OpenMP specific optimizations on the module. This is a
1056 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1058
1061
1062 // Lower type metadata and the type.test intrinsic in the ThinLTO
1063 // post link pipeline after ICP. This is to enable usage of the type
1064 // tests in ICP sequences.
1066 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1067
1069
1070 // Interprocedural constant propagation now that basic cleanup has occurred
1071 // and prior to optimizing globals.
1072 // FIXME: This position in the pipeline hasn't been carefully considered in
1073 // years, it should be re-analyzed.
1075 IPSCCPOptions(/*AllowFuncSpec=*/
1076 Level != OptimizationLevel::Os &&
1077 Level != OptimizationLevel::Oz &&
1078 !isLTOPreLink(Phase))));
1079
1080 // Attach metadata to indirect call sites indicating the set of functions
1081 // they may target at run-time. This should follow IPSCCP.
1083
1084 // Optimize globals to try and fold them into constants.
1086
1087 // Create a small function pass pipeline to cleanup after all the global
1088 // optimizations.
1089 FunctionPassManager GlobalCleanupPM;
1090 // FIXME: Should this instead by a run of SROA?
1091 GlobalCleanupPM.addPass(PromotePass());
1092 GlobalCleanupPM.addPass(InstCombinePass());
1093 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1094 GlobalCleanupPM.addPass(
1095 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1096 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1098
1099 // Add all the requested passes for instrumentation PGO, if requested.
1100 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1101 (PGOOpt->Action == PGOOptions::IRInstr ||
1102 PGOOpt->Action == PGOOptions::IRUse)) {
1103 addPGOInstrPasses(MPM, Level,
1104 /*RunProfileGen=*/PGOOpt->Action == PGOOptions::IRInstr,
1105 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1106 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase,
1107 PGOOpt->FS);
1108 MPM.addPass(PGOIndirectCallPromotion(false, false));
1109 }
1110 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1111 PGOOpt->CSAction == PGOOptions::CSIRInstr)
1112 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
1113
1114 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1115 !PGOOpt->MemoryProfile.empty())
1116 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS));
1117
1118 // Synthesize function entry counts for non-PGO compilation.
1119 if (EnableSyntheticCounts && !PGOOpt)
1121
1124 else
1126
1127 // Remove any dead arguments exposed by cleanups, constant folding globals,
1128 // and argument promotion.
1130
1132
1133 // Optimize globals now that functions are fully simplified.
1136
1137 return MPM;
1138}
1139
1140/// TODO: Should LTO cause any differences to this set of passes?
1141void PassBuilder::addVectorPasses(OptimizationLevel Level,
1142 FunctionPassManager &FPM, bool IsFullLTO) {
1145
1148 if (IsFullLTO) {
1149 // The vectorizer may have significantly shortened a loop body; unroll
1150 // again. Unroll small loops to hide loop backedge latency and saturate any
1151 // parallel execution resources of an out-of-order processor. We also then
1152 // need to clean up redundancies and loop invariant code.
1153 // FIXME: It would be really good to use a loop-integrated instruction
1154 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1155 // across the loop nests.
1156 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1159 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1161 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1164 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1165 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1166 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1167 // NOTE: we are very late in the pipeline, and we don't have any LICM
1168 // or SimplifyCFG passes scheduled after us, that would cleanup
1169 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1171 }
1172
1173 if (!IsFullLTO) {
1174 // Eliminate loads by forwarding stores from the previous iteration to loads
1175 // of the current iteration.
1177 }
1178 // Cleanup after the loop optimization passes.
1179 FPM.addPass(InstCombinePass());
1180
1181 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1182 ExtraVectorPassManager ExtraPasses;
1183 // At higher optimization levels, try to clean up any runtime overlap and
1184 // alignment checks inserted by the vectorizer. We want to track correlated
1185 // runtime checks for two inner loops in the same outer loop, fold any
1186 // common computations, hoist loop-invariant aspects out of any outer loop,
1187 // and unswitch the runtime checks if possible. Once hoisted, we may have
1188 // dead (or speculatable) control flows or more combining opportunities.
1189 ExtraPasses.addPass(EarlyCSEPass());
1191 ExtraPasses.addPass(InstCombinePass());
1192 LoopPassManager LPM;
1194 /*AllowSpeculation=*/true));
1195 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1197 ExtraPasses.addPass(
1198 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1199 /*UseBlockFrequencyInfo=*/true));
1200 ExtraPasses.addPass(
1201 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1202 ExtraPasses.addPass(InstCombinePass());
1203 FPM.addPass(std::move(ExtraPasses));
1204 }
1205
1206 // Now that we've formed fast to execute loop structures, we do further
1207 // optimizations. These are run afterward as they might block doing complex
1208 // analyses and transforms such as what are needed for loop vectorization.
1209
1210 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1211 // GVN, loop transforms, and others have already run, so it's now better to
1212 // convert to more optimized IR using more aggressive simplify CFG options.
1213 // The extra sinking transform can create larger basic blocks, so do this
1214 // before SLP vectorization.
1216 .forwardSwitchCondToPhi(true)
1217 .convertSwitchRangeToICmp(true)
1218 .convertSwitchToLookupTable(true)
1219 .needCanonicalLoops(false)
1220 .hoistCommonInsts(true)
1221 .sinkCommonInsts(true)));
1222
1223 if (IsFullLTO) {
1224 FPM.addPass(SCCPPass());
1225 FPM.addPass(InstCombinePass());
1226 FPM.addPass(BDCEPass());
1227 }
1228
1229 // Optimize parallel scalar instruction chains into SIMD instructions.
1230 if (PTO.SLPVectorization) {
1232 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1233 FPM.addPass(EarlyCSEPass());
1234 }
1235 }
1236 // Enhance/cleanup vector code.
1238
1239 if (!IsFullLTO) {
1240 FPM.addPass(InstCombinePass());
1241 // Unroll small loops to hide loop backedge latency and saturate any
1242 // parallel execution resources of an out-of-order processor. We also then
1243 // need to clean up redundancies and loop invariant code.
1244 // FIXME: It would be really good to use a loop-integrated instruction
1245 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1246 // across the loop nests.
1247 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1248 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1250 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1251 }
1253 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1256 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1257 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1258 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1259 // NOTE: we are very late in the pipeline, and we don't have any LICM
1260 // or SimplifyCFG passes scheduled after us, that would cleanup
1261 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1263 }
1264
1267 FPM.addPass(InstCombinePass());
1268
1269 // This is needed for two reasons:
1270 // 1. It works around problems that instcombine introduces, such as sinking
1271 // expensive FP divides into loops containing multiplications using the
1272 // divide result.
1273 // 2. It helps to clean up some loop-invariant code created by the loop
1274 // unroll pass when IsFullLTO=false.
1277 /*AllowSpeculation=*/true),
1278 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1279
1280 // Now that we've vectorized and unrolled loops, we may have more refined
1281 // alignment information, try to re-derive it here.
1283}
1284
1287 ThinOrFullLTOPhase LTOPhase) {
1288 const bool LTOPreLink = isLTOPreLink(LTOPhase);
1290
1291 // Run partial inlining pass to partially inline functions that have
1292 // large bodies.
1295
1296 // Remove avail extern fns and globals definitions since we aren't compiling
1297 // an object file for later LTO. For LTO we want to preserve these so they
1298 // are eligible for inlining at link-time. Note if they are unreferenced they
1299 // will be removed by GlobalDCE later, so this only impacts referenced
1300 // available externally globals. Eventually they will be suppressed during
1301 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1302 // may make globals referenced by available external functions dead and saves
1303 // running remaining passes on the eliminated functions. These should be
1304 // preserved during prelinking for link-time inlining decisions.
1305 if (!LTOPreLink)
1307
1310
1311 // Do RPO function attribute inference across the module to forward-propagate
1312 // attributes where applicable.
1313 // FIXME: Is this really an optimization rather than a canonicalization?
1315
1316 // Do a post inline PGO instrumentation and use pass. This is a context
1317 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1318 // cross-module inline has not been done yet. The context sensitive
1319 // instrumentation is after all the inlines are done.
1320 if (!LTOPreLink && PGOOpt) {
1321 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1322 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1323 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1324 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1325 LTOPhase, PGOOpt->FS);
1326 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1327 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1328 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1329 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1330 LTOPhase, PGOOpt->FS);
1331 }
1332
1333 // Re-compute GlobalsAA here prior to function passes. This is particularly
1334 // useful as the above will have inlined, DCE'ed, and function-attr
1335 // propagated everything. We should at this point have a reasonably minimal
1336 // and richly annotated call graph. By computing aliasing and mod/ref
1337 // information for all local globals here, the late loop passes and notably
1338 // the vectorizer will be able to use them to help recognize vectorizable
1339 // memory operations.
1342
1344
1345 FunctionPassManager OptimizePM;
1346 OptimizePM.addPass(Float2IntPass());
1348
1349 if (EnableMatrix) {
1350 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1351 OptimizePM.addPass(EarlyCSEPass());
1352 }
1353
1354 // CHR pass should only be applied with the profile information.
1355 // The check is to check the profile summary information in CHR.
1356 if (EnableCHR && Level == OptimizationLevel::O3)
1357 OptimizePM.addPass(ControlHeightReductionPass());
1358
1359 // FIXME: We need to run some loop optimizations to re-rotate loops after
1360 // simplifycfg and others undo their rotation.
1361
1362 // Optimize the loop execution. These passes operate on entire loop nests
1363 // rather than on each loop in an inside-out manner, and so they are actually
1364 // function passes.
1365
1366 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1367
1368 LoopPassManager LPM;
1369 // First rotate loops that may have been un-rotated by prior passes.
1370 // Disable header duplication at -Oz.
1371 LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink));
1372 // Some loops may have become dead by now. Try to delete them.
1373 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1374 // this may need to be revisited once we run GVN before loop deletion
1375 // in the simplification pipeline.
1378 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1379
1380 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1381 // into separate loop that would otherwise inhibit vectorization. This is
1382 // currently only performed for loops marked with the metadata
1383 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1384 OptimizePM.addPass(LoopDistributePass());
1385
1386 // Populates the VFABI attribute with the scalar-to-vector mappings
1387 // from the TargetLibraryInfo.
1388 OptimizePM.addPass(InjectTLIMappings());
1389
1390 addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1391
1392 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1393 // canonicalization pass that enables other optimizations. As a result,
1394 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1395 // result too early.
1396 OptimizePM.addPass(LoopSinkPass());
1397
1398 // And finally clean up LCSSA form before generating code.
1399 OptimizePM.addPass(InstSimplifyPass());
1400
1401 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1402 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1403 // flattening of blocks.
1404 OptimizePM.addPass(DivRemPairsPass());
1405
1406 // Try to annotate calls that were created during optimization.
1407 OptimizePM.addPass(TailCallElimPass());
1408
1409 // LoopSink (and other loop passes since the last simplifyCFG) might have
1410 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1411 OptimizePM.addPass(
1412 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1413
1414 // Add the core optimizing pipeline.
1415 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1417
1419
1420 // Split out cold code. Splitting is done late to avoid hiding context from
1421 // other optimizations and inadvertently regressing performance. The tradeoff
1422 // is that this has a higher code size cost than splitting early.
1423 if (EnableHotColdSplit && !LTOPreLink)
1425
1426 // Search the code for similar regions of code. If enough similar regions can
1427 // be found where extracting the regions into their own function will decrease
1428 // the size of the program, we extract the regions, a deduplicate the
1429 // structurally similar regions.
1430 if (EnableIROutliner)
1432
1433 // Merge functions if requested.
1434 if (PTO.MergeFunctions)
1436
1437 // Now we need to do some global optimization transforms.
1438 // FIXME: It would seem like these should come first in the optimization
1439 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1440 // ordering here.
1443
1444 if (PTO.CallGraphProfile && !LTOPreLink)
1446
1447 // TODO: Relative look table converter pass caused an issue when full lto is
1448 // enabled. See https://reviews.llvm.org/D94355 for more details.
1449 // Until the issue fixed, disable this pass during pre-linking phase.
1450 if (!LTOPreLink)
1452
1453 return MPM;
1454}
1455
1458 bool LTOPreLink) {
1459 if (Level == OptimizationLevel::O0)
1460 return buildO0DefaultPipeline(Level, LTOPreLink);
1461
1463
1464 // Convert @llvm.global.annotations to !annotation metadata.
1466
1467 // Force any function attributes we want the rest of the pipeline to observe.
1469
1470 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1472
1473 // Apply module pipeline start EP callback.
1475
1476 const ThinOrFullLTOPhase LTOPhase = LTOPreLink
1479 // Add the core simplification pipeline.
1481
1482 // Now add the optimization pipeline.
1484
1485 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1486 PGOOpt->Action == PGOOptions::SampleUse)
1488
1489 // Emit annotation remarks.
1491
1492 if (LTOPreLink)
1493 addRequiredLTOPreLinkPasses(MPM);
1494 return MPM;
1495}
1496
1499 bool EmitSummary) {
1501 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary,
1502 ThinLTO
1506 return MPM;
1507}
1508
1511 if (Level == OptimizationLevel::O0)
1512 return buildO0DefaultPipeline(Level, /*LTOPreLink*/true);
1513
1515
1516 // Convert @llvm.global.annotations to !annotation metadata.
1518
1519 // Force any function attributes we want the rest of the pipeline to observe.
1521
1522 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1524
1525 // Apply module pipeline start EP callback.
1527
1528 // If we are planning to perform ThinLTO later, we don't bloat the code with
1529 // unrolling/vectorization/... now. Just simplify the module as much as we
1530 // can.
1533
1534 // Run partial inlining pass to partially inline functions that have
1535 // large bodies.
1536 // FIXME: It isn't clear whether this is really the right place to run this
1537 // in ThinLTO. Because there is another canonicalization and simplification
1538 // phase that will run after the thin link, running this here ends up with
1539 // less information than will be available later and it may grow functions in
1540 // ways that aren't beneficial.
1543
1544 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1545 PGOOpt->Action == PGOOptions::SampleUse)
1547
1548 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1549 // optimization is going to be done in PostLink stage, but clang can't add
1550 // callbacks there in case of in-process ThinLTO called by linker.
1553
1554 // Emit annotation remarks.
1556
1557 addRequiredLTOPreLinkPasses(MPM);
1558
1559 return MPM;
1560}
1561
1563 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1565
1566 if (ImportSummary) {
1567 // For ThinLTO we must apply the context disambiguation decisions early, to
1568 // ensure we can correctly match the callsites to summary data.
1571
1572 // These passes import type identifier resolutions for whole-program
1573 // devirtualization and CFI. They must run early because other passes may
1574 // disturb the specific instruction patterns that these passes look for,
1575 // creating dependencies on resolutions that may not appear in the summary.
1576 //
1577 // For example, GVN may transform the pattern assume(type.test) appearing in
1578 // two basic blocks into assume(phi(type.test, type.test)), which would
1579 // transform a dependency on a WPD resolution into a dependency on a type
1580 // identifier resolution for CFI.
1581 //
1582 // Also, WPD has access to more precise information than ICP and can
1583 // devirtualize more effectively, so it should operate on the IR first.
1584 //
1585 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1586 // metadata and intrinsics.
1587 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1588 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1589 }
1590
1591 if (Level == OptimizationLevel::O0) {
1592 // Run a second time to clean up any type tests left behind by WPD for use
1593 // in ICP.
1594 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1595 // Drop available_externally and unreferenced globals. This is necessary
1596 // with ThinLTO in order to avoid leaving undefined references to dead
1597 // globals in the object file.
1600 return MPM;
1601 }
1602
1603 // Add the core simplification pipeline.
1606
1607 // Now add the optimization pipeline.
1610
1611 // Emit annotation remarks.
1613
1614 return MPM;
1615}
1616
1619 // FIXME: We should use a customized pre-link pipeline!
1620 return buildPerModuleDefaultPipeline(Level,
1621 /* LTOPreLink */ true);
1622}
1623
1626 ModuleSummaryIndex *ExportSummary) {
1628
1630
1631 // Create a function that performs CFI checks for cross-DSO calls with targets
1632 // in the current module.
1634
1635 if (Level == OptimizationLevel::O0) {
1636 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1637 // metadata and intrinsics.
1638 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1639 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1640 // Run a second time to clean up any type tests left behind by WPD for use
1641 // in ICP.
1642 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1643
1645
1646 // Emit annotation remarks.
1648
1649 return MPM;
1650 }
1651
1652 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1653 // Load sample profile before running the LTO optimization pipeline.
1654 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1655 PGOOpt->ProfileRemappingFile,
1657 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1658 // RequireAnalysisPass for PSI before subsequent non-module passes.
1660 }
1661
1662 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1664
1665 // Remove unused virtual tables to improve the quality of code generated by
1666 // whole-program devirtualization and bitset lowering.
1667 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1668
1669 // Do basic inference of function attributes from known properties of system
1670 // libraries and other oracles.
1672
1673 if (Level.getSpeedupLevel() > 1) {
1676
1677 // Indirect call promotion. This should promote all the targets that are
1678 // left by the earlier promotion pass that promotes intra-module targets.
1679 // This two-step promotion is to save the compile time. For LTO, it should
1680 // produce the same result as if we only do promotion here.
1682 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1683
1684 // Propagate constants at call sites into the functions they call. This
1685 // opens opportunities for globalopt (and inlining) by substituting function
1686 // pointers passed as arguments to direct uses of functions.
1687 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1688 Level != OptimizationLevel::Os &&
1689 Level != OptimizationLevel::Oz)));
1690
1691 // Attach metadata to indirect call sites indicating the set of functions
1692 // they may target at run-time. This should follow IPSCCP.
1694 }
1695
1696 // Now deduce any function attributes based in the current code.
1697 MPM.addPass(
1699
1700 // Do RPO function attribute inference across the module to forward-propagate
1701 // attributes where applicable.
1702 // FIXME: Is this really an optimization rather than a canonicalization?
1704
1705 // Use in-range annotations on GEP indices to split globals where beneficial.
1707
1708 // Run whole program optimization of virtual call when the list of callees
1709 // is fixed.
1710 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1711
1712 // Stop here at -O1.
1713 if (Level == OptimizationLevel::O1) {
1714 // The LowerTypeTestsPass needs to run to lower type metadata and the
1715 // type.test intrinsics. The pass does nothing if CFI is disabled.
1716 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1717 // Run a second time to clean up any type tests left behind by WPD for use
1718 // in ICP (which is performed earlier than this in the regular LTO
1719 // pipeline).
1720 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1721
1723
1724 // Emit annotation remarks.
1726
1727 return MPM;
1728 }
1729
1730 // Optimize globals to try and fold them into constants.
1732
1733 // Promote any localized globals to SSA registers.
1735
1736 // Linking modules together can lead to duplicate global constant, only
1737 // keep one copy of each constant.
1739
1740 // Remove unused arguments from functions.
1742
1743 // Reduce the code after globalopt and ipsccp. Both can open up significant
1744 // simplification opportunities, and both can propagate functions through
1745 // function pointers. When this happens, we often have to resolve varargs
1746 // calls, etc, so let instcombine do this.
1747 FunctionPassManager PeepholeFPM;
1748 PeepholeFPM.addPass(InstCombinePass());
1749 if (Level.getSpeedupLevel() > 1)
1750 PeepholeFPM.addPass(AggressiveInstCombinePass());
1751 invokePeepholeEPCallbacks(PeepholeFPM, Level);
1752
1753 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
1755
1756 // Note: historically, the PruneEH pass was run first to deduce nounwind and
1757 // generally clean up exception handling overhead. It isn't clear this is
1758 // valuable as the inliner doesn't currently care whether it is inlining an
1759 // invoke or a call.
1760 // Run the inliner now.
1761 if (EnableModuleInliner) {
1765 } else {
1768 /* MandatoryFirst */ true,
1771 }
1772
1773 // Perform context disambiguation after inlining, since that would reduce the
1774 // amount of additional cloning required to distinguish the allocation
1775 // contexts.
1778
1779 // Optimize globals again after we ran the inliner.
1781
1782 // Run the OpenMPOpt pass again after global optimizations.
1784
1785 // Garbage collect dead functions.
1786 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1787
1788 // If we didn't decide to inline a function, check to see if we can
1789 // transform it to pass arguments by value instead of by reference.
1791
1793 // The IPO Passes may leave cruft around. Clean up after them.
1794 FPM.addPass(InstCombinePass());
1795 invokePeepholeEPCallbacks(FPM, Level);
1796
1799
1801
1802 // Do a post inline PGO instrumentation and use pass. This is a context
1803 // sensitive PGO pass.
1804 if (PGOOpt) {
1805 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1806 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1807 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1808 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1810 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1811 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1812 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1813 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1815 }
1816
1817 // Break up allocas
1819
1820 // LTO provides additional opportunities for tailcall elimination due to
1821 // link-time inlining, and visibility of nocapture attribute.
1823
1824 // Run a few AA driver optimizations here and now to cleanup the code.
1827
1828 MPM.addPass(
1830
1831 // Require the GlobalsAA analysis for the module so we can query it within
1832 // MainFPM.
1835 // Invalidate AAManager so it can be recreated and pick up the newly
1836 // available GlobalsAA.
1837 MPM.addPass(
1839 }
1840
1841 FunctionPassManager MainFPM;
1844 /*AllowSpeculation=*/true),
1845 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1846
1847 if (RunNewGVN)
1848 MainFPM.addPass(NewGVNPass());
1849 else
1850 MainFPM.addPass(GVNPass());
1851
1852 // Remove dead memcpy()'s.
1853 MainFPM.addPass(MemCpyOptPass());
1854
1855 // Nuke dead stores.
1856 MainFPM.addPass(DSEPass());
1857 MainFPM.addPass(MoveAutoInitPass());
1859
1860 LoopPassManager LPM;
1861 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
1862 LPM.addPass(LoopFlattenPass());
1865 // FIXME: Add loop interchange.
1866
1867 // Unroll small loops and perform peeling.
1868 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
1869 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
1871 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
1872 // *All* loop passes must preserve it, in order to be able to use it.
1874 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
1875
1876 MainFPM.addPass(LoopDistributePass());
1877
1878 addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
1879
1880 // Run the OpenMPOpt CGSCC pass again late.
1883
1884 invokePeepholeEPCallbacks(MainFPM, Level);
1885 MainFPM.addPass(JumpThreadingPass());
1888
1889 // Lower type metadata and the type.test intrinsic. This pass supports
1890 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
1891 // to be run at link time if CFI is enabled. This pass does nothing if
1892 // CFI is disabled.
1893 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1894 // Run a second time to clean up any type tests left behind by WPD for use
1895 // in ICP (which is performed earlier than this in the regular LTO pipeline).
1896 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1897
1898 // Enable splitting late in the FullLTO post-link pipeline.
1901
1902 // Add late LTO optimization passes.
1903 FunctionPassManager LateFPM;
1904
1905 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1906 // canonicalization pass that enables other optimizations. As a result,
1907 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1908 // result too early.
1909 LateFPM.addPass(LoopSinkPass());
1910
1911 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1912 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1913 // flattening of blocks.
1914 LateFPM.addPass(DivRemPairsPass());
1915
1916 // Delete basic blocks, which optimization passes may have killed.
1917 LateFPM.addPass(SimplifyCFGPass(
1918 SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts(
1919 true)));
1920 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
1921
1922 // Drop bodies of available eternally objects to improve GlobalDCE.
1924
1925 // Now that we have optimized the program, discard unreachable functions.
1926 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1927
1928 if (PTO.MergeFunctions)
1930
1931 if (PTO.CallGraphProfile)
1933
1935
1936 // Emit annotation remarks.
1938
1939 return MPM;
1940}
1941
1943 bool LTOPreLink) {
1944 assert(Level == OptimizationLevel::O0 &&
1945 "buildO0DefaultPipeline should only be used with O0");
1946
1948
1949 // Perform pseudo probe instrumentation in O0 mode. This is for the
1950 // consistency between different build modes. For example, a LTO build can be
1951 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
1952 // the postlink will require pseudo probe instrumentation in the prelink.
1953 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
1955
1956 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
1957 PGOOpt->Action == PGOOptions::IRUse))
1959 MPM,
1960 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
1961 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
1962 PGOOpt->ProfileRemappingFile, PGOOpt->FS);
1963
1965
1966 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1968
1970
1971 // Build a minimal pipeline based on the semantics required by LLVM,
1972 // which is just that always inlining occurs. Further, disable generating
1973 // lifetime intrinsics to avoid enabling further optimizations during
1974 // code generation.
1976 /*InsertLifetimeIntrinsics=*/false));
1977
1978 if (PTO.MergeFunctions)
1980
1981 if (EnableMatrix)
1982 MPM.addPass(
1984
1985 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
1986 CGSCCPassManager CGPM;
1988 if (!CGPM.isEmpty())
1990 }
1991 if (!LateLoopOptimizationsEPCallbacks.empty()) {
1992 LoopPassManager LPM;
1994 if (!LPM.isEmpty()) {
1996 createFunctionToLoopPassAdaptor(std::move(LPM))));
1997 }
1998 }
1999 if (!LoopOptimizerEndEPCallbacks.empty()) {
2000 LoopPassManager LPM;
2002 if (!LPM.isEmpty()) {
2004 createFunctionToLoopPassAdaptor(std::move(LPM))));
2005 }
2006 }
2007 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2010 if (!FPM.isEmpty())
2012 }
2013
2015
2016 if (!VectorizerStartEPCallbacks.empty()) {
2019 if (!FPM.isEmpty())
2021 }
2022
2023 ModulePassManager CoroPM;
2024 CoroPM.addPass(CoroEarlyPass());
2025 CGSCCPassManager CGPM;
2026 CGPM.addPass(CoroSplitPass());
2027 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
2028 CoroPM.addPass(CoroCleanupPass());
2029 CoroPM.addPass(GlobalDCEPass());
2030 MPM.addPass(CoroConditionalWrapper(std::move(CoroPM)));
2031
2033
2034 if (LTOPreLink)
2035 addRequiredLTOPreLinkPasses(MPM);
2036
2038
2039 return MPM;
2040}
2041
2043 AAManager AA;
2044
2045 // The order in which these are registered determines their priority when
2046 // being queried.
2047
2048 // First we register the basic alias analysis that provides the majority of
2049 // per-function local AA logic. This is a stateless, on-demand local set of
2050 // AA techniques.
2052
2053 // Next we query fast, specialized alias analyses that wrap IR-embedded
2054 // information about aliasing.
2057
2058 // Add support for querying global aliasing information when available.
2059 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2060 // analysis, all that the `AAManager` can do is query for any *cached*
2061 // results from `GlobalsAA` through a readonly proxy.
2064
2065 // Add target-specific alias analyses.
2066 if (TM)
2068
2069 return AA;
2070}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:680
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
static LVOptions Options
Definition: LVOptions.cpp:25
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
ModulePassManager MPM
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
cl::opt< bool > EnableInferAlignmentPass
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
cl::opt< bool > EnableMemProfContextDisambiguation("enable-memprof-context-disambiguation", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation"))
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlinining pass"))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the experimental LoopInterchange Pass"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(true), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::ALL, "all", "enable all attributor runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< bool > EnableOrderFileInstrumentation("enable-order-file-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable order file instrumentation (default = off)"))
static cl::opt< bool > EnableSyntheticCounts("enable-npm-synthetic-counts", cl::Hidden, cl::desc("Run synthetic function entry count generation " "pass"))
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierachy exists in the profile"))
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
This header defines various interfaces for pass management in LLVM.
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
void registerFunctionAnalysis()
Register a specific AA result.
void registerModuleAnalysis()
Register a specific AA result.
Inlines functions marked as "always_inline".
Definition: AlwaysInliner.h:32
Argument promotion pass.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
Definition: ConstantMerge.h:29
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
The core GVN pass object.
Definition: GVN.h:117
Pass to remove unused function declarations.
Definition: GlobalDCE.h:36
Optimize globals that never have their address taken.
Definition: GlobalOpt.h:25
Pass to perform split of global variables.
Definition: GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition: SCCP.h:48
Pass to outline similar regions.
Definition: IROutliner.h:444
Run instruction simplification across each instruction in the function.
The instrumentation pass for recording function order.
Instrumentation based profiling lowering pass.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Definition: JumpThreading.h:78
Performs Loop Invariant Code Motion Pass.
Definition: LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Definition: LoopRotation.h:24
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition: LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Merge identical functions.
The module inliner pass for the new pass manager.
Definition: ModuleInliner.h:27
Module pass, wrapping the inliner pass.
Definition: Inliner.h:62
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition: Inliner.h:78
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
OpenMP optimizations pass.
Definition: OpenMPOpt.h:41
static const OptimizationLevel O3
Optimize for fast execution as much as possible.
static const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
static const OptimizationLevel O0
Disable as many optimizations as possible.
static const OptimizationLevel Os
Similar to O2 but tries to optimize for small code size instead of fast execution without triggering ...
static const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static const OptimizationLevel O1
Optimize quickly without destroying debuggability.
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Build an O0 pipeline with the minimal semantically required passes.
void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Build a per-module default optimization pipeline.
void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build an ThinLTO default optimization pipeline to a pass manager.
void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile, IntrusiveRefCntPtr< vfs::FileSystem > FS)
Add PGOInstrumenation passes for O0 only.
ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t< is_detected< HasRunOnLoopT, PassT >::value > addPass(PassT &&Pass)
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same< PassT, PassManager >::value > addPass(PassT &&Pass)
Definition: PassManager.h:544
bool isEmpty() const
Returns if the pass manager contains any passes.
Definition: PassManager.h:568
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition: PassBuilder.h:71
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition: PassBuilder.h:56
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition: PassBuilder.h:85
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition: PassBuilder.h:75
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition: PassBuilder.h:82
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition: PassBuilder.h:63
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition: PassBuilder.h:67
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition: PassBuilder.h:48
PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition: PassBuilder.h:59
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition: PassBuilder.h:52
Reassociate commutative expressions.
Definition: Reassociate.h:71
A pass to do RPO deduction and propagation of function attributes.
Definition: FunctionAttrs.h:73
This pass performs function-level constant propagation and merging.
Definition: SCCP.h:29
An optimization pass providing Scalar Replacement of Aggregates.
Definition: SROA.h:96
The sample profiler data loader pass.
Definition: SampleProfile.h:31
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition: SimplifyCFG.h:29
virtual void registerDefaultAliasAnalyses(AAManager &)
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Definition: VectorCombine.h:23
Interfaces for registering analysis passes, producing common pass manager configurations,...
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:705
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: PassManager.h:1218
@ MODULE
Definition: Attributor.h:6386
@ CGSCC
Definition: Attributor.h:6387
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:76
@ FullLTOPreLink
Full LTO prelink phase.
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
@ None
No LTO/ThinLTO behavior needed.
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< bool > ForgetSCEVInLoopUnroll
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< bool > EnableKnowledgeRetention
enable preservation of attributes in assume like: call void @llvm.assume(i1 true) [ "nonnull"(i32* PT...
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
std::enable_if_t< is_detected< HasRunOnLoopT, LoopPassT >::value, FunctionToLoopPassAdaptor > createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false, bool UseBlockFrequencyInfo=false, bool UseBranchProbabilityInfo=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
cl::opt< unsigned > SetLicmMssaOptCap
A DCE pass that assumes instructions are dead until proven otherwise.
Definition: ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition: DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition: EarlyCSE.h:30
A pass manager to run a set of extra function simplification passes after vectorization,...
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition: GVN.h:383
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition: GVN.h:390
A set of parameters to control various transforms performed by IPSCCP pass.
Definition: SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Definition: InlineAdvisor.h:60
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:205
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition: InlineCost.h:222
int DefaultThreshold
The default threshold to start with for a callee.
Definition: InlineCost.h:207
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition: InlineCost.h:235
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition: InlineCost.h:210
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Definition: PassManager.h:1272
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
Definition: FunctionAttrs.h:49
A utility pass template to force an analysis result to be available.
Definition: PassManager.h:1245