LLVM  16.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1 //===- Construction of pass pipelines -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file provides the implementation of the PassBuilder based on our
11 /// static pass registry as well as related functionality. It also provides
12 /// helpers to aid in analyzing, debugging, and testing passes and pass
13 /// pipelines.
14 ///
15 //===----------------------------------------------------------------------===//
16 
26 #include "llvm/IR/PassManager.h"
130 
131 using namespace llvm;
132 
134  "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
135  cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
137  "Heuristics-based inliner version"),
139  "Use development mode (runtime-loadable model)"),
141  "Use release mode (AOT-compiled model)")));
142 
144  "enable-npm-synthetic-counts", cl::Hidden,
145  cl::desc("Run synthetic function entry count generation "
146  "pass"));
147 
148 /// Flag to enable inline deferral during PGO.
149 static cl::opt<bool>
150  EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
151  cl::Hidden,
152  cl::desc("Enable inline deferral during PGO"));
153 
154 static cl::opt<bool> EnableMemProfiler("enable-mem-prof", cl::Hidden,
155  cl::desc("Enable memory profiler"));
156 
157 static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
158  cl::init(false), cl::Hidden,
159  cl::desc("Enable module inliner"));
160 
162  "mandatory-inlining-first", cl::init(true), cl::Hidden,
163  cl::desc("Perform mandatory inlinings module-wide, before performing "
164  "inlining"));
165 
167  "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden,
168  cl::desc("Enable non-trivial loop unswitching for -O3"));
169 
171  "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
172  cl::desc("Eagerly invalidate more analyses in default pipelines"));
173 
175  "enable-no-rerun-simplification-pipeline", cl::init(true), cl::Hidden,
176  cl::desc(
177  "Prevent running the simplification pipeline on a function more "
178  "than once in the case that SCC mutations cause a function to be "
179  "visited multiple times as long as the function has not been changed"));
180 
182  "enable-merge-functions", cl::init(false), cl::Hidden,
183  cl::desc("Enable function merging as part of the optimization pipeline"));
184 
186  "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
187  cl::desc("Run the loop rotation transformation after PGO instrumentation"));
188 
190  "enable-global-analyses", cl::init(true), cl::Hidden,
191  cl::desc("Enable inter-procedural analyses"));
192 
193 static cl::opt<bool>
194  RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden,
195  cl::desc("Run Partial inlinining pass"));
196 
198  "extra-vectorizer-passes", cl::init(false), cl::Hidden,
199  cl::desc("Run cleanup optimization passes after vectorization"));
200 
201 static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
202  cl::desc("Run the NewGVN pass"));
203 
205  "enable-loopinterchange", cl::init(false), cl::Hidden,
206  cl::desc("Enable the experimental LoopInterchange Pass"));
207 
208 static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
209  cl::init(false), cl::Hidden,
210  cl::desc("Enable Unroll And Jam Pass"));
211 
212 static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(true),
213  cl::Hidden,
214  cl::desc("Enable the LoopFlatten Pass"));
215 
216 static cl::opt<bool>
217  EnableDFAJumpThreading("enable-dfa-jump-thread",
218  cl::desc("Enable DFA jump threading"),
219  cl::init(false), cl::Hidden);
220 
221 static cl::opt<bool>
222  EnableHotColdSplit("hot-cold-split",
223  cl::desc("Enable hot-cold splitting pass"));
224 
225 static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
226  cl::Hidden,
227  cl::desc("Enable ir outliner pass"));
228 
229 static cl::opt<bool>
230  DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
231  cl::desc("Disable pre-instrumentation inliner"));
232 
234  "preinline-threshold", cl::Hidden, cl::init(75),
235  cl::desc("Control the amount of inlining in pre-instrumentation inliner "
236  "(default = 75)"));
237 
238 static cl::opt<bool>
239  EnableGVNHoist("enable-gvn-hoist",
240  cl::desc("Enable the GVN hoisting pass (default = off)"));
241 
242 static cl::opt<bool>
243  EnableGVNSink("enable-gvn-sink",
244  cl::desc("Enable the GVN sinking pass (default = off)"));
245 
246 // This option is used in simplifying testing SampleFDO optimizations for
247 // profile loading.
248 static cl::opt<bool>
249  EnableCHR("enable-chr", cl::init(true), cl::Hidden,
250  cl::desc("Enable control height reduction optimization (CHR)"));
251 
253  "flattened-profile-used", cl::init(false), cl::Hidden,
254  cl::desc("Indicate the sample profile being used is flattened, i.e., "
255  "no inline hierachy exists in the profile"));
256 
258  "enable-order-file-instrumentation", cl::init(false), cl::Hidden,
259  cl::desc("Enable order file instrumentation (default = off)"));
260 
261 static cl::opt<bool>
262  EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
263  cl::desc("Enable lowering of the matrix intrinsics"));
264 
266  "enable-constraint-elimination", cl::init(false), cl::Hidden,
267  cl::desc(
268  "Enable pass to eliminate conditions based on linear constraints"));
269 
271  "enable-function-specialization", cl::init(false), cl::Hidden,
272  cl::desc("Enable Function Specialization pass"));
273 
275  "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
276  cl::desc("Enable the attributor inter-procedural deduction pass"),
278  "enable all attributor runs"),
280  "enable module-wide attributor runs"),
282  "enable call graph SCC attributor runs"),
284  "disable attributor runs")));
285 
287  LoopInterleaving = true;
288  LoopVectorization = true;
289  SLPVectorization = false;
290  LoopUnrolling = true;
294  CallGraphProfile = true;
296  InlinerThreshold = -1;
298 }
299 
300 namespace llvm {
303 } // namespace llvm
304 
305 void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
306  OptimizationLevel Level) {
307  for (auto &C : PeepholeEPCallbacks)
308  C(FPM, Level);
309 }
310 
311 // Helper to add AnnotationRemarksPass.
314 }
315 
316 // Helper to check if the current compilation phase is preparing for LTO
320 }
321 
322 // TODO: Investigate the cost/benefit of tail call elimination on debugging.
324 PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
326 
328 
329  // Form SSA out of local memory accesses after breaking apart aggregates into
330  // scalars.
331  FPM.addPass(SROAPass());
332 
333  // Catch trivial redundancies
334  FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
335 
336  // Hoisting of scalars and load expressions.
337  FPM.addPass(
338  SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
339  FPM.addPass(InstCombinePass());
340 
342 
343  invokePeepholeEPCallbacks(FPM, Level);
344 
345  FPM.addPass(
346  SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
347 
348  // Form canonically associated expression trees, and simplify the trees using
349  // basic mathematical properties. For example, this will form (nearly)
350  // minimal multiplication trees.
351  FPM.addPass(ReassociatePass());
352 
353  // Add the primary loop simplification pipeline.
354  // FIXME: Currently this is split into two loop pass pipelines because we run
355  // some function passes in between them. These can and should be removed
356  // and/or replaced by scheduling the loop pass equivalents in the correct
357  // positions. But those equivalent passes aren't powerful enough yet.
358  // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
359  // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
360  // fully replace `SimplifyCFGPass`, and the closest to the other we have is
361  // `LoopInstSimplify`.
362  LoopPassManager LPM1, LPM2;
363 
364  // Simplify the loop body. We do this initially to clean up after other loop
365  // passes run, either when iterating on a loop or on inner loops with
366  // implications on the outer loop.
369 
370  // Try to remove as much code from the loop header as possible,
371  // to reduce amount of IR that will have to be duplicated. However,
372  // do not perform speculative hoisting the first time as LICM
373  // will destroy metadata that may not need to be destroyed if run
374  // after loop rotation.
375  // TODO: Investigate promotion cap for O1.
377  /*AllowSpeculation=*/false));
378 
379  LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
380  isLTOPreLink(Phase)));
381  // TODO: Investigate promotion cap for O1.
383  /*AllowSpeculation=*/true));
385  if (EnableLoopFlatten)
386  LPM1.addPass(LoopFlattenPass());
387 
389  LPM2.addPass(IndVarSimplifyPass());
390 
391  for (auto &C : LateLoopOptimizationsEPCallbacks)
392  C(LPM2, Level);
393 
394  LPM2.addPass(LoopDeletionPass());
395 
398 
399  // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
400  // because it changes IR to makes profile annotation in back compile
401  // inaccurate. The normal unroller doesn't pay attention to forced full unroll
402  // attributes so we need to make sure and allow the full unroll pass to pay
403  // attention to it.
404  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
405  PGOOpt->Action != PGOOptions::SampleUse)
406  LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
407  /* OnlyWhenForced= */ !PTO.LoopUnrolling,
409 
410  for (auto &C : LoopOptimizerEndEPCallbacks)
411  C(LPM2, Level);
412 
413  // We provide the opt remark emitter pass for LICM to use. We only need to do
414  // this once as it is immutable.
415  FPM.addPass(
418  /*UseMemorySSA=*/true,
419  /*UseBlockFrequencyInfo=*/true));
420  FPM.addPass(
421  SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
422  FPM.addPass(InstCombinePass());
423  // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
424  // *All* loop passes must preserve it, in order to be able to use it.
426  /*UseMemorySSA=*/false,
427  /*UseBlockFrequencyInfo=*/false));
428 
429  // Delete small array after loop unroll.
430  FPM.addPass(SROAPass());
431 
432  // Specially optimize memory movement as it doesn't look like dataflow in SSA.
433  FPM.addPass(MemCpyOptPass());
434 
435  // Sparse conditional constant propagation.
436  // FIXME: It isn't clear why we do this *after* loop passes rather than
437  // before...
438  FPM.addPass(SCCPPass());
439 
440  // Delete dead bit computations (instcombine runs after to fold away the dead
441  // computations, and then ADCE will run later to exploit any new DCE
442  // opportunities that creates).
443  FPM.addPass(BDCEPass());
444 
445  // Run instcombine after redundancy and dead bit elimination to exploit
446  // opportunities opened up by them.
447  FPM.addPass(InstCombinePass());
448  invokePeepholeEPCallbacks(FPM, Level);
449 
450  FPM.addPass(CoroElidePass());
451 
452  for (auto &C : ScalarOptimizerLateEPCallbacks)
453  C(FPM, Level);
454 
455  // Finally, do an expensive DCE pass to catch all the dead code exposed by
456  // the simplifications and basic cleanup after all the simplifications.
457  // TODO: Investigate if this is too expensive.
458  FPM.addPass(ADCEPass());
459  FPM.addPass(
460  SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
461  FPM.addPass(InstCombinePass());
462  invokePeepholeEPCallbacks(FPM, Level);
463 
464  return FPM;
465 }
466 
470  assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
471 
472  // The O1 pipeline has a separate pipeline creation function to simplify
473  // construction readability.
474  if (Level.getSpeedupLevel() == 1)
475  return buildO1FunctionSimplificationPipeline(Level, Phase);
476 
478 
479  // Form SSA out of local memory accesses after breaking apart aggregates into
480  // scalars.
481  FPM.addPass(SROAPass());
482 
483  // Catch trivial redundancies
484  FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
487 
488  // Hoisting of scalars and load expressions.
489  if (EnableGVNHoist)
490  FPM.addPass(GVNHoistPass());
491 
492  // Global value numbering based sinking.
493  if (EnableGVNSink) {
494  FPM.addPass(GVNSinkPass());
495  FPM.addPass(
496  SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
497  }
498 
501 
502  // Speculative execution if the target has divergent branches; otherwise nop.
503  FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
504 
505  // Optimize based on known information about branches, and cleanup afterward.
506  FPM.addPass(JumpThreadingPass());
508 
509  FPM.addPass(
510  SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
511  FPM.addPass(InstCombinePass());
512  if (Level == OptimizationLevel::O3)
514 
515  if (!Level.isOptimizingForSize())
517 
518  invokePeepholeEPCallbacks(FPM, Level);
519 
520  // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
521  // using the size value profile. Don't perform this when optimizing for size.
522  if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
523  !Level.isOptimizingForSize())
524  FPM.addPass(PGOMemOPSizeOpt());
525 
526  FPM.addPass(TailCallElimPass());
527  FPM.addPass(
528  SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
529 
530  // Form canonically associated expression trees, and simplify the trees using
531  // basic mathematical properties. For example, this will form (nearly)
532  // minimal multiplication trees.
533  FPM.addPass(ReassociatePass());
534 
535  // Add the primary loop simplification pipeline.
536  // FIXME: Currently this is split into two loop pass pipelines because we run
537  // some function passes in between them. These can and should be removed
538  // and/or replaced by scheduling the loop pass equivalents in the correct
539  // positions. But those equivalent passes aren't powerful enough yet.
540  // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
541  // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
542  // fully replace `SimplifyCFGPass`, and the closest to the other we have is
543  // `LoopInstSimplify`.
544  LoopPassManager LPM1, LPM2;
545 
546  // Simplify the loop body. We do this initially to clean up after other loop
547  // passes run, either when iterating on a loop or on inner loops with
548  // implications on the outer loop.
551 
552  // Try to remove as much code from the loop header as possible,
553  // to reduce amount of IR that will have to be duplicated. However,
554  // do not perform speculative hoisting the first time as LICM
555  // will destroy metadata that may not need to be destroyed if run
556  // after loop rotation.
557  // TODO: Investigate promotion cap for O1.
559  /*AllowSpeculation=*/false));
560 
561  // Disable header duplication in loop rotation at -Oz.
562  LPM1.addPass(
564  // TODO: Investigate promotion cap for O1.
566  /*AllowSpeculation=*/true));
567  LPM1.addPass(
568  SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&
570  if (EnableLoopFlatten)
571  LPM1.addPass(LoopFlattenPass());
572 
574  LPM2.addPass(IndVarSimplifyPass());
575 
576  for (auto &C : LateLoopOptimizationsEPCallbacks)
577  C(LPM2, Level);
578 
579  LPM2.addPass(LoopDeletionPass());
580 
583 
584  // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
585  // because it changes IR to makes profile annotation in back compile
586  // inaccurate. The normal unroller doesn't pay attention to forced full unroll
587  // attributes so we need to make sure and allow the full unroll pass to pay
588  // attention to it.
589  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
590  PGOOpt->Action != PGOOptions::SampleUse)
591  LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
592  /* OnlyWhenForced= */ !PTO.LoopUnrolling,
594 
595  for (auto &C : LoopOptimizerEndEPCallbacks)
596  C(LPM2, Level);
597 
598  // We provide the opt remark emitter pass for LICM to use. We only need to do
599  // this once as it is immutable.
600  FPM.addPass(
603  /*UseMemorySSA=*/true,
604  /*UseBlockFrequencyInfo=*/true));
605  FPM.addPass(
606  SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
607  FPM.addPass(InstCombinePass());
608  // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
609  // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
610  // *All* loop passes must preserve it, in order to be able to use it.
612  /*UseMemorySSA=*/false,
613  /*UseBlockFrequencyInfo=*/false));
614 
615  // Delete small array after loop unroll.
616  FPM.addPass(SROAPass());
617 
618  // Try vectorization/scalarization transforms that are both improvements
619  // themselves and can allow further folds with GVN and InstCombine.
620  FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
621 
622  // Eliminate redundancies.
624  if (RunNewGVN)
625  FPM.addPass(NewGVNPass());
626  else
627  FPM.addPass(GVNPass());
628 
629  // Sparse conditional constant propagation.
630  // FIXME: It isn't clear why we do this *after* loop passes rather than
631  // before...
632  FPM.addPass(SCCPPass());
633 
634  // Delete dead bit computations (instcombine runs after to fold away the dead
635  // computations, and then ADCE will run later to exploit any new DCE
636  // opportunities that creates).
637  FPM.addPass(BDCEPass());
638 
639  // Run instcombine after redundancy and dead bit elimination to exploit
640  // opportunities opened up by them.
641  FPM.addPass(InstCombinePass());
642  invokePeepholeEPCallbacks(FPM, Level);
643 
644  // Re-consider control flow based optimizations after redundancy elimination,
645  // redo DCE, etc.
646  if (EnableDFAJumpThreading && Level.getSizeLevel() == 0)
648 
649  FPM.addPass(JumpThreadingPass());
651 
652  // Finally, do an expensive DCE pass to catch all the dead code exposed by
653  // the simplifications and basic cleanup after all the simplifications.
654  // TODO: Investigate if this is too expensive.
655  FPM.addPass(ADCEPass());
656 
657  // Specially optimize memory movement as it doesn't look like dataflow in SSA.
658  FPM.addPass(MemCpyOptPass());
659 
660  FPM.addPass(DSEPass());
663  /*AllowSpeculation=*/true),
664  /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
665 
666  FPM.addPass(CoroElidePass());
667 
668  for (auto &C : ScalarOptimizerLateEPCallbacks)
669  C(FPM, Level);
670 
672  .convertSwitchRangeToICmp(true)
673  .hoistCommonInsts(true)
674  .sinkCommonInsts(true)));
675  FPM.addPass(InstCombinePass());
676  invokePeepholeEPCallbacks(FPM, Level);
677 
678  // Don't add CHR pass for CSIRInstr build in PostLink as the profile
679  // is still the same as the PreLink compilation.
680  if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt &&
681  ((PGOOpt->Action == PGOOptions::IRUse &&
683  PGOOpt->CSAction != PGOOptions::CSIRInstr)) ||
684  PGOOpt->Action == PGOOptions::SampleUse))
686 
687  return FPM;
688 }
689 
690 void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
693 }
694 
695 void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
696  OptimizationLevel Level, bool RunProfileGen,
697  bool IsCS, std::string ProfileFile,
698  std::string ProfileRemappingFile,
699  ThinOrFullLTOPhase LTOPhase) {
700  assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
701  if (!IsCS && !DisablePreInliner) {
703 
704  IP.DefaultThreshold = PreInlineThreshold;
705 
706  // FIXME: The hint threshold has the same value used by the regular inliner
707  // when not optimzing for size. This should probably be lowered after
708  // performance testing.
709  // FIXME: this comment is cargo culted from the old pass manager, revisit).
710  IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
712  IP, /* MandatoryFirst */ true,
714  CGSCCPassManager &CGPipeline = MIWP.getPM();
715 
717  FPM.addPass(SROAPass());
718  FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
719  FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
720  true))); // Merge & remove basic blocks.
721  FPM.addPass(InstCombinePass()); // Combine silly sequences.
722  invokePeepholeEPCallbacks(FPM, Level);
723 
724  CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
726 
727  MPM.addPass(std::move(MIWP));
728 
729  // Delete anything that is now dead to make sure that we don't instrument
730  // dead code. Instrumentation can end up keeping dead code around and
731  // dramatically increase code size.
733  }
734 
735  if (!RunProfileGen) {
736  assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
737  MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
738  // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
739  // RequireAnalysisPass for PSI before subsequent non-module passes.
741  return;
742  }
743 
744  // Perform PGO instrumentation.
746 
748  // Disable header duplication in loop rotation at -Oz.
752  /*UseMemorySSA=*/false,
753  /*UseBlockFrequencyInfo=*/false),
755  }
756 
757  // Add the profile lowering pass.
759  if (!ProfileFile.empty())
760  Options.InstrProfileOutput = ProfileFile;
761  // Do counter promotion at Level greater than O0.
762  Options.DoCounterPromotion = true;
763  Options.UseBFIInPromotion = IsCS;
765 }
766 
768  bool RunProfileGen, bool IsCS,
769  std::string ProfileFile,
770  std::string ProfileRemappingFile) {
771  if (!RunProfileGen) {
772  assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
773  MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
774  // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
775  // RequireAnalysisPass for PSI before subsequent non-module passes.
777  return;
778  }
779 
780  // Perform PGO instrumentation.
782  // Add the profile lowering pass.
784  if (!ProfileFile.empty())
785  Options.InstrProfileOutput = ProfileFile;
786  // Do not do counter promotion at O0.
787  Options.DoCounterPromotion = false;
788  Options.UseBFIInPromotion = IsCS;
790 }
791 
793  return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
794 }
795 
800  if (PTO.InlinerThreshold == -1)
802  else
804  // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
805  // disable hot callsite inline (as much as possible [1]) because it makes
806  // profile annotation in the backend inaccurate.
807  //
808  // [1] Note the cost of a function could be below zero due to erased
809  // prologue / epilogue.
810  if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
811  PGOOpt->Action == PGOOptions::SampleUse)
812  IP.HotCallSiteThreshold = 0;
813 
814  if (PGOOpt)
815  IP.EnableDeferral = EnablePGOInlineDeferral;
816 
820 
821  // Require the GlobalsAA analysis for the module so we can query it within
822  // the CGSCC pipeline.
824  // Invalidate AAManager so it can be recreated and pick up the newly available
825  // GlobalsAA.
826  MIWP.addModulePass(
828 
829  // Require the ProfileSummaryAnalysis for the module so we can query it within
830  // the inliner pass.
832 
833  // Now begin the main postorder CGSCC pipeline.
834  // FIXME: The current CGSCC pipeline has its origins in the legacy pass
835  // manager and trying to emulate its precise behavior. Much of this doesn't
836  // make a lot of sense and we should revisit the core CGSCC structure.
837  CGSCCPassManager &MainCGPipeline = MIWP.getPM();
838 
839  // Note: historically, the PruneEH pass was run first to deduce nounwind and
840  // generally clean up exception handling overhead. It isn't clear this is
841  // valuable as the inliner doesn't currently care whether it is inlining an
842  // invoke or a call.
843 
845  MainCGPipeline.addPass(AttributorCGSCCPass());
846 
847  // Now deduce any function attributes based in the current code.
848  MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
849 
850  // When at O3 add argument promotion to the pass pipeline.
851  // FIXME: It isn't at all clear why this should be limited to O3.
852  if (Level == OptimizationLevel::O3)
853  MainCGPipeline.addPass(ArgumentPromotionPass());
854 
855  // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
856  // there are no OpenMP runtime calls present in the module.
857  if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
858  MainCGPipeline.addPass(OpenMPOptCGSCCPass());
859 
860  for (auto &C : CGSCCOptimizerLateEPCallbacks)
861  C(MainCGPipeline, Level);
862 
863  // Lastly, add the core function simplification pipeline nested inside the
864  // CGSCC walk.
868 
869  MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
870 
872  MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
874 
875  return MIWP;
876 }
877 
882 
884  // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
885  // disable hot callsite inline (as much as possible [1]) because it makes
886  // profile annotation in the backend inaccurate.
887  //
888  // [1] Note the cost of a function could be below zero due to erased
889  // prologue / epilogue.
890  if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
891  PGOOpt->Action == PGOOptions::SampleUse)
892  IP.HotCallSiteThreshold = 0;
893 
894  if (PGOOpt)
895  IP.EnableDeferral = EnablePGOInlineDeferral;
896 
897  // The inline deferral logic is used to avoid losing some
898  // inlining chance in future. It is helpful in SCC inliner, in which
899  // inlining is processed in bottom-up order.
900  // While in module inliner, the inlining order is a priority-based order
901  // by default. The inline deferral is unnecessary there. So we disable the
902  // inline deferral logic in module inliner.
903  IP.EnableDeferral = false;
904 
906 
910 
913 
914  return MPM;
915 }
916 
921 
922  // Place pseudo probe instrumentation as the first pass of the pipeline to
923  // minimize the impact of optimization changes.
924  if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
927 
928  bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
929 
930  // In ThinLTO mode, when flattened profile is used, all the available
931  // profile information will be annotated in PreLink phase so there is
932  // no need to load the profile again in PostLink.
933  bool LoadSampleProfile =
934  HasSampleProfile &&
936 
937  // During the ThinLTO backend phase we perform early indirect call promotion
938  // here, before globalopt. Otherwise imported available_externally functions
939  // look unreferenced and are removed. If we are going to load the sample
940  // profile then defer until later.
941  // TODO: See if we can move later and consolidate with the location where
942  // we perform ICP when we are loading a sample profile.
943  // TODO: We pass HasSampleProfile (whether there was a sample profile file
944  // passed to the compile) to the SamplePGO flag of ICP. This is used to
945  // determine whether the new direct calls are annotated with prof metadata.
946  // Ideally this should be determined from whether the IR is annotated with
947  // sample profile, and not whether the a sample profile was provided on the
948  // command line. E.g. for flattened profiles where we will not be reloading
949  // the sample profile in the ThinLTO backend, we ideally shouldn't have to
950  // provide the sample profile file.
951  if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
952  MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
953 
954  // Do basic inference of function attributes from known properties of system
955  // libraries and other oracles.
958 
959  // Create an early function pass manager to cleanup the output of the
960  // frontend.
961  FunctionPassManager EarlyFPM;
962  // Lower llvm.expect to metadata before attempting transforms.
963  // Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
964  EarlyFPM.addPass(LowerExpectIntrinsicPass());
965  EarlyFPM.addPass(SimplifyCFGPass());
966  EarlyFPM.addPass(SROAPass());
967  EarlyFPM.addPass(EarlyCSEPass());
968  if (Level == OptimizationLevel::O3)
969  EarlyFPM.addPass(CallSiteSplittingPass());
970 
971  // In SamplePGO ThinLTO backend, we need instcombine before profile annotation
972  // to convert bitcast to direct calls so that they can be inlined during the
973  // profile annotation prepration step.
974  // More details about SamplePGO design can be found in:
975  // https://research.google.com/pubs/pub45290.html
976  // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured.
977  if (LoadSampleProfile)
978  EarlyFPM.addPass(InstCombinePass());
981 
982  if (LoadSampleProfile) {
983  // Annotate sample profile right after early FPM to ensure freshness of
984  // the debug info.
985  MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
986  PGOOpt->ProfileRemappingFile, Phase));
987  // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
988  // RequireAnalysisPass for PSI before subsequent non-module passes.
990  // Do not invoke ICP in the LTOPrelink phase as it makes it hard
991  // for the profile annotation to be accurate in the LTO backend.
994  // We perform early indirect call promotion here, before globalopt.
995  // This is important for the ThinLTO backend phase because otherwise
996  // imported available_externally functions look unreferenced and are
997  // removed.
998  MPM.addPass(
999  PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1000  }
1001 
1002  // Try to perform OpenMP specific optimizations on the module. This is a
1003  // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1004  if (Level != OptimizationLevel::O0)
1006 
1009 
1010  // Lower type metadata and the type.test intrinsic in the ThinLTO
1011  // post link pipeline after ICP. This is to enable usage of the type
1012  // tests in ICP sequences.
1014  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1015 
1016  for (auto &C : PipelineEarlySimplificationEPCallbacks)
1017  C(MPM, Level);
1018 
1019  // Specialize functions with IPSCCP.
1022 
1023  // Interprocedural constant propagation now that basic cleanup has occurred
1024  // and prior to optimizing globals.
1025  // FIXME: This position in the pipeline hasn't been carefully considered in
1026  // years, it should be re-analyzed.
1027  MPM.addPass(IPSCCPPass());
1028 
1029  // Attach metadata to indirect call sites indicating the set of functions
1030  // they may target at run-time. This should follow IPSCCP.
1032 
1033  // Optimize globals to try and fold them into constants.
1035 
1036  // Promote any localized globals to SSA registers.
1037  // FIXME: Should this instead by a run of SROA?
1038  // FIXME: We should probably run instcombine and simplifycfg afterward to
1039  // delete control flows that are dead once globals have been folded to
1040  // constants.
1042 
1043  // Create a small function pass pipeline to cleanup after all the global
1044  // optimizations.
1045  FunctionPassManager GlobalCleanupPM;
1046  GlobalCleanupPM.addPass(InstCombinePass());
1047  invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1048 
1049  GlobalCleanupPM.addPass(
1050  SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1053 
1054  // Add all the requested passes for instrumentation PGO, if requested.
1055  if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1056  (PGOOpt->Action == PGOOptions::IRInstr ||
1057  PGOOpt->Action == PGOOptions::IRUse)) {
1058  addPGOInstrPasses(MPM, Level,
1059  /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
1060  /* IsCS */ false, PGOOpt->ProfileFile,
1061  PGOOpt->ProfileRemappingFile, Phase);
1062  MPM.addPass(PGOIndirectCallPromotion(false, false));
1063  }
1064  if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1065  PGOOpt->CSAction == PGOOptions::CSIRInstr)
1066  MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
1067 
1068  // Synthesize function entry counts for non-PGO compilation.
1069  if (EnableSyntheticCounts && !PGOOpt)
1071 
1072  if (EnableModuleInliner)
1074  else
1076 
1077  // Remove any dead arguments exposed by cleanups, constant folding globals,
1078  // and argument promotion.
1080 
1082 
1086  }
1087 
1088  return MPM;
1089 }
1090 
1091 /// TODO: Should LTO cause any differences to this set of passes?
1092 void PassBuilder::addVectorPasses(OptimizationLevel Level,
1093  FunctionPassManager &FPM, bool IsFullLTO) {
1096 
1097  if (IsFullLTO) {
1098  // The vectorizer may have significantly shortened a loop body; unroll
1099  // again. Unroll small loops to hide loop backedge latency and saturate any
1100  // parallel execution resources of an out-of-order processor. We also then
1101  // need to clean up redundancies and loop invariant code.
1102  // FIXME: It would be really good to use a loop-integrated instruction
1103  // combiner for cleanup here so that the unrolling and LICM can be pipelined
1104  // across the loop nests.
1105  // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1106  if (EnableUnrollAndJam && PTO.LoopUnrolling)
1108  LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1110  Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1113  // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1114  // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1115  // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1116  FPM.addPass(SROAPass());
1117  }
1118 
1119  if (!IsFullLTO) {
1120  // Eliminate loads by forwarding stores from the previous iteration to loads
1121  // of the current iteration.
1123  }
1124  // Cleanup after the loop optimization passes.
1125  FPM.addPass(InstCombinePass());
1126 
1127  if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1128  ExtraVectorPassManager ExtraPasses;
1129  // At higher optimization levels, try to clean up any runtime overlap and
1130  // alignment checks inserted by the vectorizer. We want to track correlated
1131  // runtime checks for two inner loops in the same outer loop, fold any
1132  // common computations, hoist loop-invariant aspects out of any outer loop,
1133  // and unswitch the runtime checks if possible. Once hoisted, we may have
1134  // dead (or speculatable) control flows or more combining opportunities.
1135  ExtraPasses.addPass(EarlyCSEPass());
1136  ExtraPasses.addPass(CorrelatedValuePropagationPass());
1137  ExtraPasses.addPass(InstCombinePass());
1138  LoopPassManager LPM;
1140  /*AllowSpeculation=*/true));
1141  LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1143  ExtraPasses.addPass(
1145  ExtraPasses.addPass(
1146  createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1147  /*UseBlockFrequencyInfo=*/true));
1148  ExtraPasses.addPass(
1149  SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1150  ExtraPasses.addPass(InstCombinePass());
1151  FPM.addPass(std::move(ExtraPasses));
1152  }
1153 
1154  // Now that we've formed fast to execute loop structures, we do further
1155  // optimizations. These are run afterward as they might block doing complex
1156  // analyses and transforms such as what are needed for loop vectorization.
1157 
1158  // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1159  // GVN, loop transforms, and others have already run, so it's now better to
1160  // convert to more optimized IR using more aggressive simplify CFG options.
1161  // The extra sinking transform can create larger basic blocks, so do this
1162  // before SLP vectorization.
1164  .forwardSwitchCondToPhi(true)
1165  .convertSwitchRangeToICmp(true)
1166  .convertSwitchToLookupTable(true)
1167  .needCanonicalLoops(false)
1168  .hoistCommonInsts(true)
1169  .sinkCommonInsts(true)));
1170 
1171  if (IsFullLTO) {
1172  FPM.addPass(SCCPPass());
1173  FPM.addPass(InstCombinePass());
1174  FPM.addPass(BDCEPass());
1175  }
1176 
1177  // Optimize parallel scalar instruction chains into SIMD instructions.
1178  if (PTO.SLPVectorization) {
1179  FPM.addPass(SLPVectorizerPass());
1180  if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1181  FPM.addPass(EarlyCSEPass());
1182  }
1183  }
1184  // Enhance/cleanup vector code.
1185  FPM.addPass(VectorCombinePass());
1186 
1187  if (!IsFullLTO) {
1188  FPM.addPass(InstCombinePass());
1189  // Unroll small loops to hide loop backedge latency and saturate any
1190  // parallel execution resources of an out-of-order processor. We also then
1191  // need to clean up redundancies and loop invariant code.
1192  // FIXME: It would be really good to use a loop-integrated instruction
1193  // combiner for cleanup here so that the unrolling and LICM can be pipelined
1194  // across the loop nests.
1195  // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1196  if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1198  LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1199  }
1201  Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1204  // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1205  // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1206  // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1207  FPM.addPass(SROAPass());
1208  FPM.addPass(InstCombinePass());
1209  FPM.addPass(
1213  /*AllowSpeculation=*/true),
1214  /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
1215  }
1216 
1217  // Now that we've vectorized and unrolled loops, we may have more refined
1218  // alignment information, try to re-derive it here.
1220 
1221  if (IsFullLTO)
1222  FPM.addPass(InstCombinePass());
1223 }
1224 
1227  ThinOrFullLTOPhase LTOPhase) {
1228  const bool LTOPreLink = (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink ||
1231 
1232  // Optimize globals now that the module is fully simplified.
1235 
1236  // Run partial inlining pass to partially inline functions that have
1237  // large bodies.
1238  if (RunPartialInlining)
1240 
1241  // Remove avail extern fns and globals definitions since we aren't compiling
1242  // an object file for later LTO. For LTO we want to preserve these so they
1243  // are eligible for inlining at link-time. Note if they are unreferenced they
1244  // will be removed by GlobalDCE later, so this only impacts referenced
1245  // available externally globals. Eventually they will be suppressed during
1246  // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1247  // may make globals referenced by available external functions dead and saves
1248  // running remaining passes on the eliminated functions. These should be
1249  // preserved during prelinking for link-time inlining decisions.
1250  if (!LTOPreLink)
1252 
1255 
1256  // Do RPO function attribute inference across the module to forward-propagate
1257  // attributes where applicable.
1258  // FIXME: Is this really an optimization rather than a canonicalization?
1260 
1261  // Do a post inline PGO instrumentation and use pass. This is a context
1262  // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1263  // cross-module inline has not been done yet. The context sensitive
1264  // instrumentation is after all the inlines are done.
1265  if (!LTOPreLink && PGOOpt) {
1266  if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1267  addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
1268  /* IsCS */ true, PGOOpt->CSProfileGenFile,
1269  PGOOpt->ProfileRemappingFile, LTOPhase);
1270  else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1271  addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
1272  /* IsCS */ true, PGOOpt->ProfileFile,
1273  PGOOpt->ProfileRemappingFile, LTOPhase);
1274  }
1275 
1276  // Re-compute GlobalsAA here prior to function passes. This is particularly
1277  // useful as the above will have inlined, DCE'ed, and function-attr
1278  // propagated everything. We should at this point have a reasonably minimal
1279  // and richly annotated call graph. By computing aliasing and mod/ref
1280  // information for all local globals here, the late loop passes and notably
1281  // the vectorizer will be able to use them to help recognize vectorizable
1282  // memory operations.
1284 
1285  for (auto &C : OptimizerEarlyEPCallbacks)
1286  C(MPM, Level);
1287 
1288  FunctionPassManager OptimizePM;
1289  OptimizePM.addPass(Float2IntPass());
1290  OptimizePM.addPass(LowerConstantIntrinsicsPass());
1291 
1292  if (EnableMatrix) {
1293  OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1294  OptimizePM.addPass(EarlyCSEPass());
1295  }
1296 
1297  // FIXME: We need to run some loop optimizations to re-rotate loops after
1298  // simplifycfg and others undo their rotation.
1299 
1300  // Optimize the loop execution. These passes operate on entire loop nests
1301  // rather than on each loop in an inside-out manner, and so they are actually
1302  // function passes.
1303 
1304  for (auto &C : VectorizerStartEPCallbacks)
1305  C(OptimizePM, Level);
1306 
1307  LoopPassManager LPM;
1308  // First rotate loops that may have been un-rotated by prior passes.
1309  // Disable header duplication at -Oz.
1310  LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink));
1311  // Some loops may have become dead by now. Try to delete them.
1312  // FIXME: see discussion in https://reviews.llvm.org/D112851,
1313  // this may need to be revisited once we run GVN before loop deletion
1314  // in the simplification pipeline.
1315  LPM.addPass(LoopDeletionPass());
1317  std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1318 
1319  // Distribute loops to allow partial vectorization. I.e. isolate dependences
1320  // into separate loop that would otherwise inhibit vectorization. This is
1321  // currently only performed for loops marked with the metadata
1322  // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1323  OptimizePM.addPass(LoopDistributePass());
1324 
1325  // Populates the VFABI attribute with the scalar-to-vector mappings
1326  // from the TargetLibraryInfo.
1327  OptimizePM.addPass(InjectTLIMappings());
1328 
1329  addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1330 
1331  // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1332  // canonicalization pass that enables other optimizations. As a result,
1333  // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1334  // result too early.
1335  OptimizePM.addPass(LoopSinkPass());
1336 
1337  // And finally clean up LCSSA form before generating code.
1338  OptimizePM.addPass(InstSimplifyPass());
1339 
1340  // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1341  // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1342  // flattening of blocks.
1343  OptimizePM.addPass(DivRemPairsPass());
1344 
1345  // Try to annotate calls that were created during optimization.
1346  OptimizePM.addPass(TailCallElimPass());
1347 
1348  // LoopSink (and other loop passes since the last simplifyCFG) might have
1349  // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1350  OptimizePM.addPass(
1351  SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1352 
1353  // Add the core optimizing pipeline.
1356 
1357  for (auto &C : OptimizerLastEPCallbacks)
1358  C(MPM, Level);
1359 
1360  // Split out cold code. Splitting is done late to avoid hiding context from
1361  // other optimizations and inadvertently regressing performance. The tradeoff
1362  // is that this has a higher code size cost than splitting early.
1363  if (EnableHotColdSplit && !LTOPreLink)
1365 
1366  // Search the code for similar regions of code. If enough similar regions can
1367  // be found where extracting the regions into their own function will decrease
1368  // the size of the program, we extract the regions, a deduplicate the
1369  // structurally similar regions.
1370  if (EnableIROutliner)
1372 
1373  // Merge functions if requested.
1374  if (PTO.MergeFunctions)
1376 
1377  // Now we need to do some global optimization transforms.
1378  // FIXME: It would seem like these should come first in the optimization
1379  // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1380  // ordering here.
1383 
1384  if (PTO.CallGraphProfile && !LTOPreLink)
1386 
1387  // TODO: Relative look table converter pass caused an issue when full lto is
1388  // enabled. See https://reviews.llvm.org/D94355 for more details.
1389  // Until the issue fixed, disable this pass during pre-linking phase.
1390  if (!LTOPreLink)
1392 
1393  return MPM;
1394 }
1395 
1398  bool LTOPreLink) {
1399  assert(Level != OptimizationLevel::O0 &&
1400  "Must request optimizations for the default pipeline!");
1401 
1403 
1404  // Convert @llvm.global.annotations to !annotation metadata.
1406 
1407  // Force any function attributes we want the rest of the pipeline to observe.
1409 
1410  // Apply module pipeline start EP callback.
1411  for (auto &C : PipelineStartEPCallbacks)
1412  C(MPM, Level);
1413 
1414  if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1416 
1417  const ThinOrFullLTOPhase LTOPhase = LTOPreLink
1420  // Add the core simplification pipeline.
1421  MPM.addPass(buildModuleSimplificationPipeline(Level, LTOPhase));
1422 
1423  // Now add the optimization pipeline.
1424  MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPhase));
1425 
1426  if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1427  PGOOpt->Action == PGOOptions::SampleUse)
1429 
1430  // Emit annotation remarks.
1432 
1433  if (LTOPreLink)
1434  addRequiredLTOPreLinkPasses(MPM);
1435 
1436  return MPM;
1437 }
1438 
1441  assert(Level != OptimizationLevel::O0 &&
1442  "Must request optimizations for the default pipeline!");
1443 
1445 
1446  // Convert @llvm.global.annotations to !annotation metadata.
1448 
1449  // Force any function attributes we want the rest of the pipeline to observe.
1451 
1452  if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1454 
1455  // Apply module pipeline start EP callback.
1456  for (auto &C : PipelineStartEPCallbacks)
1457  C(MPM, Level);
1458 
1459  // If we are planning to perform ThinLTO later, we don't bloat the code with
1460  // unrolling/vectorization/... now. Just simplify the module as much as we
1461  // can.
1464 
1465  // Run partial inlining pass to partially inline functions that have
1466  // large bodies.
1467  // FIXME: It isn't clear whether this is really the right place to run this
1468  // in ThinLTO. Because there is another canonicalization and simplification
1469  // phase that will run after the thin link, running this here ends up with
1470  // less information than will be available later and it may grow functions in
1471  // ways that aren't beneficial.
1472  if (RunPartialInlining)
1474 
1475  // Reduce the size of the IR as much as possible.
1477 
1478  if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1479  PGOOpt->Action == PGOOptions::SampleUse)
1481 
1482  // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1483  // optimization is going to be done in PostLink stage, but clang can't add
1484  // callbacks there in case of in-process ThinLTO called by linker.
1485  for (auto &C : OptimizerEarlyEPCallbacks)
1486  C(MPM, Level);
1487  for (auto &C : OptimizerLastEPCallbacks)
1488  C(MPM, Level);
1489 
1490  // Emit annotation remarks.
1492 
1493  addRequiredLTOPreLinkPasses(MPM);
1494 
1495  return MPM;
1496 }
1497 
1499  OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1501 
1502  // Convert @llvm.global.annotations to !annotation metadata.
1504 
1505  if (ImportSummary) {
1506  // These passes import type identifier resolutions for whole-program
1507  // devirtualization and CFI. They must run early because other passes may
1508  // disturb the specific instruction patterns that these passes look for,
1509  // creating dependencies on resolutions that may not appear in the summary.
1510  //
1511  // For example, GVN may transform the pattern assume(type.test) appearing in
1512  // two basic blocks into assume(phi(type.test, type.test)), which would
1513  // transform a dependency on a WPD resolution into a dependency on a type
1514  // identifier resolution for CFI.
1515  //
1516  // Also, WPD has access to more precise information than ICP and can
1517  // devirtualize more effectively, so it should operate on the IR first.
1518  //
1519  // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1520  // metadata and intrinsics.
1521  MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1522  MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1523  }
1524 
1525  if (Level == OptimizationLevel::O0) {
1526  // Run a second time to clean up any type tests left behind by WPD for use
1527  // in ICP.
1528  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1529  // Drop available_externally and unreferenced globals. This is necessary
1530  // with ThinLTO in order to avoid leaving undefined references to dead
1531  // globals in the object file.
1534  return MPM;
1535  }
1536 
1537  // Force any function attributes we want the rest of the pipeline to observe.
1539 
1540  // Add the core simplification pipeline.
1543 
1544  // Now add the optimization pipeline.
1547 
1548  // Emit annotation remarks.
1550 
1551  return MPM;
1552 }
1553 
1556  assert(Level != OptimizationLevel::O0 &&
1557  "Must request optimizations for the default pipeline!");
1558  // FIXME: We should use a customized pre-link pipeline!
1559  return buildPerModuleDefaultPipeline(Level,
1560  /* LTOPreLink */ true);
1561 }
1562 
1565  ModuleSummaryIndex *ExportSummary) {
1567 
1568  // Convert @llvm.global.annotations to !annotation metadata.
1570 
1571  for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
1572  C(MPM, Level);
1573 
1574  // Create a function that performs CFI checks for cross-DSO calls with targets
1575  // in the current module.
1577 
1578  if (Level == OptimizationLevel::O0) {
1579  // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1580  // metadata and intrinsics.
1581  MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1582  MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1583  // Run a second time to clean up any type tests left behind by WPD for use
1584  // in ICP.
1585  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1586 
1587  for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
1588  C(MPM, Level);
1589 
1590  // Emit annotation remarks.
1592 
1593  return MPM;
1594  }
1595 
1596  if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1597  // Load sample profile before running the LTO optimization pipeline.
1598  MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1599  PGOOpt->ProfileRemappingFile,
1601  // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1602  // RequireAnalysisPass for PSI before subsequent non-module passes.
1604  }
1605 
1606  // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1608 
1609  // Remove unused virtual tables to improve the quality of code generated by
1610  // whole-program devirtualization and bitset lowering.
1612 
1613  // Force any function attributes we want the rest of the pipeline to observe.
1615 
1616  // Do basic inference of function attributes from known properties of system
1617  // libraries and other oracles.
1619 
1620  if (Level.getSpeedupLevel() > 1) {
1623 
1624  // Indirect call promotion. This should promote all the targets that are
1625  // left by the earlier promotion pass that promotes intra-module targets.
1626  // This two-step promotion is to save the compile time. For LTO, it should
1627  // produce the same result as if we only do promotion here.
1629  true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1630 
1633  // Propagate constants at call sites into the functions they call. This
1634  // opens opportunities for globalopt (and inlining) by substituting function
1635  // pointers passed as arguments to direct uses of functions.
1636  MPM.addPass(IPSCCPPass());
1637 
1638  // Attach metadata to indirect call sites indicating the set of functions
1639  // they may target at run-time. This should follow IPSCCP.
1641  }
1642 
1643  // Now deduce any function attributes based in the current code.
1644  MPM.addPass(
1646 
1647  // Do RPO function attribute inference across the module to forward-propagate
1648  // attributes where applicable.
1649  // FIXME: Is this really an optimization rather than a canonicalization?
1651 
1652  // Use in-range annotations on GEP indices to split globals where beneficial.
1654 
1655  // Run whole program optimization of virtual call when the list of callees
1656  // is fixed.
1657  MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1658 
1659  // Stop here at -O1.
1660  if (Level == OptimizationLevel::O1) {
1661  // The LowerTypeTestsPass needs to run to lower type metadata and the
1662  // type.test intrinsics. The pass does nothing if CFI is disabled.
1663  MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1664  // Run a second time to clean up any type tests left behind by WPD for use
1665  // in ICP (which is performed earlier than this in the regular LTO
1666  // pipeline).
1667  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1668 
1669  for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
1670  C(MPM, Level);
1671 
1672  // Emit annotation remarks.
1674 
1675  return MPM;
1676  }
1677 
1678  // Optimize globals to try and fold them into constants.
1680 
1681  // Promote any localized globals to SSA registers.
1683 
1684  // Linking modules together can lead to duplicate global constant, only
1685  // keep one copy of each constant.
1687 
1688  // Reduce the code after globalopt and ipsccp. Both can open up significant
1689  // simplification opportunities, and both can propagate functions through
1690  // function pointers. When this happens, we often have to resolve varargs
1691  // calls, etc, so let instcombine do this.
1692  FunctionPassManager PeepholeFPM;
1693  PeepholeFPM.addPass(InstCombinePass());
1694  if (Level == OptimizationLevel::O3)
1695  PeepholeFPM.addPass(AggressiveInstCombinePass());
1696  invokePeepholeEPCallbacks(PeepholeFPM, Level);
1697 
1700 
1701  // Note: historically, the PruneEH pass was run first to deduce nounwind and
1702  // generally clean up exception handling overhead. It isn't clear this is
1703  // valuable as the inliner doesn't currently care whether it is inlining an
1704  // invoke or a call.
1705  // Run the inliner now.
1708  /* MandatoryFirst */ true,
1711 
1712  // Optimize globals again after we ran the inliner.
1714 
1715  // Garbage collect dead functions.
1717 
1718  // If we didn't decide to inline a function, check to see if we can
1719  // transform it to pass arguments by value instead of by reference.
1721 
1722  // Remove unused arguments from functions.
1724 
1725  FunctionPassManager FPM;
1726  // The IPO Passes may leave cruft around. Clean up after them.
1727  FPM.addPass(InstCombinePass());
1728  invokePeepholeEPCallbacks(FPM, Level);
1729 
1730  FPM.addPass(JumpThreadingPass());
1731 
1732  // Do a post inline PGO instrumentation and use pass. This is a context
1733  // sensitive PGO pass.
1734  if (PGOOpt) {
1735  if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1736  addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
1737  /* IsCS */ true, PGOOpt->CSProfileGenFile,
1738  PGOOpt->ProfileRemappingFile,
1740  else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1741  addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
1742  /* IsCS */ true, PGOOpt->ProfileFile,
1743  PGOOpt->ProfileRemappingFile,
1745  }
1746 
1747  // Break up allocas
1748  FPM.addPass(SROAPass());
1749 
1750  // LTO provides additional opportunities for tailcall elimination due to
1751  // link-time inlining, and visibility of nocapture attribute.
1752  FPM.addPass(TailCallElimPass());
1753 
1754  // Run a few AA driver optimizations here and now to cleanup the code.
1757 
1758  MPM.addPass(
1760 
1761  // Require the GlobalsAA analysis for the module so we can query it within
1762  // MainFPM.
1764  // Invalidate AAManager so it can be recreated and pick up the newly available
1765  // GlobalsAA.
1766  MPM.addPass(
1768 
1769  FunctionPassManager MainFPM;
1772  /*AllowSpeculation=*/true),
1773  /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
1774 
1775  if (RunNewGVN)
1776  MainFPM.addPass(NewGVNPass());
1777  else
1778  MainFPM.addPass(GVNPass());
1779 
1780  // Remove dead memcpy()'s.
1781  MainFPM.addPass(MemCpyOptPass());
1782 
1783  // Nuke dead stores.
1784  MainFPM.addPass(DSEPass());
1786 
1789 
1790  LoopPassManager LPM;
1791  if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
1792  LPM.addPass(LoopFlattenPass());
1793  LPM.addPass(IndVarSimplifyPass());
1794  LPM.addPass(LoopDeletionPass());
1795  // FIXME: Add loop interchange.
1796 
1797  // Unroll small loops and perform peeling.
1798  LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
1799  /* OnlyWhenForced= */ !PTO.LoopUnrolling,
1801  // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
1802  // *All* loop passes must preserve it, in order to be able to use it.
1804  std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
1805 
1806  MainFPM.addPass(LoopDistributePass());
1807 
1808  addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
1809 
1810  // Run the OpenMPOpt CGSCC pass again late.
1812 
1813  invokePeepholeEPCallbacks(MainFPM, Level);
1814  MainFPM.addPass(JumpThreadingPass());
1817 
1818  // Lower type metadata and the type.test intrinsic. This pass supports
1819  // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
1820  // to be run at link time if CFI is enabled. This pass does nothing if
1821  // CFI is disabled.
1822  MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1823  // Run a second time to clean up any type tests left behind by WPD for use
1824  // in ICP (which is performed earlier than this in the regular LTO pipeline).
1825  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1826 
1827  // Enable splitting late in the FullLTO post-link pipeline.
1828  if (EnableHotColdSplit)
1830 
1831  // Add late LTO optimization passes.
1832  // Delete basic blocks, which optimization passes may have killed.
1834  SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts(
1835  true))));
1836 
1837  // Drop bodies of available eternally objects to improve GlobalDCE.
1839 
1840  // Now that we have optimized the program, discard unreachable functions.
1842 
1843  if (PTO.MergeFunctions)
1845 
1846  if (PTO.CallGraphProfile)
1848 
1849  for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
1850  C(MPM, Level);
1851 
1852  // Emit annotation remarks.
1854 
1855  return MPM;
1856 }
1857 
1859  bool LTOPreLink) {
1860  assert(Level == OptimizationLevel::O0 &&
1861  "buildO0DefaultPipeline should only be used with O0");
1862 
1864 
1865  // Perform pseudo probe instrumentation in O0 mode. This is for the
1866  // consistency between different build modes. For example, a LTO build can be
1867  // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
1868  // the postlink will require pseudo probe instrumentation in the prelink.
1869  if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
1871 
1872  if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
1873  PGOOpt->Action == PGOOptions::IRUse))
1875  MPM,
1876  /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
1877  /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1878 
1879  for (auto &C : PipelineStartEPCallbacks)
1880  C(MPM, Level);
1881 
1882  if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1884 
1885  for (auto &C : PipelineEarlySimplificationEPCallbacks)
1886  C(MPM, Level);
1887 
1888  // Build a minimal pipeline based on the semantics required by LLVM,
1889  // which is just that always inlining occurs. Further, disable generating
1890  // lifetime intrinsics to avoid enabling further optimizations during
1891  // code generation.
1893  /*InsertLifetimeIntrinsics=*/false));
1894 
1895  if (PTO.MergeFunctions)
1897 
1898  if (EnableMatrix)
1899  MPM.addPass(
1901 
1902  if (!CGSCCOptimizerLateEPCallbacks.empty()) {
1903  CGSCCPassManager CGPM;
1904  for (auto &C : CGSCCOptimizerLateEPCallbacks)
1905  C(CGPM, Level);
1906  if (!CGPM.isEmpty())
1908  }
1909  if (!LateLoopOptimizationsEPCallbacks.empty()) {
1910  LoopPassManager LPM;
1911  for (auto &C : LateLoopOptimizationsEPCallbacks)
1912  C(LPM, Level);
1913  if (!LPM.isEmpty()) {
1916  }
1917  }
1918  if (!LoopOptimizerEndEPCallbacks.empty()) {
1919  LoopPassManager LPM;
1920  for (auto &C : LoopOptimizerEndEPCallbacks)
1921  C(LPM, Level);
1922  if (!LPM.isEmpty()) {
1925  }
1926  }
1927  if (!ScalarOptimizerLateEPCallbacks.empty()) {
1928  FunctionPassManager FPM;
1929  for (auto &C : ScalarOptimizerLateEPCallbacks)
1930  C(FPM, Level);
1931  if (!FPM.isEmpty())
1933  }
1934 
1935  for (auto &C : OptimizerEarlyEPCallbacks)
1936  C(MPM, Level);
1937 
1938  if (!VectorizerStartEPCallbacks.empty()) {
1939  FunctionPassManager FPM;
1940  for (auto &C : VectorizerStartEPCallbacks)
1941  C(FPM, Level);
1942  if (!FPM.isEmpty())
1944  }
1945 
1946  ModulePassManager CoroPM;
1947  CoroPM.addPass(CoroEarlyPass());
1948  CGSCCPassManager CGPM;
1949  CGPM.addPass(CoroSplitPass());
1951  CoroPM.addPass(CoroCleanupPass());
1952  CoroPM.addPass(GlobalDCEPass());
1954 
1955  for (auto &C : OptimizerLastEPCallbacks)
1956  C(MPM, Level);
1957 
1958  if (LTOPreLink)
1959  addRequiredLTOPreLinkPasses(MPM);
1960 
1962 
1963  return MPM;
1964 }
1965 
1967  AAManager AA;
1968 
1969  // The order in which these are registered determines their priority when
1970  // being queried.
1971 
1972  // First we register the basic alias analysis that provides the majority of
1973  // per-function local AA logic. This is a stateless, on-demand local set of
1974  // AA techniques.
1976 
1977  // Next we query fast, specialized alias analyses that wrap IR-embedded
1978  // information about aliasing.
1981 
1982  // Add support for querying global aliasing information when available.
1983  // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
1984  // analysis, all that the `AAManager` can do is query for any *cached*
1985  // results from `GlobalsAA` through a readonly proxy.
1988 
1989  // Add target-specific alias analyses.
1990  if (TM)
1992 
1993  return AA;
1994 }
llvm::GlobalsAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: GlobalsModRef.h:127
TypeBasedAliasAnalysis.h
llvm::PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & >
Definition: LoopPassManager.h:69
llvm::RecomputeGlobalsAAPass
Definition: GlobalsModRef.h:137
llvm::BasicAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: BasicAliasAnalysis.h:150
llvm::IPSCCPPass
Pass to perform interprocedural constant propagation.
Definition: SCCP.h:30
llvm::ScopedNoAliasAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: ScopedNoAliasAA.h:51
llvm::FunctionSpecializationPass
Pass to perform interprocedural constant propagation by specializing functions.
Definition: SCCP.h:37
EnableDFAJumpThreading
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:876
IROutliner.h
getInlineParamsFromOptLevel
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
Definition: PassBuilderPipelines.cpp:792
DeadArgumentElimination.h
RunNewGVN
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
PassBuilder.h
Phase
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
Definition: AArch64FalkorHWPFFix.cpp:231
llvm::SampleProfileProbePass
Definition: SampleProfileProbe.h:133
Mem2Reg.h
llvm::OptimizationLevel::O1
static const OptimizationLevel O1
Optimize quickly without destroying debuggability.
Definition: OptimizationLevel.h:57
llvm::LoopSinkPass
A pass that does profile-guided sinking of instructions into loops.
Definition: LoopSink.h:33
llvm::PassManager::isEmpty
bool isEmpty() const
Returns if the pass manager contains any passes.
Definition: PassManager.h:568
OpenMPOpt.h
llvm::InferFunctionAttrsPass
A pass which infers function attributes from the names and signatures of function declarations in a m...
Definition: InferFunctionAttrs.h:26
llvm::SampleProfileLoaderPass
The sample profiler data loader pass.
Definition: SampleProfile.h:26
CalledValuePropagation.h
Annotation2Metadata.h
llvm::LoopInterchangePass
Definition: LoopInterchange.h:20
llvm::AlignmentFromAssumptionsPass
Definition: AlignmentFromAssumptions.h:29
PartialInlining.h
Inliner.h
llvm::Annotation2MetadataPass
Pass to convert @llvm.global.annotations to !annotation metadata.
Definition: Annotation2Metadata.h:24
llvm::ThinOrFullLTOPhase::ThinLTOPostLink
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
GlobalSplit.h
llvm::GVNHoistPass
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition: GVN.h:377
CorrelatedValuePropagation.h
llvm::LoopIdiomRecognizePass
Performs Loop Idiom Recognize Pass.
Definition: LoopIdiomRecognize.h:40
llvm::PassManager::addPass
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same< PassT, PassManager >::value > addPass(PassT &&Pass)
Definition: PassManager.h:544
llvm::OptimizationLevel::O2
static const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
Definition: OptimizationLevel.h:74
llvm::CrossDSOCFIPass
Definition: CrossDSOCFI.h:20
llvm::ALL
@ ALL
Definition: Attributor.h:5208
llvm::ReassociatePass
Reassociate commutative expressions.
Definition: Reassociate.h:71
llvm::InlinePass::EarlyInliner
@ EarlyInliner
ConstantMerge.h
llvm::SetLicmMssaNoAccForPromotionCap
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
AlignmentFromAssumptions.h
EnableIROutliner
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
llvm::PassBuilder::buildModuleOptimizationPipeline
ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
Definition: PassBuilderPipelines.cpp:1226
IndVarSimplify.h
ErrorHandling.h
SCCP.h
TailRecursionElimination.h
DivRemPairs.h
llvm::LoopRotatePass
A simple loop rotation transformation.
Definition: LoopRotation.h:24
llvm::BDCEPass
Definition: BDCE.h:26
DeadStoreElimination.h
OptimizationRemarkEmitter.h
CoroCleanup.h
GlobalsModRef.h
VectorCombine.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
MergeFunctions.h
llvm::PseudoProbeUpdatePass
Definition: SampleProfileProbe.h:153
LoopFlatten.h
EnableModuleInliner
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
ElimAvailExtern.h
llvm::PipelineTuningOptions::ForgetAllSCEVInLoopUnroll
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition: PassBuilder.h:60
EnableNoRerunSimplificationPipeline
static cl::opt< bool > EnableNoRerunSimplificationPipeline("enable-no-rerun-simplification-pipeline", cl::init(true), cl::Hidden, cl::desc("Prevent running the simplification pipeline on a function more " "than once in the case that SCC mutations cause a function to be " "visited multiple times as long as the function has not been changed"))
FlattenedProfileUsed
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierachy exists in the profile"))
llvm::LoopUnrollOptions
A set of parameters used to control various transforms performed by the LoopUnroll pass.
Definition: LoopUnrollPass.h:61
InjectTLIMappings.h
ConstraintElimination.h
Float2Int.h
CGProfile.h
llvm::LoopDistributePass
Definition: LoopDistribute.h:25
JumpThreading.h
HotColdSplitting.h
llvm::EnableKnowledgeRetention
cl::opt< bool > EnableKnowledgeRetention
enable preservation of attributes in assume like: call void @llvm.assume(i1 true) [ "nonnull"(i32* PT...
Definition: InstCombineCalls.cpp:95
llvm::PGOInstrumentationUse
The profile annotation (profile-instr-use) pass for IR based PGO.
Definition: PGOInstrumentation.h:58
llvm::InlineParams
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:204
LoopUnrollAndJamPass.h
llvm::LoopLoadEliminationPass
Pass to forward loads in a loop around the backedge to subsequent iterations.
Definition: LoopLoadElimination.h:27
llvm::ThinOrFullLTOPhase::ThinLTOPreLink
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
llvm::TailCallElimPass
Definition: TailRecursionElimination.h:61
llvm::CoroCleanupPass
Definition: CoroCleanup.h:23
llvm::createCGSCCToFunctionPassAdaptor
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: CGSCCPassManager.h:509
LoopInstSimplify.h
llvm::SLPVectorizerPass
Definition: SLPVectorizer.h:58
EnableGVNSink
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
llvm::ReversePostOrderFunctionAttrsPass
A pass to do RPO deduction and propagation of function attributes.
Definition: FunctionAttrs.h:69
llvm::PartialInlinerPass
Pass to remove unused function declarations.
Definition: PartialInlining.h:24
llvm::PGOOptions::SampleUse
@ SampleUse
Definition: PGOOptions.h:23
llvm::DSEPass
This class implements a trivial dead store elimination.
Definition: DeadStoreElimination.h:28
llvm::OptimizationLevel::O0
static const OptimizationLevel O0
Disable as many optimizations as possible.
Definition: OptimizationLevel.h:41
llvm::PassBuilder::addPGOInstrPassesForO0
void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, std::string ProfileFile, std::string ProfileRemappingFile)
Add PGOInstrumenation passes for O0 only.
Definition: PassBuilderPipelines.cpp:767
BasicAliasAnalysis.h
CoroElide.h
PerformMandatoryInliningsFirst
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(true), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
llvm::PipelineTuningOptions::LicmMssaNoAccForPromotionCap
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition: PassBuilder.h:68
MergedLoadStoreMotion.h
llvm::AggressiveInstCombinePass
Definition: AggressiveInstCombine.h:22
llvm::InstrProfiling
Instrumentation based profiling lowering pass.
Definition: InstrProfiling.h:34
llvm::ConstraintEliminationPass
Definition: ConstraintElimination.h:16
llvm::HotColdSplittingPass
Pass to outline cold regions.
Definition: HotColdSplitting.h:61
llvm::TypeBasedAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: TypeBasedAliasAnalysis.h:57
AliasAnalysis.h
llvm::ThinOrFullLTOPhase
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:73
CommandLine.h
LoopRotation.h
llvm::MODULE
@ MODULE
Definition: Attributor.h:5206
llvm::ModuleInlinerWrapperPass
Module pass, wrapping the inliner pass.
Definition: Inliner.h:122
llvm::CoroElidePass
Definition: CoroElide.h:25
llvm::ControlHeightReductionPass
Definition: ControlHeightReduction.h:21
llvm::CorrelatedValuePropagationPass
Definition: CorrelatedValuePropagation.h:18
TargetMachine.h
llvm::AttributorPass
}
Definition: Attributor.h:3115
AlwaysInliner.h
InstrProfiling.h
LoopIdiomRecognize.h
EnableConstraintElimination
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(false), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
llvm::LICMPass
Performs Loop Invariant Code Motion Pass.
Definition: LICM.h:66
ArgumentPromotion.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
EnableOrderFileInstrumentation
static cl::opt< bool > EnableOrderFileInstrumentation("enable-order-file-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable order file instrumentation (default = off)"))
llvm::PostOrderFunctionAttrsPass
Computes function attributes in post-order over the call graph.
Definition: FunctionAttrs.h:50
AggressiveInstCombine.h
llvm::PassBuilder::buildFunctionSimplificationPipeline
FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
Definition: PassBuilderPipelines.cpp:468
llvm::PGOOptions::IRUse
@ IRUse
Definition: PGOOptions.h:23
llvm::CoroEarlyPass
Definition: CoroEarly.h:26
IP
Definition: NVPTXLowerArgs.cpp:168
AssumeBundleBuilder.h
llvm::GVNSinkPass
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition: GVN.h:384
EnableLoopInterchange
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the experimental LoopInterchange Pass"))
InlineAdvisor.h
llvm::OpenMPOptCGSCCPass
Definition: OpenMPOpt.h:43
llvm::PGOIndirectCallPromotion
The indirect function call promotion pass.
Definition: PGOInstrumentation.h:73
EnableGVNHoist
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
MemCpyOptimizer.h
llvm::CanonicalizeAliasesPass
Simple pass that canonicalizes aliases.
Definition: CanonicalizeAliases.h:23
GVN.h
llvm::LoopDeletionPass
Definition: LoopDeletion.h:24
llvm::PromotePass
Definition: Mem2Reg.h:23
llvm::LoopFlattenPass
Definition: LoopFlatten.h:23
EnableMergeFunctions
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
llvm::getInlineParams
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition: InlineCost.cpp:3101
llvm::PGOOptions::CSIRInstr
@ CSIRInstr
Definition: PGOOptions.h:24
llvm::ModuleInlinerPass
The module inliner pass for the new pass manager.
Definition: ModuleInliner.h:27
llvm::PassBuilder::buildInlinerPipeline
ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
Definition: PassBuilderPipelines.cpp:797
llvm::GlobalOptPass
Optimize globals that never have their address taken.
Definition: GlobalOpt.h:25
llvm::SyntheticCountsPropagation
Definition: SyntheticCountsPropagation.h:17
llvm::InlineContext
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Definition: InlineAdvisor.h:58
llvm::PGOMemOPSizeOpt
The profile size based optimization pass for memory intrinsics.
Definition: PGOInstrumentation.h:86
llvm::PassBuilder::buildThinLTOPreLinkDefaultPipeline
ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
Definition: PassBuilderPipelines.cpp:1440
ScopedNoAliasAA.h
addAnnotationRemarksPass
static void addAnnotationRemarksPass(ModulePassManager &MPM)
Definition: PassBuilderPipelines.cpp:312
EnableGlobalAnalyses
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
EnablePGOInlineDeferral
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
llvm::MemProfilerPass
Public interface to the memory profiler pass for instrumenting code to profile memory accesses.
Definition: MemProfiler.h:30
llvm::LoopSimplifyCFGPass
Performs basic CFG simplifications to assist other loop passes.
Definition: LoopSimplifyCFG.h:28
llvm::PGOInstrumentationGenCreateVar
The instrumentation (profile-instr-gen) pass for IR based PGO.
Definition: PGOInstrumentation.h:35
llvm::MergedLoadStoreMotionPass
Definition: MergedLoadStoreMotion.h:42
ControlHeightReduction.h
InstSimplifyPass.h
llvm::LowerTypeTestsPass
Definition: LowerTypeTests.h:200
llvm::AnnotationRemarksPass
Definition: AnnotationRemarks.h:23
llvm::PassBuilder::buildModuleInlinerPipeline
ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
Definition: PassBuilderPipelines.cpp:879
llvm::SimpleLoopUnswitchPass
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
Definition: SimpleLoopUnswitch.h:67
EnablePostPGOLoopRotation
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
SROA.h
SampleProfile.h
llvm::PipelineTuningOptions::EagerlyInvalidateAnalyses
bool EagerlyInvalidateAnalyses
Definition: PassBuilder.h:88
llvm::GlobalSplitPass
Pass to perform split of global variables.
Definition: GlobalSplit.h:26
ForceFunctionAttrs.h
llvm::cl::opt
Definition: CommandLine.h:1412
Attributor.h
llvm::ModuleMemProfilerPass
Public interface to the memory profiler module pass for instrumenting code to profile memory allocati...
Definition: MemProfiler.h:39
EnableEagerlyInvalidateAnalyses
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
llvm::VectorCombinePass
Optimize scalar/vector interactions in IR using target cost models.
Definition: VectorCombine.h:23
llvm::createModuleToFunctionPassAdaptor
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: PassManager.h:1218
SpeculativeExecution.h
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:705
EarlyCSE.h
CrossDSOCFI.h
ProfileSummaryInfo.h
llvm::WholeProgramDevirtPass
Definition: WholeProgramDevirt.h:224
ModuleInliner.h
DisablePreInliner
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
RunPartialInlining
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlinining pass"))
CoroSplit.h
llvm::AssumeSimplifyPass
This pass attempts to minimize the number of assume without loosing any information.
Definition: AssumeBundleBuilder.h:55
llvm::CSKYAttrs::NONE
@ NONE
Definition: CSKYAttributes.h:76
llvm::PassBuilder::buildLTOPreLinkDefaultPipeline
ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
Definition: PassBuilderPipelines.cpp:1555
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
EnableHotColdSplit
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
llvm::AttributorCGSCCPass
Definition: Attributor.h:3118
PGOInstrumentation.h
CGSCCPassManager.h
llvm::PassBuilder::buildLTODefaultPipeline
ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
Definition: PassBuilderPipelines.cpp:1564
llvm::ForceFunctionAttrsPass
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
Definition: ForceFunctionAttrs.h:24
llvm::ExtraVectorPassManager
A pass manager to run a set of extra function simplification passes after vectorization,...
Definition: LoopVectorize.h:105
llvm::InvalidateAnalysisPass
A no-op pass template which simply forces a specific analysis result to be invalidated.
Definition: PassManager.h:1271
LowerExpectIntrinsic.h
llvm::PassBuilder::buildThinLTODefaultPipeline
ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build an ThinLTO default optimization pipeline to a pass manager.
Definition: PassBuilderPipelines.cpp:1498
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
EnableLoopFlatten
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(true), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
llvm::DeadArgumentEliminationPass
Eliminate dead arguments (and return values) from functions.
Definition: DeadArgumentElimination.h:38
ADCE.h
llvm::OptimizationLevel::Oz
static const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
Definition: OptimizationLevel.h:108
llvm::PassBuilder::buildDefaultAAPipeline
AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
Definition: PassBuilderPipelines.cpp:1966
InferFunctionAttrs.h
SimpleLoopUnswitch.h
llvm::PipelineTuningOptions::PipelineTuningOptions
PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
Definition: PassBuilderPipelines.cpp:286
llvm::InstrProfOptions
Options for the frontend instrumentation based profiling pass.
Definition: Instrumentation.h:106
llvm::SpeculativeExecutionPass
Definition: SpeculativeExecution.h:69
llvm::createFunctionToLoopPassAdaptor
std::enable_if_t< is_detected< HasRunOnLoopT, LoopPassT >::value, FunctionToLoopPassAdaptor > createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false, bool UseBlockFrequencyInfo=false, bool UseBranchProbabilityInfo=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
Definition: LoopPassManager.h:481
llvm::TargetMachine::registerDefaultAliasAnalyses
virtual void registerDefaultAliasAnalyses(AAManager &)
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
Definition: TargetMachine.h:354
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::EliminateAvailableExternallyPass
A pass that transforms external global definitions into declarations.
Definition: ElimAvailExtern.h:24
llvm::ADCEPass
A DCE pass that assumes instructions are dead until proven otherwise.
Definition: ADCE.h:31
llvm::PipelineTuningOptions::SLPVectorization
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition: PassBuilder.h:53
llvm::RelLookupTableConverterPass
Definition: RelLookupTableConverter.h:61
Reassociate.h
llvm::JumpThreadingPass
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Definition: JumpThreading.h:77
llvm::LoopFullUnrollPass
Loop unroll pass that only does full loop unrolling and peeling.
Definition: LoopUnrollPass.h:26
llvm::LowerExpectIntrinsicPass
Definition: LowerExpectIntrinsic.h:24
llvm::InlinePass::CGSCCInliner
@ CGSCCInliner
llvm::InliningAdvisorMode::Development
@ Development
SampleProfileProbe.h
LoopPassManager.h
llvm::Float2IntPass
Definition: Float2Int.h:32
llvm::LibCallsShrinkWrapPass
Definition: LibCallsShrinkWrap.h:18
NameAnonGlobals.h
llvm::CalledValuePropagationPass
Definition: CalledValuePropagation.h:26
llvm::WarnMissedTransformationsPass
Definition: WarnMissedTransforms.h:24
llvm::IndVarSimplifyPass
Definition: IndVarSimplify.h:25
LowerMatrixIntrinsics.h
llvm::GlobalDCEPass
Pass to remove unused function declarations.
Definition: GlobalDCE.h:36
LoopInterchange.h
llvm::PipelineTuningOptions::CallGraphProfile
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition: PassBuilder.h:72
llvm::ThinOrFullLTOPhase::FullLTOPostLink
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
llvm::LowerMatrixIntrinsicsPass
Definition: LowerMatrixIntrinsics.h:19
llvm::NewGVNPass
Definition: NewGVN.h:23
SyntheticCountsPropagation.h
llvm::AddDiscriminatorsPass
Definition: AddDiscriminators.h:24
CanonicalizeAliases.h
llvm::CGSCC
@ CGSCC
Definition: Attributor.h:5207
LibCallsShrinkWrap.h
llvm::AAManager::registerModuleAnalysis
void registerModuleAnalysis()
Register a specific AA result.
Definition: AliasAnalysis.h:886
llvm::CoroConditionalWrapper
Definition: CoroConditionalWrapper.h:20
llvm::InstCombinePass
Definition: InstCombine.h:28
PreInlineThreshold
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
GlobalDCE.h
llvm::PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & >::addPass
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t< is_detected< HasRunOnLoopT, PassT >::value > addPass(PassT &&Pass)
Definition: LoopPassManager.h:107
llvm::OptimizationLevel::O3
static const OptimizationLevel O3
Optimize for fast execution as much as possible.
Definition: OptimizationLevel.h:89
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:680
llvm::PipelineTuningOptions::LoopVectorization
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition: PassBuilder.h:49
NewGVN.h
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
LowerTypeTests.h
CoroConditionalWrapper.h
CallSiteSplitting.h
llvm::LoopVectorizeOptions
Definition: LoopVectorize.h:115
llvm::InstrOrderFilePass
The instrumentation pass for recording function order.
Definition: InstrOrderFile.h:20
LoopSimplifyCFG.h
llvm::CGProfilePass
Definition: CGProfile.h:19
AnnotationRemarks.h
LoopVectorize.h
SCCP.h
llvm::ModuleInlinerWrapperPass::addModulePass
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition: Inliner.h:139
llvm::PassManager
Manages a sequence of passes over a particular unit of IR.
Definition: PassManager.h:469
llvm::LoopUnrollAndJamPass
A simple loop rotation transformation.
Definition: LoopUnrollAndJamPass.h:20
llvm::IROutlinerPass
Pass to outline similar regions.
Definition: IROutliner.h:444
llvm::ForgetSCEVInLoopUnroll
cl::opt< bool > ForgetSCEVInLoopUnroll
SLPVectorizer.h
EnableO3NonTrivialUnswitching
static cl::opt< bool > EnableO3NonTrivialUnswitching("enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden, cl::desc("Enable non-trivial loop unswitching for -O3"))
LoopLoadElimination.h
llvm::PassBuilder::buildPerModuleDefaultPipeline
ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Build a per-module default optimization pipeline.
Definition: PassBuilderPipelines.cpp:1397
LoopUnrollPass.h
llvm::SCCPPass
This pass performs function-level constant propagation and merging.
Definition: SCCP.h:38
EnableSyntheticCounts
static cl::opt< bool > EnableSyntheticCounts("enable-npm-synthetic-counts", cl::Hidden, cl::desc("Run synthetic function entry count generation " "pass"))
llvm::PipelineTuningOptions::MergeFunctions
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition: PassBuilder.h:76
PGOOptions.h
llvm::OptimizationLevel
Definition: OptimizationLevel.h:22
llvm::NameAnonGlobalPass
Simple pass that provides a name to every anonymous globals.
Definition: NameAnonGlobals.h:22
PassManager.h
LowerConstantIntrinsics.h
llvm::ThinOrFullLTOPhase::None
@ None
No LTO/ThinLTO behavior needed.
EnableFunctionSpecialization
static cl::opt< bool > EnableFunctionSpecialization("enable-function-specialization", cl::init(false), cl::Hidden, cl::desc("Enable Function Specialization pass"))
llvm::PGOOptions::CSIRUse
@ CSIRUse
Definition: PGOOptions.h:24
isLTOPreLink
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
Definition: PassBuilderPipelines.cpp:317
llvm::CoroSplitPass
Definition: CoroSplit.h:24
UseInlineAdvisor
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
llvm::PassBuilder::buildO0DefaultPipeline
ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Build an O0 pipeline with the minimal semantically required passes.
Definition: PassBuilderPipelines.cpp:1858
AttributorRun
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::ALL, "all", "enable all attributor runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
llvm::SROAPass
An optimization pass providing Scalar Replacement of Aggregates.
Definition: SROA.h:64
WholeProgramDevirt.h
llvm::MemCpyOptPass
Definition: MemCpyOptimizer.h:39
llvm::AAManager::registerFunctionAnalysis
void registerFunctionAnalysis()
Register a specific AA result.
Definition: AliasAnalysis.h:881
llvm::ArgumentPromotionPass
Argument promotion pass.
Definition: ArgumentPromotion.h:23
llvm::PipelineTuningOptions::InlinerThreshold
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition: PassBuilder.h:79
llvm::DivRemPairsPass
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition: DivRemPairs.h:23
SimplifyCFGOptions.h
LoopDistribute.h
llvm::AlwaysInlinerPass
Inlines functions marked as "always_inline".
Definition: AlwaysInliner.h:32
WarnMissedTransforms.h
EnableUnrollAndJam
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
llvm::ThinOrFullLTOPhase::FullLTOPreLink
@ FullLTOPreLink
Full LTO prelink phase.
llvm::SimplifyCFGOptions
Definition: SimplifyCFGOptions.h:23
GlobalOpt.h
llvm::InjectTLIMappings
Definition: InjectTLIMappings.h:22
LoopSink.h
EnableMatrix
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
OptimizationLevel.h
llvm::LowerConstantIntrinsicsPass
Definition: LowerConstantIntrinsics.h:24
llvm::OpenMPOptPass
OpenMP optimizations pass.
Definition: OpenMPOpt.h:38
llvm::ModuleSummaryIndex
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Definition: ModuleSummaryIndex.h:1199
llvm::InliningAdvisorMode::Default
@ Default
llvm::PGOInstrumentationGen
The instrumentation (profile-instr-gen) pass for IR based PGO.
Definition: PGOInstrumentation.h:47
llvm::CallSiteSplittingPass
Definition: CallSiteSplitting.h:18
LoopDeletion.h
llvm::LoopInstSimplifyPass
Performs Loop Inst Simplify Pass.
Definition: LoopInstSimplify.h:25
MemProfiler.h
LICM.h
CoroEarly.h
llvm::PGOOptions::IRInstr
@ IRInstr
Definition: PGOOptions.h:23
EnableCHR
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
llvm::InliningAdvisorMode::Release
@ Release
BDCE.h
InstrOrderFile.h
ExtraVectorizerPasses
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
InstCombine.h
llvm::GVNPass
The core GVN pass object.
Definition: GVN.h:117
llvm::InstSimplifyPass
Run instruction simplification across each instruction in the function.
Definition: InstSimplifyPass.h:32
llvm::RequireAnalysisPass
A utility pass template to force an analysis result to be available.
Definition: PassManager.h:1243
llvm::PassBuilder::buildModuleSimplificationPipeline
ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
Definition: PassBuilderPipelines.cpp:918
llvm::PipelineTuningOptions::LoopUnrolling
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition: PassBuilder.h:56
llvm::cl::desc
Definition: CommandLine.h:413
llvm::PipelineTuningOptions::LicmMssaOptCap
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition: PassBuilder.h:64
llvm::PipelineTuningOptions::LoopInterleaving
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition: PassBuilder.h:45
llvm::MaxDevirtIterations
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
Definition: PassBuilderPipelines.cpp:301
SimplifyCFG.h
llvm::SetLicmMssaOptCap
cl::opt< unsigned > SetLicmMssaOptCap
llvm::MergeFunctionsPass
Merge identical functions.
Definition: MergeFunctions.h:25
MPM
ModulePassManager MPM
Definition: PassBuilderBindings.cpp:70
llvm::createModuleToPostOrderCGSCCPassAdaptor
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: CGSCCPassManager.h:373
llvm::PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & >::isEmpty
bool isEmpty() const
Definition: LoopPassManager.h:166
llvm::LoopUnrollPass
Loop unroll pass that will support both full and partial unrolling.
Definition: LoopUnrollPass.h:133
FunctionAttrs.h
llvm::SimplifyCFGPass
A pass to simplify and canonicalize the CFG of a function.
Definition: SimplifyCFG.h:29
llvm::LoopVectorizePass
The LoopVectorize Pass.
Definition: LoopVectorize.h:161
RelLookupTableConverter.h
EnableMemProfiler
static cl::opt< bool > EnableMemProfiler("enable-mem-prof", cl::Hidden, cl::desc("Enable memory profiler"))
llvm::EarlyCSEPass
A simple and fast domtree-based CSE pass.
Definition: EarlyCSE.h:30
llvm::ConstantMergePass
A pass that merges duplicate global constants into a single constant.
Definition: ConstantMerge.h:29
llvm::DFAJumpThreadingPass
Definition: DFAJumpThreading.h:22
AddDiscriminators.h
DFAJumpThreading.h