LLVM  15.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1 //===- Construction of pass pipelines -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file provides the implementation of the PassBuilder based on our
11 /// static pass registry as well as related functionality. It also provides
12 /// helpers to aid in analyzing, debugging, and testing passes and pass
13 /// pipelines.
14 ///
15 //===----------------------------------------------------------------------===//
16 
26 #include "llvm/IR/PassManager.h"
130 
131 using namespace llvm;
132 
134  "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
135  cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
137  "Heuristics-based inliner version."),
139  "Use development mode (runtime-loadable model)."),
141  "Use release mode (AOT-compiled model).")));
142 
144  "enable-npm-synthetic-counts", cl::Hidden,
145  cl::desc("Run synthetic function entry count generation "
146  "pass"));
147 
148 /// Flag to enable inline deferral during PGO.
149 static cl::opt<bool>
150  EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
151  cl::Hidden,
152  cl::desc("Enable inline deferral during PGO"));
153 
154 static cl::opt<bool> EnableMemProfiler("enable-mem-prof", cl::Hidden,
155  cl::desc("Enable memory profiler"));
156 
157 static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
158  cl::init(false), cl::Hidden,
159  cl::desc("Enable module inliner"));
160 
162  "mandatory-inlining-first", cl::init(true), cl::Hidden,
163  cl::desc("Perform mandatory inlinings module-wide, before performing "
164  "inlining."));
165 
167  "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden,
168  cl::desc("Enable non-trivial loop unswitching for -O3"));
169 
171  "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
172  cl::desc("Eagerly invalidate more analyses in default pipelines"));
173 
175  "enable-no-rerun-simplification-pipeline", cl::init(false), cl::Hidden,
176  cl::desc(
177  "Prevent running the simplification pipeline on a function more "
178  "than once in the case that SCC mutations cause a function to be "
179  "visited multiple times as long as the function has not been changed"));
180 
182  "enable-merge-functions", cl::init(false), cl::Hidden,
183  cl::desc("Enable function merging as part of the optimization pipeline"));
184 
186  LoopInterleaving = true;
187  LoopVectorization = true;
188  SLPVectorization = false;
189  LoopUnrolling = true;
193  CallGraphProfile = true;
196 }
197 
198 namespace llvm {
199 
208 extern cl::opt<bool> EnableCHR;
213 extern cl::opt<bool> RunNewGVN;
216 
218 
221 
223 
226 } // namespace llvm
227 
228 void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
230  for (auto &C : PeepholeEPCallbacks)
231  C(FPM, Level);
232 }
233 
234 // Helper to add AnnotationRemarksPass.
237 }
238 
239 // Helper to check if the current compilation phase is preparing for LTO
243 }
244 
245 // TODO: Investigate the cost/benefit of tail call elimination on debugging.
247 PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
249 
251 
252  // Form SSA out of local memory accesses after breaking apart aggregates into
253  // scalars.
254  FPM.addPass(SROAPass());
255 
256  // Catch trivial redundancies
257  FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
258 
259  // Hoisting of scalars and load expressions.
260  FPM.addPass(
261  SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
262  FPM.addPass(InstCombinePass());
263 
265 
266  invokePeepholeEPCallbacks(FPM, Level);
267 
268  FPM.addPass(
269  SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
270 
271  // Form canonically associated expression trees, and simplify the trees using
272  // basic mathematical properties. For example, this will form (nearly)
273  // minimal multiplication trees.
274  FPM.addPass(ReassociatePass());
275 
276  // Add the primary loop simplification pipeline.
277  // FIXME: Currently this is split into two loop pass pipelines because we run
278  // some function passes in between them. These can and should be removed
279  // and/or replaced by scheduling the loop pass equivalents in the correct
280  // positions. But those equivalent passes aren't powerful enough yet.
281  // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
282  // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
283  // fully replace `SimplifyCFGPass`, and the closest to the other we have is
284  // `LoopInstSimplify`.
285  LoopPassManager LPM1, LPM2;
286 
287  // Simplify the loop body. We do this initially to clean up after other loop
288  // passes run, either when iterating on a loop or on inner loops with
289  // implications on the outer loop.
292 
293  // Try to remove as much code from the loop header as possible,
294  // to reduce amount of IR that will have to be duplicated. However,
295  // do not perform speculative hoisting the first time as LICM
296  // will destroy metadata that may not need to be destroyed if run
297  // after loop rotation.
298  // TODO: Investigate promotion cap for O1.
300  /*AllowSpeculation=*/false));
301 
302  LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
303  isLTOPreLink(Phase)));
304  // TODO: Investigate promotion cap for O1.
306  /*AllowSpeculation=*/true));
308  if (EnableLoopFlatten)
309  LPM1.addPass(LoopFlattenPass());
310 
312  LPM2.addPass(IndVarSimplifyPass());
313 
314  for (auto &C : LateLoopOptimizationsEPCallbacks)
315  C(LPM2, Level);
316 
317  LPM2.addPass(LoopDeletionPass());
318 
321 
322  // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
323  // because it changes IR to makes profile annotation in back compile
324  // inaccurate. The normal unroller doesn't pay attention to forced full unroll
325  // attributes so we need to make sure and allow the full unroll pass to pay
326  // attention to it.
327  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
328  PGOOpt->Action != PGOOptions::SampleUse)
329  LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
330  /* OnlyWhenForced= */ !PTO.LoopUnrolling,
332 
333  for (auto &C : LoopOptimizerEndEPCallbacks)
334  C(LPM2, Level);
335 
336  // We provide the opt remark emitter pass for LICM to use. We only need to do
337  // this once as it is immutable.
338  FPM.addPass(
341  /*UseMemorySSA=*/true,
342  /*UseBlockFrequencyInfo=*/true));
343  FPM.addPass(
344  SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
345  FPM.addPass(InstCombinePass());
346  // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
347  // *All* loop passes must preserve it, in order to be able to use it.
349  /*UseMemorySSA=*/false,
350  /*UseBlockFrequencyInfo=*/false));
351 
352  // Delete small array after loop unroll.
353  FPM.addPass(SROAPass());
354 
355  // Specially optimize memory movement as it doesn't look like dataflow in SSA.
356  FPM.addPass(MemCpyOptPass());
357 
358  // Sparse conditional constant propagation.
359  // FIXME: It isn't clear why we do this *after* loop passes rather than
360  // before...
361  FPM.addPass(SCCPPass());
362 
363  // Delete dead bit computations (instcombine runs after to fold away the dead
364  // computations, and then ADCE will run later to exploit any new DCE
365  // opportunities that creates).
366  FPM.addPass(BDCEPass());
367 
368  // Run instcombine after redundancy and dead bit elimination to exploit
369  // opportunities opened up by them.
370  FPM.addPass(InstCombinePass());
371  invokePeepholeEPCallbacks(FPM, Level);
372 
373  FPM.addPass(CoroElidePass());
374 
375  for (auto &C : ScalarOptimizerLateEPCallbacks)
376  C(FPM, Level);
377 
378  // Finally, do an expensive DCE pass to catch all the dead code exposed by
379  // the simplifications and basic cleanup after all the simplifications.
380  // TODO: Investigate if this is too expensive.
381  FPM.addPass(ADCEPass());
382  FPM.addPass(
383  SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
384  FPM.addPass(InstCombinePass());
385  invokePeepholeEPCallbacks(FPM, Level);
386 
387  return FPM;
388 }
389 
393  assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
394 
395  // The O1 pipeline has a separate pipeline creation function to simplify
396  // construction readability.
397  if (Level.getSpeedupLevel() == 1)
398  return buildO1FunctionSimplificationPipeline(Level, Phase);
399 
401 
402  // Form SSA out of local memory accesses after breaking apart aggregates into
403  // scalars.
404  FPM.addPass(SROAPass());
405 
406  // Catch trivial redundancies
407  FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
410 
411  // Hoisting of scalars and load expressions.
412  if (EnableGVNHoist)
413  FPM.addPass(GVNHoistPass());
414 
415  // Global value numbering based sinking.
416  if (EnableGVNSink) {
417  FPM.addPass(GVNSinkPass());
418  FPM.addPass(
419  SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
420  }
421 
424 
425  // Speculative execution if the target has divergent branches; otherwise nop.
426  FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
427 
428  // Optimize based on known information about branches, and cleanup afterward.
429  FPM.addPass(JumpThreadingPass());
431 
432  FPM.addPass(
433  SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
434  FPM.addPass(InstCombinePass());
437 
438  if (!Level.isOptimizingForSize())
440 
441  invokePeepholeEPCallbacks(FPM, Level);
442 
443  // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
444  // using the size value profile. Don't perform this when optimizing for size.
445  if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
446  !Level.isOptimizingForSize())
447  FPM.addPass(PGOMemOPSizeOpt());
448 
449  FPM.addPass(TailCallElimPass());
450  FPM.addPass(
451  SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
452 
453  // Form canonically associated expression trees, and simplify the trees using
454  // basic mathematical properties. For example, this will form (nearly)
455  // minimal multiplication trees.
456  FPM.addPass(ReassociatePass());
457 
458  // Add the primary loop simplification pipeline.
459  // FIXME: Currently this is split into two loop pass pipelines because we run
460  // some function passes in between them. These can and should be removed
461  // and/or replaced by scheduling the loop pass equivalents in the correct
462  // positions. But those equivalent passes aren't powerful enough yet.
463  // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
464  // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
465  // fully replace `SimplifyCFGPass`, and the closest to the other we have is
466  // `LoopInstSimplify`.
467  LoopPassManager LPM1, LPM2;
468 
469  // Simplify the loop body. We do this initially to clean up after other loop
470  // passes run, either when iterating on a loop or on inner loops with
471  // implications on the outer loop.
474 
475  // Try to remove as much code from the loop header as possible,
476  // to reduce amount of IR that will have to be duplicated. However,
477  // do not perform speculative hoisting the first time as LICM
478  // will destroy metadata that may not need to be destroyed if run
479  // after loop rotation.
480  // TODO: Investigate promotion cap for O1.
482  /*AllowSpeculation=*/false));
483 
484  // Disable header duplication in loop rotation at -Oz.
485  LPM1.addPass(
487  // TODO: Investigate promotion cap for O1.
489  /*AllowSpeculation=*/true));
490  LPM1.addPass(
491  SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&
493  if (EnableLoopFlatten)
494  LPM1.addPass(LoopFlattenPass());
495 
497  LPM2.addPass(IndVarSimplifyPass());
498 
499  for (auto &C : LateLoopOptimizationsEPCallbacks)
500  C(LPM2, Level);
501 
502  LPM2.addPass(LoopDeletionPass());
503 
506 
507  // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
508  // because it changes IR to makes profile annotation in back compile
509  // inaccurate. The normal unroller doesn't pay attention to forced full unroll
510  // attributes so we need to make sure and allow the full unroll pass to pay
511  // attention to it.
512  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
513  PGOOpt->Action != PGOOptions::SampleUse)
514  LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
515  /* OnlyWhenForced= */ !PTO.LoopUnrolling,
517 
518  for (auto &C : LoopOptimizerEndEPCallbacks)
519  C(LPM2, Level);
520 
521  // We provide the opt remark emitter pass for LICM to use. We only need to do
522  // this once as it is immutable.
523  FPM.addPass(
526  /*UseMemorySSA=*/true,
527  /*UseBlockFrequencyInfo=*/true));
528  FPM.addPass(
529  SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
530  FPM.addPass(InstCombinePass());
531  // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
532  // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
533  // *All* loop passes must preserve it, in order to be able to use it.
535  /*UseMemorySSA=*/false,
536  /*UseBlockFrequencyInfo=*/false));
537 
538  // Delete small array after loop unroll.
539  FPM.addPass(SROAPass());
540 
541  // The matrix extension can introduce large vector operations early, which can
542  // benefit from running vector-combine early on.
543  if (EnableMatrix)
544  FPM.addPass(VectorCombinePass(/*ScalarizationOnly=*/true));
545 
546  // Eliminate redundancies.
548  if (RunNewGVN)
549  FPM.addPass(NewGVNPass());
550  else
551  FPM.addPass(GVNPass());
552 
553  // Sparse conditional constant propagation.
554  // FIXME: It isn't clear why we do this *after* loop passes rather than
555  // before...
556  FPM.addPass(SCCPPass());
557 
558  // Delete dead bit computations (instcombine runs after to fold away the dead
559  // computations, and then ADCE will run later to exploit any new DCE
560  // opportunities that creates).
561  FPM.addPass(BDCEPass());
562 
563  // Run instcombine after redundancy and dead bit elimination to exploit
564  // opportunities opened up by them.
565  FPM.addPass(InstCombinePass());
566  invokePeepholeEPCallbacks(FPM, Level);
567 
568  // Re-consider control flow based optimizations after redundancy elimination,
569  // redo DCE, etc.
570  if (EnableDFAJumpThreading && Level.getSizeLevel() == 0)
572 
573  FPM.addPass(JumpThreadingPass());
575 
576  // Finally, do an expensive DCE pass to catch all the dead code exposed by
577  // the simplifications and basic cleanup after all the simplifications.
578  // TODO: Investigate if this is too expensive.
579  FPM.addPass(ADCEPass());
580 
581  // Specially optimize memory movement as it doesn't look like dataflow in SSA.
582  FPM.addPass(MemCpyOptPass());
583 
584  FPM.addPass(DSEPass());
587  /*AllowSpeculation=*/true),
588  /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
589 
590  FPM.addPass(CoroElidePass());
591 
592  for (auto &C : ScalarOptimizerLateEPCallbacks)
593  C(FPM, Level);
594 
596  .convertSwitchRangeToICmp(true)
597  .hoistCommonInsts(true)
598  .sinkCommonInsts(true)));
599  FPM.addPass(InstCombinePass());
600  invokePeepholeEPCallbacks(FPM, Level);
601 
602  if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt &&
603  (PGOOpt->Action == PGOOptions::IRUse ||
604  PGOOpt->Action == PGOOptions::SampleUse))
606 
607  return FPM;
608 }
609 
610 void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
613 }
614 
615 void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
616  OptimizationLevel Level, bool RunProfileGen,
617  bool IsCS, std::string ProfileFile,
618  std::string ProfileRemappingFile,
619  ThinOrFullLTOPhase LTOPhase) {
620  assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
621  if (!IsCS && !DisablePreInliner) {
623 
624  IP.DefaultThreshold = PreInlineThreshold;
625 
626  // FIXME: The hint threshold has the same value used by the regular inliner
627  // when not optimzing for size. This should probably be lowered after
628  // performance testing.
629  // FIXME: this comment is cargo culted from the old pass manager, revisit).
630  IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
632  IP, /* MandatoryFirst */ true,
634  CGSCCPassManager &CGPipeline = MIWP.getPM();
635 
637  FPM.addPass(SROAPass());
638  FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
639  FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
640  true))); // Merge & remove basic blocks.
641  FPM.addPass(InstCombinePass()); // Combine silly sequences.
642  invokePeepholeEPCallbacks(FPM, Level);
643 
644  CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
646 
647  MPM.addPass(std::move(MIWP));
648 
649  // Delete anything that is now dead to make sure that we don't instrument
650  // dead code. Instrumentation can end up keeping dead code around and
651  // dramatically increase code size.
653  }
654 
655  if (!RunProfileGen) {
656  assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
657  MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
658  // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
659  // RequireAnalysisPass for PSI before subsequent non-module passes.
661  return;
662  }
663 
664  // Perform PGO instrumentation.
666 
667  // Disable header duplication in loop rotation at -Oz.
671  /*UseMemorySSA=*/false,
672  /*UseBlockFrequencyInfo=*/false),
674 
675  // Add the profile lowering pass.
677  if (!ProfileFile.empty())
678  Options.InstrProfileOutput = ProfileFile;
679  // Do counter promotion at Level greater than O0.
680  Options.DoCounterPromotion = true;
681  Options.UseBFIInPromotion = IsCS;
683 }
684 
686  bool RunProfileGen, bool IsCS,
687  std::string ProfileFile,
688  std::string ProfileRemappingFile) {
689  if (!RunProfileGen) {
690  assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
691  MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
692  // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
693  // RequireAnalysisPass for PSI before subsequent non-module passes.
695  return;
696  }
697 
698  // Perform PGO instrumentation.
700  // Add the profile lowering pass.
702  if (!ProfileFile.empty())
703  Options.InstrProfileOutput = ProfileFile;
704  // Do not do counter promotion at O0.
705  Options.DoCounterPromotion = false;
706  Options.UseBFIInPromotion = IsCS;
708 }
709 
711  return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
712 }
713 
718  // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
719  // disable hot callsite inline (as much as possible [1]) because it makes
720  // profile annotation in the backend inaccurate.
721  //
722  // [1] Note the cost of a function could be below zero due to erased
723  // prologue / epilogue.
724  if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
725  PGOOpt->Action == PGOOptions::SampleUse)
726  IP.HotCallSiteThreshold = 0;
727 
728  if (PGOOpt)
729  IP.EnableDeferral = EnablePGOInlineDeferral;
730 
735 
736  // Require the GlobalsAA analysis for the module so we can query it within
737  // the CGSCC pipeline.
739  // Invalidate AAManager so it can be recreated and pick up the newly available
740  // GlobalsAA.
741  MIWP.addModulePass(
743 
744  // Require the ProfileSummaryAnalysis for the module so we can query it within
745  // the inliner pass.
747 
748  // Now begin the main postorder CGSCC pipeline.
749  // FIXME: The current CGSCC pipeline has its origins in the legacy pass
750  // manager and trying to emulate its precise behavior. Much of this doesn't
751  // make a lot of sense and we should revisit the core CGSCC structure.
752  CGSCCPassManager &MainCGPipeline = MIWP.getPM();
753 
754  // Note: historically, the PruneEH pass was run first to deduce nounwind and
755  // generally clean up exception handling overhead. It isn't clear this is
756  // valuable as the inliner doesn't currently care whether it is inlining an
757  // invoke or a call.
758 
760  MainCGPipeline.addPass(AttributorCGSCCPass());
761 
762  // Now deduce any function attributes based in the current code.
763  MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
764 
765  // When at O3 add argument promotion to the pass pipeline.
766  // FIXME: It isn't at all clear why this should be limited to O3.
768  MainCGPipeline.addPass(ArgumentPromotionPass());
769 
770  // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
771  // there are no OpenMP runtime calls present in the module.
773  MainCGPipeline.addPass(OpenMPOptCGSCCPass());
774 
775  for (auto &C : CGSCCOptimizerLateEPCallbacks)
776  C(MainCGPipeline, Level);
777 
778  // Lastly, add the core function simplification pipeline nested inside the
779  // CGSCC walk.
783 
784  MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
785 
787  MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
789 
790  return MIWP;
791 }
792 
797 
799  // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
800  // disable hot callsite inline (as much as possible [1]) because it makes
801  // profile annotation in the backend inaccurate.
802  //
803  // [1] Note the cost of a function could be below zero due to erased
804  // prologue / epilogue.
805  if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
806  PGOOpt->Action == PGOOptions::SampleUse)
807  IP.HotCallSiteThreshold = 0;
808 
809  if (PGOOpt)
810  IP.EnableDeferral = EnablePGOInlineDeferral;
811 
812  // The inline deferral logic is used to avoid losing some
813  // inlining chance in future. It is helpful in SCC inliner, in which
814  // inlining is processed in bottom-up order.
815  // While in module inliner, the inlining order is a priority-based order
816  // by default. The inline deferral is unnecessary there. So we disable the
817  // inline deferral logic in module inliner.
818  IP.EnableDeferral = false;
819 
821 
825 
828 
829  return MPM;
830 }
831 
836 
837  // Place pseudo probe instrumentation as the first pass of the pipeline to
838  // minimize the impact of optimization changes.
839  if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
842 
843  bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
844 
845  // In ThinLTO mode, when flattened profile is used, all the available
846  // profile information will be annotated in PreLink phase so there is
847  // no need to load the profile again in PostLink.
848  bool LoadSampleProfile =
849  HasSampleProfile &&
851 
852  // During the ThinLTO backend phase we perform early indirect call promotion
853  // here, before globalopt. Otherwise imported available_externally functions
854  // look unreferenced and are removed. If we are going to load the sample
855  // profile then defer until later.
856  // TODO: See if we can move later and consolidate with the location where
857  // we perform ICP when we are loading a sample profile.
858  // TODO: We pass HasSampleProfile (whether there was a sample profile file
859  // passed to the compile) to the SamplePGO flag of ICP. This is used to
860  // determine whether the new direct calls are annotated with prof metadata.
861  // Ideally this should be determined from whether the IR is annotated with
862  // sample profile, and not whether the a sample profile was provided on the
863  // command line. E.g. for flattened profiles where we will not be reloading
864  // the sample profile in the ThinLTO backend, we ideally shouldn't have to
865  // provide the sample profile file.
866  if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
867  MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
868 
869  // Do basic inference of function attributes from known properties of system
870  // libraries and other oracles.
873 
874  // Create an early function pass manager to cleanup the output of the
875  // frontend.
876  FunctionPassManager EarlyFPM;
877  // Lower llvm.expect to metadata before attempting transforms.
878  // Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
879  EarlyFPM.addPass(LowerExpectIntrinsicPass());
880  EarlyFPM.addPass(SimplifyCFGPass());
881  EarlyFPM.addPass(SROAPass());
882  EarlyFPM.addPass(EarlyCSEPass());
884  EarlyFPM.addPass(CallSiteSplittingPass());
885 
886  // In SamplePGO ThinLTO backend, we need instcombine before profile annotation
887  // to convert bitcast to direct calls so that they can be inlined during the
888  // profile annotation prepration step.
889  // More details about SamplePGO design can be found in:
890  // https://research.google.com/pubs/pub45290.html
891  // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured.
892  if (LoadSampleProfile)
893  EarlyFPM.addPass(InstCombinePass());
896 
897  if (LoadSampleProfile) {
898  // Annotate sample profile right after early FPM to ensure freshness of
899  // the debug info.
900  MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
901  PGOOpt->ProfileRemappingFile, Phase));
902  // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
903  // RequireAnalysisPass for PSI before subsequent non-module passes.
905  // Do not invoke ICP in the LTOPrelink phase as it makes it hard
906  // for the profile annotation to be accurate in the LTO backend.
909  // We perform early indirect call promotion here, before globalopt.
910  // This is important for the ThinLTO backend phase because otherwise
911  // imported available_externally functions look unreferenced and are
912  // removed.
913  MPM.addPass(
914  PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
915  }
916 
917  // Try to perform OpenMP specific optimizations on the module. This is a
918  // (quick!) no-op if there are no OpenMP runtime calls present in the module.
921 
924 
925  // Lower type metadata and the type.test intrinsic in the ThinLTO
926  // post link pipeline after ICP. This is to enable usage of the type
927  // tests in ICP sequences.
929  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
930 
931  for (auto &C : PipelineEarlySimplificationEPCallbacks)
932  C(MPM, Level);
933 
934  // Specialize functions with IPSCCP.
937 
938  // Interprocedural constant propagation now that basic cleanup has occurred
939  // and prior to optimizing globals.
940  // FIXME: This position in the pipeline hasn't been carefully considered in
941  // years, it should be re-analyzed.
943 
944  // Attach metadata to indirect call sites indicating the set of functions
945  // they may target at run-time. This should follow IPSCCP.
947 
948  // Optimize globals to try and fold them into constants.
950 
951  // Promote any localized globals to SSA registers.
952  // FIXME: Should this instead by a run of SROA?
953  // FIXME: We should probably run instcombine and simplifycfg afterward to
954  // delete control flows that are dead once globals have been folded to
955  // constants.
957 
958  // Remove any dead arguments exposed by cleanups and constant folding
959  // globals.
961 
962  // Create a small function pass pipeline to cleanup after all the global
963  // optimizations.
964  FunctionPassManager GlobalCleanupPM;
965  GlobalCleanupPM.addPass(InstCombinePass());
966  invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
967 
968  GlobalCleanupPM.addPass(
969  SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
972 
973  // Add all the requested passes for instrumentation PGO, if requested.
974  if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
975  (PGOOpt->Action == PGOOptions::IRInstr ||
976  PGOOpt->Action == PGOOptions::IRUse)) {
977  addPGOInstrPasses(MPM, Level,
978  /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
979  /* IsCS */ false, PGOOpt->ProfileFile,
980  PGOOpt->ProfileRemappingFile, Phase);
981  MPM.addPass(PGOIndirectCallPromotion(false, false));
982  }
983  if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
984  PGOOpt->CSAction == PGOOptions::CSIRInstr)
985  MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
986 
987  // Synthesize function entry counts for non-PGO compilation.
988  if (EnableSyntheticCounts && !PGOOpt)
990 
993  else
995 
997 
1001  }
1002 
1003  return MPM;
1004 }
1005 
1006 /// TODO: Should LTO cause any differences to this set of passes?
1007 void PassBuilder::addVectorPasses(OptimizationLevel Level,
1008  FunctionPassManager &FPM, bool IsFullLTO) {
1011 
1012  if (IsFullLTO) {
1013  // The vectorizer may have significantly shortened a loop body; unroll
1014  // again. Unroll small loops to hide loop backedge latency and saturate any
1015  // parallel execution resources of an out-of-order processor. We also then
1016  // need to clean up redundancies and loop invariant code.
1017  // FIXME: It would be really good to use a loop-integrated instruction
1018  // combiner for cleanup here so that the unrolling and LICM can be pipelined
1019  // across the loop nests.
1020  // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1021  if (EnableUnrollAndJam && PTO.LoopUnrolling)
1023  LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1025  Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1028  }
1029 
1030  if (!IsFullLTO) {
1031  // Eliminate loads by forwarding stores from the previous iteration to loads
1032  // of the current iteration.
1034  }
1035  // Cleanup after the loop optimization passes.
1036  FPM.addPass(InstCombinePass());
1037 
1038  if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1039  ExtraVectorPassManager ExtraPasses;
1040  // At higher optimization levels, try to clean up any runtime overlap and
1041  // alignment checks inserted by the vectorizer. We want to track correlated
1042  // runtime checks for two inner loops in the same outer loop, fold any
1043  // common computations, hoist loop-invariant aspects out of any outer loop,
1044  // and unswitch the runtime checks if possible. Once hoisted, we may have
1045  // dead (or speculatable) control flows or more combining opportunities.
1046  ExtraPasses.addPass(EarlyCSEPass());
1047  ExtraPasses.addPass(CorrelatedValuePropagationPass());
1048  ExtraPasses.addPass(InstCombinePass());
1049  LoopPassManager LPM;
1051  /*AllowSpeculation=*/true));
1052  LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1054  ExtraPasses.addPass(
1056  ExtraPasses.addPass(
1057  createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1058  /*UseBlockFrequencyInfo=*/true));
1059  ExtraPasses.addPass(
1060  SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1061  ExtraPasses.addPass(InstCombinePass());
1062  FPM.addPass(std::move(ExtraPasses));
1063  }
1064 
1065  // Now that we've formed fast to execute loop structures, we do further
1066  // optimizations. These are run afterward as they might block doing complex
1067  // analyses and transforms such as what are needed for loop vectorization.
1068 
1069  // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1070  // GVN, loop transforms, and others have already run, so it's now better to
1071  // convert to more optimized IR using more aggressive simplify CFG options.
1072  // The extra sinking transform can create larger basic blocks, so do this
1073  // before SLP vectorization.
1075  .forwardSwitchCondToPhi(true)
1076  .convertSwitchRangeToICmp(true)
1077  .convertSwitchToLookupTable(true)
1078  .needCanonicalLoops(false)
1079  .hoistCommonInsts(true)
1080  .sinkCommonInsts(true)));
1081 
1082  if (IsFullLTO) {
1083  FPM.addPass(SCCPPass());
1084  FPM.addPass(InstCombinePass());
1085  FPM.addPass(BDCEPass());
1086  }
1087 
1088  // Optimize parallel scalar instruction chains into SIMD instructions.
1089  if (PTO.SLPVectorization) {
1090  FPM.addPass(SLPVectorizerPass());
1091  if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1092  FPM.addPass(EarlyCSEPass());
1093  }
1094  }
1095  // Enhance/cleanup vector code.
1096  FPM.addPass(VectorCombinePass());
1097 
1098  if (!IsFullLTO) {
1099  FPM.addPass(InstCombinePass());
1100  // Unroll small loops to hide loop backedge latency and saturate any
1101  // parallel execution resources of an out-of-order processor. We also then
1102  // need to clean up redundancies and loop invariant code.
1103  // FIXME: It would be really good to use a loop-integrated instruction
1104  // combiner for cleanup here so that the unrolling and LICM can be pipelined
1105  // across the loop nests.
1106  // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1107  if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1109  LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1110  }
1112  Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1115  FPM.addPass(InstCombinePass());
1116  FPM.addPass(
1120  /*AllowSpeculation=*/true),
1121  /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
1122  }
1123 
1124  // Now that we've vectorized and unrolled loops, we may have more refined
1125  // alignment information, try to re-derive it here.
1127 
1128  if (IsFullLTO)
1129  FPM.addPass(InstCombinePass());
1130 }
1131 
1134  ThinOrFullLTOPhase LTOPhase) {
1135  const bool LTOPreLink = (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink ||
1138 
1139  // Optimize globals now that the module is fully simplified.
1142 
1143  // Run partial inlining pass to partially inline functions that have
1144  // large bodies.
1145  if (RunPartialInlining)
1147 
1148  // Remove avail extern fns and globals definitions since we aren't compiling
1149  // an object file for later LTO. For LTO we want to preserve these so they
1150  // are eligible for inlining at link-time. Note if they are unreferenced they
1151  // will be removed by GlobalDCE later, so this only impacts referenced
1152  // available externally globals. Eventually they will be suppressed during
1153  // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1154  // may make globals referenced by available external functions dead and saves
1155  // running remaining passes on the eliminated functions. These should be
1156  // preserved during prelinking for link-time inlining decisions.
1157  if (!LTOPreLink)
1159 
1162 
1163  // Do RPO function attribute inference across the module to forward-propagate
1164  // attributes where applicable.
1165  // FIXME: Is this really an optimization rather than a canonicalization?
1167 
1168  // Do a post inline PGO instrumentation and use pass. This is a context
1169  // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1170  // cross-module inline has not been done yet. The context sensitive
1171  // instrumentation is after all the inlines are done.
1172  if (!LTOPreLink && PGOOpt) {
1173  if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1174  addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
1175  /* IsCS */ true, PGOOpt->CSProfileGenFile,
1176  PGOOpt->ProfileRemappingFile, LTOPhase);
1177  else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1178  addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
1179  /* IsCS */ true, PGOOpt->ProfileFile,
1180  PGOOpt->ProfileRemappingFile, LTOPhase);
1181  }
1182 
1183  // Re-compute GlobalsAA here prior to function passes. This is particularly
1184  // useful as the above will have inlined, DCE'ed, and function-attr
1185  // propagated everything. We should at this point have a reasonably minimal
1186  // and richly annotated call graph. By computing aliasing and mod/ref
1187  // information for all local globals here, the late loop passes and notably
1188  // the vectorizer will be able to use them to help recognize vectorizable
1189  // memory operations.
1191 
1192  for (auto &C : OptimizerEarlyEPCallbacks)
1193  C(MPM, Level);
1194 
1195  FunctionPassManager OptimizePM;
1196  OptimizePM.addPass(Float2IntPass());
1197  OptimizePM.addPass(LowerConstantIntrinsicsPass());
1198 
1199  if (EnableMatrix) {
1200  OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1201  OptimizePM.addPass(EarlyCSEPass());
1202  }
1203 
1204  // FIXME: We need to run some loop optimizations to re-rotate loops after
1205  // simplifycfg and others undo their rotation.
1206 
1207  // Optimize the loop execution. These passes operate on entire loop nests
1208  // rather than on each loop in an inside-out manner, and so they are actually
1209  // function passes.
1210 
1211  for (auto &C : VectorizerStartEPCallbacks)
1212  C(OptimizePM, Level);
1213 
1214  LoopPassManager LPM;
1215  // First rotate loops that may have been un-rotated by prior passes.
1216  // Disable header duplication at -Oz.
1217  LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink));
1218  // Some loops may have become dead by now. Try to delete them.
1219  // FIXME: see discussion in https://reviews.llvm.org/D112851,
1220  // this may need to be revisited once we run GVN before loop deletion
1221  // in the simplification pipeline.
1222  LPM.addPass(LoopDeletionPass());
1224  std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1225 
1226  // Distribute loops to allow partial vectorization. I.e. isolate dependences
1227  // into separate loop that would otherwise inhibit vectorization. This is
1228  // currently only performed for loops marked with the metadata
1229  // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1230  OptimizePM.addPass(LoopDistributePass());
1231 
1232  // Populates the VFABI attribute with the scalar-to-vector mappings
1233  // from the TargetLibraryInfo.
1234  OptimizePM.addPass(InjectTLIMappings());
1235 
1236  addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1237 
1238  // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1239  // canonicalization pass that enables other optimizations. As a result,
1240  // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1241  // result too early.
1242  OptimizePM.addPass(LoopSinkPass());
1243 
1244  // And finally clean up LCSSA form before generating code.
1245  OptimizePM.addPass(InstSimplifyPass());
1246 
1247  // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1248  // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1249  // flattening of blocks.
1250  OptimizePM.addPass(DivRemPairsPass());
1251 
1252  // LoopSink (and other loop passes since the last simplifyCFG) might have
1253  // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1254  OptimizePM.addPass(
1255  SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1256 
1257  // Add the core optimizing pipeline.
1260 
1261  for (auto &C : OptimizerLastEPCallbacks)
1262  C(MPM, Level);
1263 
1264  // Split out cold code. Splitting is done late to avoid hiding context from
1265  // other optimizations and inadvertently regressing performance. The tradeoff
1266  // is that this has a higher code size cost than splitting early.
1267  if (EnableHotColdSplit && !LTOPreLink)
1269 
1270  // Search the code for similar regions of code. If enough similar regions can
1271  // be found where extracting the regions into their own function will decrease
1272  // the size of the program, we extract the regions, a deduplicate the
1273  // structurally similar regions.
1274  if (EnableIROutliner)
1276 
1277  // Merge functions if requested.
1278  if (PTO.MergeFunctions)
1280 
1281  if (PTO.CallGraphProfile)
1283 
1284  // Now we need to do some global optimization transforms.
1285  // FIXME: It would seem like these should come first in the optimization
1286  // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1287  // ordering here.
1290 
1291  // TODO: Relative look table converter pass caused an issue when full lto is
1292  // enabled. See https://reviews.llvm.org/D94355 for more details.
1293  // Until the issue fixed, disable this pass during pre-linking phase.
1294  if (!LTOPreLink)
1296 
1297  return MPM;
1298 }
1299 
1302  bool LTOPreLink) {
1304  "Must request optimizations for the default pipeline!");
1305 
1307 
1308  // Convert @llvm.global.annotations to !annotation metadata.
1310 
1311  // Force any function attributes we want the rest of the pipeline to observe.
1313 
1314  // Apply module pipeline start EP callback.
1315  for (auto &C : PipelineStartEPCallbacks)
1316  C(MPM, Level);
1317 
1318  if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1320 
1321  const ThinOrFullLTOPhase LTOPhase = LTOPreLink
1324  // Add the core simplification pipeline.
1326 
1327  // Now add the optimization pipeline.
1329 
1330  if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1331  PGOOpt->Action == PGOOptions::SampleUse)
1333 
1334  // Emit annotation remarks.
1336 
1337  if (LTOPreLink)
1338  addRequiredLTOPreLinkPasses(MPM);
1339 
1340  return MPM;
1341 }
1342 
1346  "Must request optimizations for the default pipeline!");
1347 
1349 
1350  // Convert @llvm.global.annotations to !annotation metadata.
1352 
1353  // Force any function attributes we want the rest of the pipeline to observe.
1355 
1356  if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1358 
1359  // Apply module pipeline start EP callback.
1360  for (auto &C : PipelineStartEPCallbacks)
1361  C(MPM, Level);
1362 
1363  // If we are planning to perform ThinLTO later, we don't bloat the code with
1364  // unrolling/vectorization/... now. Just simplify the module as much as we
1365  // can.
1368 
1369  // Run partial inlining pass to partially inline functions that have
1370  // large bodies.
1371  // FIXME: It isn't clear whether this is really the right place to run this
1372  // in ThinLTO. Because there is another canonicalization and simplification
1373  // phase that will run after the thin link, running this here ends up with
1374  // less information than will be available later and it may grow functions in
1375  // ways that aren't beneficial.
1376  if (RunPartialInlining)
1378 
1379  // Reduce the size of the IR as much as possible.
1381 
1382  if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1383  PGOOpt->Action == PGOOptions::SampleUse)
1385 
1386  // Handle OptimizerLastEPCallbacks added by clang on PreLink. Actual
1387  // optimization is going to be done in PostLink stage, but clang can't
1388  // add callbacks there in case of in-process ThinLTO called by linker.
1389  for (auto &C : OptimizerLastEPCallbacks)
1390  C(MPM, Level);
1391 
1392  // Emit annotation remarks.
1394 
1395  addRequiredLTOPreLinkPasses(MPM);
1396 
1397  return MPM;
1398 }
1399 
1401  OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1403 
1404  // Convert @llvm.global.annotations to !annotation metadata.
1406 
1407  if (ImportSummary) {
1408  // These passes import type identifier resolutions for whole-program
1409  // devirtualization and CFI. They must run early because other passes may
1410  // disturb the specific instruction patterns that these passes look for,
1411  // creating dependencies on resolutions that may not appear in the summary.
1412  //
1413  // For example, GVN may transform the pattern assume(type.test) appearing in
1414  // two basic blocks into assume(phi(type.test, type.test)), which would
1415  // transform a dependency on a WPD resolution into a dependency on a type
1416  // identifier resolution for CFI.
1417  //
1418  // Also, WPD has access to more precise information than ICP and can
1419  // devirtualize more effectively, so it should operate on the IR first.
1420  //
1421  // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1422  // metadata and intrinsics.
1423  MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1424  MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1425  }
1426 
1427  if (Level == OptimizationLevel::O0) {
1428  // Run a second time to clean up any type tests left behind by WPD for use
1429  // in ICP.
1430  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1431  // Drop available_externally and unreferenced globals. This is necessary
1432  // with ThinLTO in order to avoid leaving undefined references to dead
1433  // globals in the object file.
1436  return MPM;
1437  }
1438 
1439  // Force any function attributes we want the rest of the pipeline to observe.
1441 
1442  // Add the core simplification pipeline.
1445 
1446  // Now add the optimization pipeline.
1449 
1450  // Emit annotation remarks.
1452 
1453  return MPM;
1454 }
1455 
1459  "Must request optimizations for the default pipeline!");
1460  // FIXME: We should use a customized pre-link pipeline!
1462  /* LTOPreLink */ true);
1463 }
1464 
1467  ModuleSummaryIndex *ExportSummary) {
1469 
1470  // Convert @llvm.global.annotations to !annotation metadata.
1472 
1473  for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
1474  C(MPM, Level);
1475 
1476  // Create a function that performs CFI checks for cross-DSO calls with targets
1477  // in the current module.
1479 
1480  if (Level == OptimizationLevel::O0) {
1481  // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1482  // metadata and intrinsics.
1483  MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1484  MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1485  // Run a second time to clean up any type tests left behind by WPD for use
1486  // in ICP.
1487  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1488 
1489  for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
1490  C(MPM, Level);
1491 
1492  // Emit annotation remarks.
1494 
1495  return MPM;
1496  }
1497 
1498  if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1499  // Load sample profile before running the LTO optimization pipeline.
1500  MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1501  PGOOpt->ProfileRemappingFile,
1503  // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1504  // RequireAnalysisPass for PSI before subsequent non-module passes.
1506  }
1507 
1508  // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1510 
1511  // Remove unused virtual tables to improve the quality of code generated by
1512  // whole-program devirtualization and bitset lowering.
1514 
1515  // Force any function attributes we want the rest of the pipeline to observe.
1517 
1518  // Do basic inference of function attributes from known properties of system
1519  // libraries and other oracles.
1521 
1522  if (Level.getSpeedupLevel() > 1) {
1525 
1526  // Indirect call promotion. This should promote all the targets that are
1527  // left by the earlier promotion pass that promotes intra-module targets.
1528  // This two-step promotion is to save the compile time. For LTO, it should
1529  // produce the same result as if we only do promotion here.
1531  true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1532 
1535  // Propagate constants at call sites into the functions they call. This
1536  // opens opportunities for globalopt (and inlining) by substituting function
1537  // pointers passed as arguments to direct uses of functions.
1538  MPM.addPass(IPSCCPPass());
1539 
1540  // Attach metadata to indirect call sites indicating the set of functions
1541  // they may target at run-time. This should follow IPSCCP.
1543  }
1544 
1545  // Now deduce any function attributes based in the current code.
1546  MPM.addPass(
1548 
1549  // Do RPO function attribute inference across the module to forward-propagate
1550  // attributes where applicable.
1551  // FIXME: Is this really an optimization rather than a canonicalization?
1553 
1554  // Use in-range annotations on GEP indices to split globals where beneficial.
1556 
1557  // Run whole program optimization of virtual call when the list of callees
1558  // is fixed.
1559  MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1560 
1561  // Stop here at -O1.
1562  if (Level == OptimizationLevel::O1) {
1563  // The LowerTypeTestsPass needs to run to lower type metadata and the
1564  // type.test intrinsics. The pass does nothing if CFI is disabled.
1565  MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1566  // Run a second time to clean up any type tests left behind by WPD for use
1567  // in ICP (which is performed earlier than this in the regular LTO
1568  // pipeline).
1569  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1570 
1571  for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
1572  C(MPM, Level);
1573 
1574  // Emit annotation remarks.
1576 
1577  return MPM;
1578  }
1579 
1580  // Optimize globals to try and fold them into constants.
1582 
1583  // Promote any localized globals to SSA registers.
1585 
1586  // Linking modules together can lead to duplicate global constant, only
1587  // keep one copy of each constant.
1589 
1590  // Remove unused arguments from functions.
1592 
1593  // Reduce the code after globalopt and ipsccp. Both can open up significant
1594  // simplification opportunities, and both can propagate functions through
1595  // function pointers. When this happens, we often have to resolve varargs
1596  // calls, etc, so let instcombine do this.
1597  FunctionPassManager PeepholeFPM;
1598  PeepholeFPM.addPass(InstCombinePass());
1600  PeepholeFPM.addPass(AggressiveInstCombinePass());
1601  invokePeepholeEPCallbacks(PeepholeFPM, Level);
1602 
1605 
1606  // Note: historically, the PruneEH pass was run first to deduce nounwind and
1607  // generally clean up exception handling overhead. It isn't clear this is
1608  // valuable as the inliner doesn't currently care whether it is inlining an
1609  // invoke or a call.
1610  // Run the inliner now.
1613  /* MandatoryFirst */ true,
1616 
1617  // Optimize globals again after we ran the inliner.
1619 
1620  // Garbage collect dead functions.
1622 
1623  // If we didn't decide to inline a function, check to see if we can
1624  // transform it to pass arguments by value instead of by reference.
1626 
1627  FunctionPassManager FPM;
1628  // The IPO Passes may leave cruft around. Clean up after them.
1629  FPM.addPass(InstCombinePass());
1630  invokePeepholeEPCallbacks(FPM, Level);
1631 
1632  FPM.addPass(JumpThreadingPass());
1633 
1634  // Do a post inline PGO instrumentation and use pass. This is a context
1635  // sensitive PGO pass.
1636  if (PGOOpt) {
1637  if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1638  addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
1639  /* IsCS */ true, PGOOpt->CSProfileGenFile,
1640  PGOOpt->ProfileRemappingFile,
1642  else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1643  addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
1644  /* IsCS */ true, PGOOpt->ProfileFile,
1645  PGOOpt->ProfileRemappingFile,
1647  }
1648 
1649  // Break up allocas
1650  FPM.addPass(SROAPass());
1651 
1652  // LTO provides additional opportunities for tailcall elimination due to
1653  // link-time inlining, and visibility of nocapture attribute.
1654  FPM.addPass(TailCallElimPass());
1655 
1656  // Run a few AA driver optimizations here and now to cleanup the code.
1659 
1660  MPM.addPass(
1662 
1663  // Require the GlobalsAA analysis for the module so we can query it within
1664  // MainFPM.
1666  // Invalidate AAManager so it can be recreated and pick up the newly available
1667  // GlobalsAA.
1668  MPM.addPass(
1670 
1671  FunctionPassManager MainFPM;
1674  /*AllowSpeculation=*/true),
1675  /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
1676 
1677  if (RunNewGVN)
1678  MainFPM.addPass(NewGVNPass());
1679  else
1680  MainFPM.addPass(GVNPass());
1681 
1682  // Remove dead memcpy()'s.
1683  MainFPM.addPass(MemCpyOptPass());
1684 
1685  // Nuke dead stores.
1686  MainFPM.addPass(DSEPass());
1688 
1689 
1692 
1693  LoopPassManager LPM;
1694  if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
1695  LPM.addPass(LoopFlattenPass());
1696  LPM.addPass(IndVarSimplifyPass());
1697  LPM.addPass(LoopDeletionPass());
1698  // FIXME: Add loop interchange.
1699 
1700  // Unroll small loops and perform peeling.
1701  LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
1702  /* OnlyWhenForced= */ !PTO.LoopUnrolling,
1704  // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
1705  // *All* loop passes must preserve it, in order to be able to use it.
1707  std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
1708 
1709  MainFPM.addPass(LoopDistributePass());
1710 
1711  addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
1712 
1713  // Run the OpenMPOpt CGSCC pass again late.
1714  MPM.addPass(
1716 
1717  invokePeepholeEPCallbacks(MainFPM, Level);
1718  MainFPM.addPass(JumpThreadingPass());
1721 
1722  // Lower type metadata and the type.test intrinsic. This pass supports
1723  // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
1724  // to be run at link time if CFI is enabled. This pass does nothing if
1725  // CFI is disabled.
1726  MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1727  // Run a second time to clean up any type tests left behind by WPD for use
1728  // in ICP (which is performed earlier than this in the regular LTO pipeline).
1729  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1730 
1731  // Enable splitting late in the FullLTO post-link pipeline. This is done in
1732  // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses).
1733  if (EnableHotColdSplit)
1735 
1736  // Add late LTO optimization passes.
1737  // Delete basic blocks, which optimization passes may have killed.
1739  SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts(
1740  true))));
1741 
1742  // Drop bodies of available eternally objects to improve GlobalDCE.
1744 
1745  // Now that we have optimized the program, discard unreachable functions.
1747 
1748  if (PTO.MergeFunctions)
1750 
1751  for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
1752  C(MPM, Level);
1753 
1754  // Emit annotation remarks.
1756 
1757  return MPM;
1758 }
1759 
1761  bool LTOPreLink) {
1763  "buildO0DefaultPipeline should only be used with O0");
1764 
1766 
1767  // Perform pseudo probe instrumentation in O0 mode. This is for the
1768  // consistency between different build modes. For example, a LTO build can be
1769  // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
1770  // the postlink will require pseudo probe instrumentation in the prelink.
1771  if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
1773 
1774  if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
1775  PGOOpt->Action == PGOOptions::IRUse))
1777  MPM,
1778  /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
1779  /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1780 
1781  for (auto &C : PipelineStartEPCallbacks)
1782  C(MPM, Level);
1783 
1784  if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1786 
1787  for (auto &C : PipelineEarlySimplificationEPCallbacks)
1788  C(MPM, Level);
1789 
1790  // Build a minimal pipeline based on the semantics required by LLVM,
1791  // which is just that always inlining occurs. Further, disable generating
1792  // lifetime intrinsics to avoid enabling further optimizations during
1793  // code generation.
1795  /*InsertLifetimeIntrinsics=*/false));
1796 
1797  if (PTO.MergeFunctions)
1799 
1800  if (EnableMatrix)
1801  MPM.addPass(
1803 
1804  if (!CGSCCOptimizerLateEPCallbacks.empty()) {
1805  CGSCCPassManager CGPM;
1806  for (auto &C : CGSCCOptimizerLateEPCallbacks)
1807  C(CGPM, Level);
1808  if (!CGPM.isEmpty())
1810  }
1811  if (!LateLoopOptimizationsEPCallbacks.empty()) {
1812  LoopPassManager LPM;
1813  for (auto &C : LateLoopOptimizationsEPCallbacks)
1814  C(LPM, Level);
1815  if (!LPM.isEmpty()) {
1818  }
1819  }
1820  if (!LoopOptimizerEndEPCallbacks.empty()) {
1821  LoopPassManager LPM;
1822  for (auto &C : LoopOptimizerEndEPCallbacks)
1823  C(LPM, Level);
1824  if (!LPM.isEmpty()) {
1827  }
1828  }
1829  if (!ScalarOptimizerLateEPCallbacks.empty()) {
1830  FunctionPassManager FPM;
1831  for (auto &C : ScalarOptimizerLateEPCallbacks)
1832  C(FPM, Level);
1833  if (!FPM.isEmpty())
1835  }
1836 
1837  for (auto &C : OptimizerEarlyEPCallbacks)
1838  C(MPM, Level);
1839 
1840  if (!VectorizerStartEPCallbacks.empty()) {
1841  FunctionPassManager FPM;
1842  for (auto &C : VectorizerStartEPCallbacks)
1843  C(FPM, Level);
1844  if (!FPM.isEmpty())
1846  }
1847 
1848  ModulePassManager CoroPM;
1849  CoroPM.addPass(CoroEarlyPass());
1850  CGSCCPassManager CGPM;
1851  CGPM.addPass(CoroSplitPass());
1853  CoroPM.addPass(CoroCleanupPass());
1854  CoroPM.addPass(GlobalDCEPass());
1856 
1857  for (auto &C : OptimizerLastEPCallbacks)
1858  C(MPM, Level);
1859 
1860  if (LTOPreLink)
1861  addRequiredLTOPreLinkPasses(MPM);
1862 
1864 
1865  return MPM;
1866 }
1867 
1869  AAManager AA;
1870 
1871  // The order in which these are registered determines their priority when
1872  // being queried.
1873 
1874  // First we register the basic alias analysis that provides the majority of
1875  // per-function local AA logic. This is a stateless, on-demand local set of
1876  // AA techniques.
1877  AA.registerFunctionAnalysis<BasicAA>();
1878 
1879  // Next we query fast, specialized alias analyses that wrap IR-embedded
1880  // information about aliasing.
1881  AA.registerFunctionAnalysis<ScopedNoAliasAA>();
1882  AA.registerFunctionAnalysis<TypeBasedAA>();
1883 
1884  // Add support for querying global aliasing information when available.
1885  // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
1886  // analysis, all that the `AAManager` can do is query for any *cached*
1887  // results from `GlobalsAA` through a readonly proxy.
1888  AA.registerModuleAnalysis<GlobalsAA>();
1889 
1890  // Add target-specific alias analyses.
1891  if (TM)
1893 
1894  return AA;
1895 }
llvm::GlobalsAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: GlobalsModRef.h:133
TypeBasedAliasAnalysis.h
llvm::PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & >
Definition: LoopPassManager.h:69
llvm::RecomputeGlobalsAAPass
Definition: GlobalsModRef.h:143
llvm::BasicAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: BasicAliasAnalysis.h:161
llvm::IPSCCPPass
Pass to perform interprocedural constant propagation.
Definition: SCCP.h:30
llvm::ScopedNoAliasAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: ScopedNoAliasAA.h:53
llvm::FunctionSpecializationPass
Pass to perform interprocedural constant propagation by specializing functions.
Definition: SCCP.h:37
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1303
IROutliner.h
llvm::RunNewGVN
cl::opt< bool > RunNewGVN
getInlineParamsFromOptLevel
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
Definition: PassBuilderPipelines.cpp:710
DeadArgumentElimination.h
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
PassBuilder.h
Phase
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
Definition: AArch64FalkorHWPFFix.cpp:231
llvm::SampleProfileProbePass
Definition: SampleProfileProbe.h:135
Mem2Reg.h
llvm::OptimizationLevel::O1
static const OptimizationLevel O1
Optimize quickly without destroying debuggability.
Definition: OptimizationLevel.h:57
llvm::LoopSinkPass
A pass that does profile-guided sinking of instructions into loops.
Definition: LoopSink.h:33
llvm::PassManager::isEmpty
bool isEmpty() const
Returns if the pass manager contains any passes.
Definition: PassManager.h:574
OpenMPOpt.h
llvm::InferFunctionAttrsPass
A pass which infers function attributes from the names and signatures of function declarations in a m...
Definition: InferFunctionAttrs.h:26
llvm::SampleProfileLoaderPass
The sample profiler data loader pass.
Definition: SampleProfile.h:26
CalledValuePropagation.h
Annotation2Metadata.h
llvm::LoopInterchangePass
Definition: LoopInterchange.h:20
llvm::AlignmentFromAssumptionsPass
Definition: AlignmentFromAssumptions.h:29
PartialInlining.h
Inliner.h
llvm::Annotation2MetadataPass
Pass to convert @llvm.global.annotations to !annotation metadata.
Definition: Annotation2Metadata.h:24
llvm::ThinOrFullLTOPhase::ThinLTOPostLink
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
GlobalSplit.h
llvm::GVNHoistPass
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition: GVN.h:376
CorrelatedValuePropagation.h
llvm::LoopIdiomRecognizePass
Performs Loop Idiom Recognize Pass.
Definition: LoopIdiomRecognize.h:40
llvm::ExtraVectorizerPasses
cl::opt< bool > ExtraVectorizerPasses
llvm::EnableConstraintElimination
cl::opt< bool > EnableConstraintElimination
llvm::PassManager::addPass
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same< PassT, PassManager >::value > addPass(PassT &&Pass)
Definition: PassManager.h:550
llvm::OptimizationLevel::O2
static const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
Definition: OptimizationLevel.h:74
llvm::CrossDSOCFIPass
Definition: CrossDSOCFI.h:20
llvm::ReassociatePass
Reassociate commutative expressions.
Definition: Reassociate.h:71
llvm::InlinePass::EarlyInliner
@ EarlyInliner
ConstantMerge.h
llvm::SetLicmMssaNoAccForPromotionCap
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
AlignmentFromAssumptions.h
llvm::PassBuilder::buildModuleOptimizationPipeline
ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
Definition: PassBuilderPipelines.cpp:1133
IndVarSimplify.h
ErrorHandling.h
SCCP.h
TailRecursionElimination.h
DivRemPairs.h
llvm::LoopRotatePass
A simple loop rotation transformation.
Definition: LoopRotation.h:24
llvm::BDCEPass
Definition: BDCE.h:26
DeadStoreElimination.h
OptimizationRemarkEmitter.h
CoroCleanup.h
GlobalsModRef.h
VectorCombine.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
MergeFunctions.h
llvm::PseudoProbeUpdatePass
Definition: SampleProfileProbe.h:155
LoopFlatten.h
EnableModuleInliner
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
ElimAvailExtern.h
llvm::PipelineTuningOptions::ForgetAllSCEVInLoopUnroll
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition: PassBuilder.h:60
llvm::LoopUnrollOptions
A set of parameters used to control various transforms performed by the LoopUnroll pass.
Definition: LoopUnrollPass.h:61
InjectTLIMappings.h
ConstraintElimination.h
Float2Int.h
CGProfile.h
llvm::LoopDistributePass
Definition: LoopDistribute.h:25
JumpThreading.h
HotColdSplitting.h
llvm::EnableKnowledgeRetention
cl::opt< bool > EnableKnowledgeRetention
enable preservation of attributes in assume like: call void @llvm.assume(i1 true) [ "nonnull"(i32* PT...
Definition: InstCombineCalls.cpp:94
llvm::PGOInstrumentationUse
The profile annotation (profile-instr-use) pass for IR based PGO.
Definition: PGOInstrumentation.h:58
llvm::InlineParams
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:190
LoopUnrollAndJamPass.h
llvm::LoopLoadEliminationPass
Pass to forward loads in a loop around the backedge to subsequent iterations.
Definition: LoopLoadElimination.h:27
llvm::EnableOrderFileInstrumentation
cl::opt< bool > EnableOrderFileInstrumentation
llvm::ThinOrFullLTOPhase::ThinLTOPreLink
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
llvm::TailCallElimPass
Definition: TailRecursionElimination.h:61
llvm::EnableGVNSink
cl::opt< bool > EnableGVNSink
llvm::CoroCleanupPass
Definition: CoroCleanup.h:23
llvm::EnableMatrix
cl::opt< bool > EnableMatrix
llvm::createCGSCCToFunctionPassAdaptor
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: CGSCCPassManager.h:509
LoopInstSimplify.h
llvm::SLPVectorizerPass
Definition: SLPVectorizer.h:56
llvm::ReversePostOrderFunctionAttrsPass
A pass to do RPO deduction and propagation of function attributes.
Definition: FunctionAttrs.h:70
llvm::PartialInlinerPass
Pass to remove unused function declarations.
Definition: PartialInlining.h:24
llvm::PGOOptions::SampleUse
@ SampleUse
Definition: PGOOptions.h:23
llvm::DSEPass
This class implements a trivial dead store elimination.
Definition: DeadStoreElimination.h:28
llvm::OptimizationLevel::O0
static const OptimizationLevel O0
Disable as many optimizations as possible.
Definition: OptimizationLevel.h:41
llvm::PassBuilder::addPGOInstrPassesForO0
void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, std::string ProfileFile, std::string ProfileRemappingFile)
Add PGOInstrumenation passes for O0 only.
Definition: PassBuilderPipelines.cpp:685
BasicAliasAnalysis.h
CoroElide.h
llvm::PipelineTuningOptions::LicmMssaNoAccForPromotionCap
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition: PassBuilder.h:68
MergedLoadStoreMotion.h
llvm::AggressiveInstCombinePass
Definition: AggressiveInstCombine.h:27
llvm::InstrProfiling
Instrumentation based profiling lowering pass.
Definition: InstrProfiling.h:34
llvm::ConstraintEliminationPass
Definition: ConstraintElimination.h:16
llvm::HotColdSplittingPass
Pass to outline cold regions.
Definition: HotColdSplitting.h:61
llvm::TypeBasedAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: TypeBasedAliasAnalysis.h:59
AliasAnalysis.h
llvm::ThinOrFullLTOPhase
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:73
CommandLine.h
LoopRotation.h
llvm::MODULE
@ MODULE
Definition: Attributor.h:5032
llvm::ModuleInlinerWrapperPass
Module pass, wrapping the inliner pass.
Definition: Inliner.h:122
UseInlineAdvisor
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version."), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)."), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model).")))
llvm::CoroElidePass
Definition: CoroElide.h:25
llvm::ControlHeightReductionPass
Definition: ControlHeightReduction.h:21
llvm::CorrelatedValuePropagationPass
Definition: CorrelatedValuePropagation.h:18
llvm::FlattenedProfileUsed
cl::opt< bool > FlattenedProfileUsed
TargetMachine.h
llvm::AttributorPass
}
Definition: Attributor.h:3008
PerformMandatoryInliningsFirst
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(true), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining."))
AlwaysInliner.h
InstrProfiling.h
LoopIdiomRecognize.h
llvm::LICMPass
Performs Loop Invariant Code Motion Pass.
Definition: LICM.h:66
ArgumentPromotion.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::PostOrderFunctionAttrsPass
Computes function attributes in post-order over the call graph.
Definition: FunctionAttrs.h:51
EnableNoRerunSimplificationPipeline
static cl::opt< bool > EnableNoRerunSimplificationPipeline("enable-no-rerun-simplification-pipeline", cl::init(false), cl::Hidden, cl::desc("Prevent running the simplification pipeline on a function more " "than once in the case that SCC mutations cause a function to be " "visited multiple times as long as the function has not been changed"))
llvm::PassBuilder::buildFunctionSimplificationPipeline
FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
Definition: PassBuilderPipelines.cpp:391
llvm::PGOOptions::IRUse
@ IRUse
Definition: PGOOptions.h:23
llvm::CoroEarlyPass
Definition: CoroEarly.h:26
IP
Definition: NVPTXLowerArgs.cpp:167
AssumeBundleBuilder.h
llvm::DisablePreInliner
cl::opt< bool > DisablePreInliner
llvm::GVNSinkPass
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition: GVN.h:383
InlineAdvisor.h
llvm::OpenMPOptCGSCCPass
Definition: OpenMPOpt.h:43
llvm::PGOIndirectCallPromotion
The indirect function call promotion pass.
Definition: PGOInstrumentation.h:73
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
MemCpyOptimizer.h
llvm::CanonicalizeAliasesPass
Simple pass that canonicalizes aliases.
Definition: CanonicalizeAliases.h:23
GVN.h
llvm::LoopDeletionPass
Definition: LoopDeletion.h:24
llvm::PromotePass
Definition: Mem2Reg.h:23
llvm::LoopFlattenPass
Definition: LoopFlatten.h:23
EnableMergeFunctions
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
llvm::getInlineParams
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition: InlineCost.cpp:3081
llvm::PGOOptions::CSIRInstr
@ CSIRInstr
Definition: PGOOptions.h:24
llvm::ModuleInlinerPass
The module inliner pass for the new pass manager.
Definition: ModuleInliner.h:27
llvm::PassBuilder::buildInlinerPipeline
ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
Definition: PassBuilderPipelines.cpp:715
llvm::GlobalOptPass
Optimize globals that never have their address taken.
Definition: GlobalOpt.h:25
llvm::SyntheticCountsPropagation
Definition: SyntheticCountsPropagation.h:17
llvm::InlineContext
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Definition: InlineAdvisor.h:58
llvm::PGOMemOPSizeOpt
The profile size based optimization pass for memory intrinsics.
Definition: PGOInstrumentation.h:86
llvm::PassBuilder::buildThinLTOPreLinkDefaultPipeline
ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
Definition: PassBuilderPipelines.cpp:1344
ScopedNoAliasAA.h
addAnnotationRemarksPass
static void addAnnotationRemarksPass(ModulePassManager &MPM)
Definition: PassBuilderPipelines.cpp:235
llvm::EnableDFAJumpThreading
cl::opt< bool > EnableDFAJumpThreading
EnablePGOInlineDeferral
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
llvm::MemProfilerPass
Public interface to the memory profiler pass for instrumenting code to profile memory accesses.
Definition: MemProfiler.h:30
llvm::LoopSimplifyCFGPass
Performs basic CFG simplifications to assist other loop passes.
Definition: LoopSimplifyCFG.h:28
llvm::PGOInstrumentationGenCreateVar
The instrumentation (profile-instr-gen) pass for IR based PGO.
Definition: PGOInstrumentation.h:35
llvm::MergedLoadStoreMotionPass
Definition: MergedLoadStoreMotion.h:42
ControlHeightReduction.h
InstSimplifyPass.h
llvm::LowerTypeTestsPass
Definition: LowerTypeTests.h:200
llvm::AnnotationRemarksPass
Definition: AnnotationRemarks.h:23
llvm::PassBuilder::buildModuleInlinerPipeline
ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
Definition: PassBuilderPipelines.cpp:794
llvm::SimpleLoopUnswitchPass
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
Definition: SimpleLoopUnswitch.h:67
SROA.h
SampleProfile.h
llvm::PipelineTuningOptions::EagerlyInvalidateAnalyses
bool EagerlyInvalidateAnalyses
Definition: PassBuilder.h:85
llvm::GlobalSplitPass
Pass to perform split of global variables.
Definition: GlobalSplit.h:26
ForceFunctionAttrs.h
llvm::cl::opt
Definition: CommandLine.h:1392
Attributor.h
llvm::ModuleMemProfilerPass
Public interface to the memory profiler module pass for instrumenting code to profile memory allocati...
Definition: MemProfiler.h:39
EnableEagerlyInvalidateAnalyses
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
llvm::VectorCombinePass
Optimize scalar/vector interactions in IR using target cost models.
Definition: VectorCombine.h:23
llvm::createModuleToFunctionPassAdaptor
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: PassManager.h:1224
SpeculativeExecution.h
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:685
EarlyCSE.h
CrossDSOCFI.h
ProfileSummaryInfo.h
llvm::WholeProgramDevirtPass
Definition: WholeProgramDevirt.h:224
ModuleInliner.h
CoroSplit.h
llvm::AssumeSimplifyPass
This pass attempts to minimize the number of assume without loosing any information.
Definition: AssumeBundleBuilder.h:55
llvm::PassBuilder::buildLTOPreLinkDefaultPipeline
ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
Definition: PassBuilderPipelines.cpp:1457
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::AttributorCGSCCPass
Definition: Attributor.h:3011
PGOInstrumentation.h
CGSCCPassManager.h
llvm::PassBuilder::buildLTODefaultPipeline
ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
Definition: PassBuilderPipelines.cpp:1466
llvm::ForceFunctionAttrsPass
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
Definition: ForceFunctionAttrs.h:24
llvm::ExtraVectorPassManager
A pass manager to run a set of extra function simplification passes after vectorization,...
Definition: LoopVectorize.h:105
AggressiveInstCombine.h
llvm::InvalidateAnalysisPass
A no-op pass template which simply forces a specific analysis result to be invalidated.
Definition: PassManager.h:1277
LowerExpectIntrinsic.h
llvm::PassBuilder::buildThinLTODefaultPipeline
ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build an ThinLTO default optimization pipeline to a pass manager.
Definition: PassBuilderPipelines.cpp:1400
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::DeadArgumentEliminationPass
Eliminate dead arguments (and return values) from functions.
Definition: DeadArgumentElimination.h:38
ADCE.h
llvm::OptimizationLevel::Oz
static const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
Definition: OptimizationLevel.h:108
llvm::PassBuilder::buildDefaultAAPipeline
AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
Definition: PassBuilderPipelines.cpp:1868
InferFunctionAttrs.h
SimpleLoopUnswitch.h
llvm::PipelineTuningOptions::PipelineTuningOptions
PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
Definition: PassBuilderPipelines.cpp:185
llvm::InstrProfOptions
Options for the frontend instrumentation based profiling pass.
Definition: Instrumentation.h:110
llvm::SpeculativeExecutionPass
Definition: SpeculativeExecution.h:69
llvm::createFunctionToLoopPassAdaptor
std::enable_if_t< is_detected< HasRunOnLoopT, LoopPassT >::value, FunctionToLoopPassAdaptor > createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false, bool UseBlockFrequencyInfo=false, bool UseBranchProbabilityInfo=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
Definition: LoopPassManager.h:472
llvm::PreInlineThreshold
cl::opt< int > PreInlineThreshold
llvm::TargetMachine::registerDefaultAliasAnalyses
virtual void registerDefaultAliasAnalyses(AAManager &)
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
Definition: TargetMachine.h:357
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::EnableHotColdSplit
cl::opt< bool > EnableHotColdSplit
llvm::EliminateAvailableExternallyPass
A pass that transforms external global definitions into declarations.
Definition: ElimAvailExtern.h:24
llvm::ADCEPass
A DCE pass that assumes instructions are dead until proven otherwise.
Definition: ADCE.h:31
llvm::PipelineTuningOptions::SLPVectorization
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition: PassBuilder.h:53
llvm::RelLookupTableConverterPass
Definition: RelLookupTableConverter.h:61
Reassociate.h
llvm::JumpThreadingPass
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Definition: JumpThreading.h:77
llvm::LoopFullUnrollPass
Loop unroll pass that only does full loop unrolling and peeling.
Definition: LoopUnrollPass.h:26
llvm::LowerExpectIntrinsicPass
Definition: LowerExpectIntrinsic.h:24
llvm::InlinePass::CGSCCInliner
@ CGSCCInliner
llvm::EnableIROutliner
cl::opt< bool > EnableIROutliner
llvm::InliningAdvisorMode::Development
@ Development
SampleProfileProbe.h
LoopPassManager.h
llvm::Float2IntPass
Definition: Float2Int.h:32
llvm::LibCallsShrinkWrapPass
Definition: LibCallsShrinkWrap.h:18
NameAnonGlobals.h
llvm::CalledValuePropagationPass
Definition: CalledValuePropagation.h:26
llvm::WarnMissedTransformationsPass
Definition: WarnMissedTransforms.h:24
llvm::IndVarSimplifyPass
Definition: IndVarSimplify.h:25
LowerMatrixIntrinsics.h
llvm::GlobalDCEPass
Pass to remove unused function declarations.
Definition: GlobalDCE.h:36
LoopInterchange.h
llvm::PipelineTuningOptions::CallGraphProfile
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition: PassBuilder.h:72
llvm::ThinOrFullLTOPhase::FullLTOPostLink
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
llvm::LowerMatrixIntrinsicsPass
Definition: LowerMatrixIntrinsics.h:19
llvm::NewGVNPass
Definition: NewGVN.h:23
SyntheticCountsPropagation.h
llvm::RunPartialInlining
cl::opt< bool > RunPartialInlining
llvm::AddDiscriminatorsPass
Definition: AddDiscriminators.h:24
CanonicalizeAliases.h
llvm::CGSCC
@ CGSCC
Definition: Attributor.h:5033
LibCallsShrinkWrap.h
llvm::CoroConditionalWrapper
Definition: CoroConditionalWrapper.h:20
llvm::InstCombinePass
Definition: InstCombine.h:28
GlobalDCE.h
llvm::PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & >::addPass
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t< is_detected< HasRunOnLoopT, PassT >::value > addPass(PassT &&Pass)
Definition: LoopPassManager.h:107
llvm::OptimizationLevel::O3
static const OptimizationLevel O3
Optimize for fast execution as much as possible.
Definition: OptimizationLevel.h:89
llvm::EnableLoopFlatten
cl::opt< bool > EnableLoopFlatten
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:660
llvm::PipelineTuningOptions::LoopVectorization
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition: PassBuilder.h:49
NewGVN.h
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
LowerTypeTests.h
CoroConditionalWrapper.h
CallSiteSplitting.h
llvm::LoopVectorizeOptions
Definition: LoopVectorize.h:115
llvm::InstrOrderFilePass
The instrumentation pass for recording function order.
Definition: InstrOrderFile.h:20
LoopSimplifyCFG.h
llvm::CGProfilePass
Definition: CGProfile.h:19
AnnotationRemarks.h
LoopVectorize.h
SCCP.h
llvm::ModuleInlinerWrapperPass::addModulePass
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition: Inliner.h:139
llvm::PassManager< Function >
llvm::LoopUnrollAndJamPass
A simple loop rotation transformation.
Definition: LoopUnrollAndJamPass.h:20
llvm::IROutlinerPass
Pass to outline similar regions.
Definition: IROutliner.h:444
llvm::ForgetSCEVInLoopUnroll
cl::opt< bool > ForgetSCEVInLoopUnroll
SLPVectorizer.h
EnableO3NonTrivialUnswitching
static cl::opt< bool > EnableO3NonTrivialUnswitching("enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden, cl::desc("Enable non-trivial loop unswitching for -O3"))
LoopLoadElimination.h
llvm::PassBuilder::buildPerModuleDefaultPipeline
ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Build a per-module default optimization pipeline.
Definition: PassBuilderPipelines.cpp:1301
llvm::EnableCHR
cl::opt< bool > EnableCHR
LoopUnrollPass.h
llvm::SCCPPass
This pass performs function-level constant propagation and merging.
Definition: SCCP.h:38
EnableSyntheticCounts
static cl::opt< bool > EnableSyntheticCounts("enable-npm-synthetic-counts", cl::Hidden, cl::desc("Run synthetic function entry count generation " "pass"))
llvm::PipelineTuningOptions::MergeFunctions
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition: PassBuilder.h:76
PGOOptions.h
llvm::OptimizationLevel
Definition: OptimizationLevel.h:22
llvm::NameAnonGlobalPass
Simple pass that provides a name to every anonymous globals.
Definition: NameAnonGlobals.h:22
PassManager.h
LowerConstantIntrinsics.h
llvm::ThinOrFullLTOPhase::None
@ None
No LTO/ThinLTO behavior needed.
llvm::PGOOptions::CSIRUse
@ CSIRUse
Definition: PGOOptions.h:24
isLTOPreLink
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
Definition: PassBuilderPipelines.cpp:240
llvm::CoroSplitPass
Definition: CoroSplit.h:24
llvm::PassBuilder::buildO0DefaultPipeline
ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Build an O0 pipeline with the minimal semantically required passes.
Definition: PassBuilderPipelines.cpp:1760
llvm::SROAPass
An optimization pass providing Scalar Replacement of Aggregates.
Definition: SROA.h:64
WholeProgramDevirt.h
llvm::MemCpyOptPass
Definition: MemCpyOptimizer.h:39
llvm::ArgumentPromotionPass
Argument promotion pass.
Definition: ArgumentPromotion.h:24
llvm::DivRemPairsPass
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition: DivRemPairs.h:23
SimplifyCFGOptions.h
LoopDistribute.h
AA
llvm::AlwaysInlinerPass
Inlines functions marked as "always_inline".
Definition: AlwaysInliner.h:32
WarnMissedTransforms.h
llvm::ThinOrFullLTOPhase::FullLTOPreLink
@ FullLTOPreLink
Full LTO prelink phase.
llvm::SimplifyCFGOptions
Definition: SimplifyCFGOptions.h:23
GlobalOpt.h
llvm::InjectTLIMappings
Definition: InjectTLIMappings.h:22
LoopSink.h
OptimizationLevel.h
llvm::LowerConstantIntrinsicsPass
Definition: LowerConstantIntrinsics.h:24
llvm::OpenMPOptPass
OpenMP optimizations pass.
Definition: OpenMPOpt.h:38
llvm::ModuleSummaryIndex
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Definition: ModuleSummaryIndex.h:1087
llvm::InliningAdvisorMode::Default
@ Default
llvm::PGOInstrumentationGen
The instrumentation (profile-instr-gen) pass for IR based PGO.
Definition: PGOInstrumentation.h:47
llvm::CallSiteSplittingPass
Definition: CallSiteSplitting.h:18
LoopDeletion.h
llvm::LoopInstSimplifyPass
Performs Loop Inst Simplify Pass.
Definition: LoopInstSimplify.h:25
MemProfiler.h
LICM.h
llvm::EnableLoopInterchange
cl::opt< bool > EnableLoopInterchange
CoroEarly.h
llvm::EnableUnrollAndJam
cl::opt< bool > EnableUnrollAndJam
llvm::PGOOptions::IRInstr
@ IRInstr
Definition: PGOOptions.h:23
llvm::InliningAdvisorMode::Release
@ Release
llvm::AttributorRun
cl::opt< AttributorRunOption > AttributorRun
BDCE.h
InstrOrderFile.h
InstCombine.h
llvm::GVNPass
The core GVN pass object.
Definition: GVN.h:116
llvm::InstSimplifyPass
Run instruction simplification across each instruction in the function.
Definition: InstSimplifyPass.h:32
llvm::RequireAnalysisPass
A utility pass template to force an analysis result to be available.
Definition: PassManager.h:1249
llvm::PassBuilder::buildModuleSimplificationPipeline
ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
Definition: PassBuilderPipelines.cpp:833
llvm::PipelineTuningOptions::LoopUnrolling
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition: PassBuilder.h:56
llvm::cl::desc
Definition: CommandLine.h:405
llvm::PipelineTuningOptions::LicmMssaOptCap
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition: PassBuilder.h:64
llvm::PipelineTuningOptions::LoopInterleaving
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition: PassBuilder.h:45
llvm::MaxDevirtIterations
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
Definition: PassBuilderPipelines.cpp:200
SimplifyCFG.h
llvm::SetLicmMssaOptCap
cl::opt< unsigned > SetLicmMssaOptCap
llvm::MergeFunctionsPass
Merge identical functions.
Definition: MergeFunctions.h:25
llvm::EnableGVNHoist
cl::opt< bool > EnableGVNHoist
MPM
ModulePassManager MPM
Definition: PassBuilderBindings.cpp:70
llvm::createModuleToPostOrderCGSCCPassAdaptor
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: CGSCCPassManager.h:373
llvm::PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & >::isEmpty
bool isEmpty() const
Definition: LoopPassManager.h:166
llvm::LoopUnrollPass
Loop unroll pass that will support both full and partial unrolling.
Definition: LoopUnrollPass.h:133
FunctionAttrs.h
llvm::SimplifyCFGPass
A pass to simplify and canonicalize the CFG of a function.
Definition: SimplifyCFG.h:29
llvm::EnableFunctionSpecialization
cl::opt< bool > EnableFunctionSpecialization
llvm::LoopVectorizePass
The LoopVectorize Pass.
Definition: LoopVectorize.h:161
RelLookupTableConverter.h
EnableMemProfiler
static cl::opt< bool > EnableMemProfiler("enable-mem-prof", cl::Hidden, cl::desc("Enable memory profiler"))
llvm::EarlyCSEPass
A simple and fast domtree-based CSE pass.
Definition: EarlyCSE.h:30
llvm::ConstantMergePass
A pass that merges duplicate global constants into a single constant.
Definition: ConstantMerge.h:29
llvm::DFAJumpThreadingPass
Definition: DFAJumpThreading.h:22
AddDiscriminators.h
DFAJumpThreading.h