LLVM  14.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1 //===- Construction of pass pipelines -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file provides the implementation of the PassBuilder based on our
11 /// static pass registry as well as related functionality. It also provides
12 /// helpers to aid in analyzing, debugging, and testing passes and pass
13 /// pipelines.
14 ///
15 //===----------------------------------------------------------------------===//
16 
26 #include "llvm/IR/PassManager.h"
129 
130 using namespace llvm;
131 
133  "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
134  cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
136  "Heuristics-based inliner version."),
138  "Use development mode (runtime-loadable model)."),
140  "Use release mode (AOT-compiled model).")));
141 
143  "enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore,
144  cl::desc("Run synthetic function entry count generation "
145  "pass"));
146 
147 /// Flag to enable inline deferral during PGO.
148 static cl::opt<bool>
149  EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
150  cl::Hidden,
151  cl::desc("Enable inline deferral during PGO"));
152 
153 static cl::opt<bool> EnableMemProfiler("enable-mem-prof", cl::init(false),
155  cl::desc("Enable memory profiler"));
156 
157 static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
158  cl::init(false), cl::Hidden,
159  cl::desc("Enable module inliner"));
160 
162  "mandatory-inlining-first", cl::init(true), cl::Hidden, cl::ZeroOrMore,
163  cl::desc("Perform mandatory inlinings module-wide, before performing "
164  "inlining."));
165 
167  "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden,
168  cl::ZeroOrMore, cl::desc("Enable non-trivial loop unswitching for -O3"));
169 
171  "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
172  cl::desc("Eagerly invalidate more analyses in default pipelines"));
173 
175  "enable-no-rerun-simplification-pipeline", cl::init(false), cl::Hidden,
176  cl::desc(
177  "Prevent running the simplification pipeline on a function more "
178  "than once in the case that SCC mutations cause a function to be "
179  "visited multiple times as long as the function has not been changed"));
180 
182  "enable-merge-functions", cl::init(false), cl::Hidden,
183  cl::desc("Enable function merging as part of the optimization pipeline"));
184 
186  LoopInterleaving = true;
187  LoopVectorization = true;
188  SLPVectorization = false;
189  LoopUnrolling = true;
193  CallGraphProfile = true;
196 }
197 
198 namespace llvm {
199 
208 extern cl::opt<bool> EnableCHR;
213 extern cl::opt<bool> RunNewGVN;
216 
218 
221 
223 
226 } // namespace llvm
227 
228 void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
230  for (auto &C : PeepholeEPCallbacks)
231  C(FPM, Level);
232 }
233 
234 // Helper to add AnnotationRemarksPass.
239 }
240 
241 // Helper to check if the current compilation phase is preparing for LTO
245 }
246 
247 // TODO: Investigate the cost/benefit of tail call elimination on debugging.
249 PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
251 
253 
254  // Form SSA out of local memory accesses after breaking apart aggregates into
255  // scalars.
256  FPM.addPass(SROAPass());
257 
258  // Catch trivial redundancies
259  FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
260 
261  // Hoisting of scalars and load expressions.
262  FPM.addPass(SimplifyCFGPass());
263  FPM.addPass(InstCombinePass());
264 
266 
267  invokePeepholeEPCallbacks(FPM, Level);
268 
269  FPM.addPass(SimplifyCFGPass());
270 
271  // Form canonically associated expression trees, and simplify the trees using
272  // basic mathematical properties. For example, this will form (nearly)
273  // minimal multiplication trees.
274  FPM.addPass(ReassociatePass());
275 
276  // Add the primary loop simplification pipeline.
277  // FIXME: Currently this is split into two loop pass pipelines because we run
278  // some function passes in between them. These can and should be removed
279  // and/or replaced by scheduling the loop pass equivalents in the correct
280  // positions. But those equivalent passes aren't powerful enough yet.
281  // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
282  // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
283  // fully replace `SimplifyCFGPass`, and the closest to the other we have is
284  // `LoopInstSimplify`.
285  LoopPassManager LPM1, LPM2;
286 
287  // Simplify the loop body. We do this initially to clean up after other loop
288  // passes run, either when iterating on a loop or on inner loops with
289  // implications on the outer loop.
292 
293  // Try to remove as much code from the loop header as possible,
294  // to reduce amount of IR that will have to be duplicated.
295  // TODO: Investigate promotion cap for O1.
297 
298  LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
299  isLTOPreLink(Phase)));
300  // TODO: Investigate promotion cap for O1.
303  if (EnableLoopFlatten)
304  LPM1.addPass(LoopFlattenPass());
305 
307  LPM2.addPass(IndVarSimplifyPass());
308 
309  for (auto &C : LateLoopOptimizationsEPCallbacks)
310  C(LPM2, Level);
311 
312  LPM2.addPass(LoopDeletionPass());
313 
316 
317  // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
318  // because it changes IR to makes profile annotation in back compile
319  // inaccurate. The normal unroller doesn't pay attention to forced full unroll
320  // attributes so we need to make sure and allow the full unroll pass to pay
321  // attention to it.
322  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
323  PGOOpt->Action != PGOOptions::SampleUse)
324  LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
325  /* OnlyWhenForced= */ !PTO.LoopUnrolling,
327 
328  for (auto &C : LoopOptimizerEndEPCallbacks)
329  C(LPM2, Level);
330 
331  // We provide the opt remark emitter pass for LICM to use. We only need to do
332  // this once as it is immutable.
333  FPM.addPass(
336  /*UseMemorySSA=*/true,
337  /*UseBlockFrequencyInfo=*/true));
338  FPM.addPass(SimplifyCFGPass());
339  FPM.addPass(InstCombinePass());
340  // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
341  // *All* loop passes must preserve it, in order to be able to use it.
343  /*UseMemorySSA=*/false,
344  /*UseBlockFrequencyInfo=*/false));
345 
346  // Delete small array after loop unroll.
347  FPM.addPass(SROAPass());
348 
349  // Specially optimize memory movement as it doesn't look like dataflow in SSA.
350  FPM.addPass(MemCpyOptPass());
351 
352  // Sparse conditional constant propagation.
353  // FIXME: It isn't clear why we do this *after* loop passes rather than
354  // before...
355  FPM.addPass(SCCPPass());
356 
357  // Delete dead bit computations (instcombine runs after to fold away the dead
358  // computations, and then ADCE will run later to exploit any new DCE
359  // opportunities that creates).
360  FPM.addPass(BDCEPass());
361 
362  // Run instcombine after redundancy and dead bit elimination to exploit
363  // opportunities opened up by them.
364  FPM.addPass(InstCombinePass());
365  invokePeepholeEPCallbacks(FPM, Level);
366 
367  FPM.addPass(CoroElidePass());
368 
369  for (auto &C : ScalarOptimizerLateEPCallbacks)
370  C(FPM, Level);
371 
372  // Finally, do an expensive DCE pass to catch all the dead code exposed by
373  // the simplifications and basic cleanup after all the simplifications.
374  // TODO: Investigate if this is too expensive.
375  FPM.addPass(ADCEPass());
376  FPM.addPass(SimplifyCFGPass());
377  FPM.addPass(InstCombinePass());
378  invokePeepholeEPCallbacks(FPM, Level);
379 
380  return FPM;
381 }
382 
386  assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
387 
388  // The O1 pipeline has a separate pipeline creation function to simplify
389  // construction readability.
390  if (Level.getSpeedupLevel() == 1)
391  return buildO1FunctionSimplificationPipeline(Level, Phase);
392 
394 
395  // Form SSA out of local memory accesses after breaking apart aggregates into
396  // scalars.
397  FPM.addPass(SROAPass());
398 
399  // Catch trivial redundancies
400  FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
403 
404  // Hoisting of scalars and load expressions.
405  if (EnableGVNHoist)
406  FPM.addPass(GVNHoistPass());
407 
408  // Global value numbering based sinking.
409  if (EnableGVNSink) {
410  FPM.addPass(GVNSinkPass());
411  FPM.addPass(SimplifyCFGPass());
412  }
413 
416 
417  // Speculative execution if the target has divergent branches; otherwise nop.
418  FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
419 
420  // Optimize based on known information about branches, and cleanup afterward.
421  FPM.addPass(JumpThreadingPass());
423 
424  FPM.addPass(SimplifyCFGPass());
425  FPM.addPass(InstCombinePass());
428 
429  if (!Level.isOptimizingForSize())
431 
432  invokePeepholeEPCallbacks(FPM, Level);
433 
434  // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
435  // using the size value profile. Don't perform this when optimizing for size.
436  if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
437  !Level.isOptimizingForSize())
438  FPM.addPass(PGOMemOPSizeOpt());
439 
440  FPM.addPass(TailCallElimPass());
441  FPM.addPass(SimplifyCFGPass());
442 
443  // Form canonically associated expression trees, and simplify the trees using
444  // basic mathematical properties. For example, this will form (nearly)
445  // minimal multiplication trees.
446  FPM.addPass(ReassociatePass());
447 
448  // Add the primary loop simplification pipeline.
449  // FIXME: Currently this is split into two loop pass pipelines because we run
450  // some function passes in between them. These can and should be removed
451  // and/or replaced by scheduling the loop pass equivalents in the correct
452  // positions. But those equivalent passes aren't powerful enough yet.
453  // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
454  // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
455  // fully replace `SimplifyCFGPass`, and the closest to the other we have is
456  // `LoopInstSimplify`.
457  LoopPassManager LPM1, LPM2;
458 
459  // Simplify the loop body. We do this initially to clean up after other loop
460  // passes run, either when iterating on a loop or on inner loops with
461  // implications on the outer loop.
464 
465  // Try to remove as much code from the loop header as possible,
466  // to reduce amount of IR that will have to be duplicated.
467  // TODO: Investigate promotion cap for O1.
469 
470  // Disable header duplication in loop rotation at -Oz.
471  LPM1.addPass(
473  // TODO: Investigate promotion cap for O1.
475  LPM1.addPass(
476  SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&
478  if (EnableLoopFlatten)
479  LPM1.addPass(LoopFlattenPass());
480 
482  LPM2.addPass(IndVarSimplifyPass());
483 
484  for (auto &C : LateLoopOptimizationsEPCallbacks)
485  C(LPM2, Level);
486 
487  LPM2.addPass(LoopDeletionPass());
488 
491 
492  // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
493  // because it changes IR to makes profile annotation in back compile
494  // inaccurate. The normal unroller doesn't pay attention to forced full unroll
495  // attributes so we need to make sure and allow the full unroll pass to pay
496  // attention to it.
497  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
498  PGOOpt->Action != PGOOptions::SampleUse)
499  LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
500  /* OnlyWhenForced= */ !PTO.LoopUnrolling,
502 
503  for (auto &C : LoopOptimizerEndEPCallbacks)
504  C(LPM2, Level);
505 
506  // We provide the opt remark emitter pass for LICM to use. We only need to do
507  // this once as it is immutable.
508  FPM.addPass(
511  /*UseMemorySSA=*/true,
512  /*UseBlockFrequencyInfo=*/true));
513  FPM.addPass(SimplifyCFGPass());
514  FPM.addPass(InstCombinePass());
515  // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
516  // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
517  // *All* loop passes must preserve it, in order to be able to use it.
519  /*UseMemorySSA=*/false,
520  /*UseBlockFrequencyInfo=*/false));
521 
522  // Delete small array after loop unroll.
523  FPM.addPass(SROAPass());
524 
525  // The matrix extension can introduce large vector operations early, which can
526  // benefit from running vector-combine early on.
527  if (EnableMatrix)
528  FPM.addPass(VectorCombinePass(/*ScalarizationOnly=*/true));
529 
530  // Eliminate redundancies.
532  if (RunNewGVN)
533  FPM.addPass(NewGVNPass());
534  else
535  FPM.addPass(GVNPass());
536 
537  // Sparse conditional constant propagation.
538  // FIXME: It isn't clear why we do this *after* loop passes rather than
539  // before...
540  FPM.addPass(SCCPPass());
541 
542  // Delete dead bit computations (instcombine runs after to fold away the dead
543  // computations, and then ADCE will run later to exploit any new DCE
544  // opportunities that creates).
545  FPM.addPass(BDCEPass());
546 
547  // Run instcombine after redundancy and dead bit elimination to exploit
548  // opportunities opened up by them.
549  FPM.addPass(InstCombinePass());
550  invokePeepholeEPCallbacks(FPM, Level);
551 
552  // Re-consider control flow based optimizations after redundancy elimination,
553  // redo DCE, etc.
554  if (EnableDFAJumpThreading && Level.getSizeLevel() == 0)
556 
557  FPM.addPass(JumpThreadingPass());
559 
560  // Finally, do an expensive DCE pass to catch all the dead code exposed by
561  // the simplifications and basic cleanup after all the simplifications.
562  // TODO: Investigate if this is too expensive.
563  FPM.addPass(ADCEPass());
564 
565  // Specially optimize memory movement as it doesn't look like dataflow in SSA.
566  FPM.addPass(MemCpyOptPass());
567 
568  FPM.addPass(DSEPass());
571  /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
572 
573  FPM.addPass(CoroElidePass());
574 
575  for (auto &C : ScalarOptimizerLateEPCallbacks)
576  C(FPM, Level);
577 
579  SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true)));
580  FPM.addPass(InstCombinePass());
581  invokePeepholeEPCallbacks(FPM, Level);
582 
583  if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt &&
584  (PGOOpt->Action == PGOOptions::IRUse ||
585  PGOOpt->Action == PGOOptions::SampleUse))
587 
588  return FPM;
589 }
590 
591 void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
594 }
595 
596 void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
597  OptimizationLevel Level, bool RunProfileGen,
598  bool IsCS, std::string ProfileFile,
599  std::string ProfileRemappingFile) {
600  assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
601  if (!IsCS && !DisablePreInliner) {
603 
604  IP.DefaultThreshold = PreInlineThreshold;
605 
606  // FIXME: The hint threshold has the same value used by the regular inliner
607  // when not optimzing for size. This should probably be lowered after
608  // performance testing.
609  // FIXME: this comment is cargo culted from the old pass manager, revisit).
610  IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
612  CGSCCPassManager &CGPipeline = MIWP.getPM();
613 
615  FPM.addPass(SROAPass());
616  FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
617  FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks.
618  FPM.addPass(InstCombinePass()); // Combine silly sequences.
619  invokePeepholeEPCallbacks(FPM, Level);
620 
623 
624  MPM.addPass(std::move(MIWP));
625 
626  // Delete anything that is now dead to make sure that we don't instrument
627  // dead code. Instrumentation can end up keeping dead code around and
628  // dramatically increase code size.
630  }
631 
632  if (!RunProfileGen) {
633  assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
634  MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
635  // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
636  // RequireAnalysisPass for PSI before subsequent non-module passes.
638  return;
639  }
640 
641  // Perform PGO instrumentation.
643 
645  // Disable header duplication in loop rotation at -Oz.
647  LoopRotatePass(Level != OptimizationLevel::Oz), /*UseMemorySSA=*/false,
648  /*UseBlockFrequencyInfo=*/false));
651 
652  // Add the profile lowering pass.
654  if (!ProfileFile.empty())
655  Options.InstrProfileOutput = ProfileFile;
656  // Do counter promotion at Level greater than O0.
657  Options.DoCounterPromotion = true;
658  Options.UseBFIInPromotion = IsCS;
660 }
661 
663  bool RunProfileGen, bool IsCS,
664  std::string ProfileFile,
665  std::string ProfileRemappingFile) {
666  if (!RunProfileGen) {
667  assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
668  MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
669  // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
670  // RequireAnalysisPass for PSI before subsequent non-module passes.
672  return;
673  }
674 
675  // Perform PGO instrumentation.
677  // Add the profile lowering pass.
679  if (!ProfileFile.empty())
680  Options.InstrProfileOutput = ProfileFile;
681  // Do not do counter promotion at O0.
682  Options.DoCounterPromotion = false;
683  Options.UseBFIInPromotion = IsCS;
685 }
686 
688  return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
689 }
690 
695  if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
696  PGOOpt->Action == PGOOptions::SampleUse)
697  IP.HotCallSiteThreshold = 0;
698 
699  if (PGOOpt)
700  IP.EnableDeferral = EnablePGOInlineDeferral;
701 
704 
705  // Require the GlobalsAA analysis for the module so we can query it within
706  // the CGSCC pipeline.
708  // Invalidate AAManager so it can be recreated and pick up the newly available
709  // GlobalsAA.
710  MIWP.addModulePass(
712 
713  // Require the ProfileSummaryAnalysis for the module so we can query it within
714  // the inliner pass.
716 
717  // Now begin the main postorder CGSCC pipeline.
718  // FIXME: The current CGSCC pipeline has its origins in the legacy pass
719  // manager and trying to emulate its precise behavior. Much of this doesn't
720  // make a lot of sense and we should revisit the core CGSCC structure.
721  CGSCCPassManager &MainCGPipeline = MIWP.getPM();
722 
723  // Note: historically, the PruneEH pass was run first to deduce nounwind and
724  // generally clean up exception handling overhead. It isn't clear this is
725  // valuable as the inliner doesn't currently care whether it is inlining an
726  // invoke or a call.
727 
729  MainCGPipeline.addPass(AttributorCGSCCPass());
730 
731  // Now deduce any function attributes based in the current code.
732  MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
733 
734  // When at O3 add argument promotion to the pass pipeline.
735  // FIXME: It isn't at all clear why this should be limited to O3.
737  MainCGPipeline.addPass(ArgumentPromotionPass());
738 
739  // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
740  // there are no OpenMP runtime calls present in the module.
742  MainCGPipeline.addPass(OpenMPOptCGSCCPass());
743 
744  for (auto &C : CGSCCOptimizerLateEPCallbacks)
745  C(MainCGPipeline, Level);
746 
747  // Lastly, add the core function simplification pipeline nested inside the
748  // CGSCC walk.
752 
753  MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
754 
758 
759  return MIWP;
760 }
761 
766 
768  if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
769  PGOOpt->Action == PGOOptions::SampleUse)
770  IP.HotCallSiteThreshold = 0;
771 
772  if (PGOOpt)
773  IP.EnableDeferral = EnablePGOInlineDeferral;
774 
775  // The inline deferral logic is used to avoid losing some
776  // inlining chance in future. It is helpful in SCC inliner, in which
777  // inlining is processed in bottom-up order.
778  // While in module inliner, the inlining order is a priority-based order
779  // by default. The inline deferral is unnecessary there. So we disable the
780  // inline deferral logic in module inliner.
781  IP.EnableDeferral = false;
782 
784 
788 
791 
792  return MPM;
793 }
794 
799 
800  // Place pseudo probe instrumentation as the first pass of the pipeline to
801  // minimize the impact of optimization changes.
802  if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
805 
806  bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
807 
808  // In ThinLTO mode, when flattened profile is used, all the available
809  // profile information will be annotated in PreLink phase so there is
810  // no need to load the profile again in PostLink.
811  bool LoadSampleProfile =
812  HasSampleProfile &&
814 
815  // During the ThinLTO backend phase we perform early indirect call promotion
816  // here, before globalopt. Otherwise imported available_externally functions
817  // look unreferenced and are removed. If we are going to load the sample
818  // profile then defer until later.
819  // TODO: See if we can move later and consolidate with the location where
820  // we perform ICP when we are loading a sample profile.
821  // TODO: We pass HasSampleProfile (whether there was a sample profile file
822  // passed to the compile) to the SamplePGO flag of ICP. This is used to
823  // determine whether the new direct calls are annotated with prof metadata.
824  // Ideally this should be determined from whether the IR is annotated with
825  // sample profile, and not whether the a sample profile was provided on the
826  // command line. E.g. for flattened profiles where we will not be reloading
827  // the sample profile in the ThinLTO backend, we ideally shouldn't have to
828  // provide the sample profile file.
829  if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
830  MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
831 
832  // Do basic inference of function attributes from known properties of system
833  // libraries and other oracles.
835 
836  // Create an early function pass manager to cleanup the output of the
837  // frontend.
838  FunctionPassManager EarlyFPM;
839  // Lower llvm.expect to metadata before attempting transforms.
840  // Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
841  EarlyFPM.addPass(LowerExpectIntrinsicPass());
842  EarlyFPM.addPass(SimplifyCFGPass());
843  EarlyFPM.addPass(SROAPass());
844  EarlyFPM.addPass(EarlyCSEPass());
845  EarlyFPM.addPass(CoroEarlyPass());
847  EarlyFPM.addPass(CallSiteSplittingPass());
848 
849  // In SamplePGO ThinLTO backend, we need instcombine before profile annotation
850  // to convert bitcast to direct calls so that they can be inlined during the
851  // profile annotation prepration step.
852  // More details about SamplePGO design can be found in:
853  // https://research.google.com/pubs/pub45290.html
854  // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured.
855  if (LoadSampleProfile)
856  EarlyFPM.addPass(InstCombinePass());
859 
860  if (LoadSampleProfile) {
861  // Annotate sample profile right after early FPM to ensure freshness of
862  // the debug info.
863  MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
864  PGOOpt->ProfileRemappingFile, Phase));
865  // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
866  // RequireAnalysisPass for PSI before subsequent non-module passes.
868  // Do not invoke ICP in the LTOPrelink phase as it makes it hard
869  // for the profile annotation to be accurate in the LTO backend.
872  // We perform early indirect call promotion here, before globalopt.
873  // This is important for the ThinLTO backend phase because otherwise
874  // imported available_externally functions look unreferenced and are
875  // removed.
876  MPM.addPass(
877  PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
878  }
879 
880  // Try to perform OpenMP specific optimizations on the module. This is a
881  // (quick!) no-op if there are no OpenMP runtime calls present in the module.
884 
887 
888  // Lower type metadata and the type.test intrinsic in the ThinLTO
889  // post link pipeline after ICP. This is to enable usage of the type
890  // tests in ICP sequences.
892  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
893 
894  for (auto &C : PipelineEarlySimplificationEPCallbacks)
895  C(MPM, Level);
896 
897  // Specialize functions with IPSCCP.
900 
901  // Interprocedural constant propagation now that basic cleanup has occurred
902  // and prior to optimizing globals.
903  // FIXME: This position in the pipeline hasn't been carefully considered in
904  // years, it should be re-analyzed.
906 
907  // Attach metadata to indirect call sites indicating the set of functions
908  // they may target at run-time. This should follow IPSCCP.
910 
911  // Optimize globals to try and fold them into constants.
913 
914  // Promote any localized globals to SSA registers.
915  // FIXME: Should this instead by a run of SROA?
916  // FIXME: We should probably run instcombine and simplifycfg afterward to
917  // delete control flows that are dead once globals have been folded to
918  // constants.
920 
921  // Remove any dead arguments exposed by cleanups and constant folding
922  // globals.
924 
925  // Create a small function pass pipeline to cleanup after all the global
926  // optimizations.
927  FunctionPassManager GlobalCleanupPM;
928  GlobalCleanupPM.addPass(InstCombinePass());
929  invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
930 
931  GlobalCleanupPM.addPass(SimplifyCFGPass());
934 
935  // Add all the requested passes for instrumentation PGO, if requested.
936  if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
937  (PGOOpt->Action == PGOOptions::IRInstr ||
938  PGOOpt->Action == PGOOptions::IRUse)) {
939  addPGOInstrPasses(MPM, Level,
940  /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
941  /* IsCS */ false, PGOOpt->ProfileFile,
942  PGOOpt->ProfileRemappingFile);
943  MPM.addPass(PGOIndirectCallPromotion(false, false));
944  }
945  if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
946  PGOOpt->CSAction == PGOOptions::CSIRInstr)
947  MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
948 
949  // Synthesize function entry counts for non-PGO compilation.
950  if (EnableSyntheticCounts && !PGOOpt)
952 
955  else
957 
961  }
962 
963  return MPM;
964 }
965 
966 /// TODO: Should LTO cause any differences to this set of passes?
967 void PassBuilder::addVectorPasses(OptimizationLevel Level,
968  FunctionPassManager &FPM, bool IsFullLTO) {
971 
972  if (IsFullLTO) {
973  // The vectorizer may have significantly shortened a loop body; unroll
974  // again. Unroll small loops to hide loop backedge latency and saturate any
975  // parallel execution resources of an out-of-order processor. We also then
976  // need to clean up redundancies and loop invariant code.
977  // FIXME: It would be really good to use a loop-integrated instruction
978  // combiner for cleanup here so that the unrolling and LICM can be pipelined
979  // across the loop nests.
980  // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
983  LoopUnrollAndJamPass(Level.getSpeedupLevel())));
985  Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
988  }
989 
990  if (!IsFullLTO) {
991  // Eliminate loads by forwarding stores from the previous iteration to loads
992  // of the current iteration.
994  }
995  // Cleanup after the loop optimization passes.
996  FPM.addPass(InstCombinePass());
997 
998  if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
999  ExtraVectorPassManager ExtraPasses;
1000  // At higher optimization levels, try to clean up any runtime overlap and
1001  // alignment checks inserted by the vectorizer. We want to track correlated
1002  // runtime checks for two inner loops in the same outer loop, fold any
1003  // common computations, hoist loop-invariant aspects out of any outer loop,
1004  // and unswitch the runtime checks if possible. Once hoisted, we may have
1005  // dead (or speculatable) control flows or more combining opportunities.
1006  ExtraPasses.addPass(EarlyCSEPass());
1007  ExtraPasses.addPass(CorrelatedValuePropagationPass());
1008  ExtraPasses.addPass(InstCombinePass());
1009  LoopPassManager LPM;
1011  LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1013  ExtraPasses.addPass(
1015  ExtraPasses.addPass(
1016  createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1017  /*UseBlockFrequencyInfo=*/true));
1018  ExtraPasses.addPass(SimplifyCFGPass());
1019  ExtraPasses.addPass(InstCombinePass());
1020  FPM.addPass(std::move(ExtraPasses));
1021  }
1022 
1023  // Now that we've formed fast to execute loop structures, we do further
1024  // optimizations. These are run afterward as they might block doing complex
1025  // analyses and transforms such as what are needed for loop vectorization.
1026 
1027  // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1028  // GVN, loop transforms, and others have already run, so it's now better to
1029  // convert to more optimized IR using more aggressive simplify CFG options.
1030  // The extra sinking transform can create larger basic blocks, so do this
1031  // before SLP vectorization.
1033  .forwardSwitchCondToPhi(true)
1034  .convertSwitchToLookupTable(true)
1035  .needCanonicalLoops(false)
1036  .hoistCommonInsts(true)
1037  .sinkCommonInsts(true)));
1038 
1039  if (IsFullLTO) {
1040  FPM.addPass(SCCPPass());
1041  FPM.addPass(InstCombinePass());
1042  FPM.addPass(BDCEPass());
1043  }
1044 
1045  // Optimize parallel scalar instruction chains into SIMD instructions.
1046  if (PTO.SLPVectorization) {
1047  FPM.addPass(SLPVectorizerPass());
1048  if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1049  FPM.addPass(EarlyCSEPass());
1050  }
1051  }
1052  // Enhance/cleanup vector code.
1053  FPM.addPass(VectorCombinePass());
1054 
1055  if (!IsFullLTO) {
1056  FPM.addPass(InstCombinePass());
1057  // Unroll small loops to hide loop backedge latency and saturate any
1058  // parallel execution resources of an out-of-order processor. We also then
1059  // need to clean up redundancies and loop invariant code.
1060  // FIXME: It would be really good to use a loop-integrated instruction
1061  // combiner for cleanup here so that the unrolling and LICM can be pipelined
1062  // across the loop nests.
1063  // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1064  if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1066  LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1067  }
1069  Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1072  FPM.addPass(InstCombinePass());
1073  FPM.addPass(
1077  /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
1078  }
1079 
1080  // Now that we've vectorized and unrolled loops, we may have more refined
1081  // alignment information, try to re-derive it here.
1083 
1084  if (IsFullLTO)
1085  FPM.addPass(InstCombinePass());
1086 }
1087 
1090  bool LTOPreLink) {
1092 
1093  // Optimize globals now that the module is fully simplified.
1096 
1097  // Run partial inlining pass to partially inline functions that have
1098  // large bodies.
1099  if (RunPartialInlining)
1101 
1102  // Remove avail extern fns and globals definitions since we aren't compiling
1103  // an object file for later LTO. For LTO we want to preserve these so they
1104  // are eligible for inlining at link-time. Note if they are unreferenced they
1105  // will be removed by GlobalDCE later, so this only impacts referenced
1106  // available externally globals. Eventually they will be suppressed during
1107  // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1108  // may make globals referenced by available external functions dead and saves
1109  // running remaining passes on the eliminated functions. These should be
1110  // preserved during prelinking for link-time inlining decisions.
1111  if (!LTOPreLink)
1113 
1116 
1117  // Do RPO function attribute inference across the module to forward-propagate
1118  // attributes where applicable.
1119  // FIXME: Is this really an optimization rather than a canonicalization?
1121 
1122  // Do a post inline PGO instrumentation and use pass. This is a context
1123  // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1124  // cross-module inline has not been done yet. The context sensitive
1125  // instrumentation is after all the inlines are done.
1126  if (!LTOPreLink && PGOOpt) {
1127  if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1128  addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
1129  /* IsCS */ true, PGOOpt->CSProfileGenFile,
1130  PGOOpt->ProfileRemappingFile);
1131  else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1132  addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
1133  /* IsCS */ true, PGOOpt->ProfileFile,
1134  PGOOpt->ProfileRemappingFile);
1135  }
1136 
1137  // Re-require GloblasAA here prior to function passes. This is particularly
1138  // useful as the above will have inlined, DCE'ed, and function-attr
1139  // propagated everything. We should at this point have a reasonably minimal
1140  // and richly annotated call graph. By computing aliasing and mod/ref
1141  // information for all local globals here, the late loop passes and notably
1142  // the vectorizer will be able to use them to help recognize vectorizable
1143  // memory operations.
1145 
1146  FunctionPassManager OptimizePM;
1147  OptimizePM.addPass(Float2IntPass());
1148  OptimizePM.addPass(LowerConstantIntrinsicsPass());
1149 
1150  if (EnableMatrix) {
1151  OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1152  OptimizePM.addPass(EarlyCSEPass());
1153  }
1154 
1155  // FIXME: We need to run some loop optimizations to re-rotate loops after
1156  // simplifycfg and others undo their rotation.
1157 
1158  // Optimize the loop execution. These passes operate on entire loop nests
1159  // rather than on each loop in an inside-out manner, and so they are actually
1160  // function passes.
1161 
1162  for (auto &C : VectorizerStartEPCallbacks)
1163  C(OptimizePM, Level);
1164 
1165  LoopPassManager LPM;
1166  // First rotate loops that may have been un-rotated by prior passes.
1167  // Disable header duplication at -Oz.
1168  LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink));
1169  // Some loops may have become dead by now. Try to delete them.
1170  // FIXME: see discussion in https://reviews.llvm.org/D112851,
1171  // this may need to be revisited once we run GVN before loop deletion
1172  // in the simplification pipeline.
1173  LPM.addPass(LoopDeletionPass());
1175  std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1176 
1177  // Distribute loops to allow partial vectorization. I.e. isolate dependences
1178  // into separate loop that would otherwise inhibit vectorization. This is
1179  // currently only performed for loops marked with the metadata
1180  // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1181  OptimizePM.addPass(LoopDistributePass());
1182 
1183  // Populates the VFABI attribute with the scalar-to-vector mappings
1184  // from the TargetLibraryInfo.
1185  OptimizePM.addPass(InjectTLIMappings());
1186 
1187  addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1188 
1189  // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1190  // canonicalization pass that enables other optimizations. As a result,
1191  // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1192  // result too early.
1193  OptimizePM.addPass(LoopSinkPass());
1194 
1195  // And finally clean up LCSSA form before generating code.
1196  OptimizePM.addPass(InstSimplifyPass());
1197 
1198  // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1199  // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1200  // flattening of blocks.
1201  OptimizePM.addPass(DivRemPairsPass());
1202 
1203  // LoopSink (and other loop passes since the last simplifyCFG) might have
1204  // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1205  OptimizePM.addPass(SimplifyCFGPass());
1206 
1207  OptimizePM.addPass(CoroCleanupPass());
1208 
1209  // Add the core optimizing pipeline.
1212 
1213  for (auto &C : OptimizerLastEPCallbacks)
1214  C(MPM, Level);
1215 
1216  // Split out cold code. Splitting is done late to avoid hiding context from
1217  // other optimizations and inadvertently regressing performance. The tradeoff
1218  // is that this has a higher code size cost than splitting early.
1219  if (EnableHotColdSplit && !LTOPreLink)
1221 
1222  // Search the code for similar regions of code. If enough similar regions can
1223  // be found where extracting the regions into their own function will decrease
1224  // the size of the program, we extract the regions, a deduplicate the
1225  // structurally similar regions.
1226  if (EnableIROutliner)
1228 
1229  // Merge functions if requested.
1230  if (PTO.MergeFunctions)
1232 
1233  if (PTO.CallGraphProfile)
1235 
1236  // Now we need to do some global optimization transforms.
1237  // FIXME: It would seem like these should come first in the optimization
1238  // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1239  // ordering here.
1242 
1243  // TODO: Relative look table converter pass caused an issue when full lto is
1244  // enabled. See https://reviews.llvm.org/D94355 for more details.
1245  // Until the issue fixed, disable this pass during pre-linking phase.
1246  if (!LTOPreLink)
1248 
1249  return MPM;
1250 }
1251 
1254  bool LTOPreLink) {
1256  "Must request optimizations for the default pipeline!");
1257 
1259 
1260  // Convert @llvm.global.annotations to !annotation metadata.
1262 
1263  // Force any function attributes we want the rest of the pipeline to observe.
1265 
1266  // Apply module pipeline start EP callback.
1267  for (auto &C : PipelineStartEPCallbacks)
1268  C(MPM, Level);
1269 
1270  if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1272 
1273  // Add the core simplification pipeline.
1277 
1278  // Now add the optimization pipeline.
1280 
1281  if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1282  PGOOpt->Action == PGOOptions::SampleUse)
1284 
1285  // Emit annotation remarks.
1287 
1288  if (LTOPreLink)
1289  addRequiredLTOPreLinkPasses(MPM);
1290 
1291  return MPM;
1292 }
1293 
1297  "Must request optimizations for the default pipeline!");
1298 
1300 
1301  // Convert @llvm.global.annotations to !annotation metadata.
1303 
1304  // Force any function attributes we want the rest of the pipeline to observe.
1306 
1307  if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1309 
1310  // Apply module pipeline start EP callback.
1311  for (auto &C : PipelineStartEPCallbacks)
1312  C(MPM, Level);
1313 
1314  // If we are planning to perform ThinLTO later, we don't bloat the code with
1315  // unrolling/vectorization/... now. Just simplify the module as much as we
1316  // can.
1319 
1320  // Run partial inlining pass to partially inline functions that have
1321  // large bodies.
1322  // FIXME: It isn't clear whether this is really the right place to run this
1323  // in ThinLTO. Because there is another canonicalization and simplification
1324  // phase that will run after the thin link, running this here ends up with
1325  // less information than will be available later and it may grow functions in
1326  // ways that aren't beneficial.
1327  if (RunPartialInlining)
1329 
1330  // Reduce the size of the IR as much as possible.
1332 
1333  // Module simplification splits coroutines, but does not fully clean up
1334  // coroutine intrinsics. To ensure ThinLTO optimization passes don't trip up
1335  // on these, we schedule the cleanup here.
1337 
1338  if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1339  PGOOpt->Action == PGOOptions::SampleUse)
1341 
1342  // Handle OptimizerLastEPCallbacks added by clang on PreLink. Actual
1343  // optimization is going to be done in PostLink stage, but clang can't
1344  // add callbacks there in case of in-process ThinLTO called by linker.
1345  for (auto &C : OptimizerLastEPCallbacks)
1346  C(MPM, Level);
1347 
1348  // Emit annotation remarks.
1350 
1351  addRequiredLTOPreLinkPasses(MPM);
1352 
1353  return MPM;
1354 }
1355 
1357  OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1359 
1360  // Convert @llvm.global.annotations to !annotation metadata.
1362 
1363  if (ImportSummary) {
1364  // These passes import type identifier resolutions for whole-program
1365  // devirtualization and CFI. They must run early because other passes may
1366  // disturb the specific instruction patterns that these passes look for,
1367  // creating dependencies on resolutions that may not appear in the summary.
1368  //
1369  // For example, GVN may transform the pattern assume(type.test) appearing in
1370  // two basic blocks into assume(phi(type.test, type.test)), which would
1371  // transform a dependency on a WPD resolution into a dependency on a type
1372  // identifier resolution for CFI.
1373  //
1374  // Also, WPD has access to more precise information than ICP and can
1375  // devirtualize more effectively, so it should operate on the IR first.
1376  //
1377  // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1378  // metadata and intrinsics.
1379  MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1380  MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1381  }
1382 
1383  if (Level == OptimizationLevel::O0) {
1384  // Run a second time to clean up any type tests left behind by WPD for use
1385  // in ICP.
1386  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1387  // Drop available_externally and unreferenced globals. This is necessary
1388  // with ThinLTO in order to avoid leaving undefined references to dead
1389  // globals in the object file.
1392  return MPM;
1393  }
1394 
1395  // Force any function attributes we want the rest of the pipeline to observe.
1397 
1398  // Add the core simplification pipeline.
1401 
1402  // Now add the optimization pipeline.
1404 
1405  // Emit annotation remarks.
1407 
1408  return MPM;
1409 }
1410 
1414  "Must request optimizations for the default pipeline!");
1415  // FIXME: We should use a customized pre-link pipeline!
1417  /* LTOPreLink */ true);
1418 }
1419 
1422  ModuleSummaryIndex *ExportSummary) {
1424 
1425  // Convert @llvm.global.annotations to !annotation metadata.
1427 
1428  // Create a function that performs CFI checks for cross-DSO calls with targets
1429  // in the current module.
1431 
1432  if (Level == OptimizationLevel::O0) {
1433  // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1434  // metadata and intrinsics.
1435  MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1436  MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1437  // Run a second time to clean up any type tests left behind by WPD for use
1438  // in ICP.
1439  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1440 
1441  // Emit annotation remarks.
1443 
1444  return MPM;
1445  }
1446 
1447  if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1448  // Load sample profile before running the LTO optimization pipeline.
1449  MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1450  PGOOpt->ProfileRemappingFile,
1452  // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1453  // RequireAnalysisPass for PSI before subsequent non-module passes.
1455  }
1456 
1457  // Remove unused virtual tables to improve the quality of code generated by
1458  // whole-program devirtualization and bitset lowering.
1460 
1461  // Force any function attributes we want the rest of the pipeline to observe.
1463 
1464  // Do basic inference of function attributes from known properties of system
1465  // libraries and other oracles.
1467 
1468  if (Level.getSpeedupLevel() > 1) {
1469  FunctionPassManager EarlyFPM;
1470  EarlyFPM.addPass(CallSiteSplittingPass());
1472  std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1473 
1474  // Indirect call promotion. This should promote all the targets that are
1475  // left by the earlier promotion pass that promotes intra-module targets.
1476  // This two-step promotion is to save the compile time. For LTO, it should
1477  // produce the same result as if we only do promotion here.
1479  true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1480 
1483  // Propagate constants at call sites into the functions they call. This
1484  // opens opportunities for globalopt (and inlining) by substituting function
1485  // pointers passed as arguments to direct uses of functions.
1486  MPM.addPass(IPSCCPPass());
1487 
1488  // Attach metadata to indirect call sites indicating the set of functions
1489  // they may target at run-time. This should follow IPSCCP.
1491  }
1492 
1493  // Now deduce any function attributes based in the current code.
1494  MPM.addPass(
1496 
1497  // Do RPO function attribute inference across the module to forward-propagate
1498  // attributes where applicable.
1499  // FIXME: Is this really an optimization rather than a canonicalization?
1501 
1502  // Use in-range annotations on GEP indices to split globals where beneficial.
1504 
1505  // Run whole program optimization of virtual call when the list of callees
1506  // is fixed.
1507  MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1508 
1509  // Stop here at -O1.
1510  if (Level == OptimizationLevel::O1) {
1511  // The LowerTypeTestsPass needs to run to lower type metadata and the
1512  // type.test intrinsics. The pass does nothing if CFI is disabled.
1513  MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1514  // Run a second time to clean up any type tests left behind by WPD for use
1515  // in ICP (which is performed earlier than this in the regular LTO
1516  // pipeline).
1517  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1518 
1519  // Emit annotation remarks.
1521 
1522  return MPM;
1523  }
1524 
1525  // Optimize globals to try and fold them into constants.
1527 
1528  // Promote any localized globals to SSA registers.
1530 
1531  // Linking modules together can lead to duplicate global constant, only
1532  // keep one copy of each constant.
1534 
1535  // Remove unused arguments from functions.
1537 
1538  // Reduce the code after globalopt and ipsccp. Both can open up significant
1539  // simplification opportunities, and both can propagate functions through
1540  // function pointers. When this happens, we often have to resolve varargs
1541  // calls, etc, so let instcombine do this.
1542  FunctionPassManager PeepholeFPM;
1543  PeepholeFPM.addPass(InstCombinePass());
1545  PeepholeFPM.addPass(AggressiveInstCombinePass());
1546  invokePeepholeEPCallbacks(PeepholeFPM, Level);
1547 
1550 
1551  // Note: historically, the PruneEH pass was run first to deduce nounwind and
1552  // generally clean up exception handling overhead. It isn't clear this is
1553  // valuable as the inliner doesn't currently care whether it is inlining an
1554  // invoke or a call.
1555  // Run the inliner now.
1557 
1558  // Optimize globals again after we ran the inliner.
1560 
1561  // Garbage collect dead functions.
1563 
1564  // If we didn't decide to inline a function, check to see if we can
1565  // transform it to pass arguments by value instead of by reference.
1567 
1568  FunctionPassManager FPM;
1569  // The IPO Passes may leave cruft around. Clean up after them.
1570  FPM.addPass(InstCombinePass());
1571  invokePeepholeEPCallbacks(FPM, Level);
1572 
1573  FPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
1574 
1575  // Do a post inline PGO instrumentation and use pass. This is a context
1576  // sensitive PGO pass.
1577  if (PGOOpt) {
1578  if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1579  addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
1580  /* IsCS */ true, PGOOpt->CSProfileGenFile,
1581  PGOOpt->ProfileRemappingFile);
1582  else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1583  addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
1584  /* IsCS */ true, PGOOpt->ProfileFile,
1585  PGOOpt->ProfileRemappingFile);
1586  }
1587 
1588  // Break up allocas
1589  FPM.addPass(SROAPass());
1590 
1591  // LTO provides additional opportunities for tailcall elimination due to
1592  // link-time inlining, and visibility of nocapture attribute.
1593  FPM.addPass(TailCallElimPass());
1594 
1595  // Run a few AA driver optimizations here and now to cleanup the code.
1598 
1599  MPM.addPass(
1601 
1602  // Require the GlobalsAA analysis for the module so we can query it within
1603  // MainFPM.
1605  // Invalidate AAManager so it can be recreated and pick up the newly available
1606  // GlobalsAA.
1607  MPM.addPass(
1609 
1610  FunctionPassManager MainFPM;
1613  /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
1614 
1615  if (RunNewGVN)
1616  MainFPM.addPass(NewGVNPass());
1617  else
1618  MainFPM.addPass(GVNPass());
1619 
1620  // Remove dead memcpy()'s.
1621  MainFPM.addPass(MemCpyOptPass());
1622 
1623  // Nuke dead stores.
1624  MainFPM.addPass(DSEPass());
1626 
1627 
1630 
1631  LoopPassManager LPM;
1632  if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
1633  LPM.addPass(LoopFlattenPass());
1634  LPM.addPass(IndVarSimplifyPass());
1635  LPM.addPass(LoopDeletionPass());
1636  // FIXME: Add loop interchange.
1637 
1638  // Unroll small loops and perform peeling.
1639  LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
1640  /* OnlyWhenForced= */ !PTO.LoopUnrolling,
1642  // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
1643  // *All* loop passes must preserve it, in order to be able to use it.
1645  std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
1646 
1647  MainFPM.addPass(LoopDistributePass());
1648 
1649  addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
1650 
1651  invokePeepholeEPCallbacks(MainFPM, Level);
1652  MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
1655 
1656  // Lower type metadata and the type.test intrinsic. This pass supports
1657  // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
1658  // to be run at link time if CFI is enabled. This pass does nothing if
1659  // CFI is disabled.
1660  MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1661  // Run a second time to clean up any type tests left behind by WPD for use
1662  // in ICP (which is performed earlier than this in the regular LTO pipeline).
1663  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1664 
1665  // Enable splitting late in the FullLTO post-link pipeline. This is done in
1666  // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses).
1667  if (EnableHotColdSplit)
1669 
1670  // Add late LTO optimization passes.
1671  // Delete basic blocks, which optimization passes may have killed.
1673  SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true))));
1674 
1675  // Drop bodies of available eternally objects to improve GlobalDCE.
1677 
1678  // Now that we have optimized the program, discard unreachable functions.
1680 
1681  if (PTO.MergeFunctions)
1683 
1684  // Emit annotation remarks.
1686 
1687  return MPM;
1688 }
1689 
1691  bool LTOPreLink) {
1693  "buildO0DefaultPipeline should only be used with O0");
1694 
1696 
1697  // Perform pseudo probe instrumentation in O0 mode. This is for the
1698  // consistency between different build modes. For example, a LTO build can be
1699  // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
1700  // the postlink will require pseudo probe instrumentation in the prelink.
1701  if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
1703 
1704  if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
1705  PGOOpt->Action == PGOOptions::IRUse))
1707  MPM,
1708  /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
1709  /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1710 
1711  for (auto &C : PipelineStartEPCallbacks)
1712  C(MPM, Level);
1713 
1714  if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1716 
1717  for (auto &C : PipelineEarlySimplificationEPCallbacks)
1718  C(MPM, Level);
1719 
1720  // Build a minimal pipeline based on the semantics required by LLVM,
1721  // which is just that always inlining occurs. Further, disable generating
1722  // lifetime intrinsics to avoid enabling further optimizations during
1723  // code generation.
1725  /*InsertLifetimeIntrinsics=*/false));
1726 
1727  if (PTO.MergeFunctions)
1729 
1730  if (EnableMatrix)
1731  MPM.addPass(
1733 
1734  if (!CGSCCOptimizerLateEPCallbacks.empty()) {
1735  CGSCCPassManager CGPM;
1736  for (auto &C : CGSCCOptimizerLateEPCallbacks)
1737  C(CGPM, Level);
1738  if (!CGPM.isEmpty())
1740  }
1741  if (!LateLoopOptimizationsEPCallbacks.empty()) {
1742  LoopPassManager LPM;
1743  for (auto &C : LateLoopOptimizationsEPCallbacks)
1744  C(LPM, Level);
1745  if (!LPM.isEmpty()) {
1748  }
1749  }
1750  if (!LoopOptimizerEndEPCallbacks.empty()) {
1751  LoopPassManager LPM;
1752  for (auto &C : LoopOptimizerEndEPCallbacks)
1753  C(LPM, Level);
1754  if (!LPM.isEmpty()) {
1757  }
1758  }
1759  if (!ScalarOptimizerLateEPCallbacks.empty()) {
1760  FunctionPassManager FPM;
1761  for (auto &C : ScalarOptimizerLateEPCallbacks)
1762  C(FPM, Level);
1763  if (!FPM.isEmpty())
1765  }
1766  if (!VectorizerStartEPCallbacks.empty()) {
1767  FunctionPassManager FPM;
1768  for (auto &C : VectorizerStartEPCallbacks)
1769  C(FPM, Level);
1770  if (!FPM.isEmpty())
1772  }
1773 
1775  CGSCCPassManager CGPM;
1776  CGPM.addPass(CoroSplitPass());
1779 
1780  for (auto &C : OptimizerLastEPCallbacks)
1781  C(MPM, Level);
1782 
1783  if (LTOPreLink)
1784  addRequiredLTOPreLinkPasses(MPM);
1785 
1787 
1788  return MPM;
1789 }
1790 
1792  AAManager AA;
1793 
1794  // The order in which these are registered determines their priority when
1795  // being queried.
1796 
1797  // First we register the basic alias analysis that provides the majority of
1798  // per-function local AA logic. This is a stateless, on-demand local set of
1799  // AA techniques.
1801 
1802  // Next we query fast, specialized alias analyses that wrap IR-embedded
1803  // information about aliasing.
1806 
1807  // Add support for querying global aliasing information when available.
1808  // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
1809  // analysis, all that the `AAManager` can do is query for any *cached*
1810  // results from `GlobalsAA` through a readonly proxy.
1812 
1813  // Add target-specific alias analyses.
1814  if (TM)
1816 
1817  return AA;
1818 }
llvm::GlobalsAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: GlobalsModRef.h:132
TypeBasedAliasAnalysis.h
llvm::PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & >
Definition: LoopPassManager.h:70
llvm::BasicAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: BasicAliasAnalysis.h:163
llvm::IPSCCPPass
Pass to perform interprocedural constant propagation.
Definition: SCCP.h:30
llvm::ScopedNoAliasAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: ScopedNoAliasAA.h:53
llvm::FunctionSpecializationPass
Pass to perform interprocedural constant propagation by specializing functions.
Definition: SCCP.h:37
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1287
IROutliner.h
llvm::RunNewGVN
cl::opt< bool > RunNewGVN
getInlineParamsFromOptLevel
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
Definition: PassBuilderPipelines.cpp:687
DeadArgumentElimination.h
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
PassBuilder.h
Phase
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
Definition: AArch64FalkorHWPFFix.cpp:231
llvm::SampleProfileProbePass
Definition: SampleProfileProbe.h:133
Mem2Reg.h
llvm::OptimizationLevel::O1
static const OptimizationLevel O1
Optimize quickly without destroying debuggability.
Definition: OptimizationLevel.h:57
llvm::LoopSinkPass
A pass that does profile-guided sinking of instructions into loops.
Definition: LoopSink.h:33
llvm::PassManager::isEmpty
bool isEmpty() const
Returns if the pass manager contains any passes.
Definition: PassManager.h:577
OpenMPOpt.h
llvm::InferFunctionAttrsPass
A pass which infers function attributes from the names and signatures of function declarations in a m...
Definition: InferFunctionAttrs.h:25
llvm::SampleProfileLoaderPass
The sample profiler data loader pass.
Definition: SampleProfile.h:25
CalledValuePropagation.h
Annotation2Metadata.h
llvm::LoopInterchangePass
Definition: LoopInterchange.h:17
llvm::AlignmentFromAssumptionsPass
Definition: AlignmentFromAssumptions.h:29
llvm::EnableHotColdSplit
cl::opt< bool > EnableHotColdSplit
PartialInlining.h
Inliner.h
llvm::Annotation2MetadataPass
Pass to convert @llvm.global.annotations to !annotation metadata.
Definition: Annotation2Metadata.h:24
llvm::ThinOrFullLTOPhase::ThinLTOPostLink
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
GlobalSplit.h
llvm::GVNHoistPass
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition: GVN.h:375
CorrelatedValuePropagation.h
llvm::LoopIdiomRecognizePass
Performs Loop Idiom Recognize Pass.
Definition: LoopIdiomRecognize.h:40
llvm::ExtraVectorizerPasses
cl::opt< bool > ExtraVectorizerPasses
llvm::EnableConstraintElimination
cl::opt< bool > EnableConstraintElimination
llvm::PassManager::addPass
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same< PassT, PassManager >::value > addPass(PassT &&Pass)
Definition: PassManager.h:553
llvm::OptimizationLevel::O2
static const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
Definition: OptimizationLevel.h:74
llvm::CrossDSOCFIPass
Definition: CrossDSOCFI.h:20
llvm::ReassociatePass
Reassociate commutative expressions.
Definition: Reassociate.h:71
ConstantMerge.h
llvm::SetLicmMssaNoAccForPromotionCap
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
AlignmentFromAssumptions.h
IndVarSimplify.h
ErrorHandling.h
llvm::RunPartialInlining
cl::opt< bool > RunPartialInlining
SCCP.h
llvm::PassBuilder::buildModuleOptimizationPipeline
ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Construct the core LLVM module optimization pipeline.
Definition: PassBuilderPipelines.cpp:1089
TailRecursionElimination.h
DivRemPairs.h
llvm::LoopRotatePass
A simple loop rotation transformation.
Definition: LoopRotation.h:23
llvm::BDCEPass
Definition: BDCE.h:25
DeadStoreElimination.h
OptimizationRemarkEmitter.h
CoroCleanup.h
GlobalsModRef.h
VectorCombine.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
MergeFunctions.h
llvm::PseudoProbeUpdatePass
Definition: SampleProfileProbe.h:153
LoopFlatten.h
EnableModuleInliner
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
ElimAvailExtern.h
llvm::PipelineTuningOptions::ForgetAllSCEVInLoopUnroll
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition: PassBuilder.h:60
llvm::LoopUnrollOptions
A set of parameters used to control various transforms performed by the LoopUnroll pass.
Definition: LoopUnrollPass.h:61
InjectTLIMappings.h
ConstraintElimination.h
Float2Int.h
CGProfile.h
llvm::LoopDistributePass
Definition: LoopDistribute.h:25
JumpThreading.h
HotColdSplitting.h
llvm::EnableKnowledgeRetention
cl::opt< bool > EnableKnowledgeRetention
enable preservation of attributes in assume like: call void @llvm.assume(i1 true) [ "nonnull"(i32* PT...
Definition: InstCombineCalls.cpp:98
llvm::PGOInstrumentationUse
The profile annotation (profile-instr-use) pass for IR based PGO.
Definition: PGOInstrumentation.h:58
llvm::InlineParams
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:184
LoopUnrollAndJamPass.h
llvm::LoopLoadEliminationPass
Pass to forward loads in a loop around the backedge to subsequent iterations.
Definition: LoopLoadElimination.h:27
llvm::EnableOrderFileInstrumentation
cl::opt< bool > EnableOrderFileInstrumentation
llvm::ThinOrFullLTOPhase::ThinLTOPreLink
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
llvm::TailCallElimPass
Definition: TailRecursionElimination.h:60
llvm::CoroCleanupPass
Definition: CoroCleanup.h:23
llvm::EnableMatrix
cl::opt< bool > EnableMatrix
llvm::createCGSCCToFunctionPassAdaptor
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: CGSCCPassManager.h:525
LoopInstSimplify.h
llvm::SLPVectorizerPass
Definition: SLPVectorizer.h:58
llvm::ReversePostOrderFunctionAttrsPass
A pass to do RPO deduction and propagation of function attributes.
Definition: FunctionAttrs.h:77
llvm::PartialInlinerPass
Pass to remove unused function declarations.
Definition: PartialInlining.h:24
llvm::PGOOptions::SampleUse
@ SampleUse
Definition: PGOOptions.h:23
llvm::DSEPass
This class implements a trivial dead store elimination.
Definition: DeadStoreElimination.h:28
llvm::OptimizationLevel::O0
static const OptimizationLevel O0
Disable as many optimizations as possible.
Definition: OptimizationLevel.h:41
llvm::PassBuilder::addPGOInstrPassesForO0
void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, std::string ProfileFile, std::string ProfileRemappingFile)
Add PGOInstrumenation passes for O0 only.
Definition: PassBuilderPipelines.cpp:662
BasicAliasAnalysis.h
CoroElide.h
llvm::PipelineTuningOptions::LicmMssaNoAccForPromotionCap
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition: PassBuilder.h:68
MergedLoadStoreMotion.h
llvm::AggressiveInstCombinePass
Definition: AggressiveInstCombine.h:24
llvm::InstrProfiling
Instrumentation based profiling lowering pass.
Definition: InstrProfiling.h:35
llvm::ConstraintEliminationPass
Definition: ConstraintElimination.h:16
llvm::HotColdSplittingPass
Pass to outline cold regions.
Definition: HotColdSplitting.h:61
llvm::TypeBasedAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: TypeBasedAliasAnalysis.h:59
AliasAnalysis.h
llvm::ThinOrFullLTOPhase
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:73
CommandLine.h
LoopRotation.h
llvm::MODULE
@ MODULE
Definition: Attributor.h:4792
llvm::ModuleInlinerWrapperPass
Module pass, wrapping the inliner pass.
Definition: Inliner.h:120
UseInlineAdvisor
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version."), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)."), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model).")))
llvm::CoroElidePass
Definition: CoroElide.h:25
llvm::ControlHeightReductionPass
Definition: ControlHeightReduction.h:22
llvm::CorrelatedValuePropagationPass
Definition: CorrelatedValuePropagation.h:18
llvm::FlattenedProfileUsed
cl::opt< bool > FlattenedProfileUsed
TargetMachine.h
llvm::AttributorPass
}
Definition: Attributor.h:2875
AlwaysInliner.h
InstrProfiling.h
LoopIdiomRecognize.h
llvm::LICMPass
Performs Loop Invariant Code Motion Pass.
Definition: LICM.h:46
ArgumentPromotion.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::PostOrderFunctionAttrsPass
Computes function attributes in post-order over the call graph.
Definition: FunctionAttrs.h:58
llvm::ModuleInlinerWrapperPass::getPM
CGSCCPassManager & getPM()
Allow adding more CGSCC passes, besides inlining.
Definition: Inliner.h:133
EnableNoRerunSimplificationPipeline
static cl::opt< bool > EnableNoRerunSimplificationPipeline("enable-no-rerun-simplification-pipeline", cl::init(false), cl::Hidden, cl::desc("Prevent running the simplification pipeline on a function more " "than once in the case that SCC mutations cause a function to be " "visited multiple times as long as the function has not been changed"))
llvm::PassBuilder::buildFunctionSimplificationPipeline
FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
Definition: PassBuilderPipelines.cpp:384
llvm::PGOOptions::IRUse
@ IRUse
Definition: PGOOptions.h:23
llvm::CoroEarlyPass
Definition: CoroEarly.h:26
IP
Definition: NVPTXLowerArgs.cpp:166
AssumeBundleBuilder.h
llvm::DisablePreInliner
cl::opt< bool > DisablePreInliner
llvm::GVNSinkPass
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition: GVN.h:382
InlineAdvisor.h
llvm::OpenMPOptCGSCCPass
Definition: OpenMPOpt.h:43
llvm::PGOIndirectCallPromotion
The indirect function call promotion pass.
Definition: PGOInstrumentation.h:73
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
MemCpyOptimizer.h
llvm::CanonicalizeAliasesPass
Simple pass that canonicalizes aliases.
Definition: CanonicalizeAliases.h:22
GVN.h
llvm::LoopDeletionPass
Definition: LoopDeletion.h:24
llvm::PromotePass
Definition: Mem2Reg.h:23
llvm::LoopFlattenPass
Definition: LoopFlatten.h:23
EnableMergeFunctions
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
llvm::getInlineParams
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition: InlineCost.cpp:3063
llvm::PGOOptions::CSIRInstr
@ CSIRInstr
Definition: PGOOptions.h:24
llvm::PreInlineThreshold
cl::opt< int > PreInlineThreshold
llvm::ModuleInlinerPass
The module inliner pass for the new pass manager.
Definition: ModuleInliner.h:30
llvm::PassBuilder::buildInlinerPipeline
ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
Definition: PassBuilderPipelines.cpp:692
llvm::GlobalOptPass
Optimize globals that never have their address taken.
Definition: GlobalOpt.h:25
llvm::SyntheticCountsPropagation
Definition: SyntheticCountsPropagation.h:17
llvm::PGOMemOPSizeOpt
The profile size based optimization pass for memory intrinsics.
Definition: PGOInstrumentation.h:86
llvm::PassBuilder::buildThinLTOPreLinkDefaultPipeline
ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
Definition: PassBuilderPipelines.cpp:1295
ScopedNoAliasAA.h
addAnnotationRemarksPass
static void addAnnotationRemarksPass(ModulePassManager &MPM)
Definition: PassBuilderPipelines.cpp:235
llvm::EnableDFAJumpThreading
cl::opt< bool > EnableDFAJumpThreading
EnablePGOInlineDeferral
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
llvm::MemProfilerPass
Public interface to the memory profiler pass for instrumenting code to profile memory accesses.
Definition: MemProfiler.h:28
llvm::LoopSimplifyCFGPass
Performs basic CFG simplifications to assist other loop passes.
Definition: LoopSimplifyCFG.h:26
llvm::PGOInstrumentationGenCreateVar
The instrumentation (profile-instr-gen) pass for IR based PGO.
Definition: PGOInstrumentation.h:35
llvm::MergedLoadStoreMotionPass
Definition: MergedLoadStoreMotion.h:41
ControlHeightReduction.h
EnableO3NonTrivialUnswitching
static cl::opt< bool > EnableO3NonTrivialUnswitching("enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable non-trivial loop unswitching for -O3"))
InstSimplifyPass.h
llvm::LowerTypeTestsPass
Definition: LowerTypeTests.h:200
llvm::AnnotationRemarksPass
Definition: AnnotationRemarks.h:23
llvm::PassBuilder::buildModuleInlinerPipeline
ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
Definition: PassBuilderPipelines.cpp:763
llvm::SimpleLoopUnswitchPass
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
Definition: SimpleLoopUnswitch.h:62
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
SROA.h
SampleProfile.h
llvm::PipelineTuningOptions::EagerlyInvalidateAnalyses
bool EagerlyInvalidateAnalyses
Definition: PassBuilder.h:85
llvm::GlobalSplitPass
Pass to perform split of global variables.
Definition: GlobalSplit.h:26
ForceFunctionAttrs.h
llvm::cl::opt
Definition: CommandLine.h:1432
Attributor.h
llvm::ModuleMemProfilerPass
Public interface to the memory profiler module pass for instrumenting code to profile memory allocati...
Definition: MemProfiler.h:37
EnableEagerlyInvalidateAnalyses
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
llvm::VectorCombinePass
Optimize scalar/vector interactions in IR using target cost models.
Definition: VectorCombine.h:23
llvm::createModuleToFunctionPassAdaptor
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: PassManager.h:1227
SpeculativeExecution.h
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:697
EarlyCSE.h
CrossDSOCFI.h
ProfileSummaryInfo.h
llvm::WholeProgramDevirtPass
Definition: WholeProgramDevirt.h:223
ModuleInliner.h
CoroSplit.h
llvm::AssumeSimplifyPass
This pass attempts to minimize the number of assume without loosing any information.
Definition: AssumeBundleBuilder.h:53
llvm::PassBuilder::buildLTOPreLinkDefaultPipeline
ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
Definition: PassBuilderPipelines.cpp:1412
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::AttributorCGSCCPass
Definition: Attributor.h:2878
PGOInstrumentation.h
CGSCCPassManager.h
llvm::PassBuilder::buildLTODefaultPipeline
ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
Definition: PassBuilderPipelines.cpp:1421
llvm::ForceFunctionAttrsPass
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
Definition: ForceFunctionAttrs.h:22
llvm::ExtraVectorPassManager
A pass manager to run a set of extra function simplification passes after vectorization,...
Definition: LoopVectorize.h:105
EnableSyntheticCounts
static cl::opt< bool > EnableSyntheticCounts("enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Run synthetic function entry count generation " "pass"))
AggressiveInstCombine.h
llvm::InvalidateAnalysisPass
A no-op pass template which simply forces a specific analysis result to be invalidated.
Definition: PassManager.h:1280
LowerExpectIntrinsic.h
llvm::PassBuilder::buildThinLTODefaultPipeline
ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build an ThinLTO default optimization pipeline to a pass manager.
Definition: PassBuilderPipelines.cpp:1356
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::DeadArgumentEliminationPass
Eliminate dead arguments (and return values) from functions.
Definition: DeadArgumentElimination.h:38
ADCE.h
llvm::EnableGVNSink
cl::opt< bool > EnableGVNSink
llvm::OptimizationLevel::Oz
static const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
Definition: OptimizationLevel.h:108
llvm::PassBuilder::buildDefaultAAPipeline
AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
Definition: PassBuilderPipelines.cpp:1791
InferFunctionAttrs.h
SimpleLoopUnswitch.h
llvm::PipelineTuningOptions::PipelineTuningOptions
PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
Definition: PassBuilderPipelines.cpp:185
llvm::InstrProfOptions
Options for the frontend instrumentation based profiling pass.
Definition: Instrumentation.h:121
llvm::SpeculativeExecutionPass
Definition: SpeculativeExecution.h:69
llvm::createFunctionToLoopPassAdaptor
std::enable_if_t< is_detected< HasRunOnLoopT, LoopPassT >::value, FunctionToLoopPassAdaptor > createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false, bool UseBlockFrequencyInfo=false, bool UseBranchProbabilityInfo=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
Definition: LoopPassManager.h:473
llvm::TargetMachine::registerDefaultAliasAnalyses
virtual void registerDefaultAliasAnalyses(AAManager &)
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
Definition: TargetMachine.h:358
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::EliminateAvailableExternallyPass
A pass that transforms external global definitions into declarations.
Definition: ElimAvailExtern.h:24
llvm::ADCEPass
A DCE pass that assumes instructions are dead until proven otherwise.
Definition: ADCE.h:31
llvm::PipelineTuningOptions::SLPVectorization
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition: PassBuilder.h:53
llvm::RelLookupTableConverterPass
Definition: RelLookupTableConverter.h:60
Reassociate.h
llvm::JumpThreadingPass
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Definition: JumpThreading.h:80
llvm::LoopFullUnrollPass
Loop unroll pass that only does full loop unrolling and peeling.
Definition: LoopUnrollPass.h:26
llvm::LowerExpectIntrinsicPass
Definition: LowerExpectIntrinsic.h:23
llvm::EnableIROutliner
cl::opt< bool > EnableIROutliner
llvm::InliningAdvisorMode::Development
@ Development
SampleProfileProbe.h
LoopPassManager.h
llvm::Float2IntPass
Definition: Float2Int.h:26
llvm::LibCallsShrinkWrapPass
Definition: LibCallsShrinkWrap.h:18
NameAnonGlobals.h
llvm::CalledValuePropagationPass
Definition: CalledValuePropagation.h:26
llvm::WarnMissedTransformationsPass
Definition: WarnMissedTransforms.h:22
llvm::IndVarSimplifyPass
Definition: IndVarSimplify.h:25
LowerMatrixIntrinsics.h
llvm::GlobalDCEPass
Pass to remove unused function declarations.
Definition: GlobalDCE.h:29
LoopInterchange.h
llvm::PipelineTuningOptions::CallGraphProfile
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition: PassBuilder.h:72
llvm::ThinOrFullLTOPhase::FullLTOPostLink
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
llvm::LowerMatrixIntrinsicsPass
Definition: LowerMatrixIntrinsics.h:19
llvm::NewGVNPass
Definition: NewGVN.h:23
SyntheticCountsPropagation.h
llvm::AddDiscriminatorsPass
Definition: AddDiscriminators.h:24
CanonicalizeAliases.h
llvm::CGSCC
@ CGSCC
Definition: Attributor.h:4793
LibCallsShrinkWrap.h
llvm::AAManager::registerModuleAnalysis
void registerModuleAnalysis()
Register a specific AA result.
Definition: AliasAnalysis.h:1297
llvm::InstCombinePass
Definition: InstCombine.h:27
GlobalDCE.h
llvm::PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & >::addPass
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t< is_detected< HasRunOnLoopT, PassT >::value > addPass(PassT &&Pass)
Definition: LoopPassManager.h:108
llvm::OptimizationLevel::O3
static const OptimizationLevel O3
Optimize for fast execution as much as possible.
Definition: OptimizationLevel.h:89
llvm::EnableLoopFlatten
cl::opt< bool > EnableLoopFlatten
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:672
llvm::PipelineTuningOptions::LoopVectorization
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition: PassBuilder.h:49
NewGVN.h
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
LowerTypeTests.h
CallSiteSplitting.h
llvm::LoopVectorizeOptions
Definition: LoopVectorize.h:115
llvm::InstrOrderFilePass
The instrumentation pass for recording function order.
Definition: InstrOrderFile.h:20
LoopSimplifyCFG.h
llvm::CGProfilePass
Definition: CGProfile.h:19
AnnotationRemarks.h
LoopVectorize.h
SCCP.h
llvm::ModuleInlinerWrapperPass::addModulePass
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition: Inliner.h:136
EnableMemProfiler
static cl::opt< bool > EnableMemProfiler("enable-mem-prof", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable memory profiler"))
llvm::PassManager< Function >
llvm::LoopUnrollAndJamPass
A simple loop rotation transformation.
Definition: LoopUnrollAndJamPass.h:18
llvm::IROutlinerPass
Pass to outline similar regions.
Definition: IROutliner.h:383
llvm::ForgetSCEVInLoopUnroll
cl::opt< bool > ForgetSCEVInLoopUnroll
SLPVectorizer.h
LoopLoadElimination.h
llvm::PassBuilder::buildPerModuleDefaultPipeline
ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Build a per-module default optimization pipeline.
Definition: PassBuilderPipelines.cpp:1253
llvm::EnableCHR
cl::opt< bool > EnableCHR
LoopUnrollPass.h
llvm::SCCPPass
This pass performs function-level constant propagation and merging.
Definition: SCCP.h:36
llvm::PipelineTuningOptions::MergeFunctions
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition: PassBuilder.h:76
PGOOptions.h
llvm::OptimizationLevel
Definition: OptimizationLevel.h:22
llvm::NameAnonGlobalPass
Simple pass that provides a name to every anonymous globals.
Definition: NameAnonGlobals.h:23
PassManager.h
LowerConstantIntrinsics.h
llvm::ThinOrFullLTOPhase::None
@ None
No LTO/ThinLTO behavior needed.
llvm::PGOOptions::CSIRUse
@ CSIRUse
Definition: PGOOptions.h:24
isLTOPreLink
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
Definition: PassBuilderPipelines.cpp:242
llvm::CoroSplitPass
Definition: CoroSplit.h:24
llvm::PassBuilder::buildO0DefaultPipeline
ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Build an O0 pipeline with the minimal semantically required passes.
Definition: PassBuilderPipelines.cpp:1690
llvm::SROAPass
An optimization pass providing Scalar Replacement of Aggregates.
Definition: SROA.h:64
WholeProgramDevirt.h
llvm::ModuleInlinerWrapperPass::addLateModulePass
void addLateModulePass(T Pass)
Add a module pass that runs after the CGSCC passes.
Definition: Inliner.h:141
llvm::MemCpyOptPass
Definition: MemCpyOptimizer.h:41
llvm::AAManager::registerFunctionAnalysis
void registerFunctionAnalysis()
Register a specific AA result.
Definition: AliasAnalysis.h:1292
llvm::ArgumentPromotionPass
Argument promotion pass.
Definition: ArgumentPromotion.h:24
PerformMandatoryInliningsFirst
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining."))
llvm::DivRemPairsPass
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition: DivRemPairs.h:23
SimplifyCFGOptions.h
LoopDistribute.h
llvm::AlwaysInlinerPass
Inlines functions marked as "always_inline".
Definition: AlwaysInliner.h:29
WarnMissedTransforms.h
llvm::ThinOrFullLTOPhase::FullLTOPreLink
@ FullLTOPreLink
Full LTO prelink phase.
llvm::SimplifyCFGOptions
Definition: SimplifyCFGOptions.h:23
GlobalOpt.h
llvm::InjectTLIMappings
Definition: InjectTLIMappings.h:20
LoopSink.h
OptimizationLevel.h
llvm::LowerConstantIntrinsicsPass
Definition: LowerConstantIntrinsics.h:23
llvm::OpenMPOptPass
OpenMP optimizations pass.
Definition: OpenMPOpt.h:38
llvm::ModuleSummaryIndex
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Definition: ModuleSummaryIndex.h:1088
llvm::InliningAdvisorMode::Default
@ Default
llvm::PGOInstrumentationGen
The instrumentation (profile-instr-gen) pass for IR based PGO.
Definition: PGOInstrumentation.h:47
llvm::CallSiteSplittingPass
Definition: CallSiteSplitting.h:17
LoopDeletion.h
llvm::LoopInstSimplifyPass
Performs Loop Inst Simplify Pass.
Definition: LoopInstSimplify.h:25
MemProfiler.h
LICM.h
llvm::EnableLoopInterchange
cl::opt< bool > EnableLoopInterchange
CoroEarly.h
llvm::EnableUnrollAndJam
cl::opt< bool > EnableUnrollAndJam
llvm::PGOOptions::IRInstr
@ IRInstr
Definition: PGOOptions.h:23
llvm::InliningAdvisorMode::Release
@ Release
llvm::AttributorRun
cl::opt< AttributorRunOption > AttributorRun
BDCE.h
InstrOrderFile.h
InstCombine.h
llvm::GVNPass
The core GVN pass object.
Definition: GVN.h:116
llvm::InstSimplifyPass
Run instruction simplification across each instruction in the function.
Definition: InstSimplifyPass.h:32
llvm::RequireAnalysisPass
A utility pass template to force an analysis result to be available.
Definition: PassManager.h:1252
llvm::PassBuilder::buildModuleSimplificationPipeline
ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
Definition: PassBuilderPipelines.cpp:796
llvm::PipelineTuningOptions::LoopUnrolling
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition: PassBuilder.h:56
llvm::cl::desc
Definition: CommandLine.h:412
llvm::PipelineTuningOptions::LicmMssaOptCap
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition: PassBuilder.h:64
llvm::PipelineTuningOptions::LoopInterleaving
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition: PassBuilder.h:45
llvm::MaxDevirtIterations
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
Definition: PassBuilderPipelines.cpp:200
SimplifyCFG.h
llvm::SetLicmMssaOptCap
cl::opt< unsigned > SetLicmMssaOptCap
llvm::MergeFunctionsPass
Merge identical functions.
Definition: MergeFunctions.h:25
MPM
ModulePassManager MPM
Definition: PassBuilderBindings.cpp:70
llvm::createModuleToPostOrderCGSCCPassAdaptor
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: CGSCCPassManager.h:389
llvm::PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & >::isEmpty
bool isEmpty() const
Definition: LoopPassManager.h:167
llvm::LoopUnrollPass
Loop unroll pass that will support both full and partial unrolling.
Definition: LoopUnrollPass.h:133
FunctionAttrs.h
llvm::SimplifyCFGPass
A pass to simplify and canonicalize the CFG of a function.
Definition: SimplifyCFG.h:29
llvm::EnableFunctionSpecialization
cl::opt< bool > EnableFunctionSpecialization
llvm::LoopVectorizePass
The LoopVectorize Pass.
Definition: LoopVectorize.h:161
RelLookupTableConverter.h
llvm::EarlyCSEPass
A simple and fast domtree-based CSE pass.
Definition: EarlyCSE.h:30
llvm::ConstantMergePass
A pass that merges duplicate global constants into a single constant.
Definition: ConstantMerge.h:29
llvm::DFAJumpThreadingPass
Definition: DFAJumpThreading.h:21
AddDiscriminators.h
DFAJumpThreading.h
llvm::EnableGVNHoist
cl::opt< bool > EnableGVNHoist