LLVM  14.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1 //===- Construction of pass pipelines -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file provides the implementation of the PassBuilder based on our
11 /// static pass registry as well as related functionality. It also provides
12 /// helpers to aid in analyzing, debugging, and testing passes and pass
13 /// pipelines.
14 ///
15 //===----------------------------------------------------------------------===//
16 
26 #include "llvm/IR/PassManager.h"
128 
129 using namespace llvm;
130 
132  "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
133  cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
135  "Heuristics-based inliner version."),
137  "Use development mode (runtime-loadable model)."),
139  "Use release mode (AOT-compiled model).")));
140 
142  "enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore,
143  cl::desc("Run synthetic function entry count generation "
144  "pass"));
145 
146 /// Flag to enable inline deferral during PGO.
147 static cl::opt<bool>
148  EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
149  cl::Hidden,
150  cl::desc("Enable inline deferral during PGO"));
151 
152 static cl::opt<bool> EnableMemProfiler("enable-mem-prof", cl::init(false),
154  cl::desc("Enable memory profiler"));
155 
157  "mandatory-inlining-first", cl::init(true), cl::Hidden, cl::ZeroOrMore,
158  cl::desc("Perform mandatory inlinings module-wide, before performing "
159  "inlining."));
160 
162  "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden,
163  cl::ZeroOrMore, cl::desc("Enable non-trivial loop unswitching for -O3"));
164 
166  LoopInterleaving = true;
167  LoopVectorization = true;
168  SLPVectorization = false;
169  LoopUnrolling = true;
173  CallGraphProfile = true;
174  MergeFunctions = false;
175 }
176 
177 namespace llvm {
178 
187 extern cl::opt<bool> EnableCHR;
192 extern cl::opt<bool> RunNewGVN;
195 
197 
200 
202 
205 } // namespace llvm
206 
207 void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
209  for (auto &C : PeepholeEPCallbacks)
210  C(FPM, Level);
211 }
212 
213 // Helper to add AnnotationRemarksPass.
218 }
219 
220 // Helper to check if the current compilation phase is preparing for LTO
224 }
225 
226 // TODO: Investigate the cost/benefit of tail call elimination on debugging.
228 PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
230 
232 
233  // Form SSA out of local memory accesses after breaking apart aggregates into
234  // scalars.
235  FPM.addPass(SROA());
236 
237  // Catch trivial redundancies
238  FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
239 
240  // Hoisting of scalars and load expressions.
241  FPM.addPass(SimplifyCFGPass());
242  FPM.addPass(InstCombinePass());
243 
245 
246  invokePeepholeEPCallbacks(FPM, Level);
247 
248  FPM.addPass(SimplifyCFGPass());
249 
250  // Form canonically associated expression trees, and simplify the trees using
251  // basic mathematical properties. For example, this will form (nearly)
252  // minimal multiplication trees.
253  FPM.addPass(ReassociatePass());
254 
255  // Add the primary loop simplification pipeline.
256  // FIXME: Currently this is split into two loop pass pipelines because we run
257  // some function passes in between them. These can and should be removed
258  // and/or replaced by scheduling the loop pass equivalents in the correct
259  // positions. But those equivalent passes aren't powerful enough yet.
260  // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
261  // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
262  // fully replace `SimplifyCFGPass`, and the closest to the other we have is
263  // `LoopInstSimplify`.
264  LoopPassManager LPM1, LPM2;
265 
266  // Simplify the loop body. We do this initially to clean up after other loop
267  // passes run, either when iterating on a loop or on inner loops with
268  // implications on the outer loop.
271 
272  // Try to remove as much code from the loop header as possible,
273  // to reduce amount of IR that will have to be duplicated.
274  // TODO: Investigate promotion cap for O1.
276 
277  LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
278  isLTOPreLink(Phase)));
279  // TODO: Investigate promotion cap for O1.
282 
284  LPM2.addPass(IndVarSimplifyPass());
285 
286  for (auto &C : LateLoopOptimizationsEPCallbacks)
287  C(LPM2, Level);
288 
289  LPM2.addPass(LoopDeletionPass());
290 
293 
294  // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
295  // because it changes IR to makes profile annotation in back compile
296  // inaccurate. The normal unroller doesn't pay attention to forced full unroll
297  // attributes so we need to make sure and allow the full unroll pass to pay
298  // attention to it.
299  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
300  PGOOpt->Action != PGOOptions::SampleUse)
301  LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
302  /* OnlyWhenForced= */ !PTO.LoopUnrolling,
304 
305  for (auto &C : LoopOptimizerEndEPCallbacks)
306  C(LPM2, Level);
307 
308  // We provide the opt remark emitter pass for LICM to use. We only need to do
309  // this once as it is immutable.
310  FPM.addPass(
313  /*UseMemorySSA=*/true,
314  /*UseBlockFrequencyInfo=*/true));
315  FPM.addPass(SimplifyCFGPass());
316  FPM.addPass(InstCombinePass());
317  if (EnableLoopFlatten)
319  // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
320  // *All* loop passes must preserve it, in order to be able to use it.
322  /*UseMemorySSA=*/false,
323  /*UseBlockFrequencyInfo=*/false));
324 
325  // Delete small array after loop unroll.
326  FPM.addPass(SROA());
327 
328  // Specially optimize memory movement as it doesn't look like dataflow in SSA.
329  FPM.addPass(MemCpyOptPass());
330 
331  // Sparse conditional constant propagation.
332  // FIXME: It isn't clear why we do this *after* loop passes rather than
333  // before...
334  FPM.addPass(SCCPPass());
335 
336  // Delete dead bit computations (instcombine runs after to fold away the dead
337  // computations, and then ADCE will run later to exploit any new DCE
338  // opportunities that creates).
339  FPM.addPass(BDCEPass());
340 
341  // Run instcombine after redundancy and dead bit elimination to exploit
342  // opportunities opened up by them.
343  FPM.addPass(InstCombinePass());
344  invokePeepholeEPCallbacks(FPM, Level);
345 
346  FPM.addPass(CoroElidePass());
347 
348  for (auto &C : ScalarOptimizerLateEPCallbacks)
349  C(FPM, Level);
350 
351  // Finally, do an expensive DCE pass to catch all the dead code exposed by
352  // the simplifications and basic cleanup after all the simplifications.
353  // TODO: Investigate if this is too expensive.
354  FPM.addPass(ADCEPass());
355  FPM.addPass(SimplifyCFGPass());
356  FPM.addPass(InstCombinePass());
357  invokePeepholeEPCallbacks(FPM, Level);
358 
359  return FPM;
360 }
361 
365  assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
366 
367  // The O1 pipeline has a separate pipeline creation function to simplify
368  // construction readability.
369  if (Level.getSpeedupLevel() == 1)
370  return buildO1FunctionSimplificationPipeline(Level, Phase);
371 
373 
374  // Form SSA out of local memory accesses after breaking apart aggregates into
375  // scalars.
376  FPM.addPass(SROA());
377 
378  // Catch trivial redundancies
379  FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
382 
383  // Hoisting of scalars and load expressions.
384  if (EnableGVNHoist)
385  FPM.addPass(GVNHoistPass());
386 
387  // Global value numbering based sinking.
388  if (EnableGVNSink) {
389  FPM.addPass(GVNSinkPass());
390  FPM.addPass(SimplifyCFGPass());
391  }
392 
395 
396  // Speculative execution if the target has divergent branches; otherwise nop.
397  FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
398 
399  // Optimize based on known information about branches, and cleanup afterward.
400  FPM.addPass(JumpThreadingPass());
402 
403  FPM.addPass(SimplifyCFGPass());
406  FPM.addPass(InstCombinePass());
407 
408  if (!Level.isOptimizingForSize())
410 
411  invokePeepholeEPCallbacks(FPM, Level);
412 
413  // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
414  // using the size value profile. Don't perform this when optimizing for size.
415  if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
416  !Level.isOptimizingForSize())
417  FPM.addPass(PGOMemOPSizeOpt());
418 
419  FPM.addPass(TailCallElimPass());
420  FPM.addPass(SimplifyCFGPass());
421 
422  // Form canonically associated expression trees, and simplify the trees using
423  // basic mathematical properties. For example, this will form (nearly)
424  // minimal multiplication trees.
425  FPM.addPass(ReassociatePass());
426 
427  // Add the primary loop simplification pipeline.
428  // FIXME: Currently this is split into two loop pass pipelines because we run
429  // some function passes in between them. These can and should be removed
430  // and/or replaced by scheduling the loop pass equivalents in the correct
431  // positions. But those equivalent passes aren't powerful enough yet.
432  // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
433  // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
434  // fully replace `SimplifyCFGPass`, and the closest to the other we have is
435  // `LoopInstSimplify`.
436  LoopPassManager LPM1, LPM2;
437 
438  // Simplify the loop body. We do this initially to clean up after other loop
439  // passes run, either when iterating on a loop or on inner loops with
440  // implications on the outer loop.
443 
444  // Try to remove as much code from the loop header as possible,
445  // to reduce amount of IR that will have to be duplicated.
446  // TODO: Investigate promotion cap for O1.
448 
449  // Disable header duplication in loop rotation at -Oz.
450  LPM1.addPass(
452  // TODO: Investigate promotion cap for O1.
454  LPM1.addPass(
455  SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&
458  LPM2.addPass(IndVarSimplifyPass());
459 
460  for (auto &C : LateLoopOptimizationsEPCallbacks)
461  C(LPM2, Level);
462 
463  LPM2.addPass(LoopDeletionPass());
464 
467 
468  // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
469  // because it changes IR to makes profile annotation in back compile
470  // inaccurate. The normal unroller doesn't pay attention to forced full unroll
471  // attributes so we need to make sure and allow the full unroll pass to pay
472  // attention to it.
473  if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
474  PGOOpt->Action != PGOOptions::SampleUse)
475  LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
476  /* OnlyWhenForced= */ !PTO.LoopUnrolling,
478 
479  for (auto &C : LoopOptimizerEndEPCallbacks)
480  C(LPM2, Level);
481 
482  // We provide the opt remark emitter pass for LICM to use. We only need to do
483  // this once as it is immutable.
484  FPM.addPass(
487  /*UseMemorySSA=*/true,
488  /*UseBlockFrequencyInfo=*/true));
489  FPM.addPass(SimplifyCFGPass());
490  FPM.addPass(InstCombinePass());
491  if (EnableLoopFlatten)
493  // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
494  // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
495  // *All* loop passes must preserve it, in order to be able to use it.
497  /*UseMemorySSA=*/false,
498  /*UseBlockFrequencyInfo=*/false));
499 
500  // Delete small array after loop unroll.
501  FPM.addPass(SROA());
502 
503  // The matrix extension can introduce large vector operations early, which can
504  // benefit from running vector-combine early on.
505  if (EnableMatrix)
506  FPM.addPass(VectorCombinePass(/*ScalarizationOnly=*/true));
507 
508  // Eliminate redundancies.
510  if (RunNewGVN)
511  FPM.addPass(NewGVNPass());
512  else
513  FPM.addPass(GVN());
514 
515  // Sparse conditional constant propagation.
516  // FIXME: It isn't clear why we do this *after* loop passes rather than
517  // before...
518  FPM.addPass(SCCPPass());
519 
520  // Delete dead bit computations (instcombine runs after to fold away the dead
521  // computations, and then ADCE will run later to exploit any new DCE
522  // opportunities that creates).
523  FPM.addPass(BDCEPass());
524 
525  // Run instcombine after redundancy and dead bit elimination to exploit
526  // opportunities opened up by them.
527  FPM.addPass(InstCombinePass());
528  invokePeepholeEPCallbacks(FPM, Level);
529 
530  // Re-consider control flow based optimizations after redundancy elimination,
531  // redo DCE, etc.
532  if (EnableDFAJumpThreading && Level.getSizeLevel() == 0)
534 
535  FPM.addPass(JumpThreadingPass());
537 
538  // Finally, do an expensive DCE pass to catch all the dead code exposed by
539  // the simplifications and basic cleanup after all the simplifications.
540  // TODO: Investigate if this is too expensive.
541  FPM.addPass(ADCEPass());
542 
543  // Specially optimize memory movement as it doesn't look like dataflow in SSA.
544  FPM.addPass(MemCpyOptPass());
545 
546  FPM.addPass(DSEPass());
549  /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
550 
551  FPM.addPass(CoroElidePass());
552 
553  for (auto &C : ScalarOptimizerLateEPCallbacks)
554  C(FPM, Level);
555 
557  SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true)));
558  FPM.addPass(InstCombinePass());
559  invokePeepholeEPCallbacks(FPM, Level);
560 
561  if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt &&
562  (PGOOpt->Action == PGOOptions::IRUse ||
563  PGOOpt->Action == PGOOptions::SampleUse))
565 
566  return FPM;
567 }
568 
569 void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
572 }
573 
574 void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
575  OptimizationLevel Level, bool RunProfileGen,
576  bool IsCS, std::string ProfileFile,
577  std::string ProfileRemappingFile) {
578  assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
579  if (!IsCS && !DisablePreInliner) {
581 
582  IP.DefaultThreshold = PreInlineThreshold;
583 
584  // FIXME: The hint threshold has the same value used by the regular inliner
585  // when not optimzing for size. This should probably be lowered after
586  // performance testing.
587  // FIXME: this comment is cargo culted from the old pass manager, revisit).
588  IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
590  CGSCCPassManager &CGPipeline = MIWP.getPM();
591 
593  FPM.addPass(SROA());
594  FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
595  FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks.
596  FPM.addPass(InstCombinePass()); // Combine silly sequences.
597  invokePeepholeEPCallbacks(FPM, Level);
598 
600 
601  MPM.addPass(std::move(MIWP));
602 
603  // Delete anything that is now dead to make sure that we don't instrument
604  // dead code. Instrumentation can end up keeping dead code around and
605  // dramatically increase code size.
607  }
608 
609  if (!RunProfileGen) {
610  assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
611  MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
612  // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
613  // RequireAnalysisPass for PSI before subsequent non-module passes.
615  return;
616  }
617 
618  // Perform PGO instrumentation.
620 
622  // Disable header duplication in loop rotation at -Oz.
624  LoopRotatePass(Level != OptimizationLevel::Oz), /*UseMemorySSA=*/false,
625  /*UseBlockFrequencyInfo=*/false));
627 
628  // Add the profile lowering pass.
630  if (!ProfileFile.empty())
631  Options.InstrProfileOutput = ProfileFile;
632  // Do counter promotion at Level greater than O0.
633  Options.DoCounterPromotion = true;
634  Options.UseBFIInPromotion = IsCS;
636 }
637 
639  bool RunProfileGen, bool IsCS,
640  std::string ProfileFile,
641  std::string ProfileRemappingFile) {
642  if (!RunProfileGen) {
643  assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
644  MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
645  // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
646  // RequireAnalysisPass for PSI before subsequent non-module passes.
648  return;
649  }
650 
651  // Perform PGO instrumentation.
653  // Add the profile lowering pass.
655  if (!ProfileFile.empty())
656  Options.InstrProfileOutput = ProfileFile;
657  // Do not do counter promotion at O0.
658  Options.DoCounterPromotion = false;
659  Options.UseBFIInPromotion = IsCS;
661 }
662 
664  return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
665 }
666 
671  if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
672  PGOOpt->Action == PGOOptions::SampleUse)
673  IP.HotCallSiteThreshold = 0;
674 
675  if (PGOOpt)
676  IP.EnableDeferral = EnablePGOInlineDeferral;
677 
680 
681  // Require the GlobalsAA analysis for the module so we can query it within
682  // the CGSCC pipeline.
684  // Invalidate AAManager so it can be recreated and pick up the newly available
685  // GlobalsAA.
686  MIWP.addModulePass(
688 
689  // Require the ProfileSummaryAnalysis for the module so we can query it within
690  // the inliner pass.
692 
693  // Now begin the main postorder CGSCC pipeline.
694  // FIXME: The current CGSCC pipeline has its origins in the legacy pass
695  // manager and trying to emulate its precise behavior. Much of this doesn't
696  // make a lot of sense and we should revisit the core CGSCC structure.
697  CGSCCPassManager &MainCGPipeline = MIWP.getPM();
698 
699  // Note: historically, the PruneEH pass was run first to deduce nounwind and
700  // generally clean up exception handling overhead. It isn't clear this is
701  // valuable as the inliner doesn't currently care whether it is inlining an
702  // invoke or a call.
703 
705  MainCGPipeline.addPass(AttributorCGSCCPass());
706 
707  // Now deduce any function attributes based in the current code.
708  MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
709 
710  // When at O3 add argument promotion to the pass pipeline.
711  // FIXME: It isn't at all clear why this should be limited to O3.
713  MainCGPipeline.addPass(ArgumentPromotionPass());
714 
715  // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
716  // there are no OpenMP runtime calls present in the module.
718  MainCGPipeline.addPass(OpenMPOptCGSCCPass());
719 
720  for (auto &C : CGSCCOptimizerLateEPCallbacks)
721  C(MainCGPipeline, Level);
722 
723  // Lastly, add the core function simplification pipeline nested inside the
724  // CGSCC walk.
727 
728  MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
729 
730  return MIWP;
731 }
732 
737 
738  // Place pseudo probe instrumentation as the first pass of the pipeline to
739  // minimize the impact of optimization changes.
740  if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
743 
744  bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
745 
746  // In ThinLTO mode, when flattened profile is used, all the available
747  // profile information will be annotated in PreLink phase so there is
748  // no need to load the profile again in PostLink.
749  bool LoadSampleProfile =
750  HasSampleProfile &&
752 
753  // During the ThinLTO backend phase we perform early indirect call promotion
754  // here, before globalopt. Otherwise imported available_externally functions
755  // look unreferenced and are removed. If we are going to load the sample
756  // profile then defer until later.
757  // TODO: See if we can move later and consolidate with the location where
758  // we perform ICP when we are loading a sample profile.
759  // TODO: We pass HasSampleProfile (whether there was a sample profile file
760  // passed to the compile) to the SamplePGO flag of ICP. This is used to
761  // determine whether the new direct calls are annotated with prof metadata.
762  // Ideally this should be determined from whether the IR is annotated with
763  // sample profile, and not whether the a sample profile was provided on the
764  // command line. E.g. for flattened profiles where we will not be reloading
765  // the sample profile in the ThinLTO backend, we ideally shouldn't have to
766  // provide the sample profile file.
767  if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
768  MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
769 
770  // Do basic inference of function attributes from known properties of system
771  // libraries and other oracles.
773 
774  // Create an early function pass manager to cleanup the output of the
775  // frontend.
776  FunctionPassManager EarlyFPM;
777  // Lower llvm.expect to metadata before attempting transforms.
778  // Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
779  EarlyFPM.addPass(LowerExpectIntrinsicPass());
780  EarlyFPM.addPass(SimplifyCFGPass());
781  EarlyFPM.addPass(SROA());
782  EarlyFPM.addPass(EarlyCSEPass());
783  EarlyFPM.addPass(CoroEarlyPass());
785  EarlyFPM.addPass(CallSiteSplittingPass());
786 
787  // In SamplePGO ThinLTO backend, we need instcombine before profile annotation
788  // to convert bitcast to direct calls so that they can be inlined during the
789  // profile annotation prepration step.
790  // More details about SamplePGO design can be found in:
791  // https://research.google.com/pubs/pub45290.html
792  // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured.
793  if (LoadSampleProfile)
794  EarlyFPM.addPass(InstCombinePass());
796 
797  if (LoadSampleProfile) {
798  // Annotate sample profile right after early FPM to ensure freshness of
799  // the debug info.
800  MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
801  PGOOpt->ProfileRemappingFile, Phase));
802  // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
803  // RequireAnalysisPass for PSI before subsequent non-module passes.
805  // Do not invoke ICP in the LTOPrelink phase as it makes it hard
806  // for the profile annotation to be accurate in the LTO backend.
809  // We perform early indirect call promotion here, before globalopt.
810  // This is important for the ThinLTO backend phase because otherwise
811  // imported available_externally functions look unreferenced and are
812  // removed.
813  MPM.addPass(
814  PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
815  }
816 
817  // Try to perform OpenMP specific optimizations on the module. This is a
818  // (quick!) no-op if there are no OpenMP runtime calls present in the module.
821 
824 
825  // Lower type metadata and the type.test intrinsic in the ThinLTO
826  // post link pipeline after ICP. This is to enable usage of the type
827  // tests in ICP sequences.
829  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
830 
831  for (auto &C : PipelineEarlySimplificationEPCallbacks)
832  C(MPM, Level);
833 
834  // Specialize functions with IPSCCP.
837 
838  // Interprocedural constant propagation now that basic cleanup has occurred
839  // and prior to optimizing globals.
840  // FIXME: This position in the pipeline hasn't been carefully considered in
841  // years, it should be re-analyzed.
843 
844  // Attach metadata to indirect call sites indicating the set of functions
845  // they may target at run-time. This should follow IPSCCP.
847 
848  // Optimize globals to try and fold them into constants.
850 
851  // Promote any localized globals to SSA registers.
852  // FIXME: Should this instead by a run of SROA?
853  // FIXME: We should probably run instcombine and simplifycfg afterward to
854  // delete control flows that are dead once globals have been folded to
855  // constants.
857 
858  // Remove any dead arguments exposed by cleanups and constant folding
859  // globals.
861 
862  // Create a small function pass pipeline to cleanup after all the global
863  // optimizations.
864  FunctionPassManager GlobalCleanupPM;
865  GlobalCleanupPM.addPass(InstCombinePass());
866  invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
867 
868  GlobalCleanupPM.addPass(SimplifyCFGPass());
870 
871  // Add all the requested passes for instrumentation PGO, if requested.
872  if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
873  (PGOOpt->Action == PGOOptions::IRInstr ||
874  PGOOpt->Action == PGOOptions::IRUse)) {
875  addPGOInstrPasses(MPM, Level,
876  /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
877  /* IsCS */ false, PGOOpt->ProfileFile,
878  PGOOpt->ProfileRemappingFile);
879  MPM.addPass(PGOIndirectCallPromotion(false, false));
880  }
881  if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
882  PGOOpt->CSAction == PGOOptions::CSIRInstr)
883  MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
884 
885  // Synthesize function entry counts for non-PGO compilation.
886  if (EnableSyntheticCounts && !PGOOpt)
888 
890 
894  }
895 
896  return MPM;
897 }
898 
899 /// TODO: Should LTO cause any differences to this set of passes?
900 void PassBuilder::addVectorPasses(OptimizationLevel Level,
901  FunctionPassManager &FPM, bool IsFullLTO) {
904 
905  if (IsFullLTO) {
906  // The vectorizer may have significantly shortened a loop body; unroll
907  // again. Unroll small loops to hide loop backedge latency and saturate any
908  // parallel execution resources of an out-of-order processor. We also then
909  // need to clean up redundancies and loop invariant code.
910  // FIXME: It would be really good to use a loop-integrated instruction
911  // combiner for cleanup here so that the unrolling and LICM can be pipelined
912  // across the loop nests.
913  // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
916  LoopUnrollAndJamPass(Level.getSpeedupLevel())));
918  Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
921  }
922 
923  if (!IsFullLTO) {
924  // Eliminate loads by forwarding stores from the previous iteration to loads
925  // of the current iteration.
927  }
928  // Cleanup after the loop optimization passes.
929  FPM.addPass(InstCombinePass());
930 
931  if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
932  // At higher optimization levels, try to clean up any runtime overlap and
933  // alignment checks inserted by the vectorizer. We want to track correlated
934  // runtime checks for two inner loops in the same outer loop, fold any
935  // common computations, hoist loop-invariant aspects out of any outer loop,
936  // and unswitch the runtime checks if possible. Once hoisted, we may have
937  // dead (or speculatable) control flows or more combining opportunities.
938  FPM.addPass(EarlyCSEPass());
940  FPM.addPass(InstCombinePass());
941  LoopPassManager LPM;
943  LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
945  FPM.addPass(
947  FPM.addPass(
948  createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
949  /*UseBlockFrequencyInfo=*/true));
950  FPM.addPass(SimplifyCFGPass());
951  FPM.addPass(InstCombinePass());
952  }
953 
954  // Now that we've formed fast to execute loop structures, we do further
955  // optimizations. These are run afterward as they might block doing complex
956  // analyses and transforms such as what are needed for loop vectorization.
957 
958  // Cleanup after loop vectorization, etc. Simplification passes like CVP and
959  // GVN, loop transforms, and others have already run, so it's now better to
960  // convert to more optimized IR using more aggressive simplify CFG options.
961  // The extra sinking transform can create larger basic blocks, so do this
962  // before SLP vectorization.
964  .forwardSwitchCondToPhi(true)
965  .convertSwitchToLookupTable(true)
966  .needCanonicalLoops(false)
967  .hoistCommonInsts(true)
968  .sinkCommonInsts(true)));
969 
970  if (IsFullLTO) {
971  FPM.addPass(SCCPPass());
972  FPM.addPass(InstCombinePass());
973  FPM.addPass(BDCEPass());
974  }
975 
976  // Optimize parallel scalar instruction chains into SIMD instructions.
977  if (PTO.SLPVectorization) {
978  FPM.addPass(SLPVectorizerPass());
979  if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
980  FPM.addPass(EarlyCSEPass());
981  }
982  }
983  // Enhance/cleanup vector code.
984  FPM.addPass(VectorCombinePass());
985 
986  if (!IsFullLTO) {
987  FPM.addPass(InstCombinePass());
988  // Unroll small loops to hide loop backedge latency and saturate any
989  // parallel execution resources of an out-of-order processor. We also then
990  // need to clean up redundancies and loop invariant code.
991  // FIXME: It would be really good to use a loop-integrated instruction
992  // combiner for cleanup here so that the unrolling and LICM can be pipelined
993  // across the loop nests.
994  // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
995  if (EnableUnrollAndJam && PTO.LoopUnrolling) {
997  LoopUnrollAndJamPass(Level.getSpeedupLevel())));
998  }
1000  Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1003  FPM.addPass(InstCombinePass());
1004  FPM.addPass(
1008  /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
1009  }
1010 
1011  // Now that we've vectorized and unrolled loops, we may have more refined
1012  // alignment information, try to re-derive it here.
1014 
1015  if (IsFullLTO)
1016  FPM.addPass(InstCombinePass());
1017 }
1018 
1021  bool LTOPreLink) {
1023 
1024  // Optimize globals now that the module is fully simplified.
1027 
1028  // Run partial inlining pass to partially inline functions that have
1029  // large bodies.
1030  if (RunPartialInlining)
1032 
1033  // Remove avail extern fns and globals definitions since we aren't compiling
1034  // an object file for later LTO. For LTO we want to preserve these so they
1035  // are eligible for inlining at link-time. Note if they are unreferenced they
1036  // will be removed by GlobalDCE later, so this only impacts referenced
1037  // available externally globals. Eventually they will be suppressed during
1038  // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1039  // may make globals referenced by available external functions dead and saves
1040  // running remaining passes on the eliminated functions. These should be
1041  // preserved during prelinking for link-time inlining decisions.
1042  if (!LTOPreLink)
1044 
1047 
1048  // Do RPO function attribute inference across the module to forward-propagate
1049  // attributes where applicable.
1050  // FIXME: Is this really an optimization rather than a canonicalization?
1052 
1053  // Do a post inline PGO instrumentation and use pass. This is a context
1054  // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1055  // cross-module inline has not been done yet. The context sensitive
1056  // instrumentation is after all the inlines are done.
1057  if (!LTOPreLink && PGOOpt) {
1058  if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1059  addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
1060  /* IsCS */ true, PGOOpt->CSProfileGenFile,
1061  PGOOpt->ProfileRemappingFile);
1062  else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1063  addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
1064  /* IsCS */ true, PGOOpt->ProfileFile,
1065  PGOOpt->ProfileRemappingFile);
1066  }
1067 
1068  // Re-require GloblasAA here prior to function passes. This is particularly
1069  // useful as the above will have inlined, DCE'ed, and function-attr
1070  // propagated everything. We should at this point have a reasonably minimal
1071  // and richly annotated call graph. By computing aliasing and mod/ref
1072  // information for all local globals here, the late loop passes and notably
1073  // the vectorizer will be able to use them to help recognize vectorizable
1074  // memory operations.
1076 
1077  FunctionPassManager OptimizePM;
1078  OptimizePM.addPass(Float2IntPass());
1079  OptimizePM.addPass(LowerConstantIntrinsicsPass());
1080 
1081  if (EnableMatrix) {
1082  OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1083  OptimizePM.addPass(EarlyCSEPass());
1084  }
1085 
1086  // FIXME: We need to run some loop optimizations to re-rotate loops after
1087  // simplifycfg and others undo their rotation.
1088 
1089  // Optimize the loop execution. These passes operate on entire loop nests
1090  // rather than on each loop in an inside-out manner, and so they are actually
1091  // function passes.
1092 
1093  for (auto &C : VectorizerStartEPCallbacks)
1094  C(OptimizePM, Level);
1095 
1096  // First rotate loops that may have been un-rotated by prior passes.
1097  // Disable header duplication at -Oz.
1099  LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink),
1100  /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1101 
1102  // Distribute loops to allow partial vectorization. I.e. isolate dependences
1103  // into separate loop that would otherwise inhibit vectorization. This is
1104  // currently only performed for loops marked with the metadata
1105  // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1106  OptimizePM.addPass(LoopDistributePass());
1107 
1108  // Populates the VFABI attribute with the scalar-to-vector mappings
1109  // from the TargetLibraryInfo.
1110  OptimizePM.addPass(InjectTLIMappings());
1111 
1112  addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1113 
1114  // Split out cold code. Splitting is done late to avoid hiding context from
1115  // other optimizations and inadvertently regressing performance. The tradeoff
1116  // is that this has a higher code size cost than splitting early.
1117  if (EnableHotColdSplit && !LTOPreLink)
1119 
1120  // Search the code for similar regions of code. If enough similar regions can
1121  // be found where extracting the regions into their own function will decrease
1122  // the size of the program, we extract the regions, a deduplicate the
1123  // structurally similar regions.
1124  if (EnableIROutliner)
1126 
1127  // Merge functions if requested.
1128  if (PTO.MergeFunctions)
1130 
1131  // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1132  // canonicalization pass that enables other optimizations. As a result,
1133  // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1134  // result too early.
1135  OptimizePM.addPass(LoopSinkPass());
1136 
1137  // And finally clean up LCSSA form before generating code.
1138  OptimizePM.addPass(InstSimplifyPass());
1139 
1140  // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1141  // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1142  // flattening of blocks.
1143  OptimizePM.addPass(DivRemPairsPass());
1144 
1145  // LoopSink (and other loop passes since the last simplifyCFG) might have
1146  // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1147  OptimizePM.addPass(SimplifyCFGPass());
1148 
1149  OptimizePM.addPass(CoroCleanupPass());
1150 
1151  // Add the core optimizing pipeline.
1153 
1154  for (auto &C : OptimizerLastEPCallbacks)
1155  C(MPM, Level);
1156 
1157  if (PTO.CallGraphProfile)
1159 
1160  // Now we need to do some global optimization transforms.
1161  // FIXME: It would seem like these should come first in the optimization
1162  // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1163  // ordering here.
1166 
1167  // TODO: Relative look table converter pass caused an issue when full lto is
1168  // enabled. See https://reviews.llvm.org/D94355 for more details.
1169  // Until the issue fixed, disable this pass during pre-linking phase.
1170  if (!LTOPreLink)
1172 
1173  return MPM;
1174 }
1175 
1178  bool LTOPreLink) {
1180  "Must request optimizations for the default pipeline!");
1181 
1183 
1184  // Convert @llvm.global.annotations to !annotation metadata.
1186 
1187  // Force any function attributes we want the rest of the pipeline to observe.
1189 
1190  // Apply module pipeline start EP callback.
1191  for (auto &C : PipelineStartEPCallbacks)
1192  C(MPM, Level);
1193 
1194  if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1196 
1197  // Add the core simplification pipeline.
1201 
1202  // Now add the optimization pipeline.
1204 
1205  if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1206  PGOOpt->Action == PGOOptions::SampleUse)
1208 
1209  // Emit annotation remarks.
1211 
1212  if (LTOPreLink)
1213  addRequiredLTOPreLinkPasses(MPM);
1214 
1215  return MPM;
1216 }
1217 
1221  "Must request optimizations for the default pipeline!");
1222 
1224 
1225  // Convert @llvm.global.annotations to !annotation metadata.
1227 
1228  // Force any function attributes we want the rest of the pipeline to observe.
1230 
1231  if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1233 
1234  // Apply module pipeline start EP callback.
1235  for (auto &C : PipelineStartEPCallbacks)
1236  C(MPM, Level);
1237 
1238  // If we are planning to perform ThinLTO later, we don't bloat the code with
1239  // unrolling/vectorization/... now. Just simplify the module as much as we
1240  // can.
1243 
1244  // Run partial inlining pass to partially inline functions that have
1245  // large bodies.
1246  // FIXME: It isn't clear whether this is really the right place to run this
1247  // in ThinLTO. Because there is another canonicalization and simplification
1248  // phase that will run after the thin link, running this here ends up with
1249  // less information than will be available later and it may grow functions in
1250  // ways that aren't beneficial.
1251  if (RunPartialInlining)
1253 
1254  // Reduce the size of the IR as much as possible.
1256 
1257  // Module simplification splits coroutines, but does not fully clean up
1258  // coroutine intrinsics. To ensure ThinLTO optimization passes don't trip up
1259  // on these, we schedule the cleanup here.
1261 
1262  if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1263  PGOOpt->Action == PGOOptions::SampleUse)
1265 
1266  // Handle OptimizerLastEPCallbacks added by clang on PreLink. Actual
1267  // optimization is going to be done in PostLink stage, but clang can't
1268  // add callbacks there in case of in-process ThinLTO called by linker.
1269  for (auto &C : OptimizerLastEPCallbacks)
1270  C(MPM, Level);
1271 
1272  // Emit annotation remarks.
1274 
1275  addRequiredLTOPreLinkPasses(MPM);
1276 
1277  return MPM;
1278 }
1279 
1281  OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1283 
1284  // Convert @llvm.global.annotations to !annotation metadata.
1286 
1287  if (ImportSummary) {
1288  // These passes import type identifier resolutions for whole-program
1289  // devirtualization and CFI. They must run early because other passes may
1290  // disturb the specific instruction patterns that these passes look for,
1291  // creating dependencies on resolutions that may not appear in the summary.
1292  //
1293  // For example, GVN may transform the pattern assume(type.test) appearing in
1294  // two basic blocks into assume(phi(type.test, type.test)), which would
1295  // transform a dependency on a WPD resolution into a dependency on a type
1296  // identifier resolution for CFI.
1297  //
1298  // Also, WPD has access to more precise information than ICP and can
1299  // devirtualize more effectively, so it should operate on the IR first.
1300  //
1301  // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1302  // metadata and intrinsics.
1303  MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1304  MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1305  }
1306 
1307  if (Level == OptimizationLevel::O0) {
1308  // Run a second time to clean up any type tests left behind by WPD for use
1309  // in ICP.
1310  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1311  // Drop available_externally and unreferenced globals. This is necessary
1312  // with ThinLTO in order to avoid leaving undefined references to dead
1313  // globals in the object file.
1316  return MPM;
1317  }
1318 
1319  // Force any function attributes we want the rest of the pipeline to observe.
1321 
1322  // Add the core simplification pipeline.
1325 
1326  // Now add the optimization pipeline.
1328 
1329  // Emit annotation remarks.
1331 
1332  return MPM;
1333 }
1334 
1338  "Must request optimizations for the default pipeline!");
1339  // FIXME: We should use a customized pre-link pipeline!
1341  /* LTOPreLink */ true);
1342 }
1343 
1346  ModuleSummaryIndex *ExportSummary) {
1348 
1349  // Convert @llvm.global.annotations to !annotation metadata.
1351 
1352  // Create a function that performs CFI checks for cross-DSO calls with targets
1353  // in the current module.
1355 
1356  if (Level == OptimizationLevel::O0) {
1357  // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1358  // metadata and intrinsics.
1359  MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1360  MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1361  // Run a second time to clean up any type tests left behind by WPD for use
1362  // in ICP.
1363  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1364 
1365  // Emit annotation remarks.
1367 
1368  return MPM;
1369  }
1370 
1371  if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1372  // Load sample profile before running the LTO optimization pipeline.
1373  MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1374  PGOOpt->ProfileRemappingFile,
1376  // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1377  // RequireAnalysisPass for PSI before subsequent non-module passes.
1379  }
1380 
1381  // Remove unused virtual tables to improve the quality of code generated by
1382  // whole-program devirtualization and bitset lowering.
1384 
1385  // Force any function attributes we want the rest of the pipeline to observe.
1387 
1388  // Do basic inference of function attributes from known properties of system
1389  // libraries and other oracles.
1391 
1392  if (Level.getSpeedupLevel() > 1) {
1393  FunctionPassManager EarlyFPM;
1394  EarlyFPM.addPass(CallSiteSplittingPass());
1396 
1397  // Indirect call promotion. This should promote all the targets that are
1398  // left by the earlier promotion pass that promotes intra-module targets.
1399  // This two-step promotion is to save the compile time. For LTO, it should
1400  // produce the same result as if we only do promotion here.
1402  true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1403 
1406  // Propagate constants at call sites into the functions they call. This
1407  // opens opportunities for globalopt (and inlining) by substituting function
1408  // pointers passed as arguments to direct uses of functions.
1409  MPM.addPass(IPSCCPPass());
1410 
1411  // Attach metadata to indirect call sites indicating the set of functions
1412  // they may target at run-time. This should follow IPSCCP.
1414  }
1415 
1416  // Now deduce any function attributes based in the current code.
1417  MPM.addPass(
1419 
1420  // Do RPO function attribute inference across the module to forward-propagate
1421  // attributes where applicable.
1422  // FIXME: Is this really an optimization rather than a canonicalization?
1424 
1425  // Use in-range annotations on GEP indices to split globals where beneficial.
1427 
1428  // Run whole program optimization of virtual call when the list of callees
1429  // is fixed.
1430  MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1431 
1432  // Stop here at -O1.
1433  if (Level == OptimizationLevel::O1) {
1434  // The LowerTypeTestsPass needs to run to lower type metadata and the
1435  // type.test intrinsics. The pass does nothing if CFI is disabled.
1436  MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1437  // Run a second time to clean up any type tests left behind by WPD for use
1438  // in ICP (which is performed earlier than this in the regular LTO
1439  // pipeline).
1440  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1441 
1442  // Emit annotation remarks.
1444 
1445  return MPM;
1446  }
1447 
1448  // Optimize globals to try and fold them into constants.
1450 
1451  // Promote any localized globals to SSA registers.
1453 
1454  // Linking modules together can lead to duplicate global constant, only
1455  // keep one copy of each constant.
1457 
1458  // Remove unused arguments from functions.
1460 
1461  // Reduce the code after globalopt and ipsccp. Both can open up significant
1462  // simplification opportunities, and both can propagate functions through
1463  // function pointers. When this happens, we often have to resolve varargs
1464  // calls, etc, so let instcombine do this.
1465  FunctionPassManager PeepholeFPM;
1467  PeepholeFPM.addPass(AggressiveInstCombinePass());
1468  PeepholeFPM.addPass(InstCombinePass());
1469  invokePeepholeEPCallbacks(PeepholeFPM, Level);
1470 
1472 
1473  // Note: historically, the PruneEH pass was run first to deduce nounwind and
1474  // generally clean up exception handling overhead. It isn't clear this is
1475  // valuable as the inliner doesn't currently care whether it is inlining an
1476  // invoke or a call.
1477  // Run the inliner now.
1479 
1480  // Optimize globals again after we ran the inliner.
1482 
1483  // Garbage collect dead functions.
1485 
1486  // If we didn't decide to inline a function, check to see if we can
1487  // transform it to pass arguments by value instead of by reference.
1489 
1490  FunctionPassManager FPM;
1491  // The IPO Passes may leave cruft around. Clean up after them.
1492  FPM.addPass(InstCombinePass());
1493  invokePeepholeEPCallbacks(FPM, Level);
1494 
1495  FPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
1496 
1497  // Do a post inline PGO instrumentation and use pass. This is a context
1498  // sensitive PGO pass.
1499  if (PGOOpt) {
1500  if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1501  addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
1502  /* IsCS */ true, PGOOpt->CSProfileGenFile,
1503  PGOOpt->ProfileRemappingFile);
1504  else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1505  addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
1506  /* IsCS */ true, PGOOpt->ProfileFile,
1507  PGOOpt->ProfileRemappingFile);
1508  }
1509 
1510  // Break up allocas
1511  FPM.addPass(SROA());
1512 
1513  // LTO provides additional opportunities for tailcall elimination due to
1514  // link-time inlining, and visibility of nocapture attribute.
1515  FPM.addPass(TailCallElimPass());
1516 
1517  // Run a few AA driver optimizations here and now to cleanup the code.
1519 
1520  MPM.addPass(
1522 
1523  // Require the GlobalsAA analysis for the module so we can query it within
1524  // MainFPM.
1526  // Invalidate AAManager so it can be recreated and pick up the newly available
1527  // GlobalsAA.
1528  MPM.addPass(
1530 
1531  FunctionPassManager MainFPM;
1534  /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
1535 
1536  if (RunNewGVN)
1537  MainFPM.addPass(NewGVNPass());
1538  else
1539  MainFPM.addPass(GVN());
1540 
1541  // Remove dead memcpy()'s.
1542  MainFPM.addPass(MemCpyOptPass());
1543 
1544  // Nuke dead stores.
1545  MainFPM.addPass(DSEPass());
1547 
1548  // More loops are countable; try to optimize them.
1549  if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
1551 
1554 
1555  LoopPassManager LPM;
1556  LPM.addPass(IndVarSimplifyPass());
1557  LPM.addPass(LoopDeletionPass());
1558  // FIXME: Add loop interchange.
1559 
1560  // Unroll small loops and perform peeling.
1561  LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
1562  /* OnlyWhenForced= */ !PTO.LoopUnrolling,
1564  // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
1565  // *All* loop passes must preserve it, in order to be able to use it.
1567  std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
1568 
1569  MainFPM.addPass(LoopDistributePass());
1570 
1571  addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
1572 
1573  invokePeepholeEPCallbacks(MainFPM, Level);
1574  MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
1576 
1577  // Lower type metadata and the type.test intrinsic. This pass supports
1578  // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
1579  // to be run at link time if CFI is enabled. This pass does nothing if
1580  // CFI is disabled.
1581  MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1582  // Run a second time to clean up any type tests left behind by WPD for use
1583  // in ICP (which is performed earlier than this in the regular LTO pipeline).
1584  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
1585 
1586  // Enable splitting late in the FullLTO post-link pipeline. This is done in
1587  // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses).
1588  if (EnableHotColdSplit)
1590 
1591  // Add late LTO optimization passes.
1592  // Delete basic blocks, which optimization passes may have killed.
1594  SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true))));
1595 
1596  // Drop bodies of available eternally objects to improve GlobalDCE.
1598 
1599  // Now that we have optimized the program, discard unreachable functions.
1601 
1602  if (PTO.MergeFunctions)
1604 
1605  // Emit annotation remarks.
1607 
1608  return MPM;
1609 }
1610 
1612  bool LTOPreLink) {
1614  "buildO0DefaultPipeline should only be used with O0");
1615 
1617 
1618  // Perform pseudo probe instrumentation in O0 mode. This is for the
1619  // consistency between different build modes. For example, a LTO build can be
1620  // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
1621  // the postlink will require pseudo probe instrumentation in the prelink.
1622  if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
1624 
1625  if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
1626  PGOOpt->Action == PGOOptions::IRUse))
1628  MPM,
1629  /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
1630  /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1631 
1632  for (auto &C : PipelineStartEPCallbacks)
1633  C(MPM, Level);
1634 
1635  if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1637 
1638  for (auto &C : PipelineEarlySimplificationEPCallbacks)
1639  C(MPM, Level);
1640 
1641  // Build a minimal pipeline based on the semantics required by LLVM,
1642  // which is just that always inlining occurs. Further, disable generating
1643  // lifetime intrinsics to avoid enabling further optimizations during
1644  // code generation.
1646  /*InsertLifetimeIntrinsics=*/false));
1647 
1648  if (PTO.MergeFunctions)
1650 
1651  if (EnableMatrix)
1652  MPM.addPass(
1654 
1655  if (!CGSCCOptimizerLateEPCallbacks.empty()) {
1656  CGSCCPassManager CGPM;
1657  for (auto &C : CGSCCOptimizerLateEPCallbacks)
1658  C(CGPM, Level);
1659  if (!CGPM.isEmpty())
1661  }
1662  if (!LateLoopOptimizationsEPCallbacks.empty()) {
1663  LoopPassManager LPM;
1664  for (auto &C : LateLoopOptimizationsEPCallbacks)
1665  C(LPM, Level);
1666  if (!LPM.isEmpty()) {
1669  }
1670  }
1671  if (!LoopOptimizerEndEPCallbacks.empty()) {
1672  LoopPassManager LPM;
1673  for (auto &C : LoopOptimizerEndEPCallbacks)
1674  C(LPM, Level);
1675  if (!LPM.isEmpty()) {
1678  }
1679  }
1680  if (!ScalarOptimizerLateEPCallbacks.empty()) {
1681  FunctionPassManager FPM;
1682  for (auto &C : ScalarOptimizerLateEPCallbacks)
1683  C(FPM, Level);
1684  if (!FPM.isEmpty())
1686  }
1687  if (!VectorizerStartEPCallbacks.empty()) {
1688  FunctionPassManager FPM;
1689  for (auto &C : VectorizerStartEPCallbacks)
1690  C(FPM, Level);
1691  if (!FPM.isEmpty())
1693  }
1694 
1696  CGSCCPassManager CGPM;
1697  CGPM.addPass(CoroSplitPass());
1700 
1701  for (auto &C : OptimizerLastEPCallbacks)
1702  C(MPM, Level);
1703 
1704  if (LTOPreLink)
1705  addRequiredLTOPreLinkPasses(MPM);
1706 
1707  return MPM;
1708 }
1709 
1711  AAManager AA;
1712 
1713  // The order in which these are registered determines their priority when
1714  // being queried.
1715 
1716  // First we register the basic alias analysis that provides the majority of
1717  // per-function local AA logic. This is a stateless, on-demand local set of
1718  // AA techniques.
1720 
1721  // Next we query fast, specialized alias analyses that wrap IR-embedded
1722  // information about aliasing.
1725 
1726  // Add support for querying global aliasing information when available.
1727  // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
1728  // analysis, all that the `AAManager` can do is query for any *cached*
1729  // results from `GlobalsAA` through a readonly proxy.
1731 
1732  // Add target-specific alias analyses.
1733  if (TM)
1735 
1736  return AA;
1737 }
llvm::GlobalsAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: GlobalsModRef.h:132
TypeBasedAliasAnalysis.h
llvm::PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & >
Definition: LoopPassManager.h:70
llvm::BasicAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: BasicAliasAnalysis.h:164
llvm::IPSCCPPass
Pass to perform interprocedural constant propagation.
Definition: SCCP.h:30
llvm::ScopedNoAliasAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: ScopedNoAliasAA.h:53
llvm::FunctionSpecializationPass
Pass to perform interprocedural constant propagation by specializing functions.
Definition: SCCP.h:37
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1288
IROutliner.h
llvm::RunNewGVN
cl::opt< bool > RunNewGVN
llvm::createCGSCCToFunctionPassAdaptor
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: CGSCCPassManager.h:517
getInlineParamsFromOptLevel
static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level)
Definition: PassBuilderPipelines.cpp:663
DeadArgumentElimination.h
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
PassBuilder.h
Phase
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
Definition: AArch64FalkorHWPFFix.cpp:231
llvm::SampleProfileProbePass
Definition: SampleProfileProbe.h:133
Mem2Reg.h
llvm::OptimizationLevel::O1
static const OptimizationLevel O1
Optimize quickly without destroying debuggability.
Definition: OptimizationLevel.h:57
llvm::LoopSinkPass
A pass that does profile-guided sinking of instructions into loops.
Definition: LoopSink.h:33
llvm::PassManager::isEmpty
bool isEmpty() const
Returns if the pass manager contains any passes.
Definition: PassManager.h:577
OpenMPOpt.h
llvm::InferFunctionAttrsPass
A pass which infers function attributes from the names and signatures of function declarations in a m...
Definition: InferFunctionAttrs.h:25
llvm::SampleProfileLoaderPass
The sample profiler data loader pass.
Definition: SampleProfile.h:25
CalledValuePropagation.h
Annotation2Metadata.h
llvm::LoopInterchangePass
Definition: LoopInterchange.h:17
llvm::AlignmentFromAssumptionsPass
Definition: AlignmentFromAssumptions.h:29
llvm::EnableHotColdSplit
cl::opt< bool > EnableHotColdSplit
PartialInlining.h
Inliner.h
llvm::Annotation2MetadataPass
Pass to convert @llvm.global.annotations to !annotation metadata.
Definition: Annotation2Metadata.h:24
llvm::ThinOrFullLTOPhase::ThinLTOPostLink
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
GlobalSplit.h
llvm::GVNHoistPass
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition: GVN.h:377
CorrelatedValuePropagation.h
llvm::LoopIdiomRecognizePass
Performs Loop Idiom Recognize Pass.
Definition: LoopIdiomRecognize.h:40
llvm::ExtraVectorizerPasses
cl::opt< bool > ExtraVectorizerPasses
llvm::EnableConstraintElimination
cl::opt< bool > EnableConstraintElimination
llvm::PassManager::addPass
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same< PassT, PassManager >::value > addPass(PassT &&Pass)
Definition: PassManager.h:553
llvm::OptimizationLevel::O2
static const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
Definition: OptimizationLevel.h:74
llvm::CrossDSOCFIPass
Definition: CrossDSOCFI.h:20
llvm::ReassociatePass
Reassociate commutative expressions.
Definition: Reassociate.h:71
ConstantMerge.h
llvm::SetLicmMssaNoAccForPromotionCap
cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
AlignmentFromAssumptions.h
IndVarSimplify.h
ErrorHandling.h
llvm::RunPartialInlining
cl::opt< bool > RunPartialInlining
SCCP.h
llvm::PassBuilder::buildModuleOptimizationPipeline
ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Construct the core LLVM module optimization pipeline.
Definition: PassBuilderPipelines.cpp:1020
TailRecursionElimination.h
DivRemPairs.h
llvm::LoopRotatePass
A simple loop rotation transformation.
Definition: LoopRotation.h:23
llvm::BDCEPass
Definition: BDCE.h:25
DeadStoreElimination.h
OptimizationRemarkEmitter.h
CoroCleanup.h
GlobalsModRef.h
VectorCombine.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
MergeFunctions.h
llvm::PseudoProbeUpdatePass
Definition: SampleProfileProbe.h:153
LoopFlatten.h
ElimAvailExtern.h
llvm::PipelineTuningOptions::ForgetAllSCEVInLoopUnroll
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition: PassBuilder.h:59
llvm::LoopUnrollOptions
A set of parameters used to control various transforms performed by the LoopUnroll pass.
Definition: LoopUnrollPass.h:61
InjectTLIMappings.h
ConstraintElimination.h
Float2Int.h
CGProfile.h
llvm::LoopDistributePass
Definition: LoopDistribute.h:25
JumpThreading.h
HotColdSplitting.h
llvm::EnableKnowledgeRetention
cl::opt< bool > EnableKnowledgeRetention
enable preservation of attributes in assume like: call void @llvm.assume(i1 true) [ "nonnull"(i32* PT...
Definition: InstCombineCalls.cpp:98
llvm::PGOInstrumentationUse
The profile annotation (profile-instr-use) pass for IR based PGO.
Definition: PGOInstrumentation.h:58
llvm::InlineParams
Thresholds to tune inline cost analysis.
Definition: InlineCost.h:185
LoopUnrollAndJamPass.h
llvm::LoopLoadEliminationPass
Pass to forward loads in a loop around the backedge to subsequent iterations.
Definition: LoopLoadElimination.h:27
llvm::EnableOrderFileInstrumentation
cl::opt< bool > EnableOrderFileInstrumentation
llvm::ThinOrFullLTOPhase::ThinLTOPreLink
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
llvm::TailCallElimPass
Definition: TailRecursionElimination.h:60
llvm::CoroCleanupPass
Definition: CoroCleanup.h:23
llvm::EnableMatrix
cl::opt< bool > EnableMatrix
LoopInstSimplify.h
llvm::SLPVectorizerPass
Definition: SLPVectorizer.h:58
llvm::ReversePostOrderFunctionAttrsPass
A pass to do RPO deduction and propagation of function attributes.
Definition: FunctionAttrs.h:77
llvm::PartialInlinerPass
Pass to remove unused function declarations.
Definition: PartialInlining.h:24
llvm::PGOOptions::SampleUse
@ SampleUse
Definition: PGOOptions.h:23
llvm::DSEPass
This class implements a trivial dead store elimination.
Definition: DeadStoreElimination.h:28
llvm::OptimizationLevel::O0
static const OptimizationLevel O0
Disable as many optimizations as possible.
Definition: OptimizationLevel.h:41
llvm::PassBuilder::addPGOInstrPassesForO0
void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, std::string ProfileFile, std::string ProfileRemappingFile)
Add PGOInstrumenation passes for O0 only.
Definition: PassBuilderPipelines.cpp:638
BasicAliasAnalysis.h
CoroElide.h
llvm::PipelineTuningOptions::LicmMssaNoAccForPromotionCap
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition: PassBuilder.h:67
MergedLoadStoreMotion.h
llvm::AggressiveInstCombinePass
Definition: AggressiveInstCombine.h:24
llvm::InstrProfiling
Instrumentation based profiling lowering pass.
Definition: InstrProfiling.h:35
llvm::ConstraintEliminationPass
Definition: ConstraintElimination.h:16
llvm::HotColdSplittingPass
Pass to outline cold regions.
Definition: HotColdSplitting.h:61
llvm::TypeBasedAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: TypeBasedAliasAnalysis.h:59
AliasAnalysis.h
llvm::ThinOrFullLTOPhase
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition: Pass.h:73
CommandLine.h
LoopRotation.h
llvm::MODULE
@ MODULE
Definition: Attributor.h:4617
llvm::ModuleInlinerWrapperPass
Module pass, wrapping the inliner pass.
Definition: Inliner.h:121
UseInlineAdvisor
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version."), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)."), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model).")))
llvm::CoroElidePass
Definition: CoroElide.h:25
llvm::ControlHeightReductionPass
Definition: ControlHeightReduction.h:22
llvm::CorrelatedValuePropagationPass
Definition: CorrelatedValuePropagation.h:18
llvm::FlattenedProfileUsed
cl::opt< bool > FlattenedProfileUsed
TargetMachine.h
llvm::AttributorPass
}
Definition: Attributor.h:2734
AlwaysInliner.h
InstrProfiling.h
LoopIdiomRecognize.h
llvm::LICMPass
Performs Loop Invariant Code Motion Pass.
Definition: LICM.h:46
ArgumentPromotion.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::PostOrderFunctionAttrsPass
Computes function attributes in post-order over the call graph.
Definition: FunctionAttrs.h:58
llvm::ModuleInlinerWrapperPass::getPM
CGSCCPassManager & getPM()
Allow adding more CGSCC passes, besides inlining.
Definition: Inliner.h:134
llvm::PassBuilder::buildFunctionSimplificationPipeline
FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
Definition: PassBuilderPipelines.cpp:363
llvm::PGOOptions::IRUse
@ IRUse
Definition: PGOOptions.h:23
llvm::CoroEarlyPass
Definition: CoroEarly.h:26
IP
Definition: NVPTXLowerArgs.cpp:166
AssumeBundleBuilder.h
llvm::DisablePreInliner
cl::opt< bool > DisablePreInliner
llvm::GVNSinkPass
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition: GVN.h:384
InlineAdvisor.h
llvm::createModuleToFunctionPassAdaptor
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: PassManager.h:1225
llvm::OpenMPOptCGSCCPass
Definition: OpenMPOpt.h:43
llvm::PGOIndirectCallPromotion
The indirect function call promotion pass.
Definition: PGOInstrumentation.h:73
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
MemCpyOptimizer.h
llvm::CanonicalizeAliasesPass
Simple pass that canonicalizes aliases.
Definition: CanonicalizeAliases.h:22
GVN.h
llvm::LoopDeletionPass
Definition: LoopDeletion.h:24
llvm::PromotePass
Definition: Mem2Reg.h:23
llvm::LoopFlattenPass
Definition: LoopFlatten.h:23
llvm::getInlineParams
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
Definition: InlineCost.cpp:3072
llvm::PGOOptions::CSIRInstr
@ CSIRInstr
Definition: PGOOptions.h:24
llvm::PreInlineThreshold
cl::opt< int > PreInlineThreshold
llvm::PassBuilder::buildInlinerPipeline
ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
Definition: PassBuilderPipelines.cpp:668
llvm::GlobalOptPass
Optimize globals that never have their address taken.
Definition: GlobalOpt.h:25
llvm::SyntheticCountsPropagation
Definition: SyntheticCountsPropagation.h:17
llvm::PGOMemOPSizeOpt
The profile size based optimization pass for memory intrinsics.
Definition: PGOInstrumentation.h:86
llvm::PassBuilder::buildThinLTOPreLinkDefaultPipeline
ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
Definition: PassBuilderPipelines.cpp:1219
ScopedNoAliasAA.h
addAnnotationRemarksPass
static void addAnnotationRemarksPass(ModulePassManager &MPM)
Definition: PassBuilderPipelines.cpp:214
llvm::EnableDFAJumpThreading
cl::opt< bool > EnableDFAJumpThreading
EnablePGOInlineDeferral
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
llvm::MemProfilerPass
Public interface to the memory profiler pass for instrumenting code to profile memory accesses.
Definition: MemProfiler.h:28
llvm::LoopSimplifyCFGPass
Performs basic CFG simplifications to assist other loop passes.
Definition: LoopSimplifyCFG.h:26
llvm::PGOInstrumentationGenCreateVar
The instrumentation (profile-instr-gen) pass for IR based PGO.
Definition: PGOInstrumentation.h:35
llvm::MergedLoadStoreMotionPass
Definition: MergedLoadStoreMotion.h:41
ControlHeightReduction.h
EnableO3NonTrivialUnswitching
static cl::opt< bool > EnableO3NonTrivialUnswitching("enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable non-trivial loop unswitching for -O3"))
InstSimplifyPass.h
llvm::LowerTypeTestsPass
Definition: LowerTypeTests.h:200
llvm::AnnotationRemarksPass
Definition: AnnotationRemarks.h:23
llvm::SimpleLoopUnswitchPass
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
Definition: SimpleLoopUnswitch.h:62
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
SROA.h
SampleProfile.h
llvm::GlobalSplitPass
Pass to perform split of global variables.
Definition: GlobalSplit.h:26
ForceFunctionAttrs.h
llvm::cl::opt
Definition: CommandLine.h:1432
Attributor.h
llvm::ModuleMemProfilerPass
Public interface to the memory profiler module pass for instrumenting code to profile memory allocati...
Definition: MemProfiler.h:37
llvm::VectorCombinePass
Optimize scalar/vector interactions in IR using target cost models.
Definition: VectorCombine.h:23
SpeculativeExecution.h
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:697
llvm::GVN
The core GVN pass object.
Definition: GVN.h:118
EarlyCSE.h
CrossDSOCFI.h
ProfileSummaryInfo.h
llvm::WholeProgramDevirtPass
Definition: WholeProgramDevirt.h:223
CoroSplit.h
llvm::AssumeSimplifyPass
This pass attempts to minimize the number of assume without loosing any information.
Definition: AssumeBundleBuilder.h:54
llvm::PassBuilder::buildLTOPreLinkDefaultPipeline
ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
Definition: PassBuilderPipelines.cpp:1336
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::AttributorCGSCCPass
Definition: Attributor.h:2737
PGOInstrumentation.h
CGSCCPassManager.h
llvm::PassBuilder::buildLTODefaultPipeline
ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
Definition: PassBuilderPipelines.cpp:1345
llvm::ForceFunctionAttrsPass
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
Definition: ForceFunctionAttrs.h:22
EnableSyntheticCounts
static cl::opt< bool > EnableSyntheticCounts("enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Run synthetic function entry count generation " "pass"))
AggressiveInstCombine.h
llvm::InvalidateAnalysisPass
A no-op pass template which simply forces a specific analysis result to be invalidated.
Definition: PassManager.h:1276
LowerExpectIntrinsic.h
llvm::PassBuilder::buildThinLTODefaultPipeline
ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build an ThinLTO default optimization pipeline to a pass manager.
Definition: PassBuilderPipelines.cpp:1280
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::DeadArgumentEliminationPass
Eliminate dead arguments (and return values) from functions.
Definition: DeadArgumentElimination.h:38
ADCE.h
llvm::EnableGVNSink
cl::opt< bool > EnableGVNSink
llvm::OptimizationLevel::Oz
static const OptimizationLevel Oz
A very specialized mode that will optimize for code size at any and all costs.
Definition: OptimizationLevel.h:108
llvm::PassBuilder::buildDefaultAAPipeline
AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
Definition: PassBuilderPipelines.cpp:1710
InferFunctionAttrs.h
SimpleLoopUnswitch.h
llvm::PipelineTuningOptions::PipelineTuningOptions
PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
Definition: PassBuilderPipelines.cpp:165
llvm::InstrProfOptions
Options for the frontend instrumentation based profiling pass.
Definition: Instrumentation.h:121
llvm::SpeculativeExecutionPass
Definition: SpeculativeExecution.h:69
llvm::createFunctionToLoopPassAdaptor
std::enable_if_t< is_detected< HasRunOnLoopT, LoopPassT >::value, FunctionToLoopPassAdaptor > createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false, bool UseBlockFrequencyInfo=false, bool UseBranchProbabilityInfo=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
Definition: LoopPassManager.h:474
llvm::TargetMachine::registerDefaultAliasAnalyses
virtual void registerDefaultAliasAnalyses(AAManager &)
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
Definition: TargetMachine.h:345
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::EliminateAvailableExternallyPass
A pass that transforms external global definitions into declarations.
Definition: ElimAvailExtern.h:24
llvm::ADCEPass
A DCE pass that assumes instructions are dead until proven otherwise.
Definition: ADCE.h:31
llvm::PipelineTuningOptions::SLPVectorization
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition: PassBuilder.h:52
llvm::RelLookupTableConverterPass
Definition: RelLookupTableConverter.h:60
Reassociate.h
llvm::JumpThreadingPass
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Definition: JumpThreading.h:80
llvm::LoopFullUnrollPass
Loop unroll pass that only does full loop unrolling and peeling.
Definition: LoopUnrollPass.h:26
llvm::LowerExpectIntrinsicPass
Definition: LowerExpectIntrinsic.h:23
llvm::EnableIROutliner
cl::opt< bool > EnableIROutliner
llvm::InliningAdvisorMode::Development
@ Development
SampleProfileProbe.h
LoopPassManager.h
llvm::Float2IntPass
Definition: Float2Int.h:26
llvm::LibCallsShrinkWrapPass
Definition: LibCallsShrinkWrap.h:18
NameAnonGlobals.h
llvm::CalledValuePropagationPass
Definition: CalledValuePropagation.h:26
llvm::WarnMissedTransformationsPass
Definition: WarnMissedTransforms.h:24
llvm::IndVarSimplifyPass
Definition: IndVarSimplify.h:25
LowerMatrixIntrinsics.h
llvm::GlobalDCEPass
Pass to remove unused function declarations.
Definition: GlobalDCE.h:29
LoopInterchange.h
llvm::PipelineTuningOptions::CallGraphProfile
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition: PassBuilder.h:71
llvm::ThinOrFullLTOPhase::FullLTOPostLink
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
llvm::LowerMatrixIntrinsicsPass
Definition: LowerMatrixIntrinsics.h:19
llvm::NewGVNPass
Definition: NewGVN.h:23
SyntheticCountsPropagation.h
llvm::AddDiscriminatorsPass
Definition: AddDiscriminators.h:24
CanonicalizeAliases.h
llvm::CGSCC
@ CGSCC
Definition: Attributor.h:4618
LibCallsShrinkWrap.h
llvm::AAManager::registerModuleAnalysis
void registerModuleAnalysis()
Register a specific AA result.
Definition: AliasAnalysis.h:1298
llvm::InstCombinePass
Definition: InstCombine.h:27
GlobalDCE.h
llvm::PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & >::addPass
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t< is_detected< HasRunOnLoopT, PassT >::value > addPass(PassT &&Pass)
Definition: LoopPassManager.h:108
llvm::OptimizationLevel::O3
static const OptimizationLevel O3
Optimize for fast execution as much as possible.
Definition: OptimizationLevel.h:89
llvm::EnableLoopFlatten
cl::opt< bool > EnableLoopFlatten
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:672
llvm::PipelineTuningOptions::LoopVectorization
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition: PassBuilder.h:48
NewGVN.h
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
LowerTypeTests.h
CallSiteSplitting.h
llvm::LoopVectorizeOptions
Definition: LoopVectorize.h:83
llvm::InstrOrderFilePass
The instrumentation pass for recording function order.
Definition: InstrOrderFile.h:20
LoopSimplifyCFG.h
llvm::CGProfilePass
Definition: CGProfile.h:19
AnnotationRemarks.h
LoopVectorize.h
SCCP.h
llvm::ModuleInlinerWrapperPass::addModulePass
void addModulePass(T Pass)
Allow adding module-level passes benefiting the contained CGSCC passes.
Definition: Inliner.h:137
EnableMemProfiler
static cl::opt< bool > EnableMemProfiler("enable-mem-prof", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable memory profiler"))
llvm::PassManager
Manages a sequence of passes over a particular unit of IR.
Definition: PassManager.h:472
llvm::LoopUnrollAndJamPass
A simple loop rotation transformation.
Definition: LoopUnrollAndJamPass.h:19
llvm::IROutlinerPass
Pass to outline similar regions.
Definition: IROutliner.h:383
llvm::ForgetSCEVInLoopUnroll
cl::opt< bool > ForgetSCEVInLoopUnroll
SLPVectorizer.h
LoopLoadElimination.h
llvm::PassBuilder::buildPerModuleDefaultPipeline
ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Build a per-module default optimization pipeline.
Definition: PassBuilderPipelines.cpp:1177
llvm::EnableCHR
cl::opt< bool > EnableCHR
LoopUnrollPass.h
llvm::SCCPPass
This pass performs function-level constant propagation and merging.
Definition: SCCP.h:38
llvm::PipelineTuningOptions::MergeFunctions
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition: PassBuilder.h:75
PGOOptions.h
llvm::OptimizationLevel
Definition: OptimizationLevel.h:22
llvm::NameAnonGlobalPass
Simple pass that provides a name to every anonymous globals.
Definition: NameAnonGlobals.h:23
PassManager.h
LowerConstantIntrinsics.h
llvm::ThinOrFullLTOPhase::None
@ None
No LTO/ThinLTO behavior needed.
llvm::PGOOptions::CSIRUse
@ CSIRUse
Definition: PGOOptions.h:24
isLTOPreLink
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
Definition: PassBuilderPipelines.cpp:221
llvm::CoroSplitPass
Definition: CoroSplit.h:24
llvm::PassBuilder::buildO0DefaultPipeline
ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, bool LTOPreLink=false)
Build an O0 pipeline with the minimal semantically required passes.
Definition: PassBuilderPipelines.cpp:1611
WholeProgramDevirt.h
llvm::MemCpyOptPass
Definition: MemCpyOptimizer.h:41
llvm::AAManager::registerFunctionAnalysis
void registerFunctionAnalysis()
Register a specific AA result.
Definition: AliasAnalysis.h:1293
llvm::ArgumentPromotionPass
Argument promotion pass.
Definition: ArgumentPromotion.h:24
PerformMandatoryInliningsFirst
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining."))
llvm::DivRemPairsPass
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition: DivRemPairs.h:23
SimplifyCFGOptions.h
LoopDistribute.h
llvm::AlwaysInlinerPass
Inlines functions marked as "always_inline".
Definition: AlwaysInliner.h:29
WarnMissedTransforms.h
llvm::ThinOrFullLTOPhase::FullLTOPreLink
@ FullLTOPreLink
Full LTO prelink phase.
llvm::SimplifyCFGOptions
Definition: SimplifyCFGOptions.h:23
GlobalOpt.h
llvm::InjectTLIMappings
Definition: InjectTLIMappings.h:20
LoopSink.h
OptimizationLevel.h
llvm::LowerConstantIntrinsicsPass
Definition: LowerConstantIntrinsics.h:23
llvm::OpenMPOptPass
OpenMP optimizations pass.
Definition: OpenMPOpt.h:38
llvm::ModuleSummaryIndex
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Definition: ModuleSummaryIndex.h:1078
llvm::InliningAdvisorMode::Default
@ Default
llvm::PGOInstrumentationGen
The instrumentation (profile-instr-gen) pass for IR based PGO.
Definition: PGOInstrumentation.h:47
llvm::CallSiteSplittingPass
Definition: CallSiteSplitting.h:17
LoopDeletion.h
llvm::LoopInstSimplifyPass
Performs Loop Inst Simplify Pass.
Definition: LoopInstSimplify.h:25
MemProfiler.h
LICM.h
llvm::EnableLoopInterchange
cl::opt< bool > EnableLoopInterchange
CoroEarly.h
llvm::EnableUnrollAndJam
cl::opt< bool > EnableUnrollAndJam
llvm::PGOOptions::IRInstr
@ IRInstr
Definition: PGOOptions.h:23
llvm::InliningAdvisorMode::Release
@ Release
llvm::AttributorRun
cl::opt< AttributorRunOption > AttributorRun
BDCE.h
InstrOrderFile.h
InstCombine.h
llvm::InstSimplifyPass
Run instruction simplification across each instruction in the function.
Definition: InstSimplifyPass.h:34
llvm::RequireAnalysisPass
A utility pass template to force an analysis result to be available.
Definition: PassManager.h:1248
llvm::PassBuilder::buildModuleSimplificationPipeline
ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
Definition: PassBuilderPipelines.cpp:734
llvm::PipelineTuningOptions::LoopUnrolling
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition: PassBuilder.h:55
llvm::cl::desc
Definition: CommandLine.h:412
llvm::PipelineTuningOptions::LicmMssaOptCap
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition: PassBuilder.h:63
llvm::PipelineTuningOptions::LoopInterleaving
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition: PassBuilder.h:44
llvm::MaxDevirtIterations
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
Definition: PassBuilderPipelines.cpp:179
SimplifyCFG.h
llvm::SetLicmMssaOptCap
cl::opt< unsigned > SetLicmMssaOptCap
llvm::MergeFunctionsPass
Merge identical functions.
Definition: MergeFunctions.h:25
MPM
ModulePassManager MPM
Definition: PassBuilderBindings.cpp:70
llvm::createModuleToPostOrderCGSCCPassAdaptor
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
Definition: CGSCCPassManager.h:389
llvm::PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & >::isEmpty
bool isEmpty() const
Definition: LoopPassManager.h:167
llvm::LoopUnrollPass
Loop unroll pass that will support both full and partial unrolling.
Definition: LoopUnrollPass.h:133
FunctionAttrs.h
llvm::SimplifyCFGPass
A pass to simplify and canonicalize the CFG of a function.
Definition: SimplifyCFG.h:29
llvm::EnableFunctionSpecialization
cl::opt< bool > EnableFunctionSpecialization
llvm::LoopVectorizePass
The LoopVectorize Pass.
Definition: LoopVectorize.h:129
RelLookupTableConverter.h
llvm::SROA
An optimization pass providing Scalar Replacement of Aggregates.
Definition: SROA.h:65
llvm::EarlyCSEPass
A simple and fast domtree-based CSE pass.
Definition: EarlyCSE.h:30
llvm::ConstantMergePass
A pass that merges duplicate global constants into a single constant.
Definition: ConstantMerge.h:29
llvm::DFAJumpThreadingPass
Definition: DFAJumpThreading.h:21
AddDiscriminators.h
DFAJumpThreading.h
llvm::EnableGVNHoist
cl::opt< bool > EnableGVNHoist