doxygen/LoopVectorize_8h_source.html

//===- LoopVectorize.h ------------------------------------------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops

// and generates target-independent LLVM-IR.

// The vectorizer uses the TargetTransformInfo analysis to estimate the costs

// of instructions in order to estimate the profitability of vectorization.

//

// The loop vectorizer combines consecutive loop iterations into a single

// 'wide' iteration. After this transformation the index is incremented

// by the SIMD vector width, and not by one.

//

// This pass has four parts:

// 1. The main loop pass that drives the different parts.

// 2. LoopVectorizationLegality - A unit that checks for the legality

//    of the vectorization.

// 3. InnerLoopVectorizer - A unit that performs the actual

//    widening of instructions.

// 4. LoopVectorizationCostModel - A unit that checks for the profitability

//    of vectorization. It decides on the optimal vector width, which

//    can be one, if vectorization is not profitable.

//

// There is a development effort going on to migrate loop vectorizer to the

// VPlan infrastructure and to introduce outer loop vectorization support (see

// docs/VectorizationPlan.rst and

// http://lists.llvm.org/pipermail/llvm-dev/2017-December/119523.html). For this

// purpose, we temporarily introduced the VPlan-native vectorization path: an

// alternative vectorization path that is natively implemented on top of the

// VPlan infrastructure. See EnableVPlanNativePath for enabling.

//

//===----------------------------------------------------------------------===//

//

// The reduction-variable vectorization is based on the paper:

//  D. Nuzman and R. Henderson. Multi-platform Auto-vectorization.

//

// Variable uniformity checks are inspired by:

//  Karrenberg, R. and Hack, S. Whole Function Vectorization.

//

// The interleaved access vectorization is based on the paper:

//  Dorit Nuzman, Ira Rosen and Ayal Zaks.  Auto-Vectorization of Interleaved

//  Data for SIMD

//

// Other ideas/concepts are from:

//  A. Zaks and D. Nuzman. Autovectorization in GCC-two years later.

//

//  S. Maleki, Y. Gao, M. Garzaran, T. Wong and D. Padua.  An Evaluation of

//  Vectorizing Compilers.

//

//===----------------------------------------------------------------------===//


#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H

#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H


#include "llvm/IR/PassManager.h"

#include "llvm/Support/CommandLine.h"

#include <functional>


namespace llvm {


class AssumptionCache;

class BlockFrequencyInfo;

class DemandedBits;

class DominatorTree;

class Function;

class Loop;

class LoopAccessInfoManager;

class LoopInfo;

class OptimizationRemarkEmitter;

class ProfileSummaryInfo;

class ScalarEvolution;

class TargetLibraryInfo;

class TargetTransformInfo;


extern cl::opt<bool> EnableLoopInterleaving;

extern cl::opt<bool> EnableLoopVectorization;


/// A marker to determine if extra passes after loop vectorization should be

/// run.

struct ShouldRunExtraVectorPasses

    : public AnalysisInfoMixin<ShouldRunExtraVectorPasses> {

  static AnalysisKey Key;

  struct Result {

    bool invalidate(Function &F, const PreservedAnalyses &PA,

                    FunctionAnalysisManager::Invalidator &) {

      // Check whether the analysis has been explicitly invalidated. Otherwise,

      // it remains preserved.

      auto PAC = PA.getChecker<ShouldRunExtraVectorPasses>();

      return !PAC.preservedWhenStateless();

    }

  };


  Result run(Function &F, FunctionAnalysisManager &FAM) { return Result(); }

};


/// A pass manager to run a set of extra function simplification passes after

/// vectorization, if requested. LoopVectorize caches the

/// ShouldRunExtraVectorPasses analysis to request extra simplifications, if

/// they could be beneficial.

struct ExtraVectorPassManager : public FunctionPassManager {

  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) {

    auto PA = PreservedAnalyses::all();

    if (AM.getCachedResult<ShouldRunExtraVectorPasses>(F))

      PA.intersect(FunctionPassManager::run(F, AM));

    PA.abandon<ShouldRunExtraVectorPasses>();

    return PA;

  }

};


struct LoopVectorizeOptions {

  /// If false, consider all loops for interleaving.

  /// If true, only loops that explicitly request interleaving are considered.

  bool InterleaveOnlyWhenForced;


  /// If false, consider all loops for vectorization.

  /// If true, only loops that explicitly request vectorization are considered.

  bool VectorizeOnlyWhenForced;


  /// The current defaults when creating the pass with no arguments are:

  /// EnableLoopInterleaving = true and EnableLoopVectorization = true. This

  /// means that interleaving default is consistent with the cl::opt flag, while

  /// vectorization is not.

  /// FIXME: The default for EnableLoopVectorization in the cl::opt should be

  /// set to true, and the corresponding change to account for this be made in

  /// opt.cpp. The initializations below will become:

  /// InterleaveOnlyWhenForced(!EnableLoopInterleaving)

  /// VectorizeOnlyWhenForced(!EnableLoopVectorization).

  LoopVectorizeOptions()

      : InterleaveOnlyWhenForced(false), VectorizeOnlyWhenForced(false) {}

  LoopVectorizeOptions(bool InterleaveOnlyWhenForced,

                       bool VectorizeOnlyWhenForced)

      : InterleaveOnlyWhenForced(InterleaveOnlyWhenForced),

        VectorizeOnlyWhenForced(VectorizeOnlyWhenForced) {}


  LoopVectorizeOptions &setInterleaveOnlyWhenForced(bool Value) {

    InterleaveOnlyWhenForced = Value;

    return *this;

  }


  LoopVectorizeOptions &setVectorizeOnlyWhenForced(bool Value) {

    VectorizeOnlyWhenForced = Value;

    return *this;

  }

};


/// Storage for information about made changes.

struct LoopVectorizeResult {

  bool MadeAnyChange;

  bool MadeCFGChange;


  LoopVectorizeResult(bool MadeAnyChange, bool MadeCFGChange)

      : MadeAnyChange(MadeAnyChange), MadeCFGChange(MadeCFGChange) {}

};


/// The LoopVectorize Pass.

struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> {

private:

  /// If false, consider all loops for interleaving.

  /// If true, only loops that explicitly request interleaving are considered.

  bool InterleaveOnlyWhenForced;


  /// If false, consider all loops for vectorization.

  /// If true, only loops that explicitly request vectorization are considered.

  bool VectorizeOnlyWhenForced;


public:

  LoopVectorizePass(LoopVectorizeOptions Opts = {});


  ScalarEvolution *SE;

  LoopInfo *LI;

  TargetTransformInfo *TTI;

  DominatorTree *DT;

  BlockFrequencyInfo *BFI;

  TargetLibraryInfo *TLI;

  DemandedBits *DB;

  AssumptionCache *AC;

  LoopAccessInfoManager *LAIs;

  OptimizationRemarkEmitter *ORE;

  ProfileSummaryInfo *PSI;


  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);

  void printPipeline(raw_ostream &OS,

                     function_ref<StringRef(StringRef)> MapClassName2PassName);


  // Shim for old PM.

  LoopVectorizeResult runImpl(Function &F, ScalarEvolution &SE_, LoopInfo &LI_,

                              TargetTransformInfo &TTI_, DominatorTree &DT_,

                              BlockFrequencyInfo *BFI_, TargetLibraryInfo *TLI_,

                              DemandedBits &DB_, AssumptionCache &AC_,

                              LoopAccessInfoManager &LAIs_,

                              OptimizationRemarkEmitter &ORE_,

                              ProfileSummaryInfo *PSI_);


  bool processLoop(Loop *L);

};


/// Reports a vectorization failure: print \p DebugMsg for debugging

/// purposes along with the corresponding optimization remark \p RemarkName.

/// If \p I is passed, it is an instruction that prevents vectorization.

/// Otherwise, the loop \p TheLoop is used for the location of the remark.

void reportVectorizationFailure(const StringRef DebugMsg,

    const StringRef OREMsg, const StringRef ORETag,

    OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr);


/// Reports an informative message: print \p Msg for debugging purposes as well

/// as an optimization remark. Uses either \p I as location of the remark, or

/// otherwise \p TheLoop.

void reportVectorizationInfo(const StringRef OREMsg, const StringRef ORETag,

                             OptimizationRemarkEmitter *ORE, Loop *TheLoop,

                             Instruction *I = nullptr);


} // end namespace llvm


#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H

CommandLine.h

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59

PassManager.h
This header defines various interfaces for pass management in LLVM.

OS
raw_pwrite_stream & OS
Definition: SampleProfWriter.cpp:53

llvm::AnalysisManager::Invalidator
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:360

llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321

llvm::AnalysisManager::getCachedResult
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:492

llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42

llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:38

llvm::DemandedBits
Definition: DemandedBits.h:38

llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162

llvm::Function
Definition: Function.h:62

llvm::Instruction
Definition: Instruction.h:49

llvm::LoopAccessInfoManager
Definition: LoopAccessAnalysis.h:779

llvm::LoopInfo
Definition: LoopInfo.h:407

llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44

llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33

llvm::PassManager< Function >

llvm::PassManager< Function >::run
PreservedAnalyses run(Function &IR, AnalysisManager< Function > &AM, ExtraArgTs... ExtraArgs)
Run all of the passes in this manager over the given unit of IR.
Definition: PassManager.h:201

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115

llvm::PreservedAnalyses::getChecker
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: Analysis.h:264

llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:42

llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:452

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50

llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:281

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:213

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52

false
Definition: StackSlotColoring.cpp:184

llvm::codeview::PublicSymFlags::Function
@ Function

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::EnableLoopVectorization
cl::opt< bool > EnableLoopVectorization

llvm::reportVectorizationFailure
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
Definition: LoopVectorize.cpp:990

llvm::reportVectorizationInfo
void reportVectorizationInfo(const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports an informative message: print Msg for debugging purposes as well as an optimization remark.
Definition: LoopVectorize.cpp:1001

llvm::EnableLoopInterleaving
cl::opt< bool > EnableLoopInterleaving

llvm::AnalysisInfoMixin
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:97

llvm::AnalysisKey
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:26

llvm::ExtraVectorPassManager
A pass manager to run a set of extra function simplification passes after vectorization,...
Definition: LoopVectorize.h:104

llvm::ExtraVectorPassManager::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: LoopVectorize.h:105

llvm::LoopVectorizeOptions
Definition: LoopVectorize.h:114

llvm::LoopVectorizeOptions::LoopVectorizeOptions
LoopVectorizeOptions(bool InterleaveOnlyWhenForced, bool VectorizeOnlyWhenForced)
Definition: LoopVectorize.h:134

llvm::LoopVectorizeOptions::setVectorizeOnlyWhenForced
LoopVectorizeOptions & setVectorizeOnlyWhenForced(bool Value)
Definition: LoopVectorize.h:144

llvm::LoopVectorizeOptions::setInterleaveOnlyWhenForced
LoopVectorizeOptions & setInterleaveOnlyWhenForced(bool Value)
Definition: LoopVectorize.h:139

llvm::LoopVectorizeOptions::LoopVectorizeOptions
LoopVectorizeOptions()
The current defaults when creating the pass with no arguments are: EnableLoopInterleaving = true and ...
Definition: LoopVectorize.h:132

llvm::LoopVectorizeOptions::InterleaveOnlyWhenForced
bool InterleaveOnlyWhenForced
If false, consider all loops for interleaving.
Definition: LoopVectorize.h:117

llvm::LoopVectorizeOptions::VectorizeOnlyWhenForced
bool VectorizeOnlyWhenForced
If false, consider all loops for vectorization.
Definition: LoopVectorize.h:121

llvm::LoopVectorizePass
The LoopVectorize Pass.
Definition: LoopVectorize.h:160

llvm::LoopVectorizePass::TLI
TargetLibraryInfo * TLI
Definition: LoopVectorize.h:178

llvm::LoopVectorizePass::processLoop
bool processLoop(Loop *L)
Definition: LoopVectorize.cpp:9752

llvm::LoopVectorizePass::PSI
ProfileSummaryInfo * PSI
Definition: LoopVectorize.h:183

llvm::LoopVectorizePass::LI
LoopInfo * LI
Definition: LoopVectorize.h:174

llvm::LoopVectorizePass::LAIs
LoopAccessInfoManager * LAIs
Definition: LoopVectorize.h:181

llvm::LoopVectorizePass::DB
DemandedBits * DB
Definition: LoopVectorize.h:179

llvm::LoopVectorizePass::printPipeline
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition: LoopVectorize.cpp:10327

llvm::LoopVectorizePass::BFI
BlockFrequencyInfo * BFI
Definition: LoopVectorize.h:177

llvm::LoopVectorizePass::SE
ScalarEvolution * SE
Definition: LoopVectorize.h:173

llvm::LoopVectorizePass::AC
AssumptionCache * AC
Definition: LoopVectorize.h:180

llvm::LoopVectorizePass::runImpl
LoopVectorizeResult runImpl(Function &F, ScalarEvolution &SE_, LoopInfo &LI_, TargetTransformInfo &TTI_, DominatorTree &DT_, BlockFrequencyInfo *BFI_, TargetLibraryInfo *TLI_, DemandedBits &DB_, AssumptionCache &AC_, LoopAccessInfoManager &LAIs_, OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_)
Definition: LoopVectorize.cpp:10198

llvm::LoopVectorizePass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: LoopVectorize.cpp:10271

llvm::LoopVectorizePass::ORE
OptimizationRemarkEmitter * ORE
Definition: LoopVectorize.h:182

llvm::LoopVectorizePass::DT
DominatorTree * DT
Definition: LoopVectorize.h:176

llvm::LoopVectorizePass::TTI
TargetTransformInfo * TTI
Definition: LoopVectorize.h:175

llvm::LoopVectorizeResult
Storage for information about made changes.
Definition: LoopVectorize.h:151

llvm::LoopVectorizeResult::LoopVectorizeResult
LoopVectorizeResult(bool MadeAnyChange, bool MadeCFGChange)
Definition: LoopVectorize.h:155

llvm::LoopVectorizeResult::MadeAnyChange
bool MadeAnyChange
Definition: LoopVectorize.h:152

llvm::LoopVectorizeResult::MadeCFGChange
bool MadeCFGChange
Definition: LoopVectorize.h:153

llvm::PassInfoMixin
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:74

llvm::ShouldRunExtraVectorPasses::Result
Definition: LoopVectorize.h:87

llvm::ShouldRunExtraVectorPasses::Result::invalidate
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &)
Definition: LoopVectorize.h:88

llvm::ShouldRunExtraVectorPasses
A marker to determine if extra passes after loop vectorization should be run.
Definition: LoopVectorize.h:85

llvm::ShouldRunExtraVectorPasses::run
Result run(Function &F, FunctionAnalysisManager &FAM)
Definition: LoopVectorize.h:97

llvm::ShouldRunExtraVectorPasses::Key
static AnalysisKey Key
Definition: LoopVectorize.h:86