LLVM
17.0.0git
|
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "LoopVectorizationPlanner.h"
#include "VPRecipeBuilder.h"
#include "VPlan.h"
#include "VPlanHCFGBuilder.h"
#include "VPlanTransforms.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstdint>
#include <functional>
#include <iterator>
#include <limits>
#include <map>
#include <memory>
#include <string>
#include <tuple>
#include <utility>
Go to the source code of this file.
Classes | |
class | llvm::InnerLoopVectorizer |
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization factor (VF). More... | |
class | llvm::InnerLoopUnroller |
struct | llvm::EpilogueLoopVectorizationInfo |
Encapsulate information regarding vectorization of a loop and its epilogue. More... | |
class | llvm::InnerLoopAndEpilogueVectorizer |
An extension of the inner loop vectorizer that creates a skeleton for a vectorized loop that has its epilogue (residual) also vectorized. More... | |
class | llvm::EpilogueVectorizerMainLoop |
A specialized derived class of inner loop vectorizer that performs vectorization of main loops in the process of vectorizing loops and their epilogues. More... | |
class | llvm::EpilogueVectorizerEpilogueLoop |
struct | llvm::ElementCountComparator |
ElementCountComparator creates a total ordering for ElementCount for the purposes of using it in a set structure. More... | |
class | llvm::LoopVectorizationCostModel |
LoopVectorizationCostModel - estimates the expected speedups due to vectorization. More... | |
struct | llvm::LoopVectorizationCostModel::RegisterUsage |
A struct that represents some properties of the register usage of a loop. More... | |
Namespaces | |
PreferPredicateTy | |
llvm | |
This is an optimization pass for GlobalISel generic memory operations. | |
Macros | |
#define | LV_NAME "loop-vectorize" |
#define | DEBUG_TYPE LV_NAME |
Typedefs | |
using | llvm::ElementCountSet = SmallSet< ElementCount, 16, ElementCountComparator > |
Functions | |
STATISTIC (LoopsVectorized, "Number of loops vectorized") | |
STATISTIC (LoopsAnalyzed, "Number of loops analyzed for vectorization") | |
STATISTIC (LoopsEpilogueVectorized, "Number of epilogues vectorized") | |
static bool | hasIrregularType (Type *Ty, const DataLayout &DL) |
A helper function that returns true if the given type is irregular. More... | |
static unsigned | getReciprocalPredBlockProb () |
A helper function that returns the reciprocal of the block probability of predicated blocks. More... | |
static Constant * | getSignedIntOrFpConstant (Type *Ty, int64_t C) |
A helper function that returns an integer or floating-point constant with value C. More... | |
static std::optional< unsigned > | getSmallBestKnownTC (ScalarEvolution &SE, Loop *L) |
Returns "best known" trip count for the specified loop L as defined by the following procedure: 1) Returns exact trip count if it is known. More... | |
static Instruction * | getDebugLocFromInstOrOperands (Instruction *I) |
Look for a meaningful debug location on the instruction or it's operands. More... | |
static void | debugVectorizationMessage (const StringRef Prefix, const StringRef DebugMsg, Instruction *I) |
Write a DebugMsg about vectorization to the debug output stream. More... | |
static OptimizationRemarkAnalysis | createLVAnalysis (const char *PassName, StringRef RemarkName, Loop *TheLoop, Instruction *I) |
Create an analysis remark that explains why vectorization failed. More... | |
Value * | llvm::createStepForVF (IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step) |
Return a value for Step multiplied by VF. More... | |
Value * | llvm::getRuntimeVF (IRBuilderBase &B, Type *Ty, ElementCount VF) |
Return the runtime value for VF. More... | |
const SCEV * | llvm::createTripCountSCEV (Type *IdxTy, PredicatedScalarEvolution &PSE) |
static Value * | llvm::getRuntimeVFAsFloat (IRBuilderBase &B, Type *FTy, ElementCount VF) |
void | llvm::reportVectorizationFailure (const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr) |
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding optimization remark RemarkName . More... | |
void | llvm::reportVectorizationInfo (const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr) |
Reports an informative message: print Msg for debugging purposes as well as an optimization remark. More... | |
static std::string | getDebugLocString (const Loop *L) |
static bool | isExplicitVecOuterLoop (Loop *OuterLp, OptimizationRemarkEmitter *ORE) |
static void | collectSupportedLoops (Loop &L, LoopInfo *LI, OptimizationRemarkEmitter *ORE, SmallVectorImpl< Loop * > &V) |
static Value * | getStepVector (Value *Val, Value *StartIdx, Value *Step, Instruction::BinaryOps BinOp, ElementCount VF, IRBuilderBase &Builder) |
This function adds (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...) to each vector element of Val. More... | |
static void | buildScalarSteps (Value *ScalarIV, Value *Step, const InductionDescriptor &ID, VPValue *Def, VPTransformState &State) |
Compute scalar induction steps. More... | |
static Value * | CreateStepValue (const SCEV *Step, ScalarEvolution &SE, Instruction *InsertBefore, Loop *OrigLoop=nullptr) |
static Value * | emitTransformedIndex (IRBuilderBase &B, Value *Index, Value *StartValue, Value *Step, const InductionDescriptor &ID) |
Compute the transformed value of Index at offset StartValue using step StepValue. More... | |
static bool | useMaskedInterleavedAccesses (const TargetTransformInfo &TTI) |
static void | cse (BasicBlock *BB) |
Perform cse of induction variable instructions. More... | |
static Type * | MaybeVectorizeType (Type *Elt, ElementCount VF) |
static Type * | smallestIntegerVectorType (Type *T1, Type *T2) |
static Type * | largestIntegerVectorType (Type *T1, Type *T2) |
static const SCEV * | getAddressAccessSCEV (Value *Ptr, LoopVectorizationLegality *Legal, PredicatedScalarEvolution &PSE, const Loop *TheLoop) |
Gets Address Access SCEV after verifying that the access pattern is loop invariant except the induction variable dependence. More... | |
static bool | isStrideMul (Instruction *I, LoopVectorizationLegality *Legal) |
Pass * | llvm::createLoopVectorizePass () |
Pass * | llvm::createLoopVectorizePass (bool InterleaveOnlyWhenForced, bool VectorizeOnlyWhenForced) |
static unsigned | determineVPlanVF (const unsigned WidestVectorRegBits, LoopVectorizationCostModel &CM) |
static void | AddRuntimeUnrollDisableMetaData (Loop *L) |
static VPWidenIntOrFpInductionRecipe * | createWidenInductionRecipes (PHINode *Phi, Instruction *PhiOrTrunc, VPValue *Start, const InductionDescriptor &IndDesc, LoopVectorizationCostModel &CM, VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop, VFRange &Range) |
Creates a VPWidenIntOrFpInductionRecpipe for Phi . More... | |
static void | addCanonicalIVRecipes (VPlan &Plan, Type *IdxTy, DebugLoc DL, bool HasNUW, bool UseLaneMaskForLoopControlFlow) |
static void | addUsersInExitBlock (VPBasicBlock *HeaderVPBB, VPBasicBlock *MiddleVPBB, Loop *OrigLoop, VPlan &Plan) |
static ScalarEpilogueLowering | getScalarEpilogueLowering (Function *F, Loop *L, LoopVectorizeHints &Hints, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, TargetTransformInfo *TTI, TargetLibraryInfo *TLI, AssumptionCache *AC, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, LoopVectorizationLegality &LVL, InterleavedAccessInfo *IAI) |
static bool | processLoopInVPlanNativePath (Loop *L, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT, LoopVectorizationLegality *LVL, TargetTransformInfo *TTI, TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, LoopVectorizeHints &Hints, LoopVectorizationRequirements &Requirements) |
static void | checkMixedPrecision (Loop *L, OptimizationRemarkEmitter *ORE) |
static bool | areRuntimeChecksProfitable (GeneratedRTChecks &Checks, VectorizationFactor &VF, std::optional< unsigned > VScale, Loop *L, ScalarEvolution &SE) |
Variables | |
const char | VerboseDebug [] = DEBUG_TYPE "-verbose" |
static cl::opt< bool > | EnableEpilogueVectorization ("enable-epilogue-vectorization", cl::init(true), cl::Hidden, cl::desc("Enable vectorization of epilogue loops.")) |
static cl::opt< unsigned > | EpilogueVectorizationForceVF ("epilogue-vectorization-force-VF", cl::init(1), cl::Hidden, cl::desc("When epilogue vectorization is enabled, and a value greater than " "1 is specified, forces the given VF for all applicable epilogue " "loops.")) |
static cl::opt< unsigned > | EpilogueVectorizationMinVF ("epilogue-vectorization-minimum-VF", cl::init(16), cl::Hidden, cl::desc("Only loops with vectorization factor equal to or larger than " "the specified value are considered for epilogue vectorization.")) |
static cl::opt< unsigned > | TinyTripCountVectorThreshold ("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Loops with a constant trip count that is smaller than this " "value are vectorized only if no scalar iteration overheads " "are incurred.")) |
Loops with a known constant trip count below this number are vectorized only if no scalar iteration overheads are incurred. More... | |
static cl::opt< unsigned > | VectorizeMemoryCheckThreshold ("vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks")) |
static cl::opt< PreferPredicateTy::Option > | PreferPredicateOverEpilogue ("prefer-predicate-over-epilogue", cl::init(PreferPredicateTy::ScalarEpilogue), cl::Hidden, cl::desc("Tail-folding and predication preferences over creating a scalar " "epilogue loop."), cl::values(clEnumValN(PreferPredicateTy::ScalarEpilogue, "scalar-epilogue", "Don't tail-predicate loops, create scalar epilogue"), clEnumValN(PreferPredicateTy::PredicateElseScalarEpilogue, "predicate-else-scalar-epilogue", "prefer tail-folding, create scalar epilogue if tail " "folding fails."), clEnumValN(PreferPredicateTy::PredicateOrDontVectorize, "predicate-dont-vectorize", "prefers tail-folding, don't attempt vectorization if " "tail-folding fails."))) |
static cl::opt< bool > | MaximizeBandwidth ("vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which " "will be determined by the smallest type in loop.")) |
static cl::opt< bool > | EnableInterleavedMemAccesses ("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop")) |
static cl::opt< bool > | EnableMaskedInterleavedMemAccesses ("enable-masked-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on masked interleaved memory accesses in a loop")) |
An interleave-group may need masking if it resides in a block that needs predication, or in order to mask away gaps. More... | |
static cl::opt< unsigned > | TinyTripCountInterleaveThreshold ("tiny-trip-count-interleave-threshold", cl::init(128), cl::Hidden, cl::desc("We don't interleave loops with a estimated constant trip count " "below this number")) |
static cl::opt< unsigned > | ForceTargetNumScalarRegs ("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers.")) |
static cl::opt< unsigned > | ForceTargetNumVectorRegs ("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers.")) |
static cl::opt< unsigned > | ForceTargetMaxScalarInterleaveFactor ("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "scalar loops.")) |
static cl::opt< unsigned > | ForceTargetMaxVectorInterleaveFactor ("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "vectorized loops.")) |
static cl::opt< unsigned > | ForceTargetInstructionCost ("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing.")) |
static cl::opt< bool > | ForceTargetSupportsScalableVectors ("force-target-supports-scalable-vectors", cl::init(false), cl::Hidden, cl::desc("Pretend that scalable vectors are supported, even if the target does " "not support them. This flag should only be used for testing.")) |
static cl::opt< unsigned > | SmallLoopCost ("small-loop-cost", cl::init(20), cl::Hidden, cl::desc("The cost of a loop that is considered 'small' by the interleaver.")) |
static cl::opt< bool > | LoopVectorizeWithBlockFrequency ("loop-vectorize-with-block-frequency", cl::init(true), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO " "heuristics minimizing code growth in cold regions and being more " "aggressive in hot regions.")) |
static cl::opt< bool > | EnableLoadStoreRuntimeInterleave ("enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, cl::desc("Enable runtime interleaving until load/store ports are saturated")) |
static cl::opt< bool > | InterleaveSmallLoopScalarReduction ("interleave-small-loop-scalar-reduction", cl::init(false), cl::Hidden, cl::desc("Enable interleaving for loops with small iteration counts that " "contain scalar reductions to expose ILP.")) |
Interleave small loops with scalar reductions. More... | |
static cl::opt< unsigned > | NumberOfStoresToPredicate ("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if.")) |
The number of stores in a loop that are allowed to need predication. More... | |
static cl::opt< bool > | EnableIndVarRegisterHeur ("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when interleaving")) |
static cl::opt< bool > | EnableCondStoresVectorization ("enable-cond-stores-vec", cl::init(true), cl::Hidden, cl::desc("Enable if predication of stores during vectorization.")) |
static cl::opt< unsigned > | MaxNestedScalarReductionIC ("max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, cl::desc("The maximum interleave count to use when interleaving a scalar " "reduction in a nested loop.")) |
static cl::opt< bool > | PreferInLoopReductions ("prefer-inloop-reductions", cl::init(false), cl::Hidden, cl::desc("Prefer in-loop vector reductions, " "overriding the targets preference.")) |
static cl::opt< bool > | ForceOrderedReductions ("force-ordered-reductions", cl::init(false), cl::Hidden, cl::desc("Enable the vectorisation of loops with in-order (strict) " "FP reductions")) |
static cl::opt< bool > | PreferPredicatedReductionSelect ("prefer-predicated-reduction-select", cl::init(false), cl::Hidden, cl::desc("Prefer predicating a reduction operation over an after loop select.")) |
cl::opt< bool > | EnableVPlanNativePath ("enable-vplan-native-path", cl::init(false), cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization.")) |
static cl::opt< bool > | VPlanBuildStressTest ("vplan-build-stress-test", cl::init(false), cl::Hidden, cl::desc("Build VPlan for every supported loop nest in the function and bail " "out right after the build (stress test the VPlan H-CFG construction " "in the VPlan-native vectorization path).")) |
static cl::opt< bool > | PrintVPlansInDotFormat ("vplan-print-in-dot-format", cl::Hidden, cl::desc("Use dot format instead of plain text when dumping VPlans")) |
static cl::opt< cl::boolOrDefault > | ForceSafeDivisor ("force-widen-divrem-via-safe-divisor", cl::Hidden, cl::desc("Override cost based safe divisor widening for div/rem instructions")) |
static const char | lv_name [] = "Loop Vectorization" |
const char | LLVMLoopVectorizeFollowupAll [] = "llvm.loop.vectorize.followup_all" |
const char | LLVMLoopVectorizeFollowupVectorized [] |
const char | LLVMLoopVectorizeFollowupEpilogue [] |
#define DEBUG_TYPE LV_NAME |
Definition at line 157 of file LoopVectorize.cpp.
#define LV_NAME "loop-vectorize" |
Definition at line 156 of file LoopVectorize.cpp.
|
static |
Definition at line 8714 of file LoopVectorize.cpp.
References llvm::VPInstruction::ActiveLaneMask, llvm::VPUser::addOperand(), llvm::VPBasicBlock::appendRecipe(), llvm::VPInstruction::BranchOnCond, llvm::VPInstruction::BranchOnCount, llvm::VPInstruction::CanonicalIVIncrement, llvm::VPInstruction::CanonicalIVIncrementForPart, llvm::VPInstruction::CanonicalIVIncrementForPartNUW, llvm::VPInstruction::CanonicalIVIncrementNUW, DL, llvm::ConstantInt::get(), llvm::VPlan::getEntry(), llvm::VPBlockBase::getEntryBasicBlock(), llvm::VPBlockBase::getExitingBasicBlock(), llvm::VPlan::getOrAddVPValue(), llvm::VPlan::getOrCreateTripCount(), llvm::VPlan::getVectorLoopRegion(), llvm::VPlan::getVectorTripCount(), and llvm::VPInstruction::Not.
|
static |
Definition at line 7583 of file LoopVectorize.cpp.
References Context, llvm::MDString::get(), llvm::MDNode::get(), llvm::BasicBlock::getContext(), llvm::LoopBase< BlockT, LoopT >::getHeader(), llvm::Loop::getLoopID(), llvm::MDNode::getNumOperands(), llvm::MDNode::getOperand(), i, llvm::MDNode::replaceOperandWith(), S, and llvm::Loop::setLoopID().
Referenced by llvm::LoopVectorizationPlanner::executePlan(), and llvm::LoopVectorizePass::processLoop().
|
static |
Definition at line 8794 of file LoopVectorize.cpp.
References llvm::VPlan::addLiveOut(), llvm::LoopBase< BlockT, LoopT >::getExitingBlock(), llvm::VPlan::getOrAddVPValue(), llvm::BasicBlock::getSinglePredecessor(), llvm::LoopBase< BlockT, LoopT >::getUniqueExitBlock(), llvm::InnerLoopVectorizer::OrigLoop, and llvm::BasicBlock::phis().
|
static |
Definition at line 10052 of file LoopVectorize.cpp.
References llvm::alignTo(), ceil, llvm::dbgs(), double, llvm::ElementCount::getFixed(), llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::getKnownMinValue(), getSmallBestKnownTC(), llvm::InstructionCost::getValue(), llvm::details::FixedOrScalableQuantity< ElementCount, unsigned >::isKnownLT(), llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::isScalable(), llvm::ElementCount::isScalar(), llvm::InstructionCost::isValid(), LLVM_DEBUG, llvm::max(), VectorizeMemoryCheckThreshold, and llvm::InnerLoopVectorizer::VF.
Referenced by llvm::LoopVectorizePass::processLoop().
|
static |
Compute scalar induction steps.
ScalarIV
is the scalar induction variable on which to base the steps, Step
is the size of the step.
Definition at line 2351 of file LoopVectorize.cpp.
References llvm::MCID::Add, assert(), llvm::VPTransformState::Builder, llvm::InnerLoopVectorizer::Builder, llvm::IRBuilderBase::CreateAdd(), llvm::IRBuilderBase::CreateBinOp(), llvm::IRBuilderBase::CreateSIToFP(), llvm::createStepForVF(), llvm::IRBuilderBase::CreateStepVector(), llvm::IRBuilderBase::CreateTrunc(), llvm::IRBuilderBase::CreateVectorSplat(), llvm::tgtok::Def, llvm::IntegerType::get(), llvm::VectorType::get(), llvm::Type::getContext(), llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::getKnownMinValue(), llvm::Type::getScalarSizeInBits(), llvm::Type::getScalarType(), getSignedIntOrFpConstant(), llvm::Value::getType(), llvm::VPTransformState::Instance, llvm::Type::isFloatingPointTy(), llvm::Type::isIntegerTy(), llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::isScalable(), Mul, llvm::vputils::onlyFirstLaneUsed(), llvm::VPTransformState::set(), llvm::VPTransformState::UF, and llvm::VPTransformState::VF.
Referenced by llvm::VPScalarIVStepsRecipe::execute().
|
static |
Definition at line 10011 of file LoopVectorize.cpp.
References BB, llvm::LoopBase< BlockT, LoopT >::contains(), llvm::OptimizationRemarkEmitter::emit(), llvm::LoopBase< BlockT, LoopT >::getBlocks(), llvm::LoopBase< BlockT, LoopT >::getHeader(), I, llvm::SmallPtrSetImpl< PtrType >::insert(), LV_NAME, llvm::InnerLoopVectorizer::ORE, llvm::SmallVectorImpl< T >::pop_back_val(), and S.
Referenced by llvm::LoopVectorizePass::processLoop().
|
static |
Definition at line 2180 of file LoopVectorize.cpp.
References EnableVPlanNativePath, isExplicitVecOuterLoop(), llvm::LoopBase< BlockT, LoopT >::isInnermost(), llvm::InnerLoopVectorizer::LI, llvm::InnerLoopVectorizer::ORE, llvm::LoopBlocksRPO::perform(), and VPlanBuildStressTest.
Referenced by llvm::LoopVectorizePass::runImpl().
|
static |
Create an analysis remark that explains why vectorization failed.
PassName
is the name of the pass (e.g. can be AlwaysPrint). RemarkName
is the identifier for the remark. If I
is passed it is an instruction that prevents vectorization. Otherwise TheLoop
is used for the location of the remark.
Definition at line 934 of file LoopVectorize.cpp.
References DL, llvm::LoopBase< BlockT, LoopT >::getHeader(), llvm::Loop::getStartLoc(), I, and PassName.
Referenced by llvm::reportVectorizationFailure(), and llvm::reportVectorizationInfo().
|
static |
Definition at line 2440 of file LoopVectorize.cpp.
References assert(), DL, E, llvm::ScalarEvolution::getDataLayout(), llvm::SCEV::getType(), llvm::ScalarEvolution::isLoopInvariant(), and llvm::InnerLoopVectorizer::OrigLoop.
Referenced by llvm::InnerLoopVectorizer::createInductionResumeValue(), and llvm::InnerLoopVectorizer::fixupIVUsers().
|
static |
Creates a VPWidenIntOrFpInductionRecpipe for Phi
.
If needed, it will also insert a recipe to expand the step for the induction recipe.
Definition at line 8192 of file LoopVectorize.cpp.
References assert(), llvm::LoopVectorizationPlanner::getDecisionAndClampRange(), llvm::PHINode::getIncomingValueForBlock(), llvm::LoopBase< BlockT, LoopT >::getLoopPreheader(), llvm::vputils::getOrCreateVPValueForSCEVExpr(), llvm::InductionDescriptor::getStartValue(), llvm::InductionDescriptor::getStep(), I, llvm::ScalarEvolution::isLoopInvariant(), llvm::LoopVectorizationCostModel::isProfitableToScalarize(), llvm::LoopVectorizationCostModel::isScalarAfterVectorization(), llvm::InnerLoopVectorizer::OrigLoop, and llvm::InnerLoopVectorizer::VF.
|
static |
Perform cse of induction variable instructions.
Definition at line 3414 of file LoopVectorize.cpp.
References BB, llvm::tgtok::In, llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup(), and llvm::make_early_inc_range().
|
static |
Write a DebugMsg
about vectorization to the debug output stream.
If I
is passed, the message relates to that particular instruction.
Definition at line 915 of file LoopVectorize.cpp.
References llvm::dbgs(), I, and llvm::cl::Prefix.
Referenced by llvm::reportVectorizationFailure(), and llvm::reportVectorizationInfo().
|
static |
Definition at line 7445 of file LoopVectorize.cpp.
References llvm::LoopVectorizationCostModel::getSmallestAndWidestTypes().
Referenced by llvm::LoopVectorizationPlanner::planInVPlanNativePath().
|
static |
Compute the transformed value of Index at offset StartValue using step StepValue.
For integer induction, returns StartValue + Index * StepValue. For pointer induction, returns StartValue[Index * StepValue]. FIXME: The newly created binary instructions should contain nsw/nuw flags, which can be found from the original scalar operations.
Definition at line 2459 of file LoopVectorize.cpp.
References assert(), B, CreateAdd(), CreateMul(), llvm::VectorType::getElementCount(), llvm::Value::getName(), llvm::Value::getType(), llvm::InductionDescriptor::IK_FpInduction, llvm::InductionDescriptor::IK_IntInduction, llvm::InductionDescriptor::IK_NoInduction, llvm::InductionDescriptor::IK_PtrInduction, llvm::Type::isFloatingPointTy(), llvm::Type::isIntegerTy(), llvm_unreachable, llvm::Offset, llvm::Value::setName(), X, and Y.
Referenced by llvm::InnerLoopVectorizer::createInductionResumeValue(), llvm::VPWidenPointerInductionRecipe::execute(), llvm::VPDerivedIVRecipe::execute(), and llvm::InnerLoopVectorizer::fixupIVUsers().
|
static |
Gets Address Access SCEV after verifying that the access pattern is loop invariant except the induction variable dependence.
This SCEV can be sent to the Target in order to estimate the address calculation cost.
Definition at line 6340 of file LoopVectorize.cpp.
References llvm::ScalarEvolution::getSCEV(), llvm::PredicatedScalarEvolution::getSCEV(), llvm::PredicatedScalarEvolution::getSE(), i, llvm::LoopVectorizationLegality::isInductionVariable(), llvm::ScalarEvolution::isLoopInvariant(), llvm::InnerLoopVectorizer::Legal, llvm::InnerLoopVectorizer::PSE, and Ptr.
|
static |
Look for a meaningful debug location on the instruction or it's operands.
Definition at line 895 of file LoopVectorize.cpp.
References I.
Definition at line 1020 of file LoopVectorize.cpp.
References llvm::raw_ostream::flush(), llvm::LoopBase< BlockT, LoopT >::getHeader(), llvm::Module::getModuleIdentifier(), llvm::BasicBlock::getParent(), llvm::GlobalValue::getParent(), and llvm::Loop::getStartLoc().
Referenced by llvm::LoopVectorizePass::processLoop().
|
static |
A helper function that returns the reciprocal of the block probability of predicated blocks.
If we return X, we are assuming the predicated block will execute once for every X iterations of the loop header.
TODO: We should use actual block probability here, if available. Currently, we always assume predicated blocks have a 50% chance of executing.
Definition at line 389 of file LoopVectorize.cpp.
Referenced by llvm::LoopVectorizationCostModel::getDivRemSpeculationCost().
|
static |
Definition at line 9833 of file LoopVectorize.cpp.
References llvm::InnerLoopVectorizer::AC, llvm::InnerLoopVectorizer::BFI, llvm::CM_ScalarEpilogueAllowed, llvm::CM_ScalarEpilogueNotAllowedOptSize, llvm::CM_ScalarEpilogueNotAllowedUsePredicate, llvm::CM_ScalarEpilogueNotNeededUsePredicate, llvm::InnerLoopVectorizer::DT, F, llvm::LoopVectorizeHints::FK_Disabled, llvm::LoopVectorizeHints::FK_Enabled, llvm::LoopVectorizeHints::getForce(), llvm::LoopBase< BlockT, LoopT >::getHeader(), llvm::LoopVectorizeHints::getPredicate(), llvm::IRPass, llvm::InnerLoopVectorizer::LI, PreferPredicateTy::PredicateElseScalarEpilogue, PreferPredicateTy::PredicateOrDontVectorize, PreferPredicateOverEpilogue, llvm::TargetTransformInfo::preferPredicateOverEpilogue(), llvm::InnerLoopVectorizer::PSI, PreferPredicateTy::ScalarEpilogue, llvm::shouldOptimizeForSize(), and llvm::InnerLoopVectorizer::TLI.
Referenced by llvm::LoopVectorizePass::processLoop(), and processLoopInVPlanNativePath().
A helper function that returns an integer or floating-point constant with value C.
Definition at line 393 of file LoopVectorize.cpp.
References llvm::ConstantFP::get(), llvm::ConstantInt::getSigned(), and llvm::Type::isIntegerTy().
Referenced by buildScalarSteps(), and llvm::VPWidenIntOrFpInductionRecipe::execute().
|
static |
Returns "best known" trip count for the specified loop L
as defined by the following procedure: 1) Returns exact trip count if it is known.
2) Returns expected trip count according to profile data if any. 3) Returns upper bound estimate if it is known. 4) Returns std::nullopt if all of the above failed.
Definition at line 404 of file LoopVectorize.cpp.
References llvm::getLoopEstimatedTripCount(), llvm::ScalarEvolution::getSmallConstantMaxTripCount(), llvm::ScalarEvolution::getSmallConstantTripCount(), and LoopVectorizeWithBlockFrequency.
Referenced by areRuntimeChecksProfitable(), llvm::LoopVectorizePass::processLoop(), and llvm::LoopVectorizationCostModel::selectInterleaveCount().
|
static |
This function adds (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...) to each vector element of Val.
The sequence starts at StartIndex. Opcode
is relevant for FP induction variable.
Definition at line 2300 of file LoopVectorize.cpp.
References assert(), llvm::InnerLoopVectorizer::Builder, llvm::IRBuilderBase::CreateAdd(), llvm::IRBuilderBase::CreateBinOp(), llvm::IRBuilderBase::CreateFAdd(), llvm::IRBuilderBase::CreateFMul(), llvm::IRBuilderBase::CreateMul(), llvm::IRBuilderBase::CreateStepVector(), llvm::IRBuilderBase::CreateUIToFP(), llvm::IRBuilderBase::CreateVectorSplat(), llvm::IntegerType::get(), llvm::VectorType::get(), llvm::Type::getContext(), llvm::Type::getScalarSizeInBits(), llvm::Type::getScalarType(), llvm::Value::getType(), llvm::Type::isFloatingPointTy(), llvm::Type::isIntegerTy(), llvm::ElementCount::isVector(), and llvm::InnerLoopVectorizer::VF.
Referenced by llvm::VPWidenIntOrFpInductionRecipe::execute().
|
static |
A helper function that returns true if the given type is irregular.
The type is irregular if its allocated size doesn't equal the store size of an element of the corresponding vector type.
Definition at line 376 of file LoopVectorize.cpp.
References DL.
Referenced by llvm::LoopVectorizationCostModel::interleavedAccessCanBeWidened(), and llvm::LoopVectorizationCostModel::memoryInstructionCanBeWidened().
|
static |
Definition at line 2152 of file LoopVectorize.cpp.
References llvm::LoopVectorizeHints::allowVectorization(), assert(), llvm::dbgs(), llvm::LoopVectorizeHints::emitRemarkWithHints(), llvm::LoopVectorizeHints::FK_Undefined, llvm::LoopVectorizeHints::getForce(), llvm::LoopBase< BlockT, LoopT >::getHeader(), llvm::LoopVectorizeHints::getInterleave(), llvm::BasicBlock::getParent(), llvm::LoopBase< BlockT, LoopT >::isInnermost(), LLVM_DEBUG, and llvm::InnerLoopVectorizer::ORE.
Referenced by collectSupportedLoops().
|
static |
Definition at line 6365 of file LoopVectorize.cpp.
References llvm::LoopVectorizationLegality::hasStride(), I, and llvm::InnerLoopVectorizer::Legal.
Definition at line 3519 of file LoopVectorize.cpp.
References T1.
|
static |
Definition at line 3484 of file LoopVectorize.cpp.
References llvm::VectorType::get(), llvm::Type::isFloatingPointTy(), llvm::Type::isIntOrPtrTy(), llvm::ElementCount::isScalar(), and llvm::InnerLoopVectorizer::VF.
Referenced by llvm::LoopVectorizationCostModel::getVectorIntrinsicCost().
|
static |
Definition at line 9949 of file LoopVectorize.cpp.
References llvm::InnerLoopVectorizer::AC, assert(), llvm::InnerLoopVectorizer::BFI, llvm::LoopVectorizationCostModel::collectElementTypesForWidening(), llvm::AArch64PACKey::DB, llvm::dbgs(), llvm::VectorizationFactor::Disabled(), llvm::InnerLoopVectorizer::DT, EnableVPlanNativePath, llvm::LoopVectorizationPlanner::executePlan(), F, llvm::PredicatedScalarEvolution::getBackedgeTakenCount(), llvm::LoopVectorizationPlanner::getBestPlanFor(), llvm::LoopBase< BlockT, LoopT >::getHeader(), llvm::LoopVectorizationLegality::getLAI(), llvm::Value::getName(), llvm::BasicBlock::getParent(), getScalarEpilogueLowering(), llvm::PredicatedScalarEvolution::getSE(), llvm::LoopVectorizeHints::getWidth(), llvm::InnerLoopVectorizer::LI, LLVM_DEBUG, llvm::InnerLoopVectorizer::ORE, llvm::LoopVectorizationPlanner::planInVPlanNativePath(), llvm::InnerLoopVectorizer::PSE, llvm::InnerLoopVectorizer::PSI, llvm::LoopVectorizeHints::setAlreadyVectorized(), llvm::InnerLoopVectorizer::TLI, llvm::verifyFunction(), llvm::InnerLoopVectorizer::VF, and VPlanBuildStressTest.
Referenced by llvm::LoopVectorizePass::processLoop().
Definition at line 3513 of file LoopVectorize.cpp.
References T1.
Referenced by llvm::InnerLoopVectorizer::truncateToMinimalBitwidths().
STATISTIC | ( | LoopsEpilogueVectorized | , |
"Number of epilogues vectorized" | |||
) |
|
static |
Definition at line 2555 of file LoopVectorize.cpp.
References llvm::TargetTransformInfo::enableMaskedInterleavedAccessVectorization(), EnableMaskedInterleavedMemAccesses, and llvm::cl::Option::getNumOccurrences().
Referenced by llvm::LoopVectorizationCostModel::computeMaxVF(), llvm::LoopVectorizationCostModel::interleavedAccessCanBeWidened(), llvm::LoopVectorizationPlanner::plan(), llvm::LoopVectorizePass::processLoop(), and llvm::InnerLoopVectorizer::vectorizeInterleaveGroup().
|
static |
Referenced by llvm::LoopVectorizationCostModel::selectInterleaveCount().
|
static |
Referenced by llvm::LoopVectorizePass::processLoop().
|
static |
Referenced by llvm::LoopVectorizationCostModel::selectInterleaveCount().
|
static |
An interleave-group may need masking if it resides in a block that needs predication, or in order to mask away gaps.
Referenced by useMaskedInterleavedAccesses().
cl::opt<bool> EnableVPlanNativePath("enable-vplan-native-path", cl::init(false), cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization.")) |
Referenced by collectSupportedLoops(), llvm::InnerLoopVectorizer::emitMemRuntimeChecks(), llvm::VPWidenPHIRecipe::execute(), llvm::InnerLoopVectorizer::fixVectorizedLoop(), llvm::LoopVectorizationCostModel::getWideningDecision(), llvm::LoopVectorizationCostModel::isProfitableToScalarize(), llvm::LoopVectorizationCostModel::isScalarAfterVectorization(), llvm::LoopVectorizationCostModel::isUniformAfterVectorization(), llvm::LoopVectorizationPlanner::planInVPlanNativePath(), llvm::LoopVectorizePass::processLoop(), processLoopInVPlanNativePath(), and llvm::LoopVectorizePass::run().
|
static |
|
static |
Referenced by llvm::LoopVectorizePass::processLoop().
|
static |
|
static |
|
static |
Referenced by llvm::LoopVectorizationCostModel::selectInterleaveCount().
|
static |
Referenced by llvm::LoopVectorizationCostModel::selectInterleaveCount().
|
static |
Referenced by llvm::LoopVectorizationCostModel::selectInterleaveCount().
|
static |
Referenced by llvm::LoopVectorizationCostModel::selectInterleaveCount().
|
static |
|
static |
Interleave small loops with scalar reductions.
Referenced by llvm::LoopVectorizationCostModel::selectInterleaveCount().
const char LLVMLoopVectorizeFollowupAll[] = "llvm.loop.vectorize.followup_all" |
Metadata attribute names
Definition at line 165 of file LoopVectorize.cpp.
Referenced by llvm::LoopVectorizationPlanner::executePlan(), and llvm::LoopVectorizePass::processLoop().
const char LLVMLoopVectorizeFollowupEpilogue[] |
Definition at line 168 of file LoopVectorize.cpp.
Referenced by llvm::LoopVectorizePass::processLoop().
const char LLVMLoopVectorizeFollowupVectorized[] |
Definition at line 166 of file LoopVectorize.cpp.
Referenced by llvm::LoopVectorizationPlanner::executePlan().
|
static |
Referenced by getSmallBestKnownTC().
|
static |
Definition at line 7344 of file LoopVectorize.cpp.
|
static |
|
static |
Referenced by llvm::LoopVectorizationCostModel::selectInterleaveCount().
|
static |
The number of stores in a loop that are allowed to need predication.
|
static |
|
static |
Referenced by llvm::InnerLoopVectorizer::fixReduction().
|
static |
Referenced by getScalarEpilogueLowering().
|
static |
Referenced by llvm::LoopVectorizationPlanner::printPlans().
|
static |
Referenced by llvm::LoopVectorizationCostModel::selectInterleaveCount().
|
static |
Referenced by llvm::LoopVectorizationCostModel::selectInterleaveCount().
|
static |
Loops with a known constant trip count below this number are vectorized only if no scalar iteration overheads are incurred.
Referenced by llvm::LoopVectorizePass::processLoop().
|
static |
Referenced by areRuntimeChecksProfitable().
const char VerboseDebug[] = DEBUG_TYPE "-verbose" |
Definition at line 160 of file LoopVectorize.cpp.
Referenced by llvm::EpilogueVectorizerMainLoop::printDebugTracesAtEnd(), and llvm::EpilogueVectorizerEpilogueLoop::printDebugTracesAtEnd().