#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "LoopVectorizationPlanner.h"
#include "VPRecipeBuilder.h"
#include "VPlan.h"
#include "VPlanAnalysis.h"
#include "VPlanHCFGBuilder.h"
#include "VPlanPatternMatch.h"
#include "VPlanTransforms.h"
#include "VPlanVerifier.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/VectorBuilder.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstdint>
#include <functional>
#include <iterator>
#include <limits>
#include <map>
#include <memory>
#include <string>
#include <tuple>
#include <utility>

Classes
class	llvm::InnerLoopVectorizer
	InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization factor (VF). More...

class	llvm::InnerLoopUnroller

struct	llvm::EpilogueLoopVectorizationInfo
	Encapsulate information regarding vectorization of a loop and its epilogue. More...

class	llvm::InnerLoopAndEpilogueVectorizer
	An extension of the inner loop vectorizer that creates a skeleton for a vectorized loop that has its epilogue (residual) also vectorized. More...

class	llvm::EpilogueVectorizerMainLoop
	A specialized derived class of inner loop vectorizer that performs vectorization of main loops in the process of vectorizing loops and their epilogues. More...

class	llvm::EpilogueVectorizerEpilogueLoop

class	llvm::LoopVectorizationCostModel
	LoopVectorizationCostModel - estimates the expected speedups due to vectorization. More...

struct	llvm::LoopVectorizationCostModel::RegisterUsage
	A struct that represents some properties of the register usage of a loop. More...

struct	llvm::LoopVectorizationCostModel::CallWideningDecision

Namespaces
namespace	PreferPredicateTy

namespace	llvm
	This is an optimization pass for GlobalISel generic memory operations.

Macros
#define	LV_NAME "loop-vectorize"

#define	DEBUG_TYPE LV_NAME

Typedefs
using	llvm::InstructionVFPair = std::pair< Instruction *, ElementCount >

Enumerations
enum	PreferPredicateTy::Option { PreferPredicateTy::ScalarEpilogue = 0 , PreferPredicateTy::PredicateElseScalarEpilogue , PreferPredicateTy::PredicateOrDontVectorize }

enum	llvm::ScalarEpilogueLowering { llvm::CM_ScalarEpilogueAllowed , llvm::CM_ScalarEpilogueNotAllowedOptSize , llvm::CM_ScalarEpilogueNotAllowedLowTripLoop , llvm::CM_ScalarEpilogueNotNeededUsePredicate , llvm::CM_ScalarEpilogueNotAllowedUsePredicate }

Functions
	STATISTIC (LoopsVectorized, "Number of loops vectorized")

	STATISTIC (LoopsAnalyzed, "Number of loops analyzed for vectorization")

	STATISTIC (LoopsEpilogueVectorized, "Number of epilogues vectorized")

static bool	hasIrregularType (Type *Ty, const DataLayout &DL)
	A helper function that returns true if the given type is irregular.

static std::optional< unsigned >	getSmallBestKnownTC (ScalarEvolution &SE, Loop *L)
	Returns "best known" trip count for the specified loop `L` as defined by the following procedure: 1) Returns exact trip count if it is known.

static DebugLoc	getDebugLocFromInstOrOperands (Instruction *I)
	Look for a meaningful debug location on the instruction or it's operands.

static void	debugVectorizationMessage (const StringRef Prefix, const StringRef DebugMsg, Instruction *I)
	Write a `DebugMsg` about vectorization to the debug output stream.

static OptimizationRemarkAnalysis	createLVAnalysis (const char PassName, StringRef RemarkName, Loop TheLoop, Instruction *I)
	Create an analysis remark that explains why vectorization failed.

Value *	llvm::createStepForVF (IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
	Return a value for Step multiplied by VF.

Value *	llvm::getRuntimeVF (IRBuilderBase &B, Type *Ty, ElementCount VF)
	Return the runtime value for VF.

const SCEV *	llvm::createTripCountSCEV (Type IdxTy, PredicatedScalarEvolution &PSE, Loop OrigLoop)

void	llvm::reportVectorizationFailure (const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter ORE, Loop TheLoop, Instruction *I=nullptr)
	Reports a vectorization failure: print `DebugMsg` for debugging purposes along with the corresponding optimization remark `RemarkName`.

static void	llvm::reportVectorizationInfo (const StringRef Msg, const StringRef ORETag, OptimizationRemarkEmitter ORE, Loop TheLoop, Instruction *I=nullptr)
	Reports an informative message: print `Msg` for debugging purposes as well as an optimization remark.

static void	llvm::reportVectorization (OptimizationRemarkEmitter ORE, Loop TheLoop, VectorizationFactor VF, unsigned IC)
	Report successful vectorization of the loop.

static bool	useActiveLaneMask (TailFoldingStyle Style)

static bool	useActiveLaneMaskForControlFlow (TailFoldingStyle Style)

static bool	isExplicitVecOuterLoop (Loop OuterLp, OptimizationRemarkEmitter ORE)

static void	collectSupportedLoops (Loop &L, LoopInfo LI, OptimizationRemarkEmitter ORE, SmallVectorImpl< Loop * > &V)

static Value *	emitTransformedIndex (IRBuilderBase &B, Value Index, Value StartValue, Value Step, InductionDescriptor::InductionKind InductionKind, const BinaryOperator InductionBinOp)
	Compute the transformed value of Index at offset StartValue using step StepValue.

std::optional< unsigned >	getMaxVScale (const Function &F, const TargetTransformInfo &TTI)

static bool	isIndvarOverflowCheckKnownFalse (const LoopVectorizationCostModel *Cost, ElementCount VF, std::optional< unsigned > UF=std::nullopt)
	For the given VF and UF and maximum trip count computed for the loop, return whether the induction variable might overflow in the vectorized loop.

static bool	useMaskedInterleavedAccesses (const TargetTransformInfo &TTI)

static Value *	getExpandedStep (const InductionDescriptor &ID, const SCEV2ValueTy &ExpandedSCEVs)
	Return the expanded step for `ID` using `ExpandedSCEVs` to look up SCEV expansion results.

static void	cse (BasicBlock *BB)
	Perform cse of induction variable instructions.

static Type *	MaybeVectorizeType (Type *Elt, ElementCount VF)

static std::optional< unsigned >	getVScaleForTuning (const Loop *L, const TargetTransformInfo &TTI)
	Convenience function that returns the value of vscale_range iff vscale_range.min == vscale_range.max or otherwise returns the value returned by the corresponding TTI method.

static void	emitInvalidCostRemarks (SmallVector< InstructionVFPair > InvalidCosts, OptimizationRemarkEmitter ORE, Loop TheLoop)

static bool	willGenerateVectors (VPlan &Plan, ElementCount VF, const TargetTransformInfo &TTI)
	Check if any recipe of `Plan` will generate a vector value, which will be assigned a vector register.

static const SCEV *	getAddressAccessSCEV (Value Ptr, LoopVectorizationLegality Legal, PredicatedScalarEvolution &PSE, const Loop *TheLoop)
	Gets Address Access SCEV after verifying that the access pattern is loop invariant except the induction variable dependence.

static ElementCount	determineVPlanVF (const TargetTransformInfo &TTI, LoopVectorizationCostModel &CM)

static void	AddRuntimeUnrollDisableMetaData (Loop *L)

static void	createAndCollectMergePhiForReduction (VPInstruction RedResult, DenseMap< const RecurrenceDescriptor , Value * > &ReductionResumeValues, VPTransformState &State, Loop OrigLoop, BasicBlock LoopMiddleBlock, bool VectorizingEpilogue)

static VPWidenIntOrFpInductionRecipe *	createWidenInductionRecipes (PHINode Phi, Instruction PhiOrTrunc, VPValue *Start, const InductionDescriptor &IndDesc, VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop)
	Creates a VPWidenIntOrFpInductionRecpipe for `Phi`.

static void	addCanonicalIVRecipes (VPlan &Plan, Type *IdxTy, bool HasNUW, DebugLoc DL)

static void	addUsersInExitBlock (Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan)

static void	addLiveOutsForFirstOrderRecurrences (VPlan &Plan)
	Feed a resume value for every FOR from the vector loop to the scalar loop, if middle block branches to scalar preheader, by introducing ExtractFromEnd and ResumePhi recipes in each, respectively, and a VPLiveOut which uses the latter and corresponds to the scalar header.

static Instruction *	createReverseEVL (IRBuilderBase &Builder, Value Operand, Value EVL, const Twine &Name)
	Use all-true mask for reverse rather than actual mask, as it avoids a dependence w/o affecting the result.

static ScalarEpilogueLowering	getScalarEpilogueLowering (Function F, Loop L, LoopVectorizeHints &Hints, ProfileSummaryInfo PSI, BlockFrequencyInfo BFI, TargetTransformInfo TTI, TargetLibraryInfo TLI, LoopVectorizationLegality &LVL, InterleavedAccessInfo *IAI)

static bool	processLoopInVPlanNativePath (Loop L, PredicatedScalarEvolution &PSE, LoopInfo LI, DominatorTree DT, LoopVectorizationLegality LVL, TargetTransformInfo TTI, TargetLibraryInfo TLI, DemandedBits DB, AssumptionCache AC, OptimizationRemarkEmitter ORE, BlockFrequencyInfo BFI, ProfileSummaryInfo *PSI, LoopVectorizeHints &Hints, LoopVectorizationRequirements &Requirements)

static void	checkMixedPrecision (Loop L, OptimizationRemarkEmitter ORE)

static bool	areRuntimeChecksProfitable (GeneratedRTChecks &Checks, VectorizationFactor &VF, std::optional< unsigned > VScale, Loop *L, ScalarEvolution &SE, ScalarEpilogueLowering SEL)

Variables
const char	VerboseDebug [] = DEBUG_TYPE "-verbose"

static cl::opt< bool >	EnableEpilogueVectorization ("enable-epilogue-vectorization", cl::init(true), cl::Hidden, cl::desc("Enable vectorization of epilogue loops."))

static cl::opt< unsigned >	EpilogueVectorizationForceVF ("epilogue-vectorization-force-VF", cl::init(1), cl::Hidden, cl::desc("When epilogue vectorization is enabled, and a value greater than " "1 is specified, forces the given VF for all applicable epilogue " "loops."))

static cl::opt< unsigned >	EpilogueVectorizationMinVF ("epilogue-vectorization-minimum-VF", cl::init(16), cl::Hidden, cl::desc("Only loops with vectorization factor equal to or larger than " "the specified value are considered for epilogue vectorization."))

static cl::opt< unsigned >	TinyTripCountVectorThreshold ("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Loops with a constant trip count that is smaller than this " "value are vectorized only if no scalar iteration overheads " "are incurred."))
	Loops with a known constant trip count below this number are vectorized only if no scalar iteration overheads are incurred.

static cl::opt< unsigned >	VectorizeMemoryCheckThreshold ("vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks"))

static cl::opt< PreferPredicateTy::Option >	PreferPredicateOverEpilogue ("prefer-predicate-over-epilogue", cl::init(PreferPredicateTy::ScalarEpilogue), cl::Hidden, cl::desc("Tail-folding and predication preferences over creating a scalar " "epilogue loop."), cl::values(clEnumValN(PreferPredicateTy::ScalarEpilogue, "scalar-epilogue", "Don't tail-predicate loops, create scalar epilogue"), clEnumValN(PreferPredicateTy::PredicateElseScalarEpilogue, "predicate-else-scalar-epilogue", "prefer tail-folding, create scalar epilogue if tail " "folding fails."), clEnumValN(PreferPredicateTy::PredicateOrDontVectorize, "predicate-dont-vectorize", "prefers tail-folding, don't attempt vectorization if " "tail-folding fails.")))

static cl::opt< TailFoldingStyle >	ForceTailFoldingStyle ("force-tail-folding-style", cl::desc("Force the tail folding style"), cl::init(TailFoldingStyle::None), cl::values(clEnumValN(TailFoldingStyle::None, "none", "Disable tail folding"), clEnumValN(TailFoldingStyle::Data, "data", "Create lane mask for data only, using active.lane.mask intrinsic"), clEnumValN(TailFoldingStyle::DataWithoutLaneMask, "data-without-lane-mask", "Create lane mask with compare/stepvector"), clEnumValN(TailFoldingStyle::DataAndControlFlow, "data-and-control", "Create lane mask using active.lane.mask intrinsic, and use " "it for both data and control flow"), clEnumValN(TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck, "data-and-control-without-rt-check", "Similar to data-and-control, but remove the runtime check"), clEnumValN(TailFoldingStyle::DataWithEVL, "data-with-evl", "Use predicated EVL instructions for tail folding. If EVL " "is unsupported, fallback to data-without-lane-mask.")))

static cl::opt< bool >	MaximizeBandwidth ("vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which " "will be determined by the smallest type in loop."))

static cl::opt< bool >	EnableInterleavedMemAccesses ("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop"))

static cl::opt< bool >	EnableMaskedInterleavedMemAccesses ("enable-masked-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on masked interleaved memory accesses in a loop"))
	An interleave-group may need masking if it resides in a block that needs predication, or in order to mask away gaps.

static cl::opt< unsigned >	ForceTargetNumScalarRegs ("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers."))

static cl::opt< unsigned >	ForceTargetNumVectorRegs ("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers."))

static cl::opt< unsigned >	ForceTargetMaxScalarInterleaveFactor ("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "scalar loops."))

static cl::opt< unsigned >	ForceTargetMaxVectorInterleaveFactor ("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "vectorized loops."))

cl::opt< unsigned >	ForceTargetInstructionCost ("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing."))

static cl::opt< bool >	ForceTargetSupportsScalableVectors ("force-target-supports-scalable-vectors", cl::init(false), cl::Hidden, cl::desc("Pretend that scalable vectors are supported, even if the target does " "not support them. This flag should only be used for testing."))

static cl::opt< unsigned >	SmallLoopCost ("small-loop-cost", cl::init(20), cl::Hidden, cl::desc("The cost of a loop that is considered 'small' by the interleaver."))

static cl::opt< bool >	LoopVectorizeWithBlockFrequency ("loop-vectorize-with-block-frequency", cl::init(true), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO " "heuristics minimizing code growth in cold regions and being more " "aggressive in hot regions."))

static cl::opt< bool >	EnableLoadStoreRuntimeInterleave ("enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, cl::desc("Enable runtime interleaving until load/store ports are saturated"))

static cl::opt< unsigned >	NumberOfStoresToPredicate ("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if."))
	The number of stores in a loop that are allowed to need predication.

static cl::opt< bool >	EnableIndVarRegisterHeur ("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when interleaving"))

static cl::opt< bool >	EnableCondStoresVectorization ("enable-cond-stores-vec", cl::init(true), cl::Hidden, cl::desc("Enable if predication of stores during vectorization."))

static cl::opt< unsigned >	MaxNestedScalarReductionIC ("max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, cl::desc("The maximum interleave count to use when interleaving a scalar " "reduction in a nested loop."))

static cl::opt< bool >	PreferInLoopReductions ("prefer-inloop-reductions", cl::init(false), cl::Hidden, cl::desc("Prefer in-loop vector reductions, " "overriding the targets preference."))

static cl::opt< bool >	ForceOrderedReductions ("force-ordered-reductions", cl::init(false), cl::Hidden, cl::desc("Enable the vectorisation of loops with in-order (strict) " "FP reductions"))

static cl::opt< bool >	PreferPredicatedReductionSelect ("prefer-predicated-reduction-select", cl::init(false), cl::Hidden, cl::desc("Prefer predicating a reduction operation over an after loop select."))

cl::opt< bool >	llvm::EnableVPlanNativePath ("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))

static cl::opt< bool >	VPlanBuildStressTest ("vplan-build-stress-test", cl::init(false), cl::Hidden, cl::desc("Build VPlan for every supported loop nest in the function and bail " "out right after the build (stress test the VPlan H-CFG construction " "in the VPlan-native vectorization path)."))

static cl::opt< bool >	PrintVPlansInDotFormat ("vplan-print-in-dot-format", cl::Hidden, cl::desc("Use dot format instead of plain text when dumping VPlans"))

static cl::opt< cl::boolOrDefault >	ForceSafeDivisor ("force-widen-divrem-via-safe-divisor", cl::Hidden, cl::desc("Override cost based safe divisor widening for div/rem instructions"))

static cl::opt< bool >	UseWiderVFIfCallVariantsPresent ("vectorizer-maximize-bandwidth-for-vector-calls", cl::init(true), cl::Hidden, cl::desc("Try wider VFs if they enable the use of vector variants"))

static constexpr uint32_t	SCEVCheckBypassWeights [] = {1, 127}

static constexpr uint32_t	MemCheckBypassWeights [] = {1, 127}

static constexpr uint32_t	MinItersBypassWeights [] = {1, 127}


const char	LLVMLoopVectorizeFollowupAll [] = "llvm.loop.vectorize.followup_all"

const char	LLVMLoopVectorizeFollowupVectorized []

const char	LLVMLoopVectorizeFollowupEpilogue []

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE LV_NAME

Definition at line 163 of file LoopVectorize.cpp.

◆ LV_NAME

#define LV_NAME "loop-vectorize"

Definition at line 162 of file LoopVectorize.cpp.

Function Documentation

◆ addCanonicalIVRecipes()

static void addCanonicalIVRecipes	(	VPlan &	Plan,
		Type *	IdxTy,
		bool	HasNUW,
		DebugLoc	DL
	)

static

Definition at line 8380 of file LoopVectorize.cpp.

References llvm::VPUser::addOperand(), llvm::VPInstruction::BranchOnCount, llvm::VPBuilder::createNaryOp(), llvm::VPBuilder::createOverflowingOp(), DL, llvm::VPBlockBase::getEntryBasicBlock(), llvm::VPBlockBase::getExitingBasicBlock(), llvm::VPlan::getOrAddLiveIn(), llvm::VPlan::getVectorLoopRegion(), llvm::VPlan::getVectorTripCount(), and llvm::VPlan::getVFxUF().

◆ addLiveOutsForFirstOrderRecurrences()

static void addLiveOutsForFirstOrderRecurrences ( VPlan & Plan )

static

Feed a resume value for every FOR from the vector loop to the scalar loop, if middle block branches to scalar preheader, by introducing ExtractFromEnd and ResumePhi recipes in each, respectively, and a VPLiveOut which uses the latter and corresponds to the scalar header.

Definition at line 8434 of file LoopVectorize.cpp.

References llvm::VPlan::addLiveOut(), assert(), llvm::VPBuilder::createNaryOp(), llvm::VPInstruction::ExtractFromEnd, llvm::VPlan::getCanonicalIV(), llvm::VPBlockBase::getEntryBasicBlock(), llvm::VPlan::getOrAddLiveIn(), llvm::VPCanonicalIVPHIRecipe::getScalarType(), llvm::VPBlockBase::getSingleSuccessor(), llvm::VPBlockBase::getSuccessors(), llvm::VPlan::getVectorLoopRegion(), llvm::VPBasicBlock::phis(), llvm::VPInstruction::ResumePhi, and llvm::VPBuilder::setInsertPoint().

◆ AddRuntimeUnrollDisableMetaData()

static void AddRuntimeUnrollDisableMetaData ( Loop * L )

static

Definition at line 7202 of file LoopVectorize.cpp.

References llvm::MDNode::get(), llvm::MDString::get(), llvm::MDNode::getNumOperands(), llvm::MDNode::getOperand(), llvm::SmallVectorTemplateBase< T, bool >::push_back(), and llvm::MDNode::replaceOperandWith().

Referenced by llvm::LoopVectorizationPlanner::executePlan(), and llvm::LoopVectorizePass::processLoop().

◆ addUsersInExitBlock()

static void addUsersInExitBlock	(	Loop *	OrigLoop,
		VPRecipeBuilder &	Builder,
		VPlan &	Plan
	)

static

Definition at line 8405 of file LoopVectorize.cpp.

References llvm::VPlan::addLiveOut(), llvm::LoopBase< BlockT, LoopT >::getExitingBlock(), llvm::BasicBlock::getSinglePredecessor(), llvm::LoopBase< BlockT, LoopT >::getUniqueExitBlock(), llvm::VPRecipeBuilder::getVPValueOrAddLiveIn(), and llvm::BasicBlock::phis().

◆ areRuntimeChecksProfitable()

static bool areRuntimeChecksProfitable	(	GeneratedRTChecks &	Checks,
		VectorizationFactor &	VF,
		std::optional< unsigned >	VScale,
		Loop *	L,
		ScalarEvolution &	SE,
		ScalarEpilogueLowering	SEL
	)

static

◆ checkMixedPrecision()

static void checkMixedPrecision	(	Loop *	L,
		OptimizationRemarkEmitter *	ORE
	)

static

Definition at line 9526 of file LoopVectorize.cpp.

References llvm::OptimizationRemarkEmitter::emit(), llvm::SmallVectorBase< Size_T >::empty(), I, llvm::SmallPtrSetImpl< PtrType >::insert(), LV_NAME, llvm::SmallVectorImpl< T >::pop_back_val(), and llvm::SmallVectorTemplateBase< T, bool >::push_back().

Referenced by llvm::LoopVectorizePass::processLoop().

◆ collectSupportedLoops()

static void collectSupportedLoops	(	Loop &	L,
		LoopInfo *	LI,
		OptimizationRemarkEmitter *	ORE,
		SmallVectorImpl< Loop * > &	V
	)

static

Definition at line 2175 of file LoopVectorize.cpp.

References collectSupportedLoops(), llvm::EnableVPlanNativePath, isExplicitVecOuterLoop(), llvm::LoopBlocksRPO::perform(), and VPlanBuildStressTest.

Referenced by collectSupportedLoops(), and llvm::LoopVectorizePass::runImpl().

◆ createAndCollectMergePhiForReduction()

static void createAndCollectMergePhiForReduction	(	VPInstruction *	RedResult,
		DenseMap< const RecurrenceDescriptor , Value > &	ReductionResumeValues,
		VPTransformState &	State,
		Loop *	OrigLoop,
		BasicBlock *	LoopMiddleBlock,
		bool	VectorizingEpilogue
	)

static

Definition at line 7238 of file LoopVectorize.cpp.

References assert(), llvm::VPInstruction::ComputeReductionResult, llvm::PHINode::Create(), llvm::VPTransformState::get(), llvm::VPLane::getFirstLane(), llvm::ilist_node_impl< OptionsT >::getIterator(), llvm::RecurrenceDescriptor::getLoopExitInstr(), llvm::LoopBase< BlockT, LoopT >::getLoopLatch(), llvm::LoopBase< BlockT, LoopT >::getLoopPreheader(), llvm::VPInstruction::getOpcode(), llvm::VPUser::getOperand(), llvm::RecurrenceDescriptor::getRecurrenceKind(), llvm::RecurrenceDescriptor::getRecurrenceStartValue(), llvm::BasicBlock::getTerminator(), llvm::Value::getType(), llvm::CmpInst::ICMP_NE, llvm::is_contained(), llvm::RecurrenceDescriptor::isAnyOfRecurrenceKind(), llvm::predecessors(), and llvm::VPTransformState::UF.

Referenced by llvm::LoopVectorizationPlanner::executePlan().

◆ createLVAnalysis()

static OptimizationRemarkAnalysis createLVAnalysis	(	const char *	PassName,
		StringRef	RemarkName,
		Loop *	TheLoop,
		Instruction *	I
	)

static

Create an analysis remark that explains why vectorization failed.

PassName is the name of the pass (e.g. can be AlwaysPrint). RemarkName is the identifier for the remark. If I is passed it is an instruction that prevents vectorization. Otherwise TheLoop is used for the location of the remark.

Returns: the remark object that can be streamed to.

Definition at line 894 of file LoopVectorize.cpp.

References DL, llvm::LoopBase< BlockT, LoopT >::getHeader(), llvm::Loop::getStartLoc(), I, and PassName.

Referenced by llvm::reportVectorizationFailure(), and llvm::reportVectorizationInfo().

◆ createReverseEVL()

static Instruction * createReverseEVL	(	IRBuilderBase &	Builder,
		Value *	Operand,
		Value *	EVL,
		const Twine &	Name
	)

static

Use all-true mask for reverse rather than actual mask, as it avoids a dependence w/o affecting the result.

Definition at line 9272 of file LoopVectorize.cpp.

References llvm::IRBuilderBase::CreateIntrinsic(), llvm::IRBuilderBase::CreateVectorSplat(), llvm::IRBuilderBase::getTrue(), llvm::Value::getType(), and Name.

Referenced by llvm::VPWidenLoadEVLRecipe::execute(), and llvm::VPWidenStoreEVLRecipe::execute().

◆ createWidenInductionRecipes()

static VPWidenIntOrFpInductionRecipe * createWidenInductionRecipes	(	PHINode *	Phi,
		Instruction *	PhiOrTrunc,
		VPValue *	Start,
		const InductionDescriptor &	IndDesc,
		VPlan &	Plan,
		ScalarEvolution &	SE,
		Loop &	OrigLoop
	)

static

Creates a VPWidenIntOrFpInductionRecpipe for Phi.

If needed, it will also insert a recipe to expand the step for the induction recipe.

Definition at line 7956 of file LoopVectorize.cpp.

References assert(), llvm::LoopBase< BlockT, LoopT >::getLoopPreheader(), llvm::vputils::getOrCreateVPValueForSCEVExpr(), llvm::InductionDescriptor::getStartValue(), llvm::InductionDescriptor::getStep(), and llvm::ScalarEvolution::isLoopInvariant().

◆ cse()

static void cse ( BasicBlock * BB )

static

Perform cse of induction variable instructions.

Definition at line 2865 of file LoopVectorize.cpp.

References llvm::DenseMapBase< DerivedT, KeyT, ValueT, KeyInfoT, BucketT >::lookup(), and llvm::make_early_inc_range().

Referenced by llvm::InnerLoopVectorizer::fixVectorizedLoop().

◆ debugVectorizationMessage()

static void debugVectorizationMessage	(	const StringRef	Prefix,
		const StringRef	DebugMsg,
		Instruction *	I
	)

static

Write a DebugMsg about vectorization to the debug output stream.

If I is passed, the message relates to that particular instruction.

Definition at line 875 of file LoopVectorize.cpp.

References llvm::dbgs(), and I.

Referenced by llvm::reportVectorization(), llvm::reportVectorizationFailure(), and llvm::reportVectorizationInfo().

◆ determineVPlanVF()

static ElementCount determineVPlanVF	(	const TargetTransformInfo &	TTI,
		LoopVectorizationCostModel &	CM
	)

static

Definition at line 6820 of file LoopVectorize.cpp.

References llvm::TargetTransformInfo::enableScalableVectorization(), llvm::ElementCount::get(), llvm::TargetTransformInfo::getRegisterBitWidth(), llvm::LoopVectorizationCostModel::getSmallestAndWidestTypes(), N, RegSize, llvm::TargetTransformInfo::RGK_FixedWidthVector, and llvm::TargetTransformInfo::RGK_ScalableVector.

Referenced by llvm::LoopVectorizationPlanner::planInVPlanNativePath().

◆ emitInvalidCostRemarks()

static void emitInvalidCostRemarks	(	SmallVector< InstructionVFPair >	InvalidCosts,
		OptimizationRemarkEmitter *	ORE,
		Loop *	TheLoop
	)

static

Definition at line 4346 of file LoopVectorize.cpp.

References A, assert(), B, llvm::SmallVectorBase< Size_T >::empty(), llvm::raw_ostream::flush(), I, if(), LHS, OS, llvm::reportVectorizationInfo(), RHS, llvm::sort(), and llvm::CallingConv::Tail.

◆ emitTransformedIndex()

static Value * emitTransformedIndex	(	IRBuilderBase &	B,
		Value *	Index,
		Value *	StartValue,
		Value *	Step,
		InductionDescriptor::InductionKind	InductionKind,
		const BinaryOperator *	InductionBinOp
	)

static

Compute the transformed value of Index at offset StartValue using step StepValue.

For integer induction, returns StartValue + Index * StepValue. For pointer induction, returns StartValue[Index * StepValue]. FIXME: The newly created binary instructions should contain nsw/nuw flags, which can be found from the original scalar operations.

Definition at line 2212 of file LoopVectorize.cpp.

References assert(), B, CreateAdd(), CreateMul(), llvm::Value::getName(), llvm::BinaryOperator::getOpcode(), llvm::Value::getType(), llvm::InductionDescriptor::IK_FpInduction, llvm::InductionDescriptor::IK_IntInduction, llvm::InductionDescriptor::IK_NoInduction, llvm::InductionDescriptor::IK_PtrInduction, llvm::Type::isFloatingPointTy(), llvm::Type::isIntegerTy(), llvm_unreachable, llvm::Offset, llvm::Value::setName(), X, and Y.

Referenced by llvm::InnerLoopVectorizer::createInductionResumeValue(), llvm::VPDerivedIVRecipe::execute(), and llvm::InnerLoopVectorizer::fixupIVUsers().

◆ getAddressAccessSCEV()

static const SCEV * getAddressAccessSCEV	(	Value *	Ptr,
		LoopVectorizationLegality *	Legal,
		PredicatedScalarEvolution &	PSE,
		const Loop *	TheLoop
	)

static

Gets Address Access SCEV after verifying that the access pattern is loop invariant except the induction variable dependence.

This SCEV can be sent to the Target in order to estimate the address calculation cost.

Definition at line 5531 of file LoopVectorize.cpp.

References llvm::ScalarEvolution::getSCEV(), llvm::PredicatedScalarEvolution::getSCEV(), llvm::PredicatedScalarEvolution::getSE(), llvm::ScalarEvolution::isLoopInvariant(), llvm::IRSimilarity::Legal, and Ptr.

◆ getDebugLocFromInstOrOperands()

static DebugLoc getDebugLocFromInstOrOperands ( Instruction * I )

static

Look for a meaningful debug location on the instruction or it's operands.

Definition at line 855 of file LoopVectorize.cpp.

References llvm::Empty, and I.

◆ getExpandedStep()

static Value * getExpandedStep	(	const InductionDescriptor &	ID,
		const SCEV2ValueTy &	ExpandedSCEVs
	)

static

Return the expanded step for ID using ExpandedSCEVs to look up SCEV expansion results.

Definition at line 2661 of file LoopVectorize.cpp.

References assert(), llvm::CallingConv::C, and I.

Referenced by llvm::InnerLoopVectorizer::createInductionResumeValues(), and llvm::LoopVectorizePass::processLoop().

◆ getMaxVScale()

std::optional< unsigned > getMaxVScale	(	const Function &	F,
		const TargetTransformInfo &	TTI
	)

Definition at line 2291 of file LoopVectorize.cpp.

References F, and llvm::TargetTransformInfo::getMaxVScale().

Referenced by llvm::LoopVectorizationCostModel::computeMaxVF(), and isIndvarOverflowCheckKnownFalse().

◆ getScalarEpilogueLowering()

static ScalarEpilogueLowering getScalarEpilogueLowering	(	Function *	F,
		Loop *	L,
		LoopVectorizeHints &	Hints,
		ProfileSummaryInfo *	PSI,
		BlockFrequencyInfo *	BFI,
		TargetTransformInfo *	TTI,
		TargetLibraryInfo *	TLI,
		LoopVectorizationLegality &	LVL,
		InterleavedAccessInfo *	IAI
	)

static

◆ getSmallBestKnownTC()

static std::optional< unsigned > getSmallBestKnownTC	(	ScalarEvolution &	SE,
		Loop *	L
	)

static

Returns "best known" trip count for the specified loop L as defined by the following procedure: 1) Returns exact trip count if it is known.

2) Returns expected trip count according to profile data if any. 3) Returns upper bound estimate if it is known. 4) Returns std::nullopt if all of the above failed.

Definition at line 422 of file LoopVectorize.cpp.

References llvm::getLoopEstimatedTripCount(), llvm::ScalarEvolution::getSmallConstantMaxTripCount(), llvm::ScalarEvolution::getSmallConstantTripCount(), and LoopVectorizeWithBlockFrequency.

Referenced by areRuntimeChecksProfitable(), llvm::LoopVectorizePass::processLoop(), and llvm::LoopVectorizationCostModel::selectInterleaveCount().

◆ getVScaleForTuning()

static std::optional< unsigned > getVScaleForTuning	(	const Loop *	L,
		const TargetTransformInfo &	TTI
	)

static

Convenience function that returns the value of vscale_range iff vscale_range.min == vscale_range.max or otherwise returns the value returned by the corresponding TTI method.

Definition at line 4278 of file LoopVectorize.cpp.

References llvm::Function::getFnAttribute(), llvm::TargetTransformInfo::getVScaleForTuning(), llvm::Attribute::getVScaleRangeMin(), and llvm::Function::hasFnAttribute().

Referenced by llvm::LoopVectorizationCostModel::isEpilogueVectorizationProfitable(), llvm::LoopVectorizePass::processLoop(), llvm::LoopVectorizationPlanner::selectEpilogueVectorizationFactor(), and llvm::LoopVectorizationCostModel::selectInterleaveCount().

◆ hasIrregularType()

static bool hasIrregularType	(	Type *	Ty,
		const DataLayout &	DL
	)

static

A helper function that returns true if the given type is irregular.

The type is irregular if its allocated size doesn't equal the store size of an element of the corresponding vector type.

Definition at line 409 of file LoopVectorize.cpp.

References DL.

Referenced by llvm::LoopVectorizationCostModel::interleavedAccessCanBeWidened(), and llvm::LoopVectorizationCostModel::memoryInstructionCanBeWidened().

◆ isExplicitVecOuterLoop()

static bool isExplicitVecOuterLoop	(	Loop *	OuterLp,
		OptimizationRemarkEmitter *	ORE
	)

static

Definition at line 2147 of file LoopVectorize.cpp.

References llvm::LoopVectorizeHints::allowVectorization(), assert(), llvm::dbgs(), llvm::LoopVectorizeHints::emitRemarkWithHints(), llvm::LoopVectorizeHints::FK_Undefined, llvm::LoopVectorizeHints::getForce(), llvm::LoopBase< BlockT, LoopT >::getHeader(), llvm::LoopVectorizeHints::getInterleave(), llvm::BasicBlock::getParent(), llvm::LoopBase< BlockT, LoopT >::isInnermost(), and LLVM_DEBUG.

Referenced by collectSupportedLoops().

◆ isIndvarOverflowCheckKnownFalse()

static bool isIndvarOverflowCheckKnownFalse	(	const LoopVectorizationCostModel *	Cost,
		ElementCount	VF,
		std::optional< unsigned >	UF = `std::nullopt`
	)

static

For the given VF and UF and maximum trip count computed for the loop, return whether the induction variable might overflow in the vectorized loop.

If not, then we know a runtime overflow check always evaluates to false and can be removed.

Definition at line 2306 of file LoopVectorize.cpp.

References llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::getKnownMinValue(), getMaxVScale(), and llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::isScalable().

Referenced by llvm::InnerLoopVectorizer::emitIterationCountCheck().

◆ MaybeVectorizeType()

static Type * MaybeVectorizeType	(	Type *	Elt,
		ElementCount	VF
	)

static

Definition at line 2913 of file LoopVectorize.cpp.

References llvm::VectorType::get(), llvm::Type::isFloatingPointTy(), llvm::Type::isIntOrPtrTy(), and llvm::ElementCount::isScalar().

Referenced by llvm::LoopVectorizationCostModel::getVectorIntrinsicCost().

◆ processLoopInVPlanNativePath()

static bool processLoopInVPlanNativePath	(	Loop *	L,
		PredicatedScalarEvolution &	PSE,
		LoopInfo *	LI,
		DominatorTree *	DT,
		LoopVectorizationLegality *	LVL,
		TargetTransformInfo *	TTI,
		TargetLibraryInfo *	TLI,
		DemandedBits *	DB,
		AssumptionCache *	AC,
		OptimizationRemarkEmitter *	ORE,
		BlockFrequencyInfo *	BFI,
		ProfileSummaryInfo *	PSI,
		LoopVectorizeHints &	Hints,
		LoopVectorizationRequirements &	Requirements
	)

static

Definition at line 9459 of file LoopVectorize.cpp.

References assert(), llvm::LoopVectorizationCostModel::collectElementTypesForWidening(), llvm::dbgs(), llvm::VectorizationFactor::Disabled(), llvm::EnableVPlanNativePath, llvm::LoopVectorizationPlanner::executePlan(), F, llvm::PredicatedScalarEvolution::getBackedgeTakenCount(), llvm::LoopVectorizationPlanner::getBestPlanFor(), llvm::LoopVectorizationLegality::getLAI(), getScalarEpilogueLowering(), llvm::PredicatedScalarEvolution::getSE(), llvm::LoopVectorizeHints::getWidth(), llvm::hasBranchWeightMD(), LLVM_DEBUG, llvm::LoopVectorizationPlanner::planInVPlanNativePath(), llvm::reportVectorization(), llvm::LoopVectorizeHints::setAlreadyVectorized(), llvm::verifyFunction(), VPlanBuildStressTest, and llvm::VectorizationFactor::Width.

Referenced by llvm::LoopVectorizePass::processLoop().

◆ STATISTIC() [1/3]

STATISTIC	(	LoopsAnalyzed	,
		"Number of loops analyzed for vectorization"
	)

◆ STATISTIC() [2/3]

STATISTIC	(	LoopsEpilogueVectorized	,
		"Number of epilogues vectorized"
	)

◆ STATISTIC() [3/3]

STATISTIC	(	LoopsVectorized	,
		"Number of loops vectorized"
	)

◆ useActiveLaneMask()

static bool useActiveLaneMask ( TailFoldingStyle Style )

static

Definition at line 2122 of file LoopVectorize.cpp.

◆ useActiveLaneMaskForControlFlow()

static bool useActiveLaneMaskForControlFlow ( TailFoldingStyle Style )

static

Definition at line 2128 of file LoopVectorize.cpp.

◆ useMaskedInterleavedAccesses()

static bool useMaskedInterleavedAccesses ( const TargetTransformInfo & TTI )

static

Definition at line 2338 of file LoopVectorize.cpp.

References llvm::TargetTransformInfo::enableMaskedInterleavedAccessVectorization(), and EnableMaskedInterleavedMemAccesses.

Referenced by llvm::LoopVectorizationCostModel::computeMaxVF(), llvm::LoopVectorizationCostModel::interleavedAccessCanBeWidened(), llvm::LoopVectorizationPlanner::plan(), and llvm::LoopVectorizePass::processLoop().

◆ willGenerateVectors()

static bool willGenerateVectors	(	VPlan &	Plan,
		ElementCount	VF,
		const TargetTransformInfo &	TTI
	)

static

Check if any recipe of Plan will generate a vector value, which will be assigned a vector register.

Definition at line 4414 of file LoopVectorize.cpp.

References assert(), llvm::collectEphemeralRecipesForVPlan(), llvm::detail::DenseSetImpl< ValueT, MapTy, ValueInfoT >::contains(), llvm::VPlan::getCanonicalIV(), llvm::Type::getContext(), llvm::VPRegionBlock::getEntry(), llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::getKnownMinValue(), llvm::TargetTransformInfo::getNumberOfParts(), llvm::VPCanonicalIVPHIRecipe::getScalarType(), llvm::VPlan::getVectorLoopRegion(), llvm::VPTypeAnalysis::inferScalarType(), llvm::detail::DenseSetImpl< ValueT, MapTy, ValueInfoT >::insert(), llvm::details::FixedOrScalableQuantity< LeafTy, ValueTy >::isScalable(), llvm::ElementCount::isVector(), llvm_unreachable, llvm::ToVectorTy(), and llvm::vp_depth_first_shallow().

Referenced by llvm::LoopVectorizationPlanner::getBestPlan().

Variable Documentation

◆ EnableCondStoresVectorization

cl::opt< bool > EnableCondStoresVectorization("enable-cond-stores-vec", cl::init(true), cl::Hidden, cl::desc("Enable if predication of stores during vectorization."))	(	"enable-cond-stores-vec"	,
		cl::init(true)	,
		cl::Hidden	,
		cl::desc("Enable if predication of stores during vectorization.")
	)

static

◆ EnableEpilogueVectorization

cl::opt< bool > EnableEpilogueVectorization("enable-epilogue-vectorization", cl::init(true), cl::Hidden, cl::desc("Enable vectorization of epilogue loops."))	(	"enable-epilogue-vectorization"	,
		cl::init(true)	,
		cl::Hidden	,
		cl::desc("Enable vectorization of epilogue loops.")
	)

static

Referenced by llvm::LoopVectorizationPlanner::selectEpilogueVectorizationFactor().

◆ EnableIndVarRegisterHeur

cl::opt< bool > EnableIndVarRegisterHeur("enable-ind-var-reg-heur", cl::init(true), cl::Hidden, cl::desc("Count the induction variable only once when interleaving"))	(	"enable-ind-var-reg-heur"	,
		cl::init(true)	,
		cl::Hidden	,
		cl::desc("Count the induction variable only once when interleaving")
	)

static

Referenced by llvm::LoopVectorizationCostModel::selectInterleaveCount().

◆ EnableInterleavedMemAccesses

cl::opt< bool > EnableInterleavedMemAccesses("enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop"))	(	"enable-interleaved-mem-accesses"	,
		cl::init(false)	,
		cl::Hidden	,
		cl::desc("Enable vectorization on interleaved memory accesses in a loop")
	)

static

Referenced by llvm::LoopVectorizePass::processLoop().

◆ EnableLoadStoreRuntimeInterleave

cl::opt< bool > EnableLoadStoreRuntimeInterleave("enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, cl::desc( "Enable runtime interleaving until load/store ports are saturated"))	(	"enable-loadstore-runtime-interleave"	,
		cl::init(true)	,
		cl::Hidden	,
		cl::desc( "Enable runtime interleaving until load/store ports are saturated")
	)

static

Referenced by llvm::LoopVectorizationCostModel::selectInterleaveCount().

◆ EnableMaskedInterleavedMemAccesses

cl::opt< bool > EnableMaskedInterleavedMemAccesses("enable-masked-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on masked interleaved memory accesses in a loop"))	(	"enable-masked-interleaved-mem-accesses"	,
		cl::init(false)	,
		cl::Hidden	,
		cl::desc("Enable vectorization on masked interleaved memory accesses in a loop")
	)

static

An interleave-group may need masking if it resides in a block that needs predication, or in order to mask away gaps.

Referenced by useMaskedInterleavedAccesses().

◆ EpilogueVectorizationForceVF

cl::opt< unsigned > EpilogueVectorizationForceVF("epilogue-vectorization-force-VF", cl::init(1), cl::Hidden, cl::desc("When epilogue vectorization is enabled, and a value greater than " "1 is specified, forces the given VF for all applicable epilogue " "loops."))	(	"epilogue-vectorization-force-VF"	,
		cl::init(1)	,
		cl::Hidden	,
		cl::desc("When epilogue vectorization is enabled, and a value greater than " "1 is specified, forces the given VF for all applicable epilogue " "loops.")
	)

static

Referenced by llvm::LoopVectorizationPlanner::selectEpilogueVectorizationFactor().

◆ EpilogueVectorizationMinVF

cl::opt< unsigned > EpilogueVectorizationMinVF("epilogue-vectorization-minimum-VF", cl::init(16), cl::Hidden, cl::desc("Only loops with vectorization factor equal to or larger than " "the specified value are considered for epilogue vectorization."))	(	"epilogue-vectorization-minimum-VF"	,
		cl::init(16)	,
		cl::Hidden	,
		cl::desc("Only loops with vectorization factor equal to or larger than " "the specified value are considered for epilogue vectorization.")
	)

static

Referenced by llvm::LoopVectorizationCostModel::isEpilogueVectorizationProfitable().

◆ ForceOrderedReductions

cl::opt< bool > ForceOrderedReductions("force-ordered-reductions", cl::init(false), cl::Hidden, cl::desc("Enable the vectorisation of loops with in-order (strict) " "FP reductions"))	(	"force-ordered-reductions"	,
		cl::init(false)	,
		cl::Hidden	,
		cl::desc("Enable the vectorisation of loops with in-order (strict) " "FP reductions")
	)

static

Referenced by llvm::LoopVectorizePass::processLoop().

◆ ForceSafeDivisor

cl::opt< cl::boolOrDefault > ForceSafeDivisor("force-widen-divrem-via-safe-divisor", cl::Hidden, cl::desc( "Override cost based safe divisor widening for div/rem instructions"))	(	"force-widen-divrem-via-safe-divisor"	,
		cl::Hidden	,
		cl::desc( "Override cost based safe divisor widening for div/rem instructions")
	)

static

Referenced by llvm::LoopVectorizationCostModel::isDivRemScalarWithPredication().

◆ ForceTailFoldingStyle

cl::opt< TailFoldingStyle > ForceTailFoldingStyle("force-tail-folding-style", cl::desc("Force the tail folding style"), cl::init(TailFoldingStyle::None), cl::values( clEnumValN(TailFoldingStyle::None, "none", "Disable tail folding"), clEnumValN( TailFoldingStyle::Data, "data", "Create lane mask for data only, using active.lane.mask intrinsic"), clEnumValN(TailFoldingStyle::DataWithoutLaneMask, "data-without-lane-mask", "Create lane mask with compare/stepvector"), clEnumValN(TailFoldingStyle::DataAndControlFlow, "data-and-control", "Create lane mask using active.lane.mask intrinsic, and use " "it for both data and control flow"), clEnumValN(TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck, "data-and-control-without-rt-check", "Similar to data-and-control, but remove the runtime check"), clEnumValN(TailFoldingStyle::DataWithEVL, "data-with-evl", "Use predicated EVL instructions for tail folding. If EVL " "is unsupported, fallback to data-without-lane-mask.")))	(	"force-tail-folding-style"	,
		cl::desc("Force the tail folding style")	,
		cl::init(TailFoldingStyle::None)	,
		cl::values( clEnumValN(TailFoldingStyle::None, "none", "Disable tail folding"), clEnumValN( TailFoldingStyle::Data, "data", "Create lane mask for data only, using active.lane.mask intrinsic"), clEnumValN(TailFoldingStyle::DataWithoutLaneMask, "data-without-lane-mask", "Create lane mask with compare/stepvector"), clEnumValN(TailFoldingStyle::DataAndControlFlow, "data-and-control", "Create lane mask using active.lane.mask intrinsic, and use " "it for both data and control flow"), clEnumValN(TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck, "data-and-control-without-rt-check", "Similar to data-and-control, but remove the runtime check"), clEnumValN(TailFoldingStyle::DataWithEVL, "data-with-evl", "Use predicated EVL instructions for tail folding. If EVL " "is unsupported, fallback to data-without-lane-mask."))
	)

static

Referenced by llvm::LoopVectorizationCostModel::setTailFoldingStyles().

◆ ForceTargetInstructionCost

cl::opt< unsigned > ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing."))	(	"force-target-instruction-cost"	,
		cl::init(0)	,
		cl::Hidden	,
		cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing.")
	)

Referenced by llvm::VPRecipeBase::cost(), and llvm::LoopVectorizationCostModel::expectedCost().

◆ ForceTargetMaxScalarInterleaveFactor

cl::opt< unsigned > ForceTargetMaxScalarInterleaveFactor("force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "scalar loops."))	(	"force-target-max-scalar-interleave"	,
		cl::init(0)	,
		cl::Hidden	,
		cl::desc("A flag that overrides the target's max interleave factor for " "scalar loops.")
	)

static

Referenced by llvm::LoopVectorizationCostModel::selectInterleaveCount().

◆ ForceTargetMaxVectorInterleaveFactor

cl::opt< unsigned > ForceTargetMaxVectorInterleaveFactor("force-target-max-vector-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " "vectorized loops."))	(	"force-target-max-vector-interleave"	,
		cl::init(0)	,
		cl::Hidden	,
		cl::desc("A flag that overrides the target's max interleave factor for " "vectorized loops.")
	)

static

Referenced by llvm::LoopVectorizationCostModel::selectInterleaveCount().

◆ ForceTargetNumScalarRegs

cl::opt< unsigned > ForceTargetNumScalarRegs("force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers."))	(	"force-target-num-scalar-regs"	,
		cl::init(0)	,
		cl::Hidden	,
		cl::desc("A flag that overrides the target's number of scalar registers.")
	)

static

Referenced by llvm::LoopVectorizationCostModel::selectInterleaveCount().

◆ ForceTargetNumVectorRegs

cl::opt< unsigned > ForceTargetNumVectorRegs("force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers."))	(	"force-target-num-vector-regs"	,
		cl::init(0)	,
		cl::Hidden	,
		cl::desc("A flag that overrides the target's number of vector registers.")
	)

static

Referenced by llvm::LoopVectorizationCostModel::selectInterleaveCount().

◆ ForceTargetSupportsScalableVectors

cl::opt< bool > ForceTargetSupportsScalableVectors("force-target-supports-scalable-vectors", cl::init(false), cl::Hidden, cl::desc( "Pretend that scalable vectors are supported, even if the target does " "not support them. This flag should only be used for testing."))	(	"force-target-supports-scalable-vectors"	,
		cl::init(false)	,
		cl::Hidden	,
		cl::desc( "Pretend that scalable vectors are supported, even if the target does " "not support them. This flag should only be used for testing.")
	)

static

Referenced by llvm::LoopVectorizationPlanner::planInVPlanNativePath().

◆ LLVMLoopVectorizeFollowupAll

const char LLVMLoopVectorizeFollowupAll[] = "llvm.loop.vectorize.followup_all"

Metadata attribute names

Definition at line 171 of file LoopVectorize.cpp.

Referenced by llvm::LoopVectorizationPlanner::executePlan(), and llvm::LoopVectorizePass::processLoop().

◆ LLVMLoopVectorizeFollowupEpilogue

const char LLVMLoopVectorizeFollowupEpilogue[]

Initial value:

=

"llvm.loop.vectorize.followup_epilogue"

Definition at line 174 of file LoopVectorize.cpp.

Referenced by llvm::LoopVectorizePass::processLoop().

◆ LLVMLoopVectorizeFollowupVectorized

const char LLVMLoopVectorizeFollowupVectorized[]

Initial value:

=

"llvm.loop.vectorize.followup_vectorized"

Definition at line 172 of file LoopVectorize.cpp.

Referenced by llvm::LoopVectorizationPlanner::executePlan().

◆ LoopVectorizeWithBlockFrequency

cl::opt< bool > LoopVectorizeWithBlockFrequency("loop-vectorize-with-block-frequency", cl::init(true), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to access PGO " "heuristics minimizing code growth in cold regions and being more " "aggressive in hot regions."))	(	"loop-vectorize-with-block-frequency"	,
		cl::init(true)	,
		cl::Hidden	,
		cl::desc("Enable the use of the block frequency analysis to access PGO " "heuristics minimizing code growth in cold regions and being more " "aggressive in hot regions.")
	)

static

Referenced by getSmallBestKnownTC().

◆ MaximizeBandwidth

cl::opt< bool > MaximizeBandwidth("vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which " "will be determined by the smallest type in loop."))	(	"vectorizer-maximize-bandwidth"	,
		cl::init(false)	,
		cl::Hidden	,
		cl::desc("Maximize bandwidth when selecting vectorization factor which " "will be determined by the smallest type in loop.")
	)

static

◆ MaxNestedScalarReductionIC

cl::opt< unsigned > MaxNestedScalarReductionIC("max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, cl::desc("The maximum interleave count to use when interleaving a scalar " "reduction in a nested loop."))	(	"max-nested-scalar-reduction-interleave"	,
		cl::init(2)	,
		cl::Hidden	,
		cl::desc("The maximum interleave count to use when interleaving a scalar " "reduction in a nested loop.")
	)

static

Referenced by llvm::LoopVectorizationCostModel::selectInterleaveCount().

◆ MemCheckBypassWeights

constexpr uint32_t MemCheckBypassWeights[] = {1, 127}

staticconstexpr

Definition at line 401 of file LoopVectorize.cpp.

◆ MinItersBypassWeights

constexpr uint32_t MinItersBypassWeights[] = {1, 127}

staticconstexpr

Definition at line 404 of file LoopVectorize.cpp.

Referenced by llvm::InnerLoopVectorizer::emitIterationCountCheck(), and llvm::EpilogueVectorizerMainLoop::emitIterationCountCheck().

◆ NumberOfStoresToPredicate

cl::opt< unsigned > NumberOfStoresToPredicate("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if."))	(	"vectorize-num-stores-pred"	,
		cl::init(1)	,
		cl::Hidden	,
		cl::desc("Max number of stores to be predicated behind an if.")
	)

static

The number of stores in a loop that are allowed to need predication.

◆ PreferInLoopReductions

cl::opt< bool > PreferInLoopReductions("prefer-inloop-reductions", cl::init(false), cl::Hidden, cl::desc("Prefer in-loop vector reductions, " "overriding the targets preference."))	(	"prefer-inloop-reductions"	,
		cl::init(false)	,
		cl::Hidden	,
		cl::desc("Prefer in-loop vector reductions, " "overriding the targets preference.")
	)

static

Referenced by llvm::LoopVectorizationCostModel::collectElementTypesForWidening(), and llvm::LoopVectorizationCostModel::collectInLoopReductions().

◆ PreferPredicatedReductionSelect

cl::opt< bool > PreferPredicatedReductionSelect("prefer-predicated-reduction-select", cl::init(false), cl::Hidden, cl::desc( "Prefer predicating a reduction operation over an after loop select."))	(	"prefer-predicated-reduction-select"	,
		cl::init(false)	,
		cl::Hidden	,
		cl::desc( "Prefer predicating a reduction operation over an after loop select.")
	)

static

◆ PreferPredicateOverEpilogue

cl::opt< PreferPredicateTy::Option > PreferPredicateOverEpilogue("prefer-predicate-over-epilogue", cl::init(PreferPredicateTy::ScalarEpilogue), cl::Hidden, cl::desc("Tail-folding and predication preferences over creating a scalar " "epilogue loop."), cl::values(clEnumValN(PreferPredicateTy::ScalarEpilogue, "scalar-epilogue", "Don't tail-predicate loops, create scalar epilogue"), clEnumValN(PreferPredicateTy::PredicateElseScalarEpilogue, "predicate-else-scalar-epilogue", "prefer tail-folding, create scalar epilogue if tail " "folding fails."), clEnumValN(PreferPredicateTy::PredicateOrDontVectorize, "predicate-dont-vectorize", "prefers tail-folding, don't attempt vectorization if " "tail-folding fails.")))	(	"prefer-predicate-over-epilogue"	,
		cl::init(PreferPredicateTy::ScalarEpilogue)	,
		cl::Hidden	,
		cl::desc("Tail-folding and predication preferences over creating a scalar " "epilogue loop.")	,
		cl::values(clEnumValN(PreferPredicateTy::ScalarEpilogue, "scalar-epilogue", "Don't tail-predicate loops, create scalar epilogue"), clEnumValN(PreferPredicateTy::PredicateElseScalarEpilogue, "predicate-else-scalar-epilogue", "prefer tail-folding, create scalar epilogue if tail " "folding fails."), clEnumValN(PreferPredicateTy::PredicateOrDontVectorize, "predicate-dont-vectorize", "prefers tail-folding, don't attempt vectorization if " "tail-folding fails."))
	)

static

Referenced by getScalarEpilogueLowering().

◆ PrintVPlansInDotFormat

cl::opt< bool > PrintVPlansInDotFormat("vplan-print-in-dot-format", cl::Hidden, cl::desc("Use dot format instead of plain text when dumping VPlans"))	(	"vplan-print-in-dot-format"	,
		cl::Hidden	,
		cl::desc("Use dot format instead of plain text when dumping VPlans")
	)

static

Referenced by llvm::LoopVectorizationPlanner::printPlans().

◆ SCEVCheckBypassWeights

constexpr uint32_t SCEVCheckBypassWeights[] = {1, 127}

staticconstexpr

Definition at line 398 of file LoopVectorize.cpp.

◆ SmallLoopCost

cl::opt< unsigned > SmallLoopCost("small-loop-cost", cl::init(20), cl::Hidden, cl::desc( "The cost of a loop that is considered 'small' by the interleaver."))	(	"small-loop-cost"	,
		cl::init(20)	,
		cl::Hidden	,
		cl::desc( "The cost of a loop that is considered 'small' by the interleaver.")
	)

static

Referenced by llvm::LoopVectorizationCostModel::selectInterleaveCount().

◆ TinyTripCountVectorThreshold

cl::opt< unsigned > TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), cl::Hidden, cl::desc("Loops with a constant trip count that is smaller than this " "value are vectorized only if no scalar iteration overheads " "are incurred."))	(	"vectorizer-min-trip-count"	,
		cl::init(16)	,
		cl::Hidden	,
		cl::desc("Loops with a constant trip count that is smaller than this " "value are vectorized only if no scalar iteration overheads " "are incurred.")
	)

static

Loops with a known constant trip count below this number are vectorized only if no scalar iteration overheads are incurred.

Referenced by llvm::LoopVectorizePass::processLoop().

◆ UseWiderVFIfCallVariantsPresent

cl::opt< bool > UseWiderVFIfCallVariantsPresent("vectorizer-maximize-bandwidth-for-vector-calls", cl::init(true), cl::Hidden, cl::desc("Try wider VFs if they enable the use of vector variants"))	(	"vectorizer-maximize-bandwidth-for-vector-calls"	,
		cl::init(true)	,
		cl::Hidden	,
		cl::desc("Try wider VFs if they enable the use of vector variants")
	)

static

◆ VectorizeMemoryCheckThreshold

cl::opt< unsigned > VectorizeMemoryCheckThreshold("vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks"))	(	"vectorize-memory-check-threshold"	,
		cl::init(128)	,
		cl::Hidden	,
		cl::desc("The maximum allowed number of runtime memory checks")
	)

static

Referenced by areRuntimeChecksProfitable().

◆ VerboseDebug

const char VerboseDebug[] = DEBUG_TYPE "-verbose"

Definition at line 166 of file LoopVectorize.cpp.

◆ VPlanBuildStressTest

cl::opt< bool > VPlanBuildStressTest("vplan-build-stress-test", cl::init(false), cl::Hidden, cl::desc( "Build VPlan for every supported loop nest in the function and bail " "out right after the build (stress test the VPlan H-CFG construction " "in the VPlan-native vectorization path)."))	(	"vplan-build-stress-test"	,
		cl::init(false)	,
		cl::Hidden	,
		cl::desc( "Build VPlan for every supported loop nest in the function and bail " "out right after the build (stress test the VPlan H-CFG construction " "in the VPlan-native vectorization path).")
	)

static

Referenced by collectSupportedLoops(), llvm::LoopVectorizationPlanner::planInVPlanNativePath(), and processLoopInVPlanNativePath().

Classes

Namespaces

Macros

Typedefs

Enumerations

Functions

Variables

Macro Definition Documentation

◆ DEBUG_TYPE

◆ LV_NAME

Function Documentation

◆ addCanonicalIVRecipes()

◆ addLiveOutsForFirstOrderRecurrences()

◆ AddRuntimeUnrollDisableMetaData()

◆ addUsersInExitBlock()

◆ areRuntimeChecksProfitable()

◆ checkMixedPrecision()

◆ collectSupportedLoops()

◆ createAndCollectMergePhiForReduction()

◆ createLVAnalysis()

◆ createReverseEVL()

◆ createWidenInductionRecipes()

◆ cse()

◆ debugVectorizationMessage()

◆ determineVPlanVF()

◆ emitInvalidCostRemarks()

◆ emitTransformedIndex()

◆ getAddressAccessSCEV()

◆ getDebugLocFromInstOrOperands()

◆ getExpandedStep()

◆ getMaxVScale()

◆ getScalarEpilogueLowering()

◆ getSmallBestKnownTC()

◆ getVScaleForTuning()

◆ hasIrregularType()

◆ isExplicitVecOuterLoop()

◆ isIndvarOverflowCheckKnownFalse()

◆ MaybeVectorizeType()

◆ processLoopInVPlanNativePath()

◆ STATISTIC() [1/3]

◆ STATISTIC() [2/3]

◆ STATISTIC() [3/3]

◆ useActiveLaneMask()

◆ useActiveLaneMaskForControlFlow()

◆ useMaskedInterleavedAccesses()

◆ willGenerateVectors()

Variable Documentation

◆ EnableCondStoresVectorization

◆ EnableEpilogueVectorization

◆ EnableIndVarRegisterHeur

◆ EnableInterleavedMemAccesses

◆ EnableLoadStoreRuntimeInterleave

◆ EnableMaskedInterleavedMemAccesses

◆ EpilogueVectorizationForceVF

◆ EpilogueVectorizationMinVF

◆ ForceOrderedReductions

◆ ForceSafeDivisor

◆ ForceTailFoldingStyle

◆ ForceTargetInstructionCost

◆ ForceTargetMaxScalarInterleaveFactor

◆ ForceTargetMaxVectorInterleaveFactor

◆ ForceTargetNumScalarRegs

◆ ForceTargetNumVectorRegs

◆ ForceTargetSupportsScalableVectors

◆ LLVMLoopVectorizeFollowupAll

◆ LLVMLoopVectorizeFollowupEpilogue

◆ LLVMLoopVectorizeFollowupVectorized

◆ LoopVectorizeWithBlockFrequency

◆ MaximizeBandwidth

◆ MaxNestedScalarReductionIC

◆ MemCheckBypassWeights

◆ MinItersBypassWeights

◆ NumberOfStoresToPredicate

◆ PreferInLoopReductions

◆ PreferPredicatedReductionSelect

◆ PreferPredicateOverEpilogue

◆ PrintVPlansInDotFormat

◆ SCEVCheckBypassWeights

◆ SmallLoopCost

◆ TinyTripCountVectorThreshold