29#define DEBUG_TYPE "loop-vectorize"
35 cl::desc(
"Maximize bandwidth when selecting vectorization factor which "
36 "will be determined by the smallest type in loop."));
39 "vectorizer-maximize-bandwidth-for-vector-calls",
cl::init(
true),
41 cl::desc(
"Try wider VFs if they enable the use of vector variants"));
45 cl::desc(
"Discard VFs if their register pressure is too high."));
50 "Pretend that scalable vectors are supported, even if the target does "
51 "not support them. This flag should only be used for testing."));
55 cl::desc(
"Prefer in-loop vector reductions, "
56 "overriding the targets preference."));
62 cl::desc(
"Assume the target supports masked memory operations (used for "
67 cl::desc(
"Assume the target supports gather/scatter operations (used for "
76 dbgs() <<
"LV: " << Prefix << DebugMsg;
97 if (
I &&
I->getDebugLoc())
98 DL =
I->getDebugLoc();
110 <<
"loop not vectorized: " << OREMsg);
125 "Vectorizing: ", TheLoop->
isInnermost() ?
"innermost loop" :
"outer loop",
131 <<
"vectorized " << LoopType <<
"loop (vectorization width: "
132 <<
ore::NV(
"VectorizationFactor", VFWidth)
133 <<
", interleaved count: " <<
ore::NV(
"InterleaveCount", IC) <<
")";
146 : TTI.isLegalMaskedStore(Ty, Alignment, AS));
160 (LI && TTI.isLegalMaskedGather(Ty,
Align)) ||
161 (
SI && TTI.isLegalMaskedScatter(Ty,
Align));
169bool VFSelectionContext::useMaxBandwidth(
bool IsScalable)
const {
174 (
TTI.shouldMaximizeVectorBandwidth(RegKind) ||
176 Legal->hasVectorCallVariants())));
185 if (TTI.shouldConsiderVectorizationRegPressure())
192 VF, VF.
isScalable() ? MaxPermissibleVFWithoutMaxBW.ScalableVF
193 : MaxPermissibleVFWithoutMaxBW.FixedVF);
197 ElementCount VF,
unsigned MaxTripCount,
unsigned UserIC,
198 bool FoldTailByMasking,
bool RequiresScalarEpilogue)
const {
200 if (VF.
isScalable() &&
F.hasFnAttribute(Attribute::VScaleRange)) {
201 auto Attr =
F.getFnAttribute(Attribute::VScaleRange);
202 auto Min = Attr.getVScaleRangeMin();
209 if (MaxTripCount > 0 && RequiresScalarEpilogue)
214 unsigned IC = UserIC > 0 ? UserIC : 1;
215 unsigned EstimatedVFTimesIC = EstimatedVF * IC;
217 if (MaxTripCount && MaxTripCount <= EstimatedVFTimesIC &&
225 if (ClampedUpperTripCount == 0)
226 ClampedUpperTripCount = 1;
227 LLVM_DEBUG(
dbgs() <<
"LV: Clamping the MaxVF to maximum power of two not "
228 "exceeding the constant trip count"
229 << (UserIC > 0 ?
" divided by UserIC" :
"") <<
": "
230 << ClampedUpperTripCount <<
"\n");
237ElementCount VFSelectionContext::getMaximizedVFForTarget(
238 unsigned MaxTripCount,
unsigned SmallestType,
unsigned WidestType,
239 ElementCount MaxSafeVF,
unsigned UserIC,
bool FoldTailByMasking,
240 bool RequiresScalarEpilogue) {
241 bool ComputeScalableMaxVF = MaxSafeVF.
isScalable();
242 const TypeSize WidestRegister = TTI.getRegisterBitWidth(
247 auto MinVF = [](
const ElementCount &
LHS,
const ElementCount &
RHS) {
249 "Scalable flags must match");
257 ComputeScalableMaxVF);
258 MaxVectorElementCount = MinVF(MaxVectorElementCount, MaxSafeVF);
260 << (MaxVectorElementCount * WidestType) <<
" bits.\n");
262 if (!MaxVectorElementCount) {
264 << (ComputeScalableMaxVF ?
"scalable" :
"fixed")
265 <<
" vector registers.\n");
270 clampVFByMaxTripCount(MaxVectorElementCount, MaxTripCount, UserIC,
271 FoldTailByMasking, RequiresScalarEpilogue);
274 if (MaxVF != MaxVectorElementCount)
278 MaxPermissibleVFWithoutMaxBW.ScalableVF = MaxVF;
280 MaxPermissibleVFWithoutMaxBW.FixedVF = MaxVF;
282 if (useMaxBandwidth(ComputeScalableMaxVF)) {
285 ComputeScalableMaxVF);
286 MaxVF = MinVF(MaxVectorElementCountMaxBW, MaxSafeVF);
288 if (ElementCount MinVF =
289 TTI.getMinimumVF(SmallestType, ComputeScalableMaxVF)) {
292 <<
") with target's minimum: " << MinVF <<
'\n');
297 MaxVF = clampVFByMaxTripCount(MaxVF, MaxTripCount, UserIC,
298 FoldTailByMasking, RequiresScalarEpilogue);
305 if (std::optional<unsigned> MaxVScale =
TTI.getMaxVScale())
308 if (
F.hasFnAttribute(Attribute::VScaleRange))
309 return F.getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();
314bool VFSelectionContext::isScalableVectorizationAllowed() {
315 if (IsScalableVectorizationAllowed)
316 return *IsScalableVectorizationAllowed;
318 IsScalableVectorizationAllowed =
false;
324 "ScalableVectorizationDisabled", ORE, TheLoop);
328 LLVM_DEBUG(
dbgs() <<
"LV: Scalable vectorization is available\n");
331 std::numeric_limits<ElementCount::ScalarTy>::max());
340 if (!
all_of(Legal->getReductionVars(), [&](
const auto &
Reduction) ->
bool {
341 return TTI.isLegalToVectorizeReduction(Reduction.second, MaxScalableVF);
344 "Scalable vectorization not supported for the reduction "
345 "operations found in this loop.",
346 "ScalableVFUnfeasible", ORE, TheLoop);
352 if (
any_of(ElementTypesInLoop, [&](
Type *Ty) {
353 return !Ty->
isVoidTy() && !TTI.isElementTypeLegalForScalableVector(Ty);
356 "for all element types found in this loop.",
357 "ScalableVFUnfeasible", ORE, TheLoop);
361 if (!Legal->isSafeForAnyVectorWidth() && !
getMaxVScale(F, TTI)) {
363 "for safe distance analysis.",
364 "ScalableVFUnfeasible", ORE, TheLoop);
368 IsScalableVectorizationAllowed =
true;
373VFSelectionContext::getMaxLegalScalableVF(
unsigned MaxSafeElements) {
374 if (!isScalableVectorizationAllowed())
378 std::numeric_limits<ElementCount::ScalarTy>::max());
379 if (Legal->isSafeForAnyVectorWidth())
380 return MaxScalableVF;
382 std::optional<unsigned> MaxVScale =
getMaxVScale(F, TTI);
388 "Max legal vector width too small, scalable vectorization "
390 "ScalableVFUnfeasible", ORE, TheLoop);
392 return MaxScalableVF;
396 unsigned MaxTripCount,
ElementCount UserVF,
unsigned UserIC,
397 bool FoldTailByMasking,
bool RequiresScalarEpilogue) {
404 unsigned MaxSafeElementsPowerOf2 =
406 if (!Legal->isSafeForAnyStoreLoadForwardDistances()) {
407 unsigned SLDist = Legal->getMaxStoreLoadForwardSafeDistanceInBits();
408 MaxSafeElementsPowerOf2 =
409 std::min(MaxSafeElementsPowerOf2, SLDist / WidestType);
413 auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElementsPowerOf2);
415 if (!Legal->isSafeForAnyVectorWidth())
416 MaxSafeElements = MaxSafeElementsPowerOf2;
418 LLVM_DEBUG(
dbgs() <<
"LV: The max safe fixed VF is: " << MaxSafeFixedVF
420 LLVM_DEBUG(
dbgs() <<
"LV: The max safe scalable VF is: " << MaxSafeScalableVF
426 UserVF.
isScalable() ? MaxSafeScalableVF : MaxSafeFixedVF;
443 <<
" is unsafe, clamping to max safe VF="
444 << MaxSafeFixedVF <<
".\n");
447 TheLoop->getStartLoc(),
448 TheLoop->getHeader())
449 <<
"User-specified vectorization factor "
450 <<
ore::NV(
"UserVectorizationFactor", UserVF)
451 <<
" is unsafe, clamping to maximum safe vectorization factor "
452 <<
ore::NV(
"VectorizationFactor", MaxSafeFixedVF);
454 return MaxSafeFixedVF;
459 <<
" is ignored because scalable vectors are not "
463 TheLoop->getStartLoc(),
464 TheLoop->getHeader())
465 <<
"User-specified vectorization factor "
466 <<
ore::NV(
"UserVectorizationFactor", UserVF)
467 <<
" is ignored because the target does not support scalable "
468 "vectors. The compiler will pick a more suitable value.";
472 <<
" is unsafe. Ignoring scalable UserVF.\n");
475 TheLoop->getStartLoc(),
476 TheLoop->getHeader())
477 <<
"User-specified vectorization factor "
478 <<
ore::NV(
"UserVectorizationFactor", UserVF)
479 <<
" is unsafe. Ignoring the hint to let the compiler pick a "
480 "more suitable value.";
485 LLVM_DEBUG(
dbgs() <<
"LV: The Smallest and Widest types: " << SmallestType
486 <<
" / " << WidestType <<
" bits.\n");
490 if (
auto MaxVF = getMaximizedVFForTarget(
491 MaxTripCount, SmallestType, WidestType, MaxSafeFixedVF, UserIC,
492 FoldTailByMasking, RequiresScalarEpilogue))
493 Result.FixedVF = MaxVF;
495 if (
auto MaxVF = getMaximizedVFForTarget(
496 MaxTripCount, SmallestType, WidestType, MaxSafeScalableVF, UserIC,
497 FoldTailByMasking, RequiresScalarEpilogue))
499 Result.ScalableVF = MaxVF;
507std::pair<unsigned, unsigned>
509 unsigned MinWidth = -1U;
510 unsigned MaxWidth = 8;
515 if (ElementTypesInLoop.empty() && !Legal->getReductionVars().empty()) {
516 for (
const auto &[
_, RdxDesc] : Legal->getReductionVars()) {
521 std::min(RdxDesc.getMinWidthCastToRecurrenceTypeInBits(),
522 RdxDesc.getRecurrenceType()->getScalarSizeInBits()));
523 MaxWidth = std::max(MaxWidth,
524 RdxDesc.getRecurrenceType()->getScalarSizeInBits());
527 for (
Type *
T : ElementTypesInLoop) {
528 MinWidth = std::min<unsigned>(
529 MinWidth,
DL.getTypeSizeInBits(
T->getScalarType()).getFixedValue());
530 MaxWidth = std::max<unsigned>(
531 MaxWidth,
DL.getTypeSizeInBits(
T->getScalarType()).getFixedValue());
534 return {MinWidth, MaxWidth};
539 ElementTypesInLoop.clear();
547 if (ValuesToIgnore && ValuesToIgnore->
contains(&
I))
557 if (!Legal->isReductionVariable(PN))
560 Legal->getRecurrenceDescriptor(PN);
570 T = ST->getValueOperand()->getType();
573 "Expected the load/store/recurrence type to be sized");
575 ElementTypesInLoop.insert(
T);
580void VFSelectionContext::initializeVScaleForTuning() {
584 if (
F.hasFnAttribute(Attribute::VScaleRange)) {
585 auto Attr =
F.getFnAttribute(Attribute::VScaleRange);
586 auto Min = Attr.getVScaleRangeMin();
587 auto Max = Attr.getVScaleRangeMax();
588 if (Max && Min == Max) {
589 VScaleForTuning = Max;
594 VScaleForTuning = TTI.getVScaleForTuning();
599 return !Hints->allowReordering() && RdxDesc.
isOrdered();
605 Loop *L =
const_cast<Loop *
>(TheLoop);
606 if (Legal->getRuntimePointerChecking()->Need) {
608 "Runtime ptr check is required with -Os/-Oz",
609 "runtime pointer checks needed. Enable vectorization of this "
610 "loop with '#pragma clang loop vectorize(enable)' when "
611 "compiling with -Os/-Oz",
612 "CantVersionLoopWithOptForSize", ORE, L);
616 if (!PSE.getPredicate().isAlwaysTrue()) {
618 "Runtime SCEV check is required with -Os/-Oz",
619 "runtime SCEV checks needed. Enable vectorization of this "
620 "loop with '#pragma clang loop vectorize(enable)' when "
621 "compiling with -Os/-Oz",
622 "CantVersionLoopWithOptForSize", ORE, L);
627 if (!Legal->getLAI()->getSymbolicStrides().empty()) {
629 "Runtime stride check for small trip count",
630 "runtime stride == 1 checks needed. Enable vectorization of "
631 "this loop without such check by compiling with -Os/-Oz",
632 "CantVersionLoopWithOptForSize", ORE, L);
645 if (!InLoopReductions.empty())
648 for (
const auto &Reduction : Legal->getReductionVars()) {
649 PHINode *Phi = Reduction.first;
671 !TTI.preferInLoopReduction(Kind, Phi->getType()))
679 bool InLoop = !ReductionOperations.
empty();
682 InLoopReductions.insert(Phi);
685 for (
auto *
I : ReductionOperations) {
686 InLoopReductionImmediateChains[
I] = LastChain;
690 LLVM_DEBUG(
dbgs() <<
"LV: Using " << (InLoop ?
"inloop" :
"out of loop")
691 <<
" reduction for phi: " << *Phi <<
"\n");
697 const unsigned MaxTripCount,
699 bool IsEpilogue)
const {
705 if (
A.Width.isScalable() && CostA.
isValid() && !
B.Width.isScalable() &&
710 unsigned EstimatedWidthA =
A.Width.getKnownMinValue();
711 unsigned EstimatedWidthB =
B.Width.getKnownMinValue();
713 if (
A.Width.isScalable())
714 EstimatedWidthA *= *VScale;
715 if (
B.Width.isScalable())
716 EstimatedWidthB *= *VScale;
723 return CostA < CostB ||
724 (CostA == CostB && EstimatedWidthA > EstimatedWidthB);
729 bool PreferScalable = !
TTI.preferFixedOverScalableIfEqualCost(IsEpilogue) &&
730 A.Width.isScalable() && !
B.Width.isScalable();
740 bool LowerCostWithoutTC =
741 CmpFn(CostA * EstimatedWidthB, CostB * EstimatedWidthA);
743 return LowerCostWithoutTC;
745 auto GetCostForTC = [MaxTripCount, HasTail](
unsigned VF,
757 return VectorCost * (MaxTripCount / VF) +
758 ScalarCost * (MaxTripCount % VF);
759 return VectorCost *
divideCeil(MaxTripCount, VF);
762 auto RTCostA = GetCostForTC(EstimatedWidthA, CostA,
A.ScalarCost);
763 auto RTCostB = GetCostForTC(EstimatedWidthB, CostB,
B.ScalarCost);
764 bool LowerCostWithTC = CmpFn(RTCostA, RTCostB);
765 LLVM_DEBUG(
if (LowerCostWithTC != LowerCostWithoutTC) {
766 dbgs() <<
"LV: VF " << (LowerCostWithTC ?
A.Width :
B.Width)
767 <<
" has lower cost than VF "
768 << (LowerCostWithTC ?
B.Width :
A.Width)
769 <<
" when taking the cost of the remaining scalar loop iterations "
770 "into consideration for a maximum trip count of "
771 << MaxTripCount <<
".\n";
773 return LowerCostWithTC;
779 bool IsEpilogue)
const {
780 const unsigned MaxTripCount = PSE.getSmallConstantMaxTripCount();
781 return LoopVectorizationPlanner::isMoreProfitable(
A,
B, MaxTripCount, HasTail,
794 "Scalable vectorization requested but not supported by the target",
795 "the scalable user-specified vectorization width for outer-loop "
796 "vectorization cannot be used because the target does not support "
798 "ScalableVFUnfeasible", ORE, TheLoop);
806 auto RegKind = TTI.enableScalableVectorization()
811 unsigned N = std::max<uint64_t>(1,
RegSize.getKnownMinValue() / WidestType);
818 <<
"overriding computed VF.\n");
823 "VF needs to be a power of two");
827 <<
"VF " << VF <<
" to build VPlans.\n");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
loop Loop Strength Reduction
This file defines the LoopVectorizationLegality class.
static void debugVectorizationMessage(const StringRef Prefix, const StringRef DebugMsg, Instruction *I)
Write a DebugMsg about vectorization to the debug output stream.
static cl::opt< bool > ForceTargetSupportsGatherScatterOps("force-target-supports-gather-scatter-ops", cl::init(false), cl::Hidden, cl::desc("Assume the target supports gather/scatter operations (used for " "testing)."))
cl::opt< bool > VPlanBuildOuterloopStressTest
static cl::opt< bool > ForceTargetSupportsScalableVectors("force-target-supports-scalable-vectors", cl::init(false), cl::Hidden, cl::desc("Pretend that scalable vectors are supported, even if the target does " "not support them. This flag should only be used for testing."))
static cl::opt< bool > ConsiderRegPressure("vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden, cl::desc("Discard VFs if their register pressure is too high."))
static cl::opt< bool > UseWiderVFIfCallVariantsPresent("vectorizer-maximize-bandwidth-for-vector-calls", cl::init(true), cl::Hidden, cl::desc("Try wider VFs if they enable the use of vector variants"))
static OptimizationRemarkAnalysis createLVAnalysis(StringRef RemarkName, const Loop *TheLoop, Instruction *I, DebugLoc DL={})
Create an analysis remark that explains why vectorization failed RemarkName is the identifier for the...
static cl::opt< bool > ForceTargetSupportsMaskedMemoryOps("force-target-supports-masked-memory-ops", cl::init(false), cl::Hidden, cl::desc("Assume the target supports masked memory operations (used for " "testing)."))
Note: This currently only applies to llvm.masked.load and llvm.masked.store.
static cl::opt< bool > MaximizeBandwidth("vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which " "will be determined by the smallest type in loop."))
This file provides a LoopVectorizationPlanner class.
LLVM Basic Block Representation.
A parsed version of the target data layout string in and methods for querying it.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
constexpr bool isScalar() const
Exactly one element.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
BlockT * getHeader() const
bool isScalableVectorizationDisabled() const
bool isScalableVectorizationAlwaysPreferred() const
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool contains(ConstPtrType Ptr) const
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVoidTy() const
Return true if this is 'void'.
FixedScalableVFPair computeVPlanOuterloopVF(ElementCount UserVF)
Returns a scalable VF to use for outer-loop vectorization if the target supports it and a fixed VF ot...
std::pair< unsigned, unsigned > getSmallestAndWidestTypes() const
const TTI::TargetCostKind CostKind
The kind of cost that we are calculating.
bool supportsScalableVectors() const
bool runtimeChecksRequired()
Check whether vectorization would require runtime checks.
bool isLegalGatherOrScatter(Value *V, ElementCount VF) const
Returns true if the target machine can represent V as a masked gather or scatter operation.
void collectInLoopReductions()
Split reductions into those that happen in the loop, and those that happen outside.
FixedScalableVFPair computeFeasibleMaxVF(unsigned MaxTripCount, ElementCount UserVF, unsigned UserIC, bool FoldTailByMasking, bool RequiresScalarEpilogue)
bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const
Returns true if we should use strict in-order reductions for the given RdxDesc.
bool shouldConsiderRegPressureForVF(ElementCount VF) const
void collectElementTypesForWidening(const SmallPtrSetImpl< const Value * > *ValuesToIgnore=nullptr)
Collect element types in the loop that need widening.
bool isLegalMaskedLoadOrStore(Instruction *I, ElementCount VF) const
Returns true if the target machine supports masked loads or stores for I's data type and alignment.
std::optional< unsigned > getVScaleForTuning() const
void computeMinimalBitwidths()
Compute smallest bitwidth each instruction can be represented with.
LLVM Value Representation.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr bool isZero() const
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
void reportVectorizationInfo(const StringRef Msg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr, DebugLoc DL={})
Reports an informative message: print Msg for debugging purposes as well as an optimization remark.
void reportVectorization(OptimizationRemarkEmitter *ORE, Loop *TheLoop, ElementCount VFWidth, unsigned IC)
Report successful vectorization of the loop.
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
RecurKind
These are the kinds of recurrences that we support.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
LLVM_ABI MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
cl::opt< bool > PreferInLoopReductions
This struct is a compact representation of a valid (non-zero power of two) alignment.
A class that represents two vectorization factors (initialized with 0 by default).
static FixedScalableVFPair getNone()
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
static LLVM_ABI ElementCount VectorizationFactor
VF as overridden by the user.