29#define DEBUG_TYPE "loop-vectorize"
35 cl::desc(
"Maximize bandwidth when selecting vectorization factor which "
36 "will be determined by the smallest type in loop."));
39 "vectorizer-maximize-bandwidth-for-vector-calls",
cl::init(
true),
41 cl::desc(
"Try wider VFs if they enable the use of vector variants"));
45 cl::desc(
"Discard VFs if their register pressure is too high."));
50 "Pretend that scalable vectors are supported, even if the target does "
51 "not support them. This flag should only be used for testing."));
55 cl::desc(
"Prefer in-loop vector reductions, "
56 "overriding the targets preference."));
62 cl::desc(
"Assume the target supports masked memory operations (used for "
67 cl::desc(
"Assume the target supports gather/scatter operations (used for "
76 dbgs() <<
"LV: " << Prefix << DebugMsg;
97 if (
I &&
I->getDebugLoc())
98 DL =
I->getDebugLoc();
110 <<
"loop not vectorized: " << OREMsg);
125 "Vectorizing: ", TheLoop->
isInnermost() ?
"innermost loop" :
"outer loop",
131 <<
"vectorized " << LoopType <<
"loop (vectorization width: "
132 <<
ore::NV(
"VectorizationFactor", VFWidth)
133 <<
", interleaved count: " <<
ore::NV(
"InterleaveCount", IC) <<
")";
146 : TTI.isLegalMaskedStore(Ty, Alignment, AS));
160 (LI && TTI.isLegalMaskedGather(Ty,
Align)) ||
161 (
SI && TTI.isLegalMaskedScatter(Ty,
Align));
168bool VFSelectionContext::useMaxBandwidth(
bool IsScalable)
const {
173 (
TTI.shouldMaximizeVectorBandwidth(RegKind) ||
175 Legal->hasVectorCallVariants())));
184 if (TTI.shouldConsiderVectorizationRegPressure())
191 VF, VF.
isScalable() ? MaxPermissibleVFWithoutMaxBW.ScalableVF
192 : MaxPermissibleVFWithoutMaxBW.FixedVF);
196 ElementCount VF,
unsigned MaxTripCount,
unsigned UserIC,
197 bool FoldTailByMasking,
bool RequiresScalarEpilogue)
const {
199 if (VF.
isScalable() &&
F.hasFnAttribute(Attribute::VScaleRange)) {
200 auto Attr =
F.getFnAttribute(Attribute::VScaleRange);
201 auto Min = Attr.getVScaleRangeMin();
208 if (MaxTripCount > 0 && RequiresScalarEpilogue)
213 unsigned IC = UserIC > 0 ? UserIC : 1;
214 unsigned EstimatedVFTimesIC = EstimatedVF * IC;
216 if (MaxTripCount && MaxTripCount <= EstimatedVFTimesIC &&
224 if (ClampedUpperTripCount == 0)
225 ClampedUpperTripCount = 1;
226 LLVM_DEBUG(
dbgs() <<
"LV: Clamping the MaxVF to maximum power of two not "
227 "exceeding the constant trip count"
228 << (UserIC > 0 ?
" divided by UserIC" :
"") <<
": "
229 << ClampedUpperTripCount <<
"\n");
236ElementCount VFSelectionContext::getMaximizedVFForTarget(
237 unsigned MaxTripCount,
unsigned SmallestType,
unsigned WidestType,
238 ElementCount MaxSafeVF,
unsigned UserIC,
bool FoldTailByMasking,
239 bool RequiresScalarEpilogue) {
240 bool ComputeScalableMaxVF = MaxSafeVF.
isScalable();
241 const TypeSize WidestRegister = TTI.getRegisterBitWidth(
246 auto MinVF = [](
const ElementCount &
LHS,
const ElementCount &
RHS) {
248 "Scalable flags must match");
256 ComputeScalableMaxVF);
257 MaxVectorElementCount = MinVF(MaxVectorElementCount, MaxSafeVF);
259 << (MaxVectorElementCount * WidestType) <<
" bits.\n");
261 if (!MaxVectorElementCount) {
263 << (ComputeScalableMaxVF ?
"scalable" :
"fixed")
264 <<
" vector registers.\n");
269 clampVFByMaxTripCount(MaxVectorElementCount, MaxTripCount, UserIC,
270 FoldTailByMasking, RequiresScalarEpilogue);
273 if (MaxVF != MaxVectorElementCount)
277 MaxPermissibleVFWithoutMaxBW.ScalableVF = MaxVF;
279 MaxPermissibleVFWithoutMaxBW.FixedVF = MaxVF;
281 if (useMaxBandwidth(ComputeScalableMaxVF)) {
284 ComputeScalableMaxVF);
285 MaxVF = MinVF(MaxVectorElementCountMaxBW, MaxSafeVF);
287 if (ElementCount MinVF =
288 TTI.getMinimumVF(SmallestType, ComputeScalableMaxVF)) {
291 <<
") with target's minimum: " << MinVF <<
'\n');
296 MaxVF = clampVFByMaxTripCount(MaxVF, MaxTripCount, UserIC,
297 FoldTailByMasking, RequiresScalarEpilogue);
304 if (std::optional<unsigned> MaxVScale =
TTI.getMaxVScale())
307 if (
F.hasFnAttribute(Attribute::VScaleRange))
308 return F.getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();
313bool VFSelectionContext::isScalableVectorizationAllowed() {
314 if (IsScalableVectorizationAllowed)
315 return *IsScalableVectorizationAllowed;
317 IsScalableVectorizationAllowed =
false;
323 "ScalableVectorizationDisabled", ORE, TheLoop);
327 LLVM_DEBUG(
dbgs() <<
"LV: Scalable vectorization is available\n");
330 std::numeric_limits<ElementCount::ScalarTy>::max());
339 if (!
all_of(Legal->getReductionVars(), [&](
const auto &
Reduction) ->
bool {
340 return TTI.isLegalToVectorizeReduction(Reduction.second, MaxScalableVF);
343 "Scalable vectorization not supported for the reduction "
344 "operations found in this loop.",
345 "ScalableVFUnfeasible", ORE, TheLoop);
351 if (
any_of(ElementTypesInLoop, [&](
Type *Ty) {
352 return !Ty->
isVoidTy() && !TTI.isElementTypeLegalForScalableVector(Ty);
355 "for all element types found in this loop.",
356 "ScalableVFUnfeasible", ORE, TheLoop);
360 if (!Legal->isSafeForAnyVectorWidth() && !
getMaxVScale(F, TTI)) {
362 "for safe distance analysis.",
363 "ScalableVFUnfeasible", ORE, TheLoop);
367 IsScalableVectorizationAllowed =
true;
372VFSelectionContext::getMaxLegalScalableVF(
unsigned MaxSafeElements) {
373 if (!isScalableVectorizationAllowed())
377 std::numeric_limits<ElementCount::ScalarTy>::max());
378 if (Legal->isSafeForAnyVectorWidth())
379 return MaxScalableVF;
381 std::optional<unsigned> MaxVScale =
getMaxVScale(F, TTI);
387 "Max legal vector width too small, scalable vectorization "
389 "ScalableVFUnfeasible", ORE, TheLoop);
391 return MaxScalableVF;
395 unsigned MaxTripCount,
ElementCount UserVF,
unsigned UserIC,
396 bool FoldTailByMasking,
bool RequiresScalarEpilogue) {
403 unsigned MaxSafeElementsPowerOf2 =
405 if (!Legal->isSafeForAnyStoreLoadForwardDistances()) {
406 unsigned SLDist = Legal->getMaxStoreLoadForwardSafeDistanceInBits();
407 MaxSafeElementsPowerOf2 =
408 std::min(MaxSafeElementsPowerOf2, SLDist / WidestType);
412 auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElementsPowerOf2);
414 if (!Legal->isSafeForAnyVectorWidth())
415 MaxSafeElements = MaxSafeElementsPowerOf2;
417 LLVM_DEBUG(
dbgs() <<
"LV: The max safe fixed VF is: " << MaxSafeFixedVF
419 LLVM_DEBUG(
dbgs() <<
"LV: The max safe scalable VF is: " << MaxSafeScalableVF
425 UserVF.
isScalable() ? MaxSafeScalableVF : MaxSafeFixedVF;
442 <<
" is unsafe, clamping to max safe VF="
443 << MaxSafeFixedVF <<
".\n");
446 TheLoop->getStartLoc(),
447 TheLoop->getHeader())
448 <<
"User-specified vectorization factor "
449 <<
ore::NV(
"UserVectorizationFactor", UserVF)
450 <<
" is unsafe, clamping to maximum safe vectorization factor "
451 <<
ore::NV(
"VectorizationFactor", MaxSafeFixedVF);
453 return MaxSafeFixedVF;
458 <<
" is ignored because scalable vectors are not "
462 TheLoop->getStartLoc(),
463 TheLoop->getHeader())
464 <<
"User-specified vectorization factor "
465 <<
ore::NV(
"UserVectorizationFactor", UserVF)
466 <<
" is ignored because the target does not support scalable "
467 "vectors. The compiler will pick a more suitable value.";
471 <<
" is unsafe. Ignoring scalable UserVF.\n");
474 TheLoop->getStartLoc(),
475 TheLoop->getHeader())
476 <<
"User-specified vectorization factor "
477 <<
ore::NV(
"UserVectorizationFactor", UserVF)
478 <<
" is unsafe. Ignoring the hint to let the compiler pick a "
479 "more suitable value.";
484 LLVM_DEBUG(
dbgs() <<
"LV: The Smallest and Widest types: " << SmallestType
485 <<
" / " << WidestType <<
" bits.\n");
489 if (
auto MaxVF = getMaximizedVFForTarget(
490 MaxTripCount, SmallestType, WidestType, MaxSafeFixedVF, UserIC,
491 FoldTailByMasking, RequiresScalarEpilogue))
492 Result.FixedVF = MaxVF;
494 if (
auto MaxVF = getMaximizedVFForTarget(
495 MaxTripCount, SmallestType, WidestType, MaxSafeScalableVF, UserIC,
496 FoldTailByMasking, RequiresScalarEpilogue))
498 Result.ScalableVF = MaxVF;
506std::pair<unsigned, unsigned>
508 unsigned MinWidth = -1U;
509 unsigned MaxWidth = 8;
514 if (ElementTypesInLoop.empty() && !Legal->getReductionVars().empty()) {
515 for (
const auto &[
_, RdxDesc] : Legal->getReductionVars()) {
520 std::min(RdxDesc.getMinWidthCastToRecurrenceTypeInBits(),
521 RdxDesc.getRecurrenceType()->getScalarSizeInBits()));
522 MaxWidth = std::max(MaxWidth,
523 RdxDesc.getRecurrenceType()->getScalarSizeInBits());
526 for (
Type *
T : ElementTypesInLoop) {
527 MinWidth = std::min<unsigned>(
528 MinWidth,
DL.getTypeSizeInBits(
T->getScalarType()).getFixedValue());
529 MaxWidth = std::max<unsigned>(
530 MaxWidth,
DL.getTypeSizeInBits(
T->getScalarType()).getFixedValue());
533 return {MinWidth, MaxWidth};
538 ElementTypesInLoop.clear();
546 if (ValuesToIgnore && ValuesToIgnore->
contains(&
I))
556 if (!Legal->isReductionVariable(PN))
559 Legal->getRecurrenceDescriptor(PN);
569 T = ST->getValueOperand()->getType();
572 "Expected the load/store/recurrence type to be sized");
574 ElementTypesInLoop.insert(
T);
579void VFSelectionContext::initializeVScaleForTuning() {
583 if (
F.hasFnAttribute(Attribute::VScaleRange)) {
584 auto Attr =
F.getFnAttribute(Attribute::VScaleRange);
585 auto Min = Attr.getVScaleRangeMin();
586 auto Max = Attr.getVScaleRangeMax();
587 if (Max && Min == Max) {
588 VScaleForTuning = Max;
593 VScaleForTuning = TTI.getVScaleForTuning();
598 return !Hints->allowReordering() && RdxDesc.
isOrdered();
604 Loop *L =
const_cast<Loop *
>(TheLoop);
605 if (Legal->getRuntimePointerChecking()->Need) {
607 "Runtime ptr check is required with -Os/-Oz",
608 "runtime pointer checks needed. Enable vectorization of this "
609 "loop with '#pragma clang loop vectorize(enable)' when "
610 "compiling with -Os/-Oz",
611 "CantVersionLoopWithOptForSize", ORE, L);
615 if (!PSE.getPredicate().isAlwaysTrue()) {
617 "Runtime SCEV check is required with -Os/-Oz",
618 "runtime SCEV checks needed. Enable vectorization of this "
619 "loop with '#pragma clang loop vectorize(enable)' when "
620 "compiling with -Os/-Oz",
621 "CantVersionLoopWithOptForSize", ORE, L);
626 if (!Legal->getLAI()->getSymbolicStrides().empty()) {
628 "Runtime stride check for small trip count",
629 "runtime stride == 1 checks needed. Enable vectorization of "
630 "this loop without such check by compiling with -Os/-Oz",
631 "CantVersionLoopWithOptForSize", ORE, L);
644 if (!InLoopReductions.empty())
647 for (
const auto &Reduction : Legal->getReductionVars()) {
648 PHINode *Phi = Reduction.first;
670 !TTI.preferInLoopReduction(Kind, Phi->getType()))
678 bool InLoop = !ReductionOperations.
empty();
681 InLoopReductions.insert(Phi);
684 for (
auto *
I : ReductionOperations) {
685 InLoopReductionImmediateChains[
I] = LastChain;
689 LLVM_DEBUG(
dbgs() <<
"LV: Using " << (InLoop ?
"inloop" :
"out of loop")
690 <<
" reduction for phi: " << *Phi <<
"\n");
696 const unsigned MaxTripCount,
698 bool IsEpilogue)
const {
704 if (
A.Width.isScalable() && CostA.
isValid() && !
B.Width.isScalable() &&
709 unsigned EstimatedWidthA =
A.Width.getKnownMinValue();
710 unsigned EstimatedWidthB =
B.Width.getKnownMinValue();
712 if (
A.Width.isScalable())
713 EstimatedWidthA *= *VScale;
714 if (
B.Width.isScalable())
715 EstimatedWidthB *= *VScale;
722 return CostA < CostB ||
723 (CostA == CostB && EstimatedWidthA > EstimatedWidthB);
728 bool PreferScalable = !
TTI.preferFixedOverScalableIfEqualCost(IsEpilogue) &&
729 A.Width.isScalable() && !
B.Width.isScalable();
739 bool LowerCostWithoutTC =
740 CmpFn(CostA * EstimatedWidthB, CostB * EstimatedWidthA);
742 return LowerCostWithoutTC;
744 auto GetCostForTC = [MaxTripCount, HasTail](
unsigned VF,
756 return VectorCost * (MaxTripCount / VF) +
757 ScalarCost * (MaxTripCount % VF);
758 return VectorCost *
divideCeil(MaxTripCount, VF);
761 auto RTCostA = GetCostForTC(EstimatedWidthA, CostA,
A.ScalarCost);
762 auto RTCostB = GetCostForTC(EstimatedWidthB, CostB,
B.ScalarCost);
763 bool LowerCostWithTC = CmpFn(RTCostA, RTCostB);
764 LLVM_DEBUG(
if (LowerCostWithTC != LowerCostWithoutTC) {
765 dbgs() <<
"LV: VF " << (LowerCostWithTC ?
A.Width :
B.Width)
766 <<
" has lower cost than VF "
767 << (LowerCostWithTC ?
B.Width :
A.Width)
768 <<
" when taking the cost of the remaining scalar loop iterations "
769 "into consideration for a maximum trip count of "
770 << MaxTripCount <<
".\n";
772 return LowerCostWithTC;
778 bool IsEpilogue)
const {
779 const unsigned MaxTripCount = PSE.getSmallConstantMaxTripCount();
780 return LoopVectorizationPlanner::isMoreProfitable(
A,
B, MaxTripCount, HasTail,
793 "Scalable vectorization requested but not supported by the target",
794 "the scalable user-specified vectorization width for outer-loop "
795 "vectorization cannot be used because the target does not support "
797 "ScalableVFUnfeasible", ORE, TheLoop);
805 auto RegKind = TTI.enableScalableVectorization()
810 unsigned N =
RegSize.getKnownMinValue() / WidestType;
817 <<
"overriding computed VF.\n");
822 "VF needs to be a power of two");
826 <<
"VF " << VF <<
" to build VPlans.\n");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
loop Loop Strength Reduction
This file defines the LoopVectorizationLegality class.
static void debugVectorizationMessage(const StringRef Prefix, const StringRef DebugMsg, Instruction *I)
Write a DebugMsg about vectorization to the debug output stream.
static cl::opt< bool > ForceTargetSupportsGatherScatterOps("force-target-supports-gather-scatter-ops", cl::init(false), cl::Hidden, cl::desc("Assume the target supports gather/scatter operations (used for " "testing)."))
cl::opt< bool > VPlanBuildOuterloopStressTest
static cl::opt< bool > ForceTargetSupportsScalableVectors("force-target-supports-scalable-vectors", cl::init(false), cl::Hidden, cl::desc("Pretend that scalable vectors are supported, even if the target does " "not support them. This flag should only be used for testing."))
static cl::opt< bool > ConsiderRegPressure("vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden, cl::desc("Discard VFs if their register pressure is too high."))
static cl::opt< bool > UseWiderVFIfCallVariantsPresent("vectorizer-maximize-bandwidth-for-vector-calls", cl::init(true), cl::Hidden, cl::desc("Try wider VFs if they enable the use of vector variants"))
static OptimizationRemarkAnalysis createLVAnalysis(StringRef RemarkName, const Loop *TheLoop, Instruction *I, DebugLoc DL={})
Create an analysis remark that explains why vectorization failed RemarkName is the identifier for the...
static cl::opt< bool > ForceTargetSupportsMaskedMemoryOps("force-target-supports-masked-memory-ops", cl::init(false), cl::Hidden, cl::desc("Assume the target supports masked memory operations (used for " "testing)."))
Note: This currently only applies to llvm.masked.load and llvm.masked.store.
static cl::opt< bool > MaximizeBandwidth("vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which " "will be determined by the smallest type in loop."))
This file provides a LoopVectorizationPlanner class.
LLVM Basic Block Representation.
A parsed version of the target data layout string in and methods for querying it.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
constexpr bool isScalar() const
Exactly one element.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
BlockT * getHeader() const
bool isScalableVectorizationDisabled() const
bool isScalableVectorizationAlwaysPreferred() const
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool contains(ConstPtrType Ptr) const
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVoidTy() const
Return true if this is 'void'.
FixedScalableVFPair computeVPlanOuterloopVF(ElementCount UserVF)
Returns a scalable VF to use for outer-loop vectorization if the target supports it and a fixed VF ot...
std::pair< unsigned, unsigned > getSmallestAndWidestTypes() const
const TTI::TargetCostKind CostKind
The kind of cost that we are calculating.
bool supportsScalableVectors() const
bool runtimeChecksRequired()
Check whether vectorization would require runtime checks.
bool isLegalGatherOrScatter(Value *V, ElementCount VF) const
Returns true if the target machine can represent V as a masked gather or scatter operation.
void collectInLoopReductions()
Split reductions into those that happen in the loop, and those that happen outside.
FixedScalableVFPair computeFeasibleMaxVF(unsigned MaxTripCount, ElementCount UserVF, unsigned UserIC, bool FoldTailByMasking, bool RequiresScalarEpilogue)
bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const
Returns true if we should use strict in-order reductions for the given RdxDesc.
bool shouldConsiderRegPressureForVF(ElementCount VF) const
void collectElementTypesForWidening(const SmallPtrSetImpl< const Value * > *ValuesToIgnore=nullptr)
Collect element types in the loop that need widening.
bool isLegalMaskedLoadOrStore(Instruction *I, ElementCount VF) const
Returns true if the target machine supports masked loads or stores for I's data type and alignment.
std::optional< unsigned > getVScaleForTuning() const
void computeMinimalBitwidths()
Compute smallest bitwidth each instruction can be represented with.
LLVM Value Representation.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr bool isZero() const
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
void reportVectorizationInfo(const StringRef Msg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr, DebugLoc DL={})
Reports an informative message: print Msg for debugging purposes as well as an optimization remark.
void reportVectorization(OptimizationRemarkEmitter *ORE, Loop *TheLoop, ElementCount VFWidth, unsigned IC)
Report successful vectorization of the loop.
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
RecurKind
These are the kinds of recurrences that we support.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
LLVM_ABI MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
cl::opt< bool > PreferInLoopReductions
This struct is a compact representation of a valid (non-zero power of two) alignment.
A class that represents two vectorization factors (initialized with 0 by default).
static FixedScalableVFPair getNone()
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.