LLVM 23.0.0git
LoopVectorizationPlanner.cpp
Go to the documentation of this file.
1//===- LoopVectorizationPlanner.cpp - VF selection and planning -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements VFSelectionContext methods for loop vectorization
11/// VF selection, independent of cost-modeling decisions.
12///
13//===----------------------------------------------------------------------===//
14
21#include "llvm/Support/Debug.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE "loop-vectorize"
29
31
33 "vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden,
34 cl::desc("Maximize bandwidth when selecting vectorization factor which "
35 "will be determined by the smallest type in loop."));
36
38 "vectorizer-maximize-bandwidth-for-vector-calls", cl::init(true),
40 cl::desc("Try wider VFs if they enable the use of vector variants"));
41
43 "vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden,
44 cl::desc("Discard VFs if their register pressure is too high."));
45
47 "force-target-supports-scalable-vectors", cl::init(false), cl::Hidden,
49 "Pretend that scalable vectors are supported, even if the target does "
50 "not support them. This flag should only be used for testing."));
51
53 "prefer-inloop-reductions", cl::init(false), cl::Hidden,
54 cl::desc("Prefer in-loop vector reductions, "
55 "overriding the targets preference."));
56
57/// Note: This currently only applies to `llvm.masked.load` and
58/// `llvm.masked.store`. TODO: Extend this to cover other operations as needed.
60 "force-target-supports-masked-memory-ops", cl::init(false), cl::Hidden,
61 cl::desc("Assume the target supports masked memory operations (used for "
62 "testing)."));
63
65 ElementCount VF) const {
67 auto *Ty = getLoadStoreType(I);
68 const unsigned AS = getLoadStoreAddressSpace(I);
69 const Align Alignment = getLoadStoreAlignment(I);
70
72 (isa<LoadInst>(I) ? TTI.isLegalMaskedLoad(Ty, Alignment, AS)
73 : TTI.isLegalMaskedStore(Ty, Alignment, AS));
74}
75
77 ElementCount VF) const {
78 bool LI = isa<LoadInst>(V);
79 bool SI = isa<StoreInst>(V);
80 if (!LI && !SI)
81 return false;
82 auto *Ty = getLoadStoreType(V);
84 if (VF.isVector())
85 Ty = VectorType::get(Ty, VF);
86 return (LI && TTI.isLegalMaskedGather(Ty, Align)) ||
87 (SI && TTI.isLegalMaskedScatter(Ty, Align));
88}
89
91 return TTI.supportsScalableVectors() || ForceTargetSupportsScalableVectors;
92}
93
94bool VFSelectionContext::useMaxBandwidth(bool IsScalable) const {
98 return MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences() == 0 &&
99 (TTI.shouldMaximizeVectorBandwidth(RegKind) ||
101 Legal->hasVectorCallVariants())));
102}
103
105 if (ConsiderRegPressure.getNumOccurrences())
106 return ConsiderRegPressure;
107
108 // TODO: We should eventually consider register pressure for all targets. The
109 // TTI hook is temporary whilst target-specific issues are being fixed.
110 if (TTI.shouldConsiderVectorizationRegPressure())
111 return true;
112
113 if (!useMaxBandwidth(VF.isScalable()))
114 return false;
115 // Only calculate register pressure for VFs enabled by MaxBandwidth.
117 VF, VF.isScalable() ? MaxPermissibleVFWithoutMaxBW.ScalableVF
118 : MaxPermissibleVFWithoutMaxBW.FixedVF);
119}
120
121ElementCount VFSelectionContext::clampVFByMaxTripCount(
122 ElementCount VF, unsigned MaxTripCount, unsigned UserIC,
123 bool FoldTailByMasking, bool RequiresScalarEpilogue) const {
124 unsigned EstimatedVF = VF.getKnownMinValue();
125 if (VF.isScalable() && F.hasFnAttribute(Attribute::VScaleRange)) {
126 auto Attr = F.getFnAttribute(Attribute::VScaleRange);
127 auto Min = Attr.getVScaleRangeMin();
128 EstimatedVF *= Min;
129 }
130
131 // When a scalar epilogue is required, at least one iteration of the scalar
132 // loop has to execute. Adjust MaxTripCount accordingly to avoid picking a
133 // max VF that results in a dead vector loop.
134 if (MaxTripCount > 0 && RequiresScalarEpilogue)
135 MaxTripCount -= 1;
136
137 // When the user specifies an interleave count, we need to ensure that
138 // VF * UserIC <= MaxTripCount to avoid a dead vector loop.
139 unsigned IC = UserIC > 0 ? UserIC : 1;
140 unsigned EstimatedVFTimesIC = EstimatedVF * IC;
141
142 if (MaxTripCount && MaxTripCount <= EstimatedVFTimesIC &&
143 (!FoldTailByMasking || isPowerOf2_32(MaxTripCount))) {
144 // If upper bound loop trip count (TC) is known at compile time there is no
145 // point in choosing VF greater than TC / IC (as done in the loop below).
146 // Select maximum power of two which doesn't exceed TC / IC. If VF is
147 // scalable, we only fall back on a fixed VF when the TC is less than or
148 // equal to the known number of lanes.
149 auto ClampedUpperTripCount = llvm::bit_floor(MaxTripCount / IC);
150 if (ClampedUpperTripCount == 0)
151 ClampedUpperTripCount = 1;
152 LLVM_DEBUG(dbgs() << "LV: Clamping the MaxVF to maximum power of two not "
153 "exceeding the constant trip count"
154 << (UserIC > 0 ? " divided by UserIC" : "") << ": "
155 << ClampedUpperTripCount << "\n");
156 return ElementCount::get(ClampedUpperTripCount,
157 FoldTailByMasking ? VF.isScalable() : false);
158 }
159 return VF;
160}
161
162ElementCount VFSelectionContext::getMaximizedVFForTarget(
163 unsigned MaxTripCount, unsigned SmallestType, unsigned WidestType,
164 ElementCount MaxSafeVF, unsigned UserIC, bool FoldTailByMasking,
165 bool RequiresScalarEpilogue) {
166 bool ComputeScalableMaxVF = MaxSafeVF.isScalable();
167 const TypeSize WidestRegister = TTI.getRegisterBitWidth(
168 ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
170
171 // Convenience function to return the minimum of two ElementCounts.
172 auto MinVF = [](const ElementCount &LHS, const ElementCount &RHS) {
173 assert((LHS.isScalable() == RHS.isScalable()) &&
174 "Scalable flags must match");
176 };
177
178 // Ensure MaxVF is a power of 2; the dependence distance bound may not be.
179 // Note that both WidestRegister and WidestType may not be a powers of 2.
180 auto MaxVectorElementCount = ElementCount::get(
181 llvm::bit_floor(WidestRegister.getKnownMinValue() / WidestType),
182 ComputeScalableMaxVF);
183 MaxVectorElementCount = MinVF(MaxVectorElementCount, MaxSafeVF);
184 LLVM_DEBUG(dbgs() << "LV: The Widest register safe to use is: "
185 << (MaxVectorElementCount * WidestType) << " bits.\n");
186
187 if (!MaxVectorElementCount) {
188 LLVM_DEBUG(dbgs() << "LV: The target has no "
189 << (ComputeScalableMaxVF ? "scalable" : "fixed")
190 << " vector registers.\n");
191 return ElementCount::getFixed(1);
192 }
193
194 ElementCount MaxVF =
195 clampVFByMaxTripCount(MaxVectorElementCount, MaxTripCount, UserIC,
196 FoldTailByMasking, RequiresScalarEpilogue);
197 // If the MaxVF was already clamped, there's no point in trying to pick a
198 // larger one.
199 if (MaxVF != MaxVectorElementCount)
200 return MaxVF;
201
202 if (MaxVF.isScalable())
203 MaxPermissibleVFWithoutMaxBW.ScalableVF = MaxVF;
204 else
205 MaxPermissibleVFWithoutMaxBW.FixedVF = MaxVF;
206
207 if (useMaxBandwidth(ComputeScalableMaxVF)) {
208 auto MaxVectorElementCountMaxBW = ElementCount::get(
209 llvm::bit_floor(WidestRegister.getKnownMinValue() / SmallestType),
210 ComputeScalableMaxVF);
211 MaxVF = MinVF(MaxVectorElementCountMaxBW, MaxSafeVF);
212
213 if (ElementCount MinVF =
214 TTI.getMinimumVF(SmallestType, ComputeScalableMaxVF)) {
215 if (ElementCount::isKnownLT(MaxVF, MinVF)) {
216 LLVM_DEBUG(dbgs() << "LV: Overriding calculated MaxVF(" << MaxVF
217 << ") with target's minimum: " << MinVF << '\n');
218 MaxVF = MinVF;
219 }
220 }
221
222 MaxVF = clampVFByMaxTripCount(MaxVF, MaxTripCount, UserIC,
223 FoldTailByMasking, RequiresScalarEpilogue);
224 }
225 return MaxVF;
226}
227
228std::optional<unsigned> llvm::getMaxVScale(const Function &F,
229 const TargetTransformInfo &TTI) {
230 if (std::optional<unsigned> MaxVScale = TTI.getMaxVScale())
231 return MaxVScale;
232
233 if (F.hasFnAttribute(Attribute::VScaleRange))
234 return F.getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();
235
236 return std::nullopt;
237}
238
239bool VFSelectionContext::isScalableVectorizationAllowed() {
240 if (IsScalableVectorizationAllowed)
241 return *IsScalableVectorizationAllowed;
242
243 IsScalableVectorizationAllowed = false;
245 return false;
246
247 if (Hints->isScalableVectorizationDisabled()) {
248 reportVectorizationInfo("Scalable vectorization is explicitly disabled",
249 "ScalableVectorizationDisabled", ORE, TheLoop);
250 return false;
251 }
252
253 LLVM_DEBUG(dbgs() << "LV: Scalable vectorization is available\n");
254
255 auto MaxScalableVF = ElementCount::getScalable(
256 std::numeric_limits<ElementCount::ScalarTy>::max());
257
258 // Test that the loop-vectorizer can legalize all operations for this MaxVF.
259 // FIXME: While for scalable vectors this is currently sufficient, this should
260 // be replaced by a more detailed mechanism that filters out specific VFs,
261 // instead of invalidating vectorization for a whole set of VFs based on the
262 // MaxVF.
263
264 // Disable scalable vectorization if the loop contains unsupported reductions.
265 if (!all_of(Legal->getReductionVars(), [&](const auto &Reduction) -> bool {
266 return TTI.isLegalToVectorizeReduction(Reduction.second, MaxScalableVF);
267 })) {
269 "Scalable vectorization not supported for the reduction "
270 "operations found in this loop.",
271 "ScalableVFUnfeasible", ORE, TheLoop);
272 return false;
273 }
274
275 // Disable scalable vectorization if the loop contains any instructions
276 // with element types not supported for scalable vectors.
277 if (any_of(ElementTypesInLoop, [&](Type *Ty) {
278 return !Ty->isVoidTy() && !TTI.isElementTypeLegalForScalableVector(Ty);
279 })) {
280 reportVectorizationInfo("Scalable vectorization is not supported "
281 "for all element types found in this loop.",
282 "ScalableVFUnfeasible", ORE, TheLoop);
283 return false;
284 }
285
286 if (!Legal->isSafeForAnyVectorWidth() && !getMaxVScale(F, TTI)) {
287 reportVectorizationInfo("The target does not provide maximum vscale value "
288 "for safe distance analysis.",
289 "ScalableVFUnfeasible", ORE, TheLoop);
290 return false;
291 }
292
293 IsScalableVectorizationAllowed = true;
294 return true;
295}
296
298VFSelectionContext::getMaxLegalScalableVF(unsigned MaxSafeElements) {
299 if (!isScalableVectorizationAllowed())
301
302 auto MaxScalableVF = ElementCount::getScalable(
303 std::numeric_limits<ElementCount::ScalarTy>::max());
304 if (Legal->isSafeForAnyVectorWidth())
305 return MaxScalableVF;
306
307 std::optional<unsigned> MaxVScale = getMaxVScale(F, TTI);
308 // Limit MaxScalableVF by the maximum safe dependence distance.
309 MaxScalableVF = ElementCount::getScalable(MaxSafeElements / *MaxVScale);
310
311 if (!MaxScalableVF)
313 "Max legal vector width too small, scalable vectorization "
314 "unfeasible.",
315 "ScalableVFUnfeasible", ORE, TheLoop);
316
317 return MaxScalableVF;
318}
319
321 unsigned MaxTripCount, ElementCount UserVF, unsigned UserIC,
322 bool FoldTailByMasking, bool RequiresScalarEpilogue) {
323 auto [SmallestType, WidestType] = getSmallestAndWidestTypes();
324
325 // Get the maximum safe dependence distance in bits computed by LAA.
326 // It is computed by MaxVF * sizeOf(type) * 8, where type is taken from
327 // the memory accesses that is most restrictive (involved in the smallest
328 // dependence distance).
329 unsigned MaxSafeElementsPowerOf2 =
330 llvm::bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType);
331 if (!Legal->isSafeForAnyStoreLoadForwardDistances()) {
332 unsigned SLDist = Legal->getMaxStoreLoadForwardSafeDistanceInBits();
333 MaxSafeElementsPowerOf2 =
334 std::min(MaxSafeElementsPowerOf2, SLDist / WidestType);
335 }
336
337 auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElementsPowerOf2);
338 auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElementsPowerOf2);
339
340 if (!Legal->isSafeForAnyVectorWidth())
341 MaxSafeElements = MaxSafeElementsPowerOf2;
342
343 LLVM_DEBUG(dbgs() << "LV: The max safe fixed VF is: " << MaxSafeFixedVF
344 << ".\n");
345 LLVM_DEBUG(dbgs() << "LV: The max safe scalable VF is: " << MaxSafeScalableVF
346 << ".\n");
347
348 // First analyze the UserVF, fall back if the UserVF should be ignored.
349 if (UserVF) {
350 auto MaxSafeUserVF =
351 UserVF.isScalable() ? MaxSafeScalableVF : MaxSafeFixedVF;
352
353 if (ElementCount::isKnownLE(UserVF, MaxSafeUserVF)) {
354 // If `VF=vscale x N` is safe, then so is `VF=N`
355 if (UserVF.isScalable())
356 return FixedScalableVFPair(
357 ElementCount::getFixed(UserVF.getKnownMinValue()), UserVF);
358
359 return UserVF;
360 }
361
362 assert(ElementCount::isKnownGT(UserVF, MaxSafeUserVF));
363
364 // Only clamp if the UserVF is not scalable. If the UserVF is scalable, it
365 // is better to ignore the hint and let the compiler choose a suitable VF.
366 if (!UserVF.isScalable()) {
367 LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
368 << " is unsafe, clamping to max safe VF="
369 << MaxSafeFixedVF << ".\n");
370 ORE->emit([&]() {
371 return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
372 TheLoop->getStartLoc(),
373 TheLoop->getHeader())
374 << "User-specified vectorization factor "
375 << ore::NV("UserVectorizationFactor", UserVF)
376 << " is unsafe, clamping to maximum safe vectorization factor "
377 << ore::NV("VectorizationFactor", MaxSafeFixedVF);
378 });
379 return MaxSafeFixedVF;
380 }
381
383 LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
384 << " is ignored because scalable vectors are not "
385 "available.\n");
386 ORE->emit([&]() {
387 return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
388 TheLoop->getStartLoc(),
389 TheLoop->getHeader())
390 << "User-specified vectorization factor "
391 << ore::NV("UserVectorizationFactor", UserVF)
392 << " is ignored because the target does not support scalable "
393 "vectors. The compiler will pick a more suitable value.";
394 });
395 } else {
396 LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
397 << " is unsafe. Ignoring scalable UserVF.\n");
398 ORE->emit([&]() {
399 return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
400 TheLoop->getStartLoc(),
401 TheLoop->getHeader())
402 << "User-specified vectorization factor "
403 << ore::NV("UserVectorizationFactor", UserVF)
404 << " is unsafe. Ignoring the hint to let the compiler pick a "
405 "more suitable value.";
406 });
407 }
408 }
409
410 LLVM_DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType
411 << " / " << WidestType << " bits.\n");
412
415 if (auto MaxVF = getMaximizedVFForTarget(
416 MaxTripCount, SmallestType, WidestType, MaxSafeFixedVF, UserIC,
417 FoldTailByMasking, RequiresScalarEpilogue))
418 Result.FixedVF = MaxVF;
419
420 if (auto MaxVF = getMaximizedVFForTarget(
421 MaxTripCount, SmallestType, WidestType, MaxSafeScalableVF, UserIC,
422 FoldTailByMasking, RequiresScalarEpilogue))
423 if (MaxVF.isScalable()) {
424 Result.ScalableVF = MaxVF;
425 LLVM_DEBUG(dbgs() << "LV: Found feasible scalable VF = " << MaxVF
426 << "\n");
427 }
428
429 return Result;
430}
431
432std::pair<unsigned, unsigned>
434 unsigned MinWidth = -1U;
435 unsigned MaxWidth = 8;
436 const DataLayout &DL = F.getDataLayout();
437 // For in-loop reductions, no element types are added to ElementTypesInLoop
438 // if there are no loads/stores in the loop. In this case, check through the
439 // reduction variables to determine the maximum width.
440 if (ElementTypesInLoop.empty() && !Legal->getReductionVars().empty()) {
441 for (const auto &[_, RdxDesc] : Legal->getReductionVars()) {
442 // When finding the min width used by the recurrence we need to account
443 // for casts on the input operands of the recurrence.
444 MinWidth = std::min(
445 MinWidth,
446 std::min(RdxDesc.getMinWidthCastToRecurrenceTypeInBits(),
447 RdxDesc.getRecurrenceType()->getScalarSizeInBits()));
448 MaxWidth = std::max(MaxWidth,
449 RdxDesc.getRecurrenceType()->getScalarSizeInBits());
450 }
451 } else {
452 for (Type *T : ElementTypesInLoop) {
453 MinWidth = std::min<unsigned>(
454 MinWidth, DL.getTypeSizeInBits(T->getScalarType()).getFixedValue());
455 MaxWidth = std::max<unsigned>(
456 MaxWidth, DL.getTypeSizeInBits(T->getScalarType()).getFixedValue());
457 }
458 }
459 return {MinWidth, MaxWidth};
460}
461
463 const SmallPtrSetImpl<const Value *> *ValuesToIgnore) {
464 ElementTypesInLoop.clear();
465 // For each block.
466 for (BasicBlock *BB : TheLoop->blocks()) {
467 // For each instruction in the loop.
468 for (Instruction &I : *BB) {
469 Type *T = I.getType();
470
471 // Skip ignored values.
472 if (ValuesToIgnore && ValuesToIgnore->contains(&I))
473 continue;
474
475 // Only examine Loads, Stores and PHINodes.
477 continue;
478
479 // Examine PHI nodes that are reduction variables. Update the type to
480 // account for the recurrence type.
481 if (auto *PN = dyn_cast<PHINode>(&I)) {
482 if (!Legal->isReductionVariable(PN))
483 continue;
484 const RecurrenceDescriptor &RdxDesc =
485 Legal->getRecurrenceDescriptor(PN);
487 TTI.preferInLoopReduction(RdxDesc.getRecurrenceKind(),
488 RdxDesc.getRecurrenceType()))
489 continue;
490 T = RdxDesc.getRecurrenceType();
491 }
492
493 // Examine the stored values.
494 if (auto *ST = dyn_cast<StoreInst>(&I))
495 T = ST->getValueOperand()->getType();
496
497 assert(T->isSized() &&
498 "Expected the load/store/recurrence type to be sized");
499
500 ElementTypesInLoop.insert(T);
501 }
502 }
503}
504
505void VFSelectionContext::initializeVScaleForTuning() {
507 return;
508
509 if (F.hasFnAttribute(Attribute::VScaleRange)) {
510 auto Attr = F.getFnAttribute(Attribute::VScaleRange);
511 auto Min = Attr.getVScaleRangeMin();
512 auto Max = Attr.getVScaleRangeMax();
513 if (Max && Min == Max) {
514 VScaleForTuning = Max;
515 return;
516 }
517 }
518
519 VScaleForTuning = TTI.getVScaleForTuning();
520}
521
523 const RecurrenceDescriptor &RdxDesc) const {
524 return !Hints->allowReordering() && RdxDesc.isOrdered();
525}
526
528 LLVM_DEBUG(dbgs() << "LV: Performing code size checks.\n");
529
530 Loop *L = const_cast<Loop *>(TheLoop);
531 if (Legal->getRuntimePointerChecking()->Need) {
533 "Runtime ptr check is required with -Os/-Oz",
534 "runtime pointer checks needed. Enable vectorization of this "
535 "loop with '#pragma clang loop vectorize(enable)' when "
536 "compiling with -Os/-Oz",
537 "CantVersionLoopWithOptForSize", ORE, L);
538 return true;
539 }
540
541 if (!PSE.getPredicate().isAlwaysTrue()) {
543 "Runtime SCEV check is required with -Os/-Oz",
544 "runtime SCEV checks needed. Enable vectorization of this "
545 "loop with '#pragma clang loop vectorize(enable)' when "
546 "compiling with -Os/-Oz",
547 "CantVersionLoopWithOptForSize", ORE, L);
548 return true;
549 }
550
551 // FIXME: Avoid specializing for stride==1 instead of bailing out.
552 if (!Legal->getLAI()->getSymbolicStrides().empty()) {
554 "Runtime stride check for small trip count",
555 "runtime stride == 1 checks needed. Enable vectorization of "
556 "this loop without such check by compiling with -Os/-Oz",
557 "CantVersionLoopWithOptForSize", ORE, L);
558 return true;
559 }
560
561 return false;
562}
563
565 MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
566}
567
569 // Avoid duplicating work finding in-loop reductions.
570 if (!InLoopReductions.empty())
571 return;
572
573 for (const auto &Reduction : Legal->getReductionVars()) {
574 PHINode *Phi = Reduction.first;
575 const RecurrenceDescriptor &RdxDesc = Reduction.second;
576
577 // Multi-use reductions (e.g., used in FindLastIV patterns) are handled
578 // separately and should not be considered for in-loop reductions.
579 if (RdxDesc.hasUsesOutsideReductionChain())
580 continue;
581
582 // We don't collect reductions that are type promoted (yet).
583 if (RdxDesc.getRecurrenceType() != Phi->getType())
584 continue;
585
586 // In-loop AnyOf and FindIV reductions are not yet supported.
587 RecurKind Kind = RdxDesc.getRecurrenceKind();
591 continue;
592
593 // If the target would prefer this reduction to happen "in-loop", then we
594 // want to record it as such.
596 !TTI.preferInLoopReduction(Kind, Phi->getType()))
597 continue;
598
599 // Check that we can correctly put the reductions into the loop, by
600 // finding the chain of operations that leads from the phi to the loop
601 // exit value.
602 SmallVector<Instruction *, 4> ReductionOperations =
603 RdxDesc.getReductionOpChain(Phi, const_cast<Loop *>(TheLoop));
604 bool InLoop = !ReductionOperations.empty();
605
606 if (InLoop) {
607 InLoopReductions.insert(Phi);
608 // Add the elements to InLoopReductionImmediateChains for cost modelling.
609 Instruction *LastChain = Phi;
610 for (auto *I : ReductionOperations) {
611 InLoopReductionImmediateChains[I] = LastChain;
612 LastChain = I;
613 }
614 }
615 LLVM_DEBUG(dbgs() << "LV: Using " << (InLoop ? "inloop" : "out of loop")
616 << " reduction for phi: " << *Phi << "\n");
617 }
618}
619
620// TODO: we could return a pair of values that specify the max VF and
621// min VF, to be used in `buildVPlans(MinVF, MaxVF)` instead of
622// `buildVPlans(VF, VF)`. We cannot do it because VPLAN at the moment
623// doesn't have a cost model that can choose which plan to execute if
624// more than one is generated.
627 if (UserVF.isScalable() && !supportsScalableVectors()) {
629 "Scalable vectorization requested but not supported by the target",
630 "the scalable user-specified vectorization width for outer-loop "
631 "vectorization cannot be used because the target does not support "
632 "scalable vectors.",
633 "ScalableVFUnfeasible", ORE, TheLoop);
635 }
636
637 ElementCount VF = UserVF;
638 if (VF.isZero()) {
639 auto [_, WidestType] = getSmallestAndWidestTypes();
640
641 auto RegKind = TTI.enableScalableVectorization()
644
645 TypeSize RegSize = TTI.getRegisterBitWidth(RegKind);
646 unsigned N = RegSize.getKnownMinValue() / WidestType;
647 VF = ElementCount::get(N, RegSize.isScalable());
648 LLVM_DEBUG(dbgs() << "LV: VPlan computed VF " << VF << ".\n");
649
650 // Make sure we have a VF > 1 for stress testing.
652 LLVM_DEBUG(dbgs() << "LV: VPlan stress testing: "
653 << "overriding computed VF.\n");
655 }
656 }
658 "VF needs to be a power of two");
659 if (VF.isScalar())
661 LLVM_DEBUG(dbgs() << "LV: Using " << (!UserVF.isZero() ? "user " : "")
662 << "VF " << VF << " to build VPlans.\n");
663 return FixedScalableVFPair(VF);
664}
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define DEBUG_TYPE
#define _
loop Loop Strength Reduction
This file defines the LoopVectorizationLegality class.
cl::opt< bool > VPlanBuildOuterloopStressTest
static cl::opt< bool > ForceTargetSupportsScalableVectors("force-target-supports-scalable-vectors", cl::init(false), cl::Hidden, cl::desc("Pretend that scalable vectors are supported, even if the target does " "not support them. This flag should only be used for testing."))
static cl::opt< bool > ConsiderRegPressure("vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden, cl::desc("Discard VFs if their register pressure is too high."))
static cl::opt< bool > UseWiderVFIfCallVariantsPresent("vectorizer-maximize-bandwidth-for-vector-calls", cl::init(true), cl::Hidden, cl::desc("Try wider VFs if they enable the use of vector variants"))
static cl::opt< bool > ForceTargetSupportsMaskedMemoryOps("force-target-supports-masked-memory-ops", cl::init(false), cl::Hidden, cl::desc("Assume the target supports masked memory operations (used for " "testing)."))
Note: This currently only applies to llvm.masked.load and llvm.masked.store.
static cl::opt< bool > MaximizeBandwidth("vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which " "will be determined by the smallest type in loop."))
This file provides a LoopVectorizationPlanner class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
#define LLVM_DEBUG(...)
Definition Debug.h:119
Value * RHS
Value * LHS
LLVM Basic Block Representation.
Definition BasicBlock.h:62
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:324
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Diagnostic information for optimization analysis remarks.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool contains(ConstPtrType Ptr) const
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
FixedScalableVFPair computeVPlanOuterloopVF(ElementCount UserVF)
Returns a scalable VF to use for outer-loop vectorization if the target supports it and a fixed VF ot...
std::pair< unsigned, unsigned > getSmallestAndWidestTypes() const
bool runtimeChecksRequired()
Check whether vectorization would require runtime checks.
bool isLegalGatherOrScatter(Value *V, ElementCount VF) const
Returns true if the target machine can represent V as a masked gather or scatter operation.
void collectInLoopReductions()
Split reductions into those that happen in the loop, and those that happen outside.
FixedScalableVFPair computeFeasibleMaxVF(unsigned MaxTripCount, ElementCount UserVF, unsigned UserIC, bool FoldTailByMasking, bool RequiresScalarEpilogue)
bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const
Returns true if we should use strict in-order reductions for the given RdxDesc.
bool shouldConsiderRegPressureForVF(ElementCount VF) const
void collectElementTypesForWidening(const SmallPtrSetImpl< const Value * > *ValuesToIgnore=nullptr)
Collect element types in the loop that need widening.
bool isLegalMaskedLoadOrStore(Instruction *I, ElementCount VF) const
Returns true if the target machine supports masked loads or stores for I's data type and alignment.
void computeMinimalBitwidths()
Compute smallest bitwidth each instruction can be represented with.
LLVM Value Representation.
Definition Value.h:75
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr bool isZero() const
Definition TypeSize.h:153
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:223
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
void reportVectorizationInfo(const StringRef Msg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr, DebugLoc DL={})
Reports an informative message: print Msg for debugging purposes as well as an optimization remark.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
TargetTransformInfo TTI
RecurKind
These are the kinds of recurrences that we support.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:347
LLVM_ABI MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
cl::opt< bool > PreferInLoopReductions
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
A class that represents two vectorization factors (initialized with 0 by default).
static FixedScalableVFPair getNone()