LLVM 18.0.0git
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1//===- LoopVectorizationLegality.cpp --------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides loop vectorization legality analysis. Original code
10// resided in LoopVectorize.cpp for a long time.
11//
12// At this point, it is implemented as a utility class, not as an analysis
13// pass. It should be easy to create an analysis pass around it if there
14// is a need (but D45420 needs to happen first).
15//
16
18#include "llvm/Analysis/Loads.h"
29
30using namespace llvm;
31using namespace PatternMatch;
32
33#define LV_NAME "loop-vectorize"
34#define DEBUG_TYPE LV_NAME
35
36static cl::opt<bool>
37 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
38 cl::desc("Enable if-conversion during vectorization."));
39
40static cl::opt<bool>
41AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
42 cl::desc("Enable recognition of non-constant strided "
43 "pointer induction variables."));
44
45namespace llvm {
47 HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
48 cl::desc("Allow enabling loop hints to reorder "
49 "FP operations during vectorization."));
50}
51
52// TODO: Move size-based thresholds out of legality checking, make cost based
53// decisions instead of hard thresholds.
55 "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
56 cl::desc("The maximum number of SCEV checks allowed."));
57
59 "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
60 cl::desc("The maximum number of SCEV checks allowed with a "
61 "vectorize(enable) pragma"));
62
65 "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
67 cl::desc("Control whether the compiler can use scalable vectors to "
68 "vectorize a loop"),
71 "Scalable vectorization is disabled."),
74 "Scalable vectorization is available and favored when the "
75 "cost is inconclusive."),
78 "Scalable vectorization is available and favored when the "
79 "cost is inconclusive.")));
80
81/// Maximum vectorization interleave count.
82static const unsigned MaxInterleaveFactor = 16;
83
84namespace llvm {
85
86bool LoopVectorizeHints::Hint::validate(unsigned Val) {
87 switch (Kind) {
88 case HK_WIDTH:
90 case HK_INTERLEAVE:
91 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
92 case HK_FORCE:
93 return (Val <= 1);
94 case HK_ISVECTORIZED:
95 case HK_PREDICATE:
96 case HK_SCALABLE:
97 return (Val == 0 || Val == 1);
98 }
99 return false;
100}
101
103 bool InterleaveOnlyWhenForced,
106 : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
107 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
108 Force("vectorize.enable", FK_Undefined, HK_FORCE),
109 IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
110 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
111 Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
112 TheLoop(L), ORE(ORE) {
113 // Populate values with existing loop metadata.
114 getHintsFromMetadata();
115
116 // force-vector-interleave overrides DisableInterleaving.
119
120 // If the metadata doesn't explicitly specify whether to enable scalable
121 // vectorization, then decide based on the following criteria (increasing
122 // level of priority):
123 // - Target default
124 // - Metadata width
125 // - Force option (always overrides)
127 if (TTI)
130
131 if (Width.Value)
132 // If the width is set, but the metadata says nothing about the scalable
133 // property, then assume it concerns only a fixed-width UserVF.
134 // If width is not set, the flag takes precedence.
135 Scalable.Value = SK_FixedWidthOnly;
136 }
137
138 // If the flag is set to force any use of scalable vectors, override the loop
139 // hints.
140 if (ForceScalableVectorization.getValue() !=
142 Scalable.Value = ForceScalableVectorization.getValue();
143
144 // Scalable vectorization is disabled if no preference is specified.
146 Scalable.Value = SK_FixedWidthOnly;
147
148 if (IsVectorized.Value != 1)
149 // If the vectorization width and interleaving count are both 1 then
150 // consider the loop to have been already vectorized because there's
151 // nothing more that we can do.
152 IsVectorized.Value =
154 LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
155 << "LV: Interleaving disabled by the pass manager\n");
156}
157
159 LLVMContext &Context = TheLoop->getHeader()->getContext();
160
161 MDNode *IsVectorizedMD = MDNode::get(
162 Context,
163 {MDString::get(Context, "llvm.loop.isvectorized"),
165 MDNode *LoopID = TheLoop->getLoopID();
166 MDNode *NewLoopID =
168 {Twine(Prefix(), "vectorize.").str(),
169 Twine(Prefix(), "interleave.").str()},
170 {IsVectorizedMD});
171 TheLoop->setLoopID(NewLoopID);
172
173 // Update internal cache.
174 IsVectorized.Value = 1;
175}
176
178 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
180 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
182 return false;
183 }
184
185 if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
186 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
188 return false;
189 }
190
191 if (getIsVectorized() == 1) {
192 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
193 // FIXME: Add interleave.disable metadata. This will allow
194 // vectorize.disable to be used without disabling the pass and errors
195 // to differentiate between disabled vectorization and a width of 1.
196 ORE.emit([&]() {
198 "AllDisabled", L->getStartLoc(),
199 L->getHeader())
200 << "loop not vectorized: vectorization and interleaving are "
201 "explicitly disabled, or the loop has already been "
202 "vectorized";
203 });
204 return false;
205 }
206
207 return true;
208}
209
211 using namespace ore;
212
213 ORE.emit([&]() {
214 if (Force.Value == LoopVectorizeHints::FK_Disabled)
215 return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
216 TheLoop->getStartLoc(),
217 TheLoop->getHeader())
218 << "loop not vectorized: vectorization is explicitly disabled";
219 else {
220 OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
221 TheLoop->getStartLoc(), TheLoop->getHeader());
222 R << "loop not vectorized";
223 if (Force.Value == LoopVectorizeHints::FK_Enabled) {
224 R << " (Force=" << NV("Force", true);
225 if (Width.Value != 0)
226 R << ", Vector Width=" << NV("VectorWidth", getWidth());
227 if (getInterleave() != 0)
228 R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
229 R << ")";
230 }
231 return R;
232 }
233 });
234}
235
238 return LV_NAME;
240 return LV_NAME;
242 return LV_NAME;
244}
245
247 // Allow the vectorizer to change the order of operations if enabling
248 // loop hints are provided
249 ElementCount EC = getWidth();
250 return HintsAllowReordering &&
252 EC.getKnownMinValue() > 1);
253}
254
255void LoopVectorizeHints::getHintsFromMetadata() {
256 MDNode *LoopID = TheLoop->getLoopID();
257 if (!LoopID)
258 return;
259
260 // First operand should refer to the loop id itself.
261 assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
262 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
263
264 for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
265 const MDString *S = nullptr;
267
268 // The expected hint is either a MDString or a MDNode with the first
269 // operand a MDString.
270 if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
271 if (!MD || MD->getNumOperands() == 0)
272 continue;
273 S = dyn_cast<MDString>(MD->getOperand(0));
274 for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
275 Args.push_back(MD->getOperand(i));
276 } else {
277 S = dyn_cast<MDString>(LoopID->getOperand(i));
278 assert(Args.size() == 0 && "too many arguments for MDString");
279 }
280
281 if (!S)
282 continue;
283
284 // Check if the hint starts with the loop metadata prefix.
285 StringRef Name = S->getString();
286 if (Args.size() == 1)
287 setHint(Name, Args[0]);
288 }
289}
290
291void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
292 if (!Name.startswith(Prefix()))
293 return;
294 Name = Name.substr(Prefix().size(), StringRef::npos);
295
296 const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
297 if (!C)
298 return;
299 unsigned Val = C->getZExtValue();
300
301 Hint *Hints[] = {&Width, &Interleave, &Force,
302 &IsVectorized, &Predicate, &Scalable};
303 for (auto *H : Hints) {
304 if (Name == H->Name) {
305 if (H->validate(Val))
306 H->Value = Val;
307 else
308 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
309 break;
310 }
311 }
312}
313
314// Return true if the inner loop \p Lp is uniform with regard to the outer loop
315// \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
316// executing the inner loop will execute the same iterations). This check is
317// very constrained for now but it will be relaxed in the future. \p Lp is
318// considered uniform if it meets all the following conditions:
319// 1) it has a canonical IV (starting from 0 and with stride 1),
320// 2) its latch terminator is a conditional branch and,
321// 3) its latch condition is a compare instruction whose operands are the
322// canonical IV and an OuterLp invariant.
323// This check doesn't take into account the uniformity of other conditions not
324// related to the loop latch because they don't affect the loop uniformity.
325//
326// NOTE: We decided to keep all these checks and its associated documentation
327// together so that we can easily have a picture of the current supported loop
328// nests. However, some of the current checks don't depend on \p OuterLp and
329// would be redundantly executed for each \p Lp if we invoked this function for
330// different candidate outer loops. This is not the case for now because we
331// don't currently have the infrastructure to evaluate multiple candidate outer
332// loops and \p OuterLp will be a fixed parameter while we only support explicit
333// outer loop vectorization. It's also very likely that these checks go away
334// before introducing the aforementioned infrastructure. However, if this is not
335// the case, we should move the \p OuterLp independent checks to a separate
336// function that is only executed once for each \p Lp.
337static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
338 assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
339
340 // If Lp is the outer loop, it's uniform by definition.
341 if (Lp == OuterLp)
342 return true;
343 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
344
345 // 1.
347 if (!IV) {
348 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
349 return false;
350 }
351
352 // 2.
353 BasicBlock *Latch = Lp->getLoopLatch();
354 auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
355 if (!LatchBr || LatchBr->isUnconditional()) {
356 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
357 return false;
358 }
359
360 // 3.
361 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
362 if (!LatchCmp) {
364 dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
365 return false;
366 }
367
368 Value *CondOp0 = LatchCmp->getOperand(0);
369 Value *CondOp1 = LatchCmp->getOperand(1);
370 Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
371 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
372 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
373 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
374 return false;
375 }
376
377 return true;
378}
379
380// Return true if \p Lp and all its nested loops are uniform with regard to \p
381// OuterLp.
382static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
383 if (!isUniformLoop(Lp, OuterLp))
384 return false;
385
386 // Check if nested loops are uniform.
387 for (Loop *SubLp : *Lp)
388 if (!isUniformLoopNest(SubLp, OuterLp))
389 return false;
390
391 return true;
392}
393
395 if (Ty->isPointerTy())
396 return DL.getIntPtrType(Ty);
397
398 // It is possible that char's or short's overflow when we ask for the loop's
399 // trip count, work around this by changing the type size.
400 if (Ty->getScalarSizeInBits() < 32)
401 return Type::getInt32Ty(Ty->getContext());
402
403 return Ty;
404}
405
406static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
409 if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
410 return Ty0;
411 return Ty1;
412}
413
414/// Check that the instruction has outside loop users and is not an
415/// identified reduction variable.
416static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
417 SmallPtrSetImpl<Value *> &AllowedExit) {
418 // Reductions, Inductions and non-header phis are allowed to have exit users. All
419 // other instructions must not have external users.
420 if (!AllowedExit.count(Inst))
421 // Check that all of the users of the loop are inside the BB.
422 for (User *U : Inst->users()) {
423 Instruction *UI = cast<Instruction>(U);
424 // This user may be a reduction exit value.
425 if (!TheLoop->contains(UI)) {
426 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
427 return true;
428 }
429 }
430 return false;
431}
432
433/// Returns true if A and B have same pointer operands or same SCEVs addresses
435 StoreInst *B) {
436 // Compare store
437 if (A == B)
438 return true;
439
440 // Otherwise Compare pointers
441 Value *APtr = A->getPointerOperand();
442 Value *BPtr = B->getPointerOperand();
443 if (APtr == BPtr)
444 return true;
445
446 // Otherwise compare address SCEVs
447 if (SE->getSCEV(APtr) == SE->getSCEV(BPtr))
448 return true;
449
450 return false;
451}
452
454 Value *Ptr) const {
455 // FIXME: Currently, the set of symbolic strides is sometimes queried before
456 // it's collected. This happens from canVectorizeWithIfConvert, when the
457 // pointer is checked to reference consecutive elements suitable for a
458 // masked access.
459 const auto &Strides =
461
462 Function *F = TheLoop->getHeader()->getParent();
463 bool OptForSize = F->hasOptSize() ||
464 llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
466 bool CanAddPredicate = !OptForSize;
467 int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
468 CanAddPredicate, false).value_or(0);
469 if (Stride == 1 || Stride == -1)
470 return Stride;
471 return 0;
472}
473
475 return LAI->isInvariant(V);
476}
477
478namespace {
479/// A rewriter to build the SCEVs for each of the VF lanes in the expected
480/// vectorized loop, which can then be compared to detect their uniformity. This
481/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
482/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
483/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
484/// uniformity.
485class SCEVAddRecForUniformityRewriter
486 : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
487 /// Multiplier to be applied to the step of AddRecs in TheLoop.
488 unsigned StepMultiplier;
489
490 /// Offset to be added to the AddRecs in TheLoop.
491 unsigned Offset;
492
493 /// Loop for which to rewrite AddRecsFor.
494 Loop *TheLoop;
495
496 /// Is any sub-expressions not analyzable w.r.t. uniformity?
497 bool CannotAnalyze = false;
498
499 bool canAnalyze() const { return !CannotAnalyze; }
500
501public:
502 SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
503 unsigned Offset, Loop *TheLoop)
504 : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
505 TheLoop(TheLoop) {}
506
507 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
508 assert(Expr->getLoop() == TheLoop &&
509 "addrec outside of TheLoop must be invariant and should have been "
510 "handled earlier");
511 // Build a new AddRec by multiplying the step by StepMultiplier and
512 // incrementing the start by Offset * step.
513 Type *Ty = Expr->getType();
514 auto *Step = Expr->getStepRecurrence(SE);
515 if (!SE.isLoopInvariant(Step, TheLoop)) {
516 CannotAnalyze = true;
517 return Expr;
518 }
519 auto *NewStep = SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
520 auto *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
521 auto *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
522 return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
523 }
524
525 const SCEV *visit(const SCEV *S) {
526 if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
527 return S;
529 }
530
531 const SCEV *visitUnknown(const SCEVUnknown *S) {
532 if (SE.isLoopInvariant(S, TheLoop))
533 return S;
534 // The value could vary across iterations.
535 CannotAnalyze = true;
536 return S;
537 }
538
539 const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
540 // Could not analyze the expression.
541 CannotAnalyze = true;
542 return S;
543 }
544
545 static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
546 unsigned StepMultiplier, unsigned Offset,
547 Loop *TheLoop) {
548 /// Bail out if the expression does not contain an UDiv expression.
549 /// Uniform values which are not loop invariant require operations to strip
550 /// out the lowest bits. For now just look for UDivs and use it to avoid
551 /// re-writing UDIV-free expressions for other lanes to limit compile time.
552 if (!SCEVExprContains(S,
553 [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
554 return SE.getCouldNotCompute();
555
556 SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
557 TheLoop);
558 const SCEV *Result = Rewriter.visit(S);
559
560 if (Rewriter.canAnalyze())
561 return Result;
562 return SE.getCouldNotCompute();
563 }
564};
565
566} // namespace
567
569 if (isInvariant(V))
570 return true;
571 if (VF.isScalable())
572 return false;
573 if (VF.isScalar())
574 return true;
575
576 // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
577 // never considered uniform.
578 auto *SE = PSE.getSE();
579 if (!SE->isSCEVable(V->getType()))
580 return false;
581 const SCEV *S = SE->getSCEV(V);
582
583 // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
584 // lane 0 matches the expressions for all other lanes.
585 unsigned FixedVF = VF.getKnownMinValue();
586 const SCEV *FirstLaneExpr =
587 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
588 if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
589 return false;
590
591 // Make sure the expressions for lanes FixedVF-1..1 match the expression for
592 // lane 0. We check lanes in reverse order for compile-time, as frequently
593 // checking the last lane is sufficient to rule out uniformity.
594 return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
595 const SCEV *IthLaneExpr =
596 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
597 return FirstLaneExpr == IthLaneExpr;
598 });
599}
600
602 ElementCount VF) const {
604 if (!Ptr)
605 return false;
606 // Note: There's nothing inherent which prevents predicated loads and
607 // stores from being uniform. The current lowering simply doesn't handle
608 // it; in particular, the cost model distinguishes scatter/gather from
609 // scalar w/predication, and we currently rely on the scalar path.
610 return isUniform(Ptr, VF) && !blockNeedsPredication(I.getParent());
611}
612
613bool LoopVectorizationLegality::canVectorizeOuterLoop() {
614 assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
615 // Store the result and return it at the end instead of exiting early, in case
616 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
617 bool Result = true;
618 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
619
620 for (BasicBlock *BB : TheLoop->blocks()) {
621 // Check whether the BB terminator is a BranchInst. Any other terminator is
622 // not supported yet.
623 auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
624 if (!Br) {
625 reportVectorizationFailure("Unsupported basic block terminator",
626 "loop control flow is not understood by vectorizer",
627 "CFGNotUnderstood", ORE, TheLoop);
628 if (DoExtraAnalysis)
629 Result = false;
630 else
631 return false;
632 }
633
634 // Check whether the BranchInst is a supported one. Only unconditional
635 // branches, conditional branches with an outer loop invariant condition or
636 // backedges are supported.
637 // FIXME: We skip these checks when VPlan predication is enabled as we
638 // want to allow divergent branches. This whole check will be removed
639 // once VPlan predication is on by default.
640 if (Br && Br->isConditional() &&
641 !TheLoop->isLoopInvariant(Br->getCondition()) &&
642 !LI->isLoopHeader(Br->getSuccessor(0)) &&
643 !LI->isLoopHeader(Br->getSuccessor(1))) {
644 reportVectorizationFailure("Unsupported conditional branch",
645 "loop control flow is not understood by vectorizer",
646 "CFGNotUnderstood", ORE, TheLoop);
647 if (DoExtraAnalysis)
648 Result = false;
649 else
650 return false;
651 }
652 }
653
654 // Check whether inner loops are uniform. At this point, we only support
655 // simple outer loops scenarios with uniform nested loops.
656 if (!isUniformLoopNest(TheLoop /*loop nest*/,
657 TheLoop /*context outer loop*/)) {
658 reportVectorizationFailure("Outer loop contains divergent loops",
659 "loop control flow is not understood by vectorizer",
660 "CFGNotUnderstood", ORE, TheLoop);
661 if (DoExtraAnalysis)
662 Result = false;
663 else
664 return false;
665 }
666
667 // Check whether we are able to set up outer loop induction.
668 if (!setupOuterLoopInductions()) {
669 reportVectorizationFailure("Unsupported outer loop Phi(s)",
670 "Unsupported outer loop Phi(s)",
671 "UnsupportedPhi", ORE, TheLoop);
672 if (DoExtraAnalysis)
673 Result = false;
674 else
675 return false;
676 }
677
678 return Result;
679}
680
681void LoopVectorizationLegality::addInductionPhi(
682 PHINode *Phi, const InductionDescriptor &ID,
683 SmallPtrSetImpl<Value *> &AllowedExit) {
684 Inductions[Phi] = ID;
685
686 // In case this induction also comes with casts that we know we can ignore
687 // in the vectorized loop body, record them here. All casts could be recorded
688 // here for ignoring, but suffices to record only the first (as it is the
689 // only one that may bw used outside the cast sequence).
690 const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
691 if (!Casts.empty())
692 InductionCastsToIgnore.insert(*Casts.begin());
693
694 Type *PhiTy = Phi->getType();
695 const DataLayout &DL = Phi->getModule()->getDataLayout();
696
697 // Get the widest type.
698 if (!PhiTy->isFloatingPointTy()) {
699 if (!WidestIndTy)
700 WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
701 else
702 WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
703 }
704
705 // Int inductions are special because we only allow one IV.
706 if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
707 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
708 isa<Constant>(ID.getStartValue()) &&
709 cast<Constant>(ID.getStartValue())->isNullValue()) {
710
711 // Use the phi node with the widest type as induction. Use the last
712 // one if there are multiple (no good reason for doing this other
713 // than it is expedient). We've checked that it begins at zero and
714 // steps by one, so this is a canonical induction variable.
715 if (!PrimaryInduction || PhiTy == WidestIndTy)
716 PrimaryInduction = Phi;
717 }
718
719 // Both the PHI node itself, and the "post-increment" value feeding
720 // back into the PHI node may have external users.
721 // We can allow those uses, except if the SCEVs we have for them rely
722 // on predicates that only hold within the loop, since allowing the exit
723 // currently means re-using this SCEV outside the loop (see PR33706 for more
724 // details).
725 if (PSE.getPredicate().isAlwaysTrue()) {
726 AllowedExit.insert(Phi);
727 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
728 }
729
730 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
731}
732
733bool LoopVectorizationLegality::setupOuterLoopInductions() {
734 BasicBlock *Header = TheLoop->getHeader();
735
736 // Returns true if a given Phi is a supported induction.
737 auto isSupportedPhi = [&](PHINode &Phi) -> bool {
739 if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
741 addInductionPhi(&Phi, ID, AllowedExit);
742 return true;
743 } else {
744 // Bail out for any Phi in the outer loop header that is not a supported
745 // induction.
747 dbgs()
748 << "LV: Found unsupported PHI for outer loop vectorization.\n");
749 return false;
750 }
751 };
752
753 if (llvm::all_of(Header->phis(), isSupportedPhi))
754 return true;
755 else
756 return false;
757}
758
759/// Checks if a function is scalarizable according to the TLI, in
760/// the sense that it should be vectorized and then expanded in
761/// multiple scalar calls. This is represented in the
762/// TLI via mappings that do not specify a vector name, as in the
763/// following example:
764///
765/// const VecDesc VecIntrinsics[] = {
766/// {"llvm.phx.abs.i32", "", 4}
767/// };
768static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
769 const StringRef ScalarName = CI.getCalledFunction()->getName();
770 bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
771 // Check that all known VFs are not associated to a vector
772 // function, i.e. the vector name is emty.
773 if (Scalarize) {
774 ElementCount WidestFixedVF, WidestScalableVF;
775 TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
777 ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
778 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
780 ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
781 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
782 assert((WidestScalableVF.isZero() || !Scalarize) &&
783 "Caller may decide to scalarize a variant using a scalable VF");
784 }
785 return Scalarize;
786}
787
788bool LoopVectorizationLegality::canVectorizeInstrs() {
789 BasicBlock *Header = TheLoop->getHeader();
790
791 // For each block in the loop.
792 for (BasicBlock *BB : TheLoop->blocks()) {
793 // Scan the instructions in the block and look for hazards.
794 for (Instruction &I : *BB) {
795 if (auto *Phi = dyn_cast<PHINode>(&I)) {
796 Type *PhiTy = Phi->getType();
797 // Check that this PHI type is allowed.
798 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
799 !PhiTy->isPointerTy()) {
800 reportVectorizationFailure("Found a non-int non-pointer PHI",
801 "loop control flow is not understood by vectorizer",
802 "CFGNotUnderstood", ORE, TheLoop);
803 return false;
804 }
805
806 // If this PHINode is not in the header block, then we know that we
807 // can convert it to select during if-conversion. No need to check if
808 // the PHIs in this block are induction or reduction variables.
809 if (BB != Header) {
810 // Non-header phi nodes that have outside uses can be vectorized. Add
811 // them to the list of allowed exits.
812 // Unsafe cyclic dependencies with header phis are identified during
813 // legalization for reduction, induction and fixed order
814 // recurrences.
815 AllowedExit.insert(&I);
816 continue;
817 }
818
819 // We only allow if-converted PHIs with exactly two incoming values.
820 if (Phi->getNumIncomingValues() != 2) {
821 reportVectorizationFailure("Found an invalid PHI",
822 "loop control flow is not understood by vectorizer",
823 "CFGNotUnderstood", ORE, TheLoop, Phi);
824 return false;
825 }
826
828 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
829 DT, PSE.getSE())) {
830 Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
831 AllowedExit.insert(RedDes.getLoopExitInstr());
832 Reductions[Phi] = RedDes;
833 continue;
834 }
835
836 // We prevent matching non-constant strided pointer IVS to preserve
837 // historical vectorizer behavior after a generalization of the
838 // IVDescriptor code. The intent is to remove this check, but we
839 // have to fix issues around code quality for such loops first.
840 auto isDisallowedStridedPointerInduction =
841 [](const InductionDescriptor &ID) {
843 return false;
844 return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
845 ID.getConstIntStepValue() == nullptr;
846 };
847
848 // TODO: Instead of recording the AllowedExit, it would be good to
849 // record the complementary set: NotAllowedExit. These include (but may
850 // not be limited to):
851 // 1. Reduction phis as they represent the one-before-last value, which
852 // is not available when vectorized
853 // 2. Induction phis and increment when SCEV predicates cannot be used
854 // outside the loop - see addInductionPhi
855 // 3. Non-Phis with outside uses when SCEV predicates cannot be used
856 // outside the loop - see call to hasOutsideLoopUser in the non-phi
857 // handling below
858 // 4. FixedOrderRecurrence phis that can possibly be handled by
859 // extraction.
860 // By recording these, we can then reason about ways to vectorize each
861 // of these NotAllowedExit.
863 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
864 !isDisallowedStridedPointerInduction(ID)) {
865 addInductionPhi(Phi, ID, AllowedExit);
866 Requirements->addExactFPMathInst(ID.getExactFPMathInst());
867 continue;
868 }
869
870 if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
871 AllowedExit.insert(Phi);
872 FixedOrderRecurrences.insert(Phi);
873 continue;
874 }
875
876 // As a last resort, coerce the PHI to a AddRec expression
877 // and re-try classifying it a an induction PHI.
878 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
879 !isDisallowedStridedPointerInduction(ID)) {
880 addInductionPhi(Phi, ID, AllowedExit);
881 continue;
882 }
883
884 reportVectorizationFailure("Found an unidentified PHI",
885 "value that could not be identified as "
886 "reduction is used outside the loop",
887 "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
888 return false;
889 } // end of PHI handling
890
891 // We handle calls that:
892 // * Are debug info intrinsics.
893 // * Have a mapping to an IR intrinsic.
894 // * Have a vector version available.
895 auto *CI = dyn_cast<CallInst>(&I);
896
897 if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
898 !isa<DbgInfoIntrinsic>(CI) &&
899 !(CI->getCalledFunction() && TLI &&
900 (!VFDatabase::getMappings(*CI).empty() ||
901 isTLIScalarize(*TLI, *CI)))) {
902 // If the call is a recognized math libary call, it is likely that
903 // we can vectorize it given loosened floating-point constraints.
905 bool IsMathLibCall =
906 TLI && CI->getCalledFunction() &&
907 CI->getType()->isFloatingPointTy() &&
908 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
909 TLI->hasOptimizedCodeGen(Func);
910
911 if (IsMathLibCall) {
912 // TODO: Ideally, we should not use clang-specific language here,
913 // but it's hard to provide meaningful yet generic advice.
914 // Also, should this be guarded by allowExtraAnalysis() and/or be part
915 // of the returned info from isFunctionVectorizable()?
917 "Found a non-intrinsic callsite",
918 "library call cannot be vectorized. "
919 "Try compiling with -fno-math-errno, -ffast-math, "
920 "or similar flags",
921 "CantVectorizeLibcall", ORE, TheLoop, CI);
922 } else {
923 reportVectorizationFailure("Found a non-intrinsic callsite",
924 "call instruction cannot be vectorized",
925 "CantVectorizeLibcall", ORE, TheLoop, CI);
926 }
927 return false;
928 }
929
930 // Some intrinsics have scalar arguments and should be same in order for
931 // them to be vectorized (i.e. loop invariant).
932 if (CI) {
933 auto *SE = PSE.getSE();
934 Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
935 for (unsigned i = 0, e = CI->arg_size(); i != e; ++i)
936 if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, i)) {
937 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
938 reportVectorizationFailure("Found unvectorizable intrinsic",
939 "intrinsic instruction cannot be vectorized",
940 "CantVectorizeIntrinsic", ORE, TheLoop, CI);
941 return false;
942 }
943 }
944 }
945
946 // Check that the instruction return type is vectorizable.
947 // Also, we can't vectorize extractelement instructions.
948 if ((!VectorType::isValidElementType(I.getType()) &&
949 !I.getType()->isVoidTy()) ||
950 isa<ExtractElementInst>(I)) {
951 reportVectorizationFailure("Found unvectorizable type",
952 "instruction return type cannot be vectorized",
953 "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
954 return false;
955 }
956
957 // Check that the stored type is vectorizable.
958 if (auto *ST = dyn_cast<StoreInst>(&I)) {
959 Type *T = ST->getValueOperand()->getType();
961 reportVectorizationFailure("Store instruction cannot be vectorized",
962 "store instruction cannot be vectorized",
963 "CantVectorizeStore", ORE, TheLoop, ST);
964 return false;
965 }
966
967 // For nontemporal stores, check that a nontemporal vector version is
968 // supported on the target.
969 if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
970 // Arbitrarily try a vector of 2 elements.
971 auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
972 assert(VecTy && "did not find vectorized version of stored type");
973 if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
975 "nontemporal store instruction cannot be vectorized",
976 "nontemporal store instruction cannot be vectorized",
977 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
978 return false;
979 }
980 }
981
982 } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
983 if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
984 // For nontemporal loads, check that a nontemporal vector version is
985 // supported on the target (arbitrarily try a vector of 2 elements).
986 auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
987 assert(VecTy && "did not find vectorized version of load type");
988 if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
990 "nontemporal load instruction cannot be vectorized",
991 "nontemporal load instruction cannot be vectorized",
992 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
993 return false;
994 }
995 }
996
997 // FP instructions can allow unsafe algebra, thus vectorizable by
998 // non-IEEE-754 compliant SIMD units.
999 // This applies to floating-point math operations and calls, not memory
1000 // operations, shuffles, or casts, as they don't change precision or
1001 // semantics.
1002 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
1003 !I.isFast()) {
1004 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
1005 Hints->setPotentiallyUnsafe();
1006 }
1007
1008 // Reduction instructions are allowed to have exit users.
1009 // All other instructions must not have external users.
1010 if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
1011 // We can safely vectorize loops where instructions within the loop are
1012 // used outside the loop only if the SCEV predicates within the loop is
1013 // same as outside the loop. Allowing the exit means reusing the SCEV
1014 // outside the loop.
1015 if (PSE.getPredicate().isAlwaysTrue()) {
1016 AllowedExit.insert(&I);
1017 continue;
1018 }
1019 reportVectorizationFailure("Value cannot be used outside the loop",
1020 "value cannot be used outside the loop",
1021 "ValueUsedOutsideLoop", ORE, TheLoop, &I);
1022 return false;
1023 }
1024 } // next instr.
1025 }
1026
1027 if (!PrimaryInduction) {
1028 if (Inductions.empty()) {
1029 reportVectorizationFailure("Did not find one integer induction var",
1030 "loop induction variable could not be identified",
1031 "NoInductionVariable", ORE, TheLoop);
1032 return false;
1033 } else if (!WidestIndTy) {
1034 reportVectorizationFailure("Did not find one integer induction var",
1035 "integer loop induction variable could not be identified",
1036 "NoIntegerInductionVariable", ORE, TheLoop);
1037 return false;
1038 } else {
1039 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
1040 }
1041 }
1042
1043 // Now we know the widest induction type, check if our found induction
1044 // is the same size. If it's not, unset it here and InnerLoopVectorizer
1045 // will create another.
1046 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
1047 PrimaryInduction = nullptr;
1048
1049 return true;
1050}
1051
1052bool LoopVectorizationLegality::canVectorizeMemory() {
1053 LAI = &LAIs.getInfo(*TheLoop);
1054 const OptimizationRemarkAnalysis *LAR = LAI->getReport();
1055 if (LAR) {
1056 ORE->emit([&]() {
1057 return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
1058 "loop not vectorized: ", *LAR);
1059 });
1060 }
1061
1062 if (!LAI->canVectorizeMemory())
1063 return false;
1064
1065 // We can vectorize stores to invariant address when final reduction value is
1066 // guaranteed to be stored at the end of the loop. Also, if decision to
1067 // vectorize loop is made, runtime checks are added so as to make sure that
1068 // invariant address won't alias with any other objects.
1069 if (!LAI->getStoresToInvariantAddresses().empty()) {
1070 // For each invariant address, check if last stored value is unconditional
1071 // and the address is not calculated inside the loop.
1072 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1074 continue;
1075
1076 if (blockNeedsPredication(SI->getParent())) {
1078 "We don't allow storing to uniform addresses",
1079 "write of conditional recurring variant value to a loop "
1080 "invariant address could not be vectorized",
1081 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1082 return false;
1083 }
1084
1085 // Invariant address should be defined outside of loop. LICM pass usually
1086 // makes sure it happens, but in rare cases it does not, we do not want
1087 // to overcomplicate vectorization to support this case.
1088 if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
1089 if (TheLoop->contains(Ptr)) {
1091 "Invariant address is calculated inside the loop",
1092 "write to a loop invariant address could not "
1093 "be vectorized",
1094 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1095 return false;
1096 }
1097 }
1098 }
1099
1101 // For each invariant address, check its last stored value is the result
1102 // of one of our reductions.
1103 //
1104 // We do not check if dependence with loads exists because they are
1105 // currently rejected earlier in LoopAccessInfo::analyzeLoop. In case this
1106 // behaviour changes we have to modify this code.
1107 ScalarEvolution *SE = PSE.getSE();
1108 SmallVector<StoreInst *, 4> UnhandledStores;
1109 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1111 // Earlier stores to this address are effectively deadcode.
1112 // With opaque pointers it is possible for one pointer to be used with
1113 // different sizes of stored values:
1114 // store i32 0, ptr %x
1115 // store i8 0, ptr %x
1116 // The latest store doesn't complitely overwrite the first one in the
1117 // example. That is why we have to make sure that types of stored
1118 // values are same.
1119 // TODO: Check that bitwidth of unhandled store is smaller then the
1120 // one that overwrites it and add a test.
1121 erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
1122 return storeToSameAddress(SE, SI, I) &&
1123 I->getValueOperand()->getType() ==
1124 SI->getValueOperand()->getType();
1125 });
1126 continue;
1127 }
1128 UnhandledStores.push_back(SI);
1129 }
1130
1131 bool IsOK = UnhandledStores.empty();
1132 // TODO: we should also validate against InvariantMemSets.
1133 if (!IsOK) {
1135 "We don't allow storing to uniform addresses",
1136 "write to a loop invariant address could not "
1137 "be vectorized",
1138 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1139 return false;
1140 }
1141 }
1142 }
1143
1144 PSE.addPredicate(LAI->getPSE().getPredicate());
1145 return true;
1146}
1147
1149 bool EnableStrictReductions) {
1150
1151 // First check if there is any ExactFP math or if we allow reassociations
1152 if (!Requirements->getExactFPInst() || Hints->allowReordering())
1153 return true;
1154
1155 // If the above is false, we have ExactFPMath & do not allow reordering.
1156 // If the EnableStrictReductions flag is set, first check if we have any
1157 // Exact FP induction vars, which we cannot vectorize.
1158 if (!EnableStrictReductions ||
1159 any_of(getInductionVars(), [&](auto &Induction) -> bool {
1160 InductionDescriptor IndDesc = Induction.second;
1161 return IndDesc.getExactFPMathInst();
1162 }))
1163 return false;
1164
1165 // We can now only vectorize if all reductions with Exact FP math also
1166 // have the isOrdered flag set, which indicates that we can move the
1167 // reduction operations in-loop.
1168 return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
1169 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1170 return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
1171 }));
1172}
1173
1175 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1176 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1177 return RdxDesc.IntermediateStore == SI;
1178 });
1179}
1180
1182 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1183 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1184 if (!RdxDesc.IntermediateStore)
1185 return false;
1186
1187 ScalarEvolution *SE = PSE.getSE();
1188 Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
1189 return V == InvariantAddress ||
1190 SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
1191 });
1192}
1193
1195 Value *In0 = const_cast<Value *>(V);
1196 PHINode *PN = dyn_cast_or_null<PHINode>(In0);
1197 if (!PN)
1198 return false;
1199
1200 return Inductions.count(PN);
1201}
1202
1203const InductionDescriptor *
1205 if (!isInductionPhi(Phi))
1206 return nullptr;
1207 auto &ID = getInductionVars().find(Phi)->second;
1208 if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
1210 return &ID;
1211 return nullptr;
1212}
1213
1214const InductionDescriptor *
1216 if (!isInductionPhi(Phi))
1217 return nullptr;
1218 auto &ID = getInductionVars().find(Phi)->second;
1220 return &ID;
1221 return nullptr;
1222}
1223
1225 const Value *V) const {
1226 auto *Inst = dyn_cast<Instruction>(V);
1227 return (Inst && InductionCastsToIgnore.count(Inst));
1228}
1229
1232}
1233
1235 const PHINode *Phi) const {
1236 return FixedOrderRecurrences.count(Phi);
1237}
1238
1240 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
1241}
1242
1243bool LoopVectorizationLegality::blockCanBePredicated(
1244 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
1245 SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
1246 for (Instruction &I : *BB) {
1247 // We can predicate blocks with calls to assume, as long as we drop them in
1248 // case we flatten the CFG via predication.
1249 if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
1250 MaskedOp.insert(&I);
1251 continue;
1252 }
1253
1254 // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1255 // TODO: there might be cases that it should block the vectorization. Let's
1256 // ignore those for now.
1257 if (isa<NoAliasScopeDeclInst>(&I))
1258 continue;
1259
1260 // We can allow masked calls if there's at least one vector variant, even
1261 // if we end up scalarizing due to the cost model calculations.
1262 // TODO: Allow other calls if they have appropriate attributes... readonly
1263 // and argmemonly?
1264 if (CallInst *CI = dyn_cast<CallInst>(&I))
1266 MaskedOp.insert(CI);
1267 continue;
1268 }
1269
1270 // Loads are handled via masking (or speculated if safe to do so.)
1271 if (auto *LI = dyn_cast<LoadInst>(&I)) {
1272 if (!SafePtrs.count(LI->getPointerOperand()))
1273 MaskedOp.insert(LI);
1274 continue;
1275 }
1276
1277 // Predicated store requires some form of masking:
1278 // 1) masked store HW instruction,
1279 // 2) emulation via load-blend-store (only if safe and legal to do so,
1280 // be aware on the race conditions), or
1281 // 3) element-by-element predicate check and scalar store.
1282 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1283 MaskedOp.insert(SI);
1284 continue;
1285 }
1286
1287 if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
1288 return false;
1289 }
1290
1291 return true;
1292}
1293
1294bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1295 if (!EnableIfConversion) {
1296 reportVectorizationFailure("If-conversion is disabled",
1297 "if-conversion is disabled",
1298 "IfConversionDisabled",
1299 ORE, TheLoop);
1300 return false;
1301 }
1302
1303 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1304
1305 // A list of pointers which are known to be dereferenceable within scope of
1306 // the loop body for each iteration of the loop which executes. That is,
1307 // the memory pointed to can be dereferenced (with the access size implied by
1308 // the value's type) unconditionally within the loop header without
1309 // introducing a new fault.
1310 SmallPtrSet<Value *, 8> SafePointers;
1311
1312 // Collect safe addresses.
1313 for (BasicBlock *BB : TheLoop->blocks()) {
1314 if (!blockNeedsPredication(BB)) {
1315 for (Instruction &I : *BB)
1316 if (auto *Ptr = getLoadStorePointerOperand(&I))
1317 SafePointers.insert(Ptr);
1318 continue;
1319 }
1320
1321 // For a block which requires predication, a address may be safe to access
1322 // in the loop w/o predication if we can prove dereferenceability facts
1323 // sufficient to ensure it'll never fault within the loop. For the moment,
1324 // we restrict this to loads; stores are more complicated due to
1325 // concurrency restrictions.
1326 ScalarEvolution &SE = *PSE.getSE();
1327 for (Instruction &I : *BB) {
1328 LoadInst *LI = dyn_cast<LoadInst>(&I);
1329 if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1330 isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC))
1331 SafePointers.insert(LI->getPointerOperand());
1332 }
1333 }
1334
1335 // Collect the blocks that need predication.
1336 for (BasicBlock *BB : TheLoop->blocks()) {
1337 // We don't support switch statements inside loops.
1338 if (!isa<BranchInst>(BB->getTerminator())) {
1339 reportVectorizationFailure("Loop contains a switch statement",
1340 "loop contains a switch statement",
1341 "LoopContainsSwitch", ORE, TheLoop,
1342 BB->getTerminator());
1343 return false;
1344 }
1345
1346 // We must be able to predicate all blocks that need to be predicated.
1347 if (blockNeedsPredication(BB) &&
1348 !blockCanBePredicated(BB, SafePointers, MaskedOp)) {
1350 "Control flow cannot be substituted for a select",
1351 "control flow cannot be substituted for a select", "NoCFGForSelect",
1352 ORE, TheLoop, BB->getTerminator());
1353 return false;
1354 }
1355 }
1356
1357 // We can if-convert this loop.
1358 return true;
1359}
1360
1361// Helper function to canVectorizeLoopNestCFG.
1362bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1363 bool UseVPlanNativePath) {
1364 assert((UseVPlanNativePath || Lp->isInnermost()) &&
1365 "VPlan-native path is not enabled.");
1366
1367 // TODO: ORE should be improved to show more accurate information when an
1368 // outer loop can't be vectorized because a nested loop is not understood or
1369 // legal. Something like: "outer_loop_location: loop not vectorized:
1370 // (inner_loop_location) loop control flow is not understood by vectorizer".
1371
1372 // Store the result and return it at the end instead of exiting early, in case
1373 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1374 bool Result = true;
1375 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1376
1377 // We must have a loop in canonical form. Loops with indirectbr in them cannot
1378 // be canonicalized.
1379 if (!Lp->getLoopPreheader()) {
1380 reportVectorizationFailure("Loop doesn't have a legal pre-header",
1381 "loop control flow is not understood by vectorizer",
1382 "CFGNotUnderstood", ORE, TheLoop);
1383 if (DoExtraAnalysis)
1384 Result = false;
1385 else
1386 return false;
1387 }
1388
1389 // We must have a single backedge.
1390 if (Lp->getNumBackEdges() != 1) {
1391 reportVectorizationFailure("The loop must have a single backedge",
1392 "loop control flow is not understood by vectorizer",
1393 "CFGNotUnderstood", ORE, TheLoop);
1394 if (DoExtraAnalysis)
1395 Result = false;
1396 else
1397 return false;
1398 }
1399
1400 return Result;
1401}
1402
1403bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1404 Loop *Lp, bool UseVPlanNativePath) {
1405 // Store the result and return it at the end instead of exiting early, in case
1406 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1407 bool Result = true;
1408 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1409 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1410 if (DoExtraAnalysis)
1411 Result = false;
1412 else
1413 return false;
1414 }
1415
1416 // Recursively check whether the loop control flow of nested loops is
1417 // understood.
1418 for (Loop *SubLp : *Lp)
1419 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1420 if (DoExtraAnalysis)
1421 Result = false;
1422 else
1423 return false;
1424 }
1425
1426 return Result;
1427}
1428
1429bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1430 // Store the result and return it at the end instead of exiting early, in case
1431 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1432 bool Result = true;
1433
1434 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1435 // Check whether the loop-related control flow in the loop nest is expected by
1436 // vectorizer.
1437 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1438 if (DoExtraAnalysis)
1439 Result = false;
1440 else
1441 return false;
1442 }
1443
1444 // We need to have a loop header.
1445 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1446 << '\n');
1447
1448 // Specific checks for outer loops. We skip the remaining legal checks at this
1449 // point because they don't support outer loops.
1450 if (!TheLoop->isInnermost()) {
1451 assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1452
1453 if (!canVectorizeOuterLoop()) {
1454 reportVectorizationFailure("Unsupported outer loop",
1455 "unsupported outer loop",
1456 "UnsupportedOuterLoop",
1457 ORE, TheLoop);
1458 // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1459 // outer loops.
1460 return false;
1461 }
1462
1463 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1464 return Result;
1465 }
1466
1467 assert(TheLoop->isInnermost() && "Inner loop expected.");
1468 // Check if we can if-convert non-single-bb loops.
1469 unsigned NumBlocks = TheLoop->getNumBlocks();
1470 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1471 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1472 if (DoExtraAnalysis)
1473 Result = false;
1474 else
1475 return false;
1476 }
1477
1478 // Check if we can vectorize the instructions and CFG in this loop.
1479 if (!canVectorizeInstrs()) {
1480 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1481 if (DoExtraAnalysis)
1482 Result = false;
1483 else
1484 return false;
1485 }
1486
1487 // Go over each instruction and look at memory deps.
1488 if (!canVectorizeMemory()) {
1489 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1490 if (DoExtraAnalysis)
1491 Result = false;
1492 else
1493 return false;
1494 }
1495
1496 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1498 ? " (with a runtime bound check)"
1499 : "")
1500 << "!\n");
1501
1502 unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1503 if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1504 SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1505
1506 if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
1507 reportVectorizationFailure("Too many SCEV checks needed",
1508 "Too many SCEV assumptions need to be made and checked at runtime",
1509 "TooManySCEVRunTimeChecks", ORE, TheLoop);
1510 if (DoExtraAnalysis)
1511 Result = false;
1512 else
1513 return false;
1514 }
1515
1516 // Okay! We've done all the tests. If any have failed, return false. Otherwise
1517 // we can vectorize, and at this point we don't have any other mem analysis
1518 // which may limit our maximum vectorization factor, so just return true with
1519 // no restrictions.
1520 return Result;
1521}
1522
1524
1525 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1526
1527 SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1528
1529 for (const auto &Reduction : getReductionVars())
1530 ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1531
1532 // TODO: handle non-reduction outside users when tail is folded by masking.
1533 for (auto *AE : AllowedExit) {
1534 // Check that all users of allowed exit values are inside the loop or
1535 // are the live-out of a reduction.
1536 if (ReductionLiveOuts.count(AE))
1537 continue;
1538 for (User *U : AE->users()) {
1539 Instruction *UI = cast<Instruction>(U);
1540 if (TheLoop->contains(UI))
1541 continue;
1542 LLVM_DEBUG(
1543 dbgs()
1544 << "LV: Cannot fold tail by masking, loop has an outside user for "
1545 << *UI << "\n");
1546 return false;
1547 }
1548 }
1549
1550 // The list of pointers that we can safely read and write to remains empty.
1551 SmallPtrSet<Value *, 8> SafePointers;
1552
1553 // Collect masked ops in temporary set first to avoid partially populating
1554 // MaskedOp if a block cannot be predicated.
1556
1557 // Check and mark all blocks for predication, including those that ordinarily
1558 // do not need predication such as the header block.
1559 for (BasicBlock *BB : TheLoop->blocks()) {
1560 if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
1561 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as requested.\n");
1562 return false;
1563 }
1564 }
1565
1566 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1567
1568 MaskedOp.insert(TmpMaskedOp.begin(), TmpMaskedOp.end());
1569 return true;
1570}
1571
1572} // namespace llvm
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:680
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
#define DEBUG_TYPE
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:526
loop Loop Strength Reduction
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive.")))
#define LV_NAME
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
static cl::opt< bool > AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables."))
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
This file defines the LoopVectorizationLegality class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
LLVMContext & Context
static bool rewrite(Function &F)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
Definition: VirtRegMap.cpp:237
static const uint32_t IV[8]
Definition: blake3_impl.h:78
Class for arbitrary precision integers.
Definition: APInt.h:76
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1412
This class represents a function call, abstracting a target machine's calling convention.
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:419
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:294
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:291
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:302
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:693
A struct for saving information about induction variables.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:177
Value * getPointerOperand()
Definition: Instructions.h:264
const LoopAccessInfo & getInfo(Loop &L)
bool hasDependenceInvolvingLoopInvariantAddress() const
If the loop has memory dependence involving an invariant address, i.e.
ArrayRef< StoreInst * > getStoresToInvariantAddresses() const
Return the list of stores to invariant addresses.
const OptimizationRemarkAnalysis * getReport() const
The diagnostics report generated for the analysis.
const RuntimePointerChecking * getRuntimePointerChecking() const
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
bool isInvariant(Value *V) const
Returns true if value V is loop invariant.
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
const DenseMap< Value *, const SCEV * > & getSymbolicStrides() const
If an access has a symbolic strides, this maps the pointer value to the stride symbol.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool isLoopHeader(const BlockT *BB) const
bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
bool blockNeedsPredication(BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
bool isUniform(Value *V, ElementCount VF) const
Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
bool isInvariant(Value *V) const
Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
bool prepareToFoldTailByMasking()
Return true if we can vectorize this loop while folding its tail by masking, and mark all respective ...
bool isUniformMemOp(Instruction &I, ElementCount VF) const
A uniform memory op is a load or store which accesses the same memory location on all VF lanes,...
bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
void addExactFPMathInst(Instruction *I)
Track the 1st floating-point instruction that can not be reassociated.
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
void emitRemarkWithHints() const
Dumps all the hint information.
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:47
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:631
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:525
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition: LoopInfo.cpp:150
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:501
Metadata node.
Definition: Metadata.h:950
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1303
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1416
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1309
A single uniqued string.
Definition: Metadata.h:611
StringRef getString() const
Definition: Metadata.cpp:509
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:499
size_type count(const KeyT &Key) const
Definition: MapVector.h:144
iterator find(const KeyT &Key)
Definition: MapVector.h:146
bool empty() const
Definition: MapVector.h:79
Root of the metadata hierarchy.
Definition: Metadata.h:61
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
const SCEVPredicate & getPredicate() const
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:72
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Instruction * getLoopExitInstr() const
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
bool Need
This flag indicates if we need to add the runtime check.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
This visitor recursively visits a SCEV expression and re-writes it.
const SCEV * visit(const SCEV *S)
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
const SCEV * getCouldNotCompute()
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:345
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:384
iterator end() const
Definition: SmallPtrSet.h:409
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:366
iterator begin() const
Definition: SmallPtrSet.h:404
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:451
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
Value * getPointerOperand()
Definition: Instructions.h:393
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
static constexpr size_t npos
Definition: StringRef.h:52
Provides information about what library functions are available for the current target.
bool hasOptimizedCodeGen(LibFunc F) const
Tests if the function is both available and a candidate for optimized code generation.
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition: VectorUtils.h:277
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:266
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
iterator_range< user_iterator > users()
Definition: Value.h:421
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:684
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:212
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:163
constexpr bool isZero() const
Definition: TypeSize.h:151
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:705
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1727
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1685
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
static Type * getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
static Type * convertPointerToIntegerType(const DataLayout &DL, Type *Ty)
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1734
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:429
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition: Loads.cpp:260
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1125
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2021
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
An object of this class is returned by queries that could not be answered.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static const unsigned MaxVectorWidth
Maximum SIMD width.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.