LLVM 23.0.0git
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1//===- LoopVectorizationLegality.cpp --------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides loop vectorization legality analysis. Original code
10// resided in LoopVectorize.cpp for a long time.
11//
12// At this point, it is implemented as a utility class, not as an analysis
13// pass. It should be easy to create an analysis pass around it if there
14// is a need (but D45420 needs to happen first).
15//
16
20#include "llvm/Analysis/Loads.h"
29#include "llvm/IR/Dominators.h"
34
35using namespace llvm;
36using namespace PatternMatch;
37using namespace LoopVectorizationUtils;
38
39#define LV_NAME "loop-vectorize"
40#define DEBUG_TYPE LV_NAME
41
42static cl::opt<bool>
43 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
44 cl::desc("Enable if-conversion during vectorization."));
45
46static cl::opt<bool>
47AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
48 cl::desc("Enable recognition of non-constant strided "
49 "pointer induction variables."));
50
51static cl::opt<bool>
52 HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
53 cl::desc("Allow enabling loop hints to reorder "
54 "FP operations during vectorization."));
55
58 "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
60 cl::desc("Control whether the compiler can use scalable vectors to "
61 "vectorize a loop"),
64 "Scalable vectorization is disabled."),
67 "Scalable vectorization is available and favored when the "
68 "cost is inconclusive."),
71 "Scalable vectorization is available and favored when the "
72 "cost is inconclusive."),
75 "Scalable vectorization is available and always favored when "
76 "feasible")));
77
79 "enable-histogram-loop-vectorization", cl::init(false), cl::Hidden,
80 cl::desc("Enables autovectorization of some loops containing histograms"));
81
82/// Maximum vectorization interleave count.
83static const unsigned MaxInterleaveFactor = 16;
84
85namespace llvm {
86
87bool LoopVectorizeHints::Hint::validate(unsigned Val) {
88 switch (Kind) {
89 case HK_WIDTH:
91 case HK_INTERLEAVE:
92 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
93 case HK_FORCE:
94 return (Val <= 1);
95 case HK_ISVECTORIZED:
96 case HK_PREDICATE:
97 case HK_SCALABLE:
98 return (Val == 0 || Val == 1);
99 }
100 return false;
101}
102
104 bool InterleaveOnlyWhenForced,
107 : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
108 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
109 Force("vectorize.enable", FK_Undefined, HK_FORCE),
110 IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
111 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
112 Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
113 TheLoop(L), ORE(ORE) {
114 // Populate values with existing loop metadata.
115 getHintsFromMetadata();
116
117 // force-vector-interleave overrides DisableInterleaving.
120
121 // If the metadata doesn't explicitly specify whether to enable scalable
122 // vectorization, then decide based on the following criteria (increasing
123 // level of priority):
124 // - Target default
125 // - Metadata width
126 // - Force option (always overrides)
128 if (TTI)
129 Scalable.Value = TTI->enableScalableVectorization() ? SK_PreferScalable
131
132 if (Width.Value)
133 // If the width is set, but the metadata says nothing about the scalable
134 // property, then assume it concerns only a fixed-width UserVF.
135 // If width is not set, the flag takes precedence.
136 Scalable.Value = SK_FixedWidthOnly;
137 }
138
139 // If the flag is set to force any use of scalable vectors, override the loop
140 // hints.
141 if (ForceScalableVectorization.getValue() !=
143 Scalable.Value = ForceScalableVectorization.getValue();
144
145 // Scalable vectorization is disabled if no preference is specified.
147 Scalable.Value = SK_FixedWidthOnly;
148
149 if (IsVectorized.Value != 1)
150 // If the vectorization width and interleaving count are both 1 then
151 // consider the loop to have been already vectorized because there's
152 // nothing more that we can do.
153 IsVectorized.Value =
155 LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
156 << "LV: Interleaving disabled by the pass manager\n");
157}
158
160 TheLoop->addIntLoopAttribute("llvm.loop.isvectorized", 1,
161 {Twine(Prefix(), "vectorize.").str(),
162 Twine(Prefix(), "interleave.").str()});
163
164 // Update internal cache.
165 IsVectorized.Value = 1;
166}
167
168void LoopVectorizeHints::reportDisallowedVectorization(
169 const StringRef DebugMsg, const StringRef RemarkName,
170 const StringRef RemarkMsg, const Loop *L) const {
171 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: " << DebugMsg << ".\n");
172 ORE.emit(OptimizationRemarkMissed(LV_NAME, RemarkName, L->getStartLoc(),
173 L->getHeader())
174 << "loop not vectorized: " << RemarkMsg);
175}
176
178 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
180 if (Force.Value == LoopVectorizeHints::FK_Disabled) {
181 reportDisallowedVectorization("#pragma vectorize disable",
182 "MissedExplicitlyDisabled",
183 "vectorization is explicitly disabled", L);
184 } else if (hasDisableAllTransformsHint(L)) {
185 reportDisallowedVectorization("loop hasDisableAllTransformsHint",
186 "MissedTransformsDisabled",
187 "loop transformations are disabled", L);
188 } else {
189 llvm_unreachable("loop vect disabled for an unknown reason");
190 }
191 return false;
192 }
193
194 if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
195 reportDisallowedVectorization(
196 "VectorizeOnlyWhenForced is set, and no #pragma vectorize enable",
197 "MissedForceOnly", "only vectorizing loops that explicitly request it",
198 L);
199 return false;
200 }
201
202 if (getIsVectorized() == 1) {
203 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
204 // FIXME: Add interleave.disable metadata. This will allow
205 // vectorize.disable to be used without disabling the pass and errors
206 // to differentiate between disabled vectorization and a width of 1.
207 ORE.emit([&]() {
208 return OptimizationRemarkAnalysis(LV_NAME, "AllDisabled",
209 L->getStartLoc(), L->getHeader())
210 << "loop not vectorized: vectorization and interleaving are "
211 "explicitly disabled, or the loop has already been "
212 "vectorized";
213 });
214 return false;
215 }
216
217 return true;
218}
219
221 using namespace ore;
222
223 ORE.emit([&]() {
224 if (Force.Value == LoopVectorizeHints::FK_Disabled)
225 return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
226 TheLoop->getStartLoc(),
227 TheLoop->getHeader())
228 << "loop not vectorized: vectorization is explicitly disabled";
229
230 OptimizationRemarkMissed R(LV_NAME, "MissedDetails", TheLoop->getStartLoc(),
231 TheLoop->getHeader());
232 R << "loop not vectorized";
233 if (Force.Value == LoopVectorizeHints::FK_Enabled) {
234 R << " (Force=" << NV("Force", true);
235 if (Width.Value != 0)
236 R << ", Vector Width=" << NV("VectorWidth", getWidth());
237 if (getInterleave() != 0)
238 R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
239 R << ")";
240 }
241 return R;
242 });
243}
244
246 // Allow the vectorizer to change the order of operations if enabling
247 // loop hints are provided
248 ElementCount EC = getWidth();
249 return HintsAllowReordering &&
251 EC.getKnownMinValue() > 1);
252}
253
254void LoopVectorizeHints::getHintsFromMetadata() {
255 MDNode *LoopID = TheLoop->getLoopID();
256 if (!LoopID)
257 return;
258
259 // First operand should refer to the loop id itself.
260 assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
261 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
262
263 for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) {
264 const MDString *S = nullptr;
266
267 // The expected hint is either a MDString or a MDNode with the first
268 // operand a MDString.
269 if (const MDNode *MD = dyn_cast<MDNode>(MDO)) {
270 if (!MD || MD->getNumOperands() == 0)
271 continue;
272 S = dyn_cast<MDString>(MD->getOperand(0));
273 for (unsigned Idx = 1; Idx < MD->getNumOperands(); ++Idx)
274 Args.push_back(MD->getOperand(Idx));
275 } else {
276 S = dyn_cast<MDString>(MDO);
277 assert(Args.size() == 0 && "too many arguments for MDString");
278 }
279
280 if (!S)
281 continue;
282
283 // Check if the hint starts with the loop metadata prefix.
284 StringRef Name = S->getString();
285 if (Args.size() == 1)
286 setHint(Name, Args[0]);
287 }
288}
289
290void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
291 if (!Name.consume_front(Prefix()))
292 return;
293
294 const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
295 if (!C)
296 return;
297 unsigned Val = C->getZExtValue();
298
299 Hint *Hints[] = {&Width, &Interleave, &Force,
300 &IsVectorized, &Predicate, &Scalable};
301 for (auto *H : Hints) {
302 if (Name == H->Name) {
303 if (H->validate(Val))
304 H->Value = Val;
305 else
306 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
307 break;
308 }
309 }
310}
311
312// Return true if the inner loop \p Lp is uniform with regard to the outer loop
313// \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
314// executing the inner loop will execute the same iterations). This check is
315// very constrained for now but it will be relaxed in the future. \p Lp is
316// considered uniform if it meets all the following conditions:
317// 1) it has a canonical IV (starting from 0 and with stride 1),
318// 2) its latch terminator is a conditional branch and,
319// 3) its latch condition is a compare instruction whose operands are the
320// canonical IV and an OuterLp invariant.
321// This check doesn't take into account the uniformity of other conditions not
322// related to the loop latch because they don't affect the loop uniformity.
323//
324// NOTE: We decided to keep all these checks and its associated documentation
325// together so that we can easily have a picture of the current supported loop
326// nests. However, some of the current checks don't depend on \p OuterLp and
327// would be redundantly executed for each \p Lp if we invoked this function for
328// different candidate outer loops. This is not the case for now because we
329// don't currently have the infrastructure to evaluate multiple candidate outer
330// loops and \p OuterLp will be a fixed parameter while we only support explicit
331// outer loop vectorization. It's also very likely that these checks go away
332// before introducing the aforementioned infrastructure. However, if this is not
333// the case, we should move the \p OuterLp independent checks to a separate
334// function that is only executed once for each \p Lp.
335static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
336 assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
337
338 // If Lp is the outer loop, it's uniform by definition.
339 if (Lp == OuterLp)
340 return true;
341 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
342
343 // 1.
345 if (!IV) {
346 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
347 return false;
348 }
349
350 // 2.
351 BasicBlock *Latch = Lp->getLoopLatch();
352 auto *LatchBr = dyn_cast<CondBrInst>(Latch->getTerminator());
353 if (!LatchBr) {
354 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
355 return false;
356 }
357
358 // 3.
359 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
360 if (!LatchCmp) {
362 dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
363 return false;
364 }
365
366 Value *CondOp0 = LatchCmp->getOperand(0);
367 Value *CondOp1 = LatchCmp->getOperand(1);
368 Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
369 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
370 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
371 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
372 return false;
373 }
374
375 return true;
376}
377
378// Return true if \p Lp and all its nested loops are uniform with regard to \p
379// OuterLp.
380static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
381 if (!isUniformLoop(Lp, OuterLp))
382 return false;
383
384 // Check if nested loops are uniform.
385 for (Loop *SubLp : *Lp)
386 if (!isUniformLoopNest(SubLp, OuterLp))
387 return false;
388
389 return true;
390}
391
393 assert(Ty->isIntOrPtrTy() && "Expected integer or pointer type");
394
395 if (Ty->isPointerTy())
396 return DL.getIntPtrType(Ty->getContext(), Ty->getPointerAddressSpace());
397
398 // It is possible that char's or short's overflow when we ask for the loop's
399 // trip count, work around this by changing the type size.
400 if (Ty->getScalarSizeInBits() < 32)
401 return Type::getInt32Ty(Ty->getContext());
402
403 return cast<IntegerType>(Ty);
404}
405
407 Type *Ty1) {
410 return TyA->getScalarSizeInBits() > TyB->getScalarSizeInBits() ? TyA : TyB;
411}
412
413/// Returns true if A and B have same pointer operands or same SCEVs addresses
415 StoreInst *B) {
416 // Compare store
417 if (A == B)
418 return true;
419
420 // Otherwise Compare pointers
421 Value *APtr = A->getPointerOperand();
422 Value *BPtr = B->getPointerOperand();
423 if (APtr == BPtr)
424 return true;
425
426 // Otherwise compare address SCEVs
427 return SE->getSCEV(APtr) == SE->getSCEV(BPtr);
428}
429
431 if (!AllowRuntimeSCEVChecks || !TheLoop->isInnermost())
432 return;
433
434 for (BasicBlock *BB : TheLoop->blocks())
435 for (Instruction &I : *BB)
438}
439
441 Value *Ptr) const {
442 // FIXME: Currently, the set of symbolic strides is sometimes queried before
443 // it's collected. This happens from canVectorizeWithIfConvert, when the
444 // pointer is checked to reference consecutive elements suitable for a
445 // masked access.
446 // Stride versioning requires adding a SCEV equality predicate; only consult
447 // the symbolic strides when runtime SCEV checks are permitted.
448 const auto &Strides = LAI && AllowRuntimeSCEVChecks
449 ? LAI->getSymbolicStrides()
451 int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, *DT, Strides,
452 AllowRuntimeSCEVChecks, false)
453 .value_or(0);
454 if (Stride == 1 || Stride == -1)
455 return Stride;
456 return 0;
457}
458
460 return LAI->isInvariant(V);
461}
462
463namespace {
464/// A rewriter to build the SCEVs for each of the VF lanes in the expected
465/// vectorized loop, which can then be compared to detect their uniformity. This
466/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
467/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
468/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
469/// uniformity.
470class SCEVAddRecForUniformityRewriter
471 : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
472 /// Multiplier to be applied to the step of AddRecs in TheLoop.
473 unsigned StepMultiplier;
474
475 /// Offset to be added to the AddRecs in TheLoop.
476 unsigned Offset;
477
478 /// Loop for which to rewrite AddRecsFor.
479 Loop *TheLoop;
480
481 /// Is any sub-expressions not analyzable w.r.t. uniformity?
482 bool CannotAnalyze = false;
483
484 bool canAnalyze() const { return !CannotAnalyze; }
485
486public:
487 SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
488 unsigned Offset, Loop *TheLoop)
489 : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
490 TheLoop(TheLoop) {}
491
492 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
493 assert(Expr->getLoop() == TheLoop &&
494 "addrec outside of TheLoop must be invariant and should have been "
495 "handled earlier");
496 // Build a new AddRec by multiplying the step by StepMultiplier and
497 // incrementing the start by Offset * step.
498 Type *Ty = Expr->getType();
499 const SCEV *Step = Expr->getStepRecurrence(SE);
500 if (!SE.isLoopInvariant(Step, TheLoop)) {
501 CannotAnalyze = true;
502 return Expr;
503 }
504 const SCEV *NewStep =
505 SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
506 const SCEV *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
507 const SCEV *NewStart =
508 SE.getAddExpr(Expr->getStart(), SCEVUse(ScaledOffset));
509 return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
510 }
511
512 const SCEV *visit(const SCEV *S) {
513 if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
514 return S;
516 }
517
518 const SCEV *visitUnknown(const SCEVUnknown *S) {
519 if (SE.isLoopInvariant(S, TheLoop))
520 return S;
521 // The value could vary across iterations.
522 CannotAnalyze = true;
523 return S;
524 }
525
526 const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
527 // Could not analyze the expression.
528 CannotAnalyze = true;
529 return S;
530 }
531
532 static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
533 unsigned StepMultiplier, unsigned Offset,
534 Loop *TheLoop) {
535 /// Bail out if the expression does not contain an UDiv expression.
536 /// Uniform values which are not loop invariant require operations to strip
537 /// out the lowest bits. For now just look for UDivs and use it to avoid
538 /// re-writing UDIV-free expressions for other lanes to limit compile time.
539 if (!SCEVExprContains(S,
540 [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
541 return SE.getCouldNotCompute();
542
543 SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
544 TheLoop);
545 const SCEV *Result = Rewriter.visit(S);
546
547 if (Rewriter.canAnalyze())
548 return Result;
549 return SE.getCouldNotCompute();
550 }
551};
552
553} // namespace
554
556 Value *V, std::optional<ElementCount> VF) const {
557 if (isInvariant(V))
558 return true;
559 if (!VF || VF->isScalable())
560 return false;
561 if (VF->isScalar())
562 return true;
563
564 // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
565 // never considered uniform.
566 auto *SE = PSE.getSE();
567 if (!SE->isSCEVable(V->getType()))
568 return false;
569 const SCEV *S = SE->getSCEV(V);
570
571 // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
572 // lane 0 matches the expressions for all other lanes.
573 unsigned FixedVF = VF->getKnownMinValue();
574 const SCEV *FirstLaneExpr =
575 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
576 if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
577 return false;
578
579 // Make sure the expressions for lanes FixedVF-1..1 match the expression for
580 // lane 0. We check lanes in reverse order for compile-time, as frequently
581 // checking the last lane is sufficient to rule out uniformity.
582 return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
583 const SCEV *IthLaneExpr =
584 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
585 return FirstLaneExpr == IthLaneExpr;
586 });
587}
588
590 Instruction &I, std::optional<ElementCount> VF) const {
592 if (!Ptr)
593 return false;
594 // Note: There's nothing inherent which prevents predicated loads and
595 // stores from being uniform. The current lowering simply doesn't handle
596 // it; in particular, the cost model distinguishes scatter/gather from
597 // scalar w/predication, and we currently rely on the scalar path.
598 return isUniform(Ptr, VF) && !blockNeedsPredication(I.getParent());
599}
600
601bool LoopVectorizationLegality::canVectorizeOuterLoop() {
602 assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
603 // Store the result and return it at the end instead of exiting early, in case
604 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
605 bool Result = true;
606 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
607
608 for (BasicBlock *BB : TheLoop->blocks()) {
609 // Check whether the BB terminator is a branch. Any other terminator is
610 // not supported yet.
611 Instruction *Term = BB->getTerminator();
614 "Unsupported basic block terminator",
615 "loop control flow is not understood by vectorizer",
616 "CFGNotUnderstood", ORE, TheLoop);
617 if (DoExtraAnalysis)
618 Result = false;
619 else
620 return false;
621 }
622
623 // Check whether the branch is a supported one. Only unconditional
624 // branches, conditional branches with an outer loop invariant condition or
625 // backedges are supported.
626 // FIXME: We skip these checks when VPlan predication is enabled as we
627 // want to allow divergent branches. This whole check will be removed
628 // once VPlan predication is on by default.
629 auto *Br = dyn_cast<CondBrInst>(Term);
630 if (Br && !TheLoop->isLoopInvariant(Br->getCondition()) &&
631 !LI->isLoopHeader(Br->getSuccessor(0)) &&
632 !LI->isLoopHeader(Br->getSuccessor(1))) {
634 "Unsupported conditional branch",
635 "loop control flow is not understood by vectorizer",
636 "CFGNotUnderstood", ORE, TheLoop);
637 if (DoExtraAnalysis)
638 Result = false;
639 else
640 return false;
641 }
642 }
643
644 // Check whether inner loops are uniform. At this point, we only support
645 // simple outer loops scenarios with uniform nested loops.
646 if (!isUniformLoopNest(TheLoop /*loop nest*/,
647 TheLoop /*context outer loop*/)) {
649 "Outer loop contains divergent loops",
650 "loop control flow is not understood by vectorizer", "CFGNotUnderstood",
651 ORE, TheLoop);
652 if (DoExtraAnalysis)
653 Result = false;
654 else
655 return false;
656 }
657
658 // Check whether we are able to set up outer loop induction.
659 if (!setupOuterLoopInductions()) {
660 reportVectorizationFailure("Unsupported outer loop Phi(s)",
661 "UnsupportedPhi", ORE, TheLoop);
662 if (DoExtraAnalysis)
663 Result = false;
664 else
665 return false;
666 }
667
668 return Result;
669}
670
671void LoopVectorizationLegality::addInductionPhi(PHINode *Phi,
672 const InductionDescriptor &ID) {
673 Inductions[Phi] = ID;
674
675 // In case this induction also comes with casts that we know we can ignore
676 // in the vectorized loop body, record them here. All casts could be recorded
677 // here for ignoring, but suffices to record only the first (as it is the
678 // only one that may bw used outside the cast sequence).
679 ArrayRef<Instruction *> Casts = ID.getCastInsts();
680 if (!Casts.empty())
681 InductionCastsToIgnore.insert(*Casts.begin());
682
683 Type *PhiTy = Phi->getType();
684 const DataLayout &DL = Phi->getDataLayout();
685
686 assert((PhiTy->isIntOrPtrTy() || PhiTy->isFloatingPointTy()) &&
687 "Expected int, ptr, or FP induction phi type");
688
689 // Get the widest type.
690 if (PhiTy->isIntOrPtrTy()) {
691 if (!WidestIndTy)
692 WidestIndTy = getInductionIntegerTy(DL, PhiTy);
693 else
694 WidestIndTy = getWiderInductionTy(DL, PhiTy, WidestIndTy);
695 }
696
697 // Int inductions are special because we only allow one IV.
698 if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
699 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
700 isa<Constant>(ID.getStartValue()) &&
701 cast<Constant>(ID.getStartValue())->isNullValue()) {
702
703 // Use the phi node with the widest type as induction. Use the last
704 // one if there are multiple (no good reason for doing this other
705 // than it is expedient). We've checked that it begins at zero and
706 // steps by one, so this is a canonical induction variable.
707 if (!PrimaryInduction || PhiTy == WidestIndTy)
708 PrimaryInduction = Phi;
709 }
710
711 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
712}
713
714bool LoopVectorizationLegality::setupOuterLoopInductions() {
715 BasicBlock *Header = TheLoop->getHeader();
716
717 // Returns true if a given Phi is a supported induction.
718 auto IsSupportedPhi = [&](PHINode &Phi) -> bool {
719 InductionDescriptor ID;
720 if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
722 addInductionPhi(&Phi, ID);
723 return true;
724 }
725 // Bail out for any Phi in the outer loop header that is not a supported
726 // induction.
728 dbgs() << "LV: Found unsupported PHI for outer loop vectorization.\n");
729 return false;
730 };
731
732 return llvm::all_of(Header->phis(), IsSupportedPhi);
733}
734
735/// Checks if a function is scalarizable according to the TLI, in
736/// the sense that it should be vectorized and then expanded in
737/// multiple scalar calls. This is represented in the
738/// TLI via mappings that do not specify a vector name, as in the
739/// following example:
740///
741/// const VecDesc VecIntrinsics[] = {
742/// {"llvm.phx.abs.i32", "", 4}
743/// };
744static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
745 const StringRef ScalarName = CI.getCalledFunction()->getName();
746 bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
747 // Check that all known VFs are not associated to a vector
748 // function, i.e. the vector name is emty.
749 if (Scalarize) {
750 ElementCount WidestFixedVF, WidestScalableVF;
751 TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
753 ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
754 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
756 ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
757 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
758 assert((WidestScalableVF.isZero() || !Scalarize) &&
759 "Caller may decide to scalarize a variant using a scalable VF");
760 }
761 return Scalarize;
762}
763
764bool LoopVectorizationLegality::canVectorizeInstrs() {
765 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
766 bool Result = true;
767
768 // For each block in the loop.
769 for (BasicBlock *BB : TheLoop->blocks()) {
770 // Scan the instructions in the block and look for hazards.
771 for (Instruction &I : *BB) {
772 Result &= canVectorizeInstr(I);
773 if (!DoExtraAnalysis && !Result)
774 return false;
775 }
776 }
777
778 if (!PrimaryInduction) {
779 if (Inductions.empty()) {
781 "Did not find one integer induction var",
782 "loop induction variable could not be identified",
783 "NoInductionVariable", ORE, TheLoop);
784 return false;
785 }
786 if (!WidestIndTy) {
788 "Did not find one integer induction var",
789 "integer loop induction variable could not be identified",
790 "NoIntegerInductionVariable", ORE, TheLoop);
791 return false;
792 }
793 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
794 }
795
796 // Now we know the widest induction type, check if our found induction
797 // is the same size. If it's not, unset it here and InnerLoopVectorizer
798 // will create another.
799 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
800 PrimaryInduction = nullptr;
801
802 return Result;
803}
804
805bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) {
806 BasicBlock *BB = I.getParent();
807 BasicBlock *Header = TheLoop->getHeader();
808
809 if (auto *Phi = dyn_cast<PHINode>(&I)) {
810 Type *PhiTy = Phi->getType();
811 // Check that this PHI type is allowed.
812 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
813 !PhiTy->isPointerTy()) {
815 "Found a non-int non-pointer PHI",
816 "loop control flow is not understood by vectorizer",
817 "CFGNotUnderstood", ORE, TheLoop);
818 return false;
819 }
820
821 // If this PHINode is not in the header block, then we know that we
822 // can convert it to select during if-conversion. No need to check if
823 // the PHIs in this block are induction or reduction variables.
824 if (BB != Header) {
825 // Non-header phi nodes that have outside uses can be vectorized. Unsafe
826 // cyclic dependencies with header phis are identified during legalization
827 // for reduction, induction and fixed order recurrences.
828 return true;
829 }
830
831 // We only allow if-converted PHIs with exactly two incoming values.
832 if (Phi->getNumIncomingValues() != 2) {
834 "Found an invalid PHI",
835 "loop control flow is not understood by vectorizer",
836 "CFGNotUnderstood", ORE, TheLoop, Phi);
837 return false;
838 }
839
840 RecurrenceDescriptor RedDes;
841 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, DT,
842 PSE.getSE())) {
843 Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
844 Reductions[Phi] = std::move(RedDes);
847 RedDes.getRecurrenceKind())) &&
848 "Only min/max recurrences are allowed to have multiple uses "
849 "currently");
850 return true;
851 }
852
853 // We prevent matching non-constant strided pointer IVS to preserve
854 // historical vectorizer behavior after a generalization of the
855 // IVDescriptor code. The intent is to remove this check, but we
856 // have to fix issues around code quality for such loops first.
857 auto IsDisallowedStridedPointerInduction =
858 [](const InductionDescriptor &ID) {
860 return false;
861 return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
862 ID.getConstIntStepValue() == nullptr;
863 };
864
865 InductionDescriptor ID;
866 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
867 !IsDisallowedStridedPointerInduction(ID)) {
868 addInductionPhi(Phi, ID);
869 Requirements->addExactFPMathInst(ID.getExactFPMathInst());
870 return true;
871 }
872
873 if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
874 FixedOrderRecurrences.insert(Phi);
875 return true;
876 }
877
878 // As a last resort, coerce the PHI to a AddRec expression
879 // and re-try classifying it a an induction PHI.
880 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
881 !IsDisallowedStridedPointerInduction(ID)) {
882 addInductionPhi(Phi, ID);
883 return true;
884 }
885
886 reportVectorizationFailure("Found an unidentified PHI",
887 "value that could not be identified as "
888 "reduction is used outside the loop",
889 "NonReductionValueUsedOutsideLoop", ORE, TheLoop,
890 Phi);
891 return false;
892 } // end of PHI handling
893
894 // We handle calls that:
895 // * Have a mapping to an IR intrinsic.
896 // * Have a vector version available.
897 auto *CI = dyn_cast<CallInst>(&I);
898
899 if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
900 !(CI->getCalledFunction() && TLI &&
901 (!VFDatabase::getMappings(*CI).empty() || isTLIScalarize(*TLI, *CI)))) {
902 // If the call is a recognized math libary call, it is likely that
903 // we can vectorize it given loosened floating-point constraints.
904 LibFunc Func;
905 bool IsMathLibCall =
906 TLI && CI->getCalledFunction() && CI->getType()->isFloatingPointTy() &&
907 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
908 TLI->hasOptimizedCodeGen(Func);
909
910 if (IsMathLibCall) {
911 // TODO: Ideally, we should not use clang-specific language here,
912 // but it's hard to provide meaningful yet generic advice.
913 // Also, should this be guarded by allowExtraAnalysis() and/or be part
914 // of the returned info from isFunctionVectorizable()?
916 "Found a non-intrinsic callsite",
917 "library call cannot be vectorized. "
918 "Try compiling with -fno-math-errno, -ffast-math, "
919 "or similar flags",
920 "CantVectorizeLibcall", ORE, TheLoop, CI);
921 } else {
922 reportVectorizationFailure("Found a non-intrinsic callsite",
923 "call instruction cannot be vectorized",
924 "CantVectorizeLibcall", ORE, TheLoop, CI);
925 }
926 return false;
927 }
928
929 // Some intrinsics have scalar arguments and should be same in order for
930 // them to be vectorized (i.e. loop invariant).
931 if (CI) {
932 auto *SE = PSE.getSE();
933 Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
934 for (unsigned Idx = 0; Idx < CI->arg_size(); ++Idx)
935 if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, Idx, TTI)) {
936 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(Idx)), TheLoop)) {
938 "Found unvectorizable intrinsic",
939 "intrinsic instruction cannot be vectorized",
940 "CantVectorizeIntrinsic", ORE, TheLoop, CI);
941 return false;
942 }
943 }
944 }
945
946 // If we found a vectorized variant of a function, note that so LV can
947 // make better decisions about maximum VF.
948 if (CI && !VFDatabase::getMappings(*CI).empty())
949 VecCallVariantsFound = true;
950
951 auto CanWidenInstructionTy = [](Instruction const &Inst) {
952 Type *InstTy = Inst.getType();
953 if (!isa<StructType>(InstTy))
954 return canVectorizeTy(InstTy);
955
956 // For now, we only recognize struct values returned from calls where
957 // all users are extractvalue as vectorizable. All element types of the
958 // struct must be types that can be widened.
959 return isa<CallInst>(Inst) && canVectorizeTy(InstTy) &&
960 all_of(Inst.users(), IsaPred<ExtractValueInst>);
961 };
962
963 // Check that the instruction return type is vectorizable.
964 // We can't vectorize casts from vector type to scalar type.
965 // Also, we can't vectorize extractelement instructions.
966 if (!CanWidenInstructionTy(I) ||
967 (isa<CastInst>(I) &&
968 !VectorType::isValidElementType(I.getOperand(0)->getType())) ||
970 reportVectorizationFailure("Found unvectorizable type",
971 "instruction return type cannot be vectorized",
972 "CantVectorizeInstructionReturnType", ORE,
973 TheLoop, &I);
974 return false;
975 }
976
977 // Check that the stored type is vectorizable.
978 if (auto *ST = dyn_cast<StoreInst>(&I)) {
979 Type *T = ST->getValueOperand()->getType();
981 reportVectorizationFailure("Store instruction cannot be vectorized",
982 "CantVectorizeStore", ORE, TheLoop, ST);
983 return false;
984 }
985
986 // For nontemporal stores, check that a nontemporal vector version is
987 // supported on the target.
988 if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
989 // Arbitrarily try a vector of 2 elements.
990 auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
991 assert(VecTy && "did not find vectorized version of stored type");
992 if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
994 "nontemporal store instruction cannot be vectorized",
995 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
996 return false;
997 }
998 }
999
1000 } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
1001 if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
1002 // For nontemporal loads, check that a nontemporal vector version is
1003 // supported on the target (arbitrarily try a vector of 2 elements).
1004 auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
1005 assert(VecTy && "did not find vectorized version of load type");
1006 if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
1008 "nontemporal load instruction cannot be vectorized",
1009 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
1010 return false;
1011 }
1012 }
1013
1014 // FP instructions can allow unsafe algebra, thus vectorizable by
1015 // non-IEEE-754 compliant SIMD units.
1016 // This applies to floating-point math operations and calls, not memory
1017 // operations, shuffles, or casts, as they don't change precision or
1018 // semantics.
1019 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
1020 !I.isFast()) {
1021 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
1022 Hints->setPotentiallyUnsafe();
1023 }
1024
1025 return true;
1026}
1027
1028/// Find histogram operations that match high-level code in loops:
1029/// \code
1030/// buckets[indices[i]]+=step;
1031/// \endcode
1032///
1033/// It matches a pattern starting from \p HSt, which Stores to the 'buckets'
1034/// array the computed histogram. It uses a BinOp to sum all counts, storing
1035/// them using a loop-variant index Load from the 'indices' input array.
1036///
1037/// On successful matches it updates the STATISTIC 'HistogramsDetected',
1038/// regardless of hardware support. When there is support, it additionally
1039/// stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers
1040/// used to update histogram in \p HistogramPtrs.
1041static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop,
1042 const PredicatedScalarEvolution &PSE,
1043 SmallVectorImpl<HistogramInfo> &Histograms) {
1044
1045 // Store value must come from a Binary Operation.
1046 Instruction *HPtrInstr = nullptr;
1047 BinaryOperator *HBinOp = nullptr;
1048 if (!match(HSt, m_Store(m_BinOp(HBinOp), m_Instruction(HPtrInstr))))
1049 return false;
1050
1051 // BinOp must be an Add or a Sub modifying the bucket value by a
1052 // loop invariant amount.
1053 // FIXME: We assume the loop invariant term is on the RHS.
1054 // Fine for an immediate/constant, but maybe not a generic value?
1055 Value *HIncVal = nullptr;
1056 if (!match(HBinOp, m_Add(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))) &&
1057 !match(HBinOp, m_Sub(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))))
1058 return false;
1059
1060 // Make sure the increment value is loop invariant.
1061 if (!TheLoop->isLoopInvariant(HIncVal))
1062 return false;
1063
1064 // The address to store is calculated through a GEP Instruction.
1066 if (!GEP)
1067 return false;
1068
1069 // Restrict address calculation to constant indices except for the last term.
1070 Value *HIdx = nullptr;
1071 for (Value *Index : GEP->indices()) {
1072 if (HIdx)
1073 return false;
1074 if (!isa<ConstantInt>(Index))
1075 HIdx = Index;
1076 }
1077
1078 if (!HIdx)
1079 return false;
1080
1081 // Check that the index is calculated by loading from another array. Ignore
1082 // any extensions.
1083 // FIXME: Support indices from other sources than a linear load from memory?
1084 // We're currently trying to match an operation looping over an array
1085 // of indices, but there could be additional levels of indirection
1086 // in place, or possibly some additional calculation to form the index
1087 // from the loaded data.
1088 Value *VPtrVal;
1089 if (!match(HIdx, m_ZExtOrSExtOrSelf(m_Load(m_Value(VPtrVal)))))
1090 return false;
1091
1092 // Make sure the index address varies in this loop, not an outer loop.
1093 const auto *AR = dyn_cast<SCEVAddRecExpr>(PSE.getSE()->getSCEV(VPtrVal));
1094 if (!AR || AR->getLoop() != TheLoop)
1095 return false;
1096
1097 // Ensure we'll have the same mask by checking that all parts of the histogram
1098 // (gather load, update, scatter store) are in the same block.
1099 LoadInst *IndexedLoad = cast<LoadInst>(HBinOp->getOperand(0));
1100 BasicBlock *LdBB = IndexedLoad->getParent();
1101 if (LdBB != HBinOp->getParent() || LdBB != HSt->getParent())
1102 return false;
1103
1104 LLVM_DEBUG(dbgs() << "LV: Found histogram for: " << *HSt << "\n");
1105
1106 // Store the operations that make up the histogram.
1107 Histograms.emplace_back(IndexedLoad, HBinOp, HSt);
1108 return true;
1109}
1110
1111bool LoopVectorizationLegality::canVectorizeIndirectUnsafeDependences() {
1112 // For now, we only support an IndirectUnsafe dependency that calculates
1113 // a histogram
1115 return false;
1116
1117 // Find a single IndirectUnsafe dependency.
1118 const MemoryDepChecker::Dependence *IUDep = nullptr;
1119 const MemoryDepChecker &DepChecker = LAI->getDepChecker();
1120 const auto *Deps = DepChecker.getDependences();
1121 // If there were too many dependences, LAA abandons recording them. We can't
1122 // proceed safely if we don't know what the dependences are.
1123 if (!Deps)
1124 return false;
1125
1126 for (const MemoryDepChecker::Dependence &Dep : *Deps) {
1127 // Ignore dependencies that are either known to be safe or can be
1128 // checked at runtime.
1131 continue;
1132
1133 // We're only interested in IndirectUnsafe dependencies here, where the
1134 // address might come from a load from memory. We also only want to handle
1135 // one such dependency, at least for now.
1136 if (Dep.Type != MemoryDepChecker::Dependence::IndirectUnsafe || IUDep)
1137 return false;
1138
1139 IUDep = &Dep;
1140 }
1141 if (!IUDep)
1142 return false;
1143
1144 // For now only normal loads and stores are supported.
1145 LoadInst *LI = dyn_cast<LoadInst>(IUDep->getSource(DepChecker));
1146 StoreInst *SI = dyn_cast<StoreInst>(IUDep->getDestination(DepChecker));
1147
1148 if (!LI || !SI)
1149 return false;
1150
1151 LLVM_DEBUG(dbgs() << "LV: Checking for a histogram on: " << *SI << "\n");
1152 return findHistogram(LI, SI, TheLoop, LAI->getPSE(), Histograms);
1153}
1154
1155bool LoopVectorizationLegality::canVectorizeMemory() {
1156 LAI = &LAIs.getInfo(*TheLoop);
1157 const OptimizationRemarkAnalysis *LAR = LAI->getReport();
1158 if (LAR) {
1159 ORE->emit([&]() {
1160 return OptimizationRemarkAnalysis(LV_NAME, "loop not vectorized: ", *LAR);
1161 });
1162 }
1163
1164 if (!LAI->canVectorizeMemory()) {
1167 "Cannot vectorize unsafe dependencies in uncountable exit loop with "
1168 "side effects",
1169 "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE,
1170 TheLoop);
1171 return false;
1172 }
1173
1174 return canVectorizeIndirectUnsafeDependences();
1175 }
1176
1177 if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
1178 reportVectorizationFailure("We don't allow storing to uniform addresses",
1179 "write to a loop invariant address could not "
1180 "be vectorized",
1181 "CantVectorizeStoreToLoopInvariantAddress", ORE,
1182 TheLoop);
1183 return false;
1184 }
1185
1186 // We can vectorize stores to invariant address when final reduction value is
1187 // guaranteed to be stored at the end of the loop. Also, if decision to
1188 // vectorize loop is made, runtime checks are added so as to make sure that
1189 // invariant address won't alias with any other objects.
1190 if (!LAI->getStoresToInvariantAddresses().empty()) {
1191 // For each invariant address, check if last stored value is unconditional
1192 // and the address is not calculated inside the loop.
1193 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1195 continue;
1196
1197 if (blockNeedsPredication(SI->getParent())) {
1199 "We don't allow storing to uniform addresses",
1200 "write of conditional recurring variant value to a loop "
1201 "invariant address could not be vectorized",
1202 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1203 return false;
1204 }
1205
1206 // Invariant address should be defined outside of loop. LICM pass usually
1207 // makes sure it happens, but in rare cases it does not, we do not want
1208 // to overcomplicate vectorization to support this case.
1209 if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
1210 if (TheLoop->contains(Ptr)) {
1212 "Invariant address is calculated inside the loop",
1213 "write to a loop invariant address could not "
1214 "be vectorized",
1215 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1216 return false;
1217 }
1218 }
1219 }
1220
1221 if (LAI->hasStoreStoreDependenceInvolvingLoopInvariantAddress()) {
1222 // For each invariant address, check its last stored value is the result
1223 // of one of our reductions.
1224 //
1225 // We do not check if dependence with loads exists because that is already
1226 // checked via hasLoadStoreDependenceInvolvingLoopInvariantAddress.
1227 ScalarEvolution *SE = PSE.getSE();
1228 SmallVector<StoreInst *, 4> UnhandledStores;
1229 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1231 // Earlier stores to this address are effectively deadcode.
1232 // With opaque pointers it is possible for one pointer to be used with
1233 // different sizes of stored values:
1234 // store i32 0, ptr %x
1235 // store i8 0, ptr %x
1236 // The latest store doesn't complitely overwrite the first one in the
1237 // example. That is why we have to make sure that types of stored
1238 // values are same.
1239 // TODO: Check that bitwidth of unhandled store is smaller then the
1240 // one that overwrites it and add a test.
1241 erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
1242 return storeToSameAddress(SE, SI, I) &&
1243 I->getValueOperand()->getType() ==
1244 SI->getValueOperand()->getType();
1245 });
1246 continue;
1247 }
1248 UnhandledStores.push_back(SI);
1249 }
1250
1251 bool IsOK = UnhandledStores.empty();
1252 // TODO: we should also validate against InvariantMemSets.
1253 if (!IsOK) {
1255 "We don't allow storing to uniform addresses",
1256 "write to a loop invariant address could not "
1257 "be vectorized",
1258 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1259 return false;
1260 }
1261 }
1262 }
1263
1264 PSE.addPredicate(LAI->getPSE().getPredicate());
1265 return true;
1266}
1267
1269 bool EnableStrictReductions) {
1270
1271 // First check if there is any ExactFP math or if we allow reassociations
1272 if (!Requirements->getExactFPInst() || Hints->allowReordering())
1273 return true;
1274
1275 // If the above is false, we have ExactFPMath & do not allow reordering.
1276 // If the EnableStrictReductions flag is set, first check if we have any
1277 // Exact FP induction vars, which we cannot vectorize.
1278 if (!EnableStrictReductions ||
1279 any_of(getInductionVars(), [&](auto &Induction) -> bool {
1280 InductionDescriptor IndDesc = Induction.second;
1281 return IndDesc.getExactFPMathInst();
1282 }))
1283 return false;
1284
1285 // We can now only vectorize if all reductions with Exact FP math also
1286 // have the isOrdered flag set, which indicates that we can move the
1287 // reduction operations in-loop.
1288 return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
1289 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1290 return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
1291 }));
1292}
1293
1295 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1296 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1297 return RdxDesc.IntermediateStore == SI;
1298 });
1299}
1300
1302 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1303 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1304 if (!RdxDesc.IntermediateStore)
1305 return false;
1306
1307 ScalarEvolution *SE = PSE.getSE();
1308 Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
1309 return V == InvariantAddress ||
1310 SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
1311 });
1312}
1313
1315 Value *In0 = const_cast<Value *>(V);
1317 if (!PN)
1318 return false;
1319
1320 return Inductions.count(PN);
1321}
1322
1323const InductionDescriptor *
1325 if (!isInductionPhi(Phi))
1326 return nullptr;
1327 auto &ID = getInductionVars().find(Phi)->second;
1328 if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
1330 return &ID;
1331 return nullptr;
1332}
1333
1334const InductionDescriptor *
1336 if (!isInductionPhi(Phi))
1337 return nullptr;
1338 auto &ID = getInductionVars().find(Phi)->second;
1340 return &ID;
1341 return nullptr;
1342}
1343
1345 const Value *V) const {
1346 auto *Inst = dyn_cast<Instruction>(V);
1347 return (Inst && InductionCastsToIgnore.count(Inst));
1348}
1349
1353
1355 const PHINode *Phi) const {
1356 return FixedOrderRecurrences.count(Phi);
1357}
1358
1360 const BasicBlock *BB) const {
1361 // When vectorizing early exits, create predicates for the latch block only.
1362 // For a single early exit, it must be a direct predecessor of the latch.
1363 // For multiple early exits, they form a chain where each exiting block
1364 // dominates all subsequent blocks up to the latch.
1365 BasicBlock *Latch = TheLoop->getLoopLatch();
1367 return BB == Latch;
1368 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
1369}
1370
1371bool LoopVectorizationLegality::blockCanBePredicated(
1372 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
1373 SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
1374 for (Instruction &I : *BB) {
1375 // We can predicate blocks with calls to assume, as long as we drop them in
1376 // case we flatten the CFG via predication.
1378 MaskedOp.insert(&I);
1379 continue;
1380 }
1381
1382 // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1383 // TODO: there might be cases that it should block the vectorization. Let's
1384 // ignore those for now.
1386 continue;
1387
1388 // We can allow masked calls if there's at least one vector variant, even
1389 // if we end up scalarizing due to the cost model calculations.
1390 // TODO: Allow other calls if they have appropriate attributes... readonly
1391 // and argmemonly?
1392 if (CallInst *CI = dyn_cast<CallInst>(&I))
1394 MaskedOp.insert(CI);
1395 continue;
1396 }
1397
1398 // Loads are handled via masking (or speculated if safe to do so.)
1399 if (auto *LI = dyn_cast<LoadInst>(&I)) {
1400 if (!SafePtrs.count(LI->getPointerOperand()))
1401 MaskedOp.insert(LI);
1402 continue;
1403 }
1404
1405 // Predicated store requires some form of masking:
1406 // 1) masked store HW instruction,
1407 // 2) emulation via load-blend-store (only if safe and legal to do so,
1408 // be aware on the race conditions), or
1409 // 3) element-by-element predicate check and scalar store.
1410 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1411 MaskedOp.insert(SI);
1412 continue;
1413 }
1414
1415 if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
1416 return false;
1417 }
1418
1419 return true;
1420}
1421
1422bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1423 if (!EnableIfConversion) {
1424 reportVectorizationFailure("If-conversion is disabled",
1425 "IfConversionDisabled", ORE, TheLoop);
1426 return false;
1427 }
1428
1429 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1430
1431 // A list of pointers which are known to be dereferenceable within scope of
1432 // the loop body for each iteration of the loop which executes. That is,
1433 // the memory pointed to can be dereferenced (with the access size implied by
1434 // the value's type) unconditionally within the loop header without
1435 // introducing a new fault.
1436 SmallPtrSet<Value *, 8> SafePointers;
1437
1438 // Collect safe addresses.
1439 for (BasicBlock *BB : TheLoop->blocks()) {
1440 if (!blockNeedsPredication(BB)) {
1441 for (Instruction &I : *BB)
1442 if (auto *Ptr = getLoadStorePointerOperand(&I))
1443 SafePointers.insert(Ptr);
1444 continue;
1445 }
1446
1447 // For a block which requires predication, a address may be safe to access
1448 // in the loop w/o predication if we can prove dereferenceability facts
1449 // sufficient to ensure it'll never fault within the loop. For the moment,
1450 // we restrict this to loads; stores are more complicated due to
1451 // concurrency restrictions.
1452 ScalarEvolution &SE = *PSE.getSE();
1454 for (Instruction &I : *BB) {
1455 LoadInst *LI = dyn_cast<LoadInst>(&I);
1456
1457 // Make sure we can execute all computations feeding into Ptr in the loop
1458 // w/o triggering UB and that none of the out-of-loop operands are poison.
1459 // We do not need to check if operations inside the loop can produce
1460 // poison due to flags (e.g. due to an inbounds GEP going out of bounds),
1461 // because flags will be dropped when executing them unconditionally.
1462 // TODO: Results could be improved by considering poison-propagation
1463 // properties of visited ops.
1464 auto CanSpeculatePointerOp = [this](Value *Ptr) {
1465 SmallVector<Value *> Worklist = {Ptr};
1466 SmallPtrSet<Value *, 4> Visited;
1467 while (!Worklist.empty()) {
1468 Value *CurrV = Worklist.pop_back_val();
1469 if (!Visited.insert(CurrV).second)
1470 continue;
1471
1472 auto *CurrI = dyn_cast<Instruction>(CurrV);
1473 if (!CurrI || !TheLoop->contains(CurrI)) {
1474 // If operands from outside the loop may be poison then Ptr may also
1475 // be poison.
1476 if (!isGuaranteedNotToBePoison(CurrV, AC,
1477 TheLoop->getLoopPredecessor()
1478 ->getTerminator()
1479 ->getIterator(),
1480 DT))
1481 return false;
1482 continue;
1483 }
1484
1485 // A loaded value may be poison, independent of any flags.
1486 if (isa<LoadInst>(CurrI) && !isGuaranteedNotToBePoison(CurrV, AC))
1487 return false;
1488
1489 // For other ops, assume poison can only be introduced via flags,
1490 // which can be dropped.
1491 if (!isa<PHINode>(CurrI) && !isSafeToSpeculativelyExecute(CurrI))
1492 return false;
1493 append_range(Worklist, CurrI->operands());
1494 }
1495 return true;
1496 };
1497 // Pass the Predicates pointer to isDereferenceableAndAlignedInLoop so
1498 // that it will consider loops that need guarding by SCEV checks. The
1499 // vectoriser will generate these checks if we decide to vectorise.
1500 if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1501 CanSpeculatePointerOp(LI->getPointerOperand()) &&
1502 isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC,
1503 &Predicates))
1504 SafePointers.insert(LI->getPointerOperand());
1505 Predicates.clear();
1506 }
1507 }
1508
1509 // Collect the blocks that need predication.
1510 for (BasicBlock *BB : TheLoop->blocks()) {
1511 // We support only branches and switch statements as terminators inside the
1512 // loop.
1513 if (isa<SwitchInst>(BB->getTerminator())) {
1514 if (TheLoop->isLoopExiting(BB)) {
1515 reportVectorizationFailure("Loop contains an unsupported switch",
1516 "LoopContainsUnsupportedSwitch", ORE,
1517 TheLoop, BB->getTerminator());
1518 return false;
1519 }
1520 } else if (!isa<UncondBrInst, CondBrInst>(BB->getTerminator())) {
1521 reportVectorizationFailure("Loop contains an unsupported terminator",
1522 "LoopContainsUnsupportedTerminator", ORE,
1523 TheLoop, BB->getTerminator());
1524 return false;
1525 }
1526
1527 // We must be able to predicate all blocks that need to be predicated.
1528 if (blockNeedsPredication(BB) &&
1529 !blockCanBePredicated(BB, SafePointers, ConditionallyExecutedOps)) {
1531 "Control flow cannot be substituted for a select", "NoCFGForSelect",
1532 ORE, TheLoop, BB->getTerminator());
1533 return false;
1534 }
1535 }
1536
1537 // We can if-convert this loop.
1538 return true;
1539}
1540
1541// Helper function to canVectorizeLoopNestCFG.
1542bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1543 bool UseVPlanNativePath) {
1544 assert((UseVPlanNativePath || Lp->isInnermost()) &&
1545 "VPlan-native path is not enabled.");
1546
1547 // TODO: ORE should be improved to show more accurate information when an
1548 // outer loop can't be vectorized because a nested loop is not understood or
1549 // legal. Something like: "outer_loop_location: loop not vectorized:
1550 // (inner_loop_location) loop control flow is not understood by vectorizer".
1551
1552 // Store the result and return it at the end instead of exiting early, in case
1553 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1554 bool Result = true;
1555 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1556
1557 // We must have a loop in canonical form. Loops with indirectbr in them cannot
1558 // be canonicalized.
1559 if (!Lp->getLoopPreheader()) {
1561 "Loop doesn't have a legal pre-header",
1562 "loop control flow is not understood by vectorizer", "CFGNotUnderstood",
1563 ORE, TheLoop);
1564 if (DoExtraAnalysis)
1565 Result = false;
1566 else
1567 return false;
1568 }
1569
1570 // We must have a single backedge.
1571 if (Lp->getNumBackEdges() != 1) {
1573 "The loop must have a single backedge",
1574 "loop control flow is not understood by vectorizer", "CFGNotUnderstood",
1575 ORE, TheLoop);
1576 if (DoExtraAnalysis)
1577 Result = false;
1578 else
1579 return false;
1580 }
1581
1582 // The latch must be terminated by a branch.
1583 BasicBlock *Latch = Lp->getLoopLatch();
1584 if (Latch && !isa<UncondBrInst, CondBrInst>(Latch->getTerminator())) {
1586 "The loop latch terminator is not a UncondBrInst/CondBrInst",
1587 "loop control flow is not understood by vectorizer", "CFGNotUnderstood",
1588 ORE, TheLoop);
1589 if (DoExtraAnalysis)
1590 Result = false;
1591 else
1592 return false;
1593 }
1594
1595 return Result;
1596}
1597
1598bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1599 Loop *Lp, bool UseVPlanNativePath) {
1600 // Store the result and return it at the end instead of exiting early, in case
1601 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1602 bool Result = true;
1603 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1604 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1605 if (DoExtraAnalysis)
1606 Result = false;
1607 else
1608 return false;
1609 }
1610
1611 // Recursively check whether the loop control flow of nested loops is
1612 // understood.
1613 for (Loop *SubLp : *Lp)
1614 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1615 if (DoExtraAnalysis)
1616 Result = false;
1617 else
1618 return false;
1619 }
1620
1621 return Result;
1622}
1623
1624bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
1625 BasicBlock *LatchBB = TheLoop->getLoopLatch();
1626 if (!LatchBB) {
1627 reportVectorizationFailure("Loop does not have a latch",
1628 "Cannot vectorize early exit loop",
1629 "NoLatchEarlyExit", ORE, TheLoop);
1630 return false;
1631 }
1632
1633 if (Reductions.size() || FixedOrderRecurrences.size()) {
1635 "Found reductions or recurrences in early-exit loop",
1636 "Cannot vectorize early exit loop with reductions or recurrences",
1637 "RecurrencesInEarlyExitLoop", ORE, TheLoop);
1638 return false;
1639 }
1640
1641 SmallVector<BasicBlock *, 8> ExitingBlocks;
1642 TheLoop->getExitingBlocks(ExitingBlocks);
1643
1644 // Keep a record of all the exiting blocks.
1646 SmallVector<BasicBlock *> UncountableExitingBlocks;
1647 for (BasicBlock *BB : ExitingBlocks) {
1648 const SCEV *EC =
1649 PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates);
1650 if (isa<SCEVCouldNotCompute>(EC)) {
1651 if (size(successors(BB)) != 2) {
1653 "Early exiting block does not have exactly two successors",
1654 "Incorrect number of successors from early exiting block",
1655 "EarlyExitTooManySuccessors", ORE, TheLoop);
1656 return false;
1657 }
1658
1659 UncountableExitingBlocks.push_back(BB);
1660 } else
1661 CountableExitingBlocks.push_back(BB);
1662 }
1663 // We can safely ignore the predicates here because when vectorizing the loop
1664 // the PredicatatedScalarEvolution class will keep track of all predicates
1665 // for each exiting block anyway. This happens when calling
1666 // PSE.getSymbolicMaxBackedgeTakenCount() below.
1667 Predicates.clear();
1668
1669 if (UncountableExitingBlocks.empty()) {
1670 LLVM_DEBUG(dbgs() << "LV: Could not find any uncountable exits");
1671 return false;
1672 }
1673
1674 // The latch block must have a countable exit.
1676 PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
1678 "Cannot determine exact exit count for latch block",
1679 "Cannot vectorize early exit loop",
1680 "UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
1681 return false;
1682 }
1683 assert(llvm::is_contained(CountableExitingBlocks, LatchBB) &&
1684 "Latch block not found in list of countable exits!");
1685
1686 // Check to see if there are instructions that could potentially generate
1687 // exceptions or have side-effects.
1688 auto IsSafeOperation = [](Instruction *I) -> bool {
1689 switch (I->getOpcode()) {
1690 case Instruction::Load:
1691 case Instruction::Store:
1692 case Instruction::PHI:
1693 case Instruction::UncondBr:
1694 case Instruction::CondBr:
1695 // These are checked separately.
1696 return true;
1697 default:
1699 }
1700 };
1701
1702 bool HasSideEffects = false;
1703 for (auto *BB : TheLoop->blocks())
1704 for (auto &I : *BB) {
1705 if (I.mayWriteToMemory()) {
1706 if (isa<StoreInst>(&I) && cast<StoreInst>(&I)->isSimple()) {
1707 HasSideEffects = true;
1708 continue;
1709 }
1710
1711 // We don't support complex writes to memory.
1713 "Complex writes to memory unsupported in early exit loops",
1714 "Cannot vectorize early exit loop with complex writes to memory",
1715 "WritesInEarlyExitLoop", ORE, TheLoop);
1716 return false;
1717 }
1718
1719 if (!IsSafeOperation(&I)) {
1720 reportVectorizationFailure("Early exit loop contains operations that "
1721 "cannot be speculatively executed",
1722 "UnsafeOperationsEarlyExitLoop", ORE,
1723 TheLoop);
1724 return false;
1725 }
1726 }
1727
1728 SmallVector<LoadInst *, 4> NonDerefLoads;
1729 // TODO: Handle loops that may fault.
1730 if (!HasSideEffects) {
1731 // Read-only loop.
1732 Predicates.clear();
1733 if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads,
1734 &Predicates)) {
1736 "Loop may fault", "Cannot vectorize non-read-only early exit loop",
1737 "NonReadOnlyEarlyExitLoop", ORE, TheLoop);
1738 return false;
1739 }
1740 } else {
1741 // Check all uncountable exiting blocks for movable loads.
1742 for (BasicBlock *ExitingBB : UncountableExitingBlocks) {
1743 if (!canUncountableExitConditionLoadBeMoved(ExitingBB))
1744 return false;
1745 }
1746 }
1747
1748 // Check non-dereferenceable loads if any.
1749 for (LoadInst *LI : NonDerefLoads) {
1750 // Only support unit-stride access for now.
1751 int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand());
1752 if (Stride != 1) {
1754 "Loop contains potentially faulting strided load",
1755 "Cannot vectorize early exit loop with "
1756 "strided fault-only-first load",
1757 "EarlyExitLoopWithStridedFaultOnlyFirstLoad", ORE, TheLoop);
1758 return false;
1759 }
1760 }
1761
1762 [[maybe_unused]] const SCEV *SymbolicMaxBTC =
1763 PSE.getSymbolicMaxBackedgeTakenCount();
1764 // Since we have an exact exit count for the latch and the early exit
1765 // dominates the latch, then this should guarantee a computed SCEV value.
1766 assert(!isa<SCEVCouldNotCompute>(SymbolicMaxBTC) &&
1767 "Failed to get symbolic expression for backedge taken count");
1768 LLVM_DEBUG(dbgs() << "LV: Found an early exit loop with symbolic max "
1769 "backedge taken count: "
1770 << *SymbolicMaxBTC << '\n');
1771 UncountableExitType = HasSideEffects ? UncountableExitTrait::ReadWrite
1773 return true;
1774}
1775
1776bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(
1777 BasicBlock *ExitingBlock) {
1778 // Try to find a load in the critical path for the uncountable exit condition.
1779 // This is currently matching about the simplest form we can, expecting
1780 // only one in-loop load, the result of which is directly compared against
1781 // a loop-invariant value.
1782 // FIXME: We're insisting on a single use for now, because otherwise we will
1783 // need to make PHI nodes for other users. That can be done once the initial
1784 // transform code lands.
1785 auto *Br = cast<CondBrInst>(ExitingBlock->getTerminator());
1786
1787 using namespace llvm::PatternMatch;
1788 Instruction *L = nullptr;
1789 Value *Ptr = nullptr;
1790 Value *R = nullptr;
1791 if (!match(Br->getCondition(),
1793 m_Value(R))))) {
1795 "Early exit loop with store but no supported condition load",
1796 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
1797 return false;
1798 }
1799
1800 // FIXME: Don't rely on operand ordering for the comparison.
1801 if (!TheLoop->isLoopInvariant(R)) {
1803 "Early exit loop with store but no supported condition load",
1804 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
1805 return false;
1806 }
1807
1808 // Make sure that the load address is not loop invariant; we want an
1809 // address calculation that we can rotate to the next vector iteration.
1810 const auto *AR = dyn_cast<SCEVAddRecExpr>(PSE.getSE()->getSCEV(Ptr));
1811 if (!AR || AR->getLoop() != TheLoop || !AR->isAffine()) {
1813 "Uncountable exit condition depends on load with an address that is "
1814 "not an add recurrence in the loop",
1815 "EarlyExitLoadInvariantAddress", ORE, TheLoop);
1816 return false;
1817 }
1818
1819 ICFLoopSafetyInfo SafetyInfo;
1820 SafetyInfo.computeLoopSafetyInfo(TheLoop);
1821 LoadInst *Load = cast<LoadInst>(L);
1822 // We need to know that load will be executed before we can hoist a
1823 // copy out to run just before the first iteration.
1824 if (!SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) {
1826 "Load for uncountable exit not guaranteed to execute",
1827 "ConditionalUncountableExitLoad", ORE, TheLoop);
1828 return false;
1829 }
1830
1831 // Prohibit any potential aliasing with any instruction in the loop which
1832 // might store to memory.
1833 // FIXME: Relax this constraint where possible.
1834 for (auto *BB : TheLoop->blocks()) {
1835 for (auto &I : *BB) {
1836 if (&I == Load)
1837 continue;
1838
1839 if (I.mayWriteToMemory()) {
1840 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1841 AliasResult AR = AA->alias(Ptr, SI->getPointerOperand());
1842 if (AR == AliasResult::NoAlias)
1843 continue;
1844 }
1845
1847 "Cannot determine whether critical uncountable exit load address "
1848 "does not alias with a memory write",
1849 "CantVectorizeAliasWithCriticalUncountableExitLoad", ORE, TheLoop);
1850 return false;
1851 }
1852 }
1853 }
1854
1855 return true;
1856}
1857
1858bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1859 // Store the result and return it at the end instead of exiting early, in case
1860 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1861 bool Result = true;
1862
1863 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1864 // Check whether the loop-related control flow in the loop nest is expected by
1865 // vectorizer.
1866 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1867 if (DoExtraAnalysis) {
1868 LLVM_DEBUG(dbgs() << "LV: legality check failed: loop nest");
1869 Result = false;
1870 } else {
1871 return false;
1872 }
1873 }
1874
1875 // We need to have a loop header.
1876 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1877 << '\n');
1878
1879 // Specific checks for outer loops. We skip the remaining legal checks at this
1880 // point because they don't support outer loops.
1881 if (!TheLoop->isInnermost()) {
1882 assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1883
1884 if (!canVectorizeOuterLoop()) {
1885 reportVectorizationFailure("Unsupported outer loop",
1886 "UnsupportedOuterLoop", ORE, TheLoop);
1887 // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1888 // outer loops.
1889 return false;
1890 }
1891
1892 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1893 return Result;
1894 }
1895
1896 assert(TheLoop->isInnermost() && "Inner loop expected.");
1897 // Check if we can if-convert non-single-bb loops.
1898 unsigned NumBlocks = TheLoop->getNumBlocks();
1899 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1900 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1901 if (DoExtraAnalysis)
1902 Result = false;
1903 else
1904 return false;
1905 }
1906
1907 // Check if we can vectorize the instructions and CFG in this loop.
1908 if (!canVectorizeInstrs()) {
1909 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1910 if (DoExtraAnalysis)
1911 Result = false;
1912 else
1913 return false;
1914 }
1915
1916 if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
1917 if (TheLoop->getExitingBlock()) {
1918 reportVectorizationFailure("Cannot vectorize uncountable loop",
1919 "UnsupportedUncountableLoop", ORE, TheLoop);
1920 if (DoExtraAnalysis)
1921 Result = false;
1922 else
1923 return false;
1924 } else {
1925 if (!isVectorizableEarlyExitLoop()) {
1926 assert(UncountableExitType == UncountableExitTrait::None &&
1927 "Must be false without vectorizable early-exit loop");
1928 if (DoExtraAnalysis)
1929 Result = false;
1930 else
1931 return false;
1932 }
1933 }
1934 }
1935
1936 // Go over each instruction and look at memory deps.
1937 if (!canVectorizeMemory()) {
1938 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1939 if (DoExtraAnalysis)
1940 Result = false;
1941 else
1942 return false;
1943 }
1944
1945 // Bail out for ReadWrite loops with uncountable exits for now.
1946 if (UncountableExitType == UncountableExitTrait::ReadWrite) {
1948 "Writes to memory unsupported in early exit loops",
1949 "Cannot vectorize early exit loop with writes to memory",
1950 "WritesInEarlyExitLoop", ORE, TheLoop);
1951 return false;
1952 }
1953
1954 if (Result) {
1955 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1956 << (LAI->getRuntimePointerChecking()->Need
1957 ? " (with a runtime bound check)"
1958 : "")
1959 << "!\n");
1960 }
1961
1962 // Okay! We've done all the tests. If any have failed, return false. Otherwise
1963 // we can vectorize, and at this point we don't have any other mem analysis
1964 // which may limit our maximum vectorization factor, so just return true with
1965 // no restrictions.
1966 return Result;
1967}
1968
1970 // The only loops we can vectorize without a scalar epilogue, are loops with
1971 // a bottom-test and a single exiting block. We'd have to handle the fact
1972 // that not every instruction executes on the last iteration. This will
1973 // require a lane mask which varies through the vector loop body. (TODO)
1974 if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1975 LLVM_DEBUG(
1976 dbgs()
1977 << "LV: Cannot fold tail by masking. Requires a singe latch exit\n");
1978 return false;
1979 }
1980
1981 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1982
1983 // The list of pointers that we can safely read and write to remains empty.
1984 SmallPtrSet<Value *, 8> SafePointers;
1985
1986 // Check all blocks for predication, including those that ordinarily do not
1987 // need predication such as the header block.
1989 for (BasicBlock *BB : TheLoop->blocks()) {
1990 if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
1991 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking.\n");
1992 return false;
1993 }
1994 }
1995
1996 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1997
1998 return true;
1999}
2000
2002 // The list of pointers that we can safely read and write to remains empty.
2003 SmallPtrSet<Value *, 8> SafePointers;
2004
2005 // Mark all blocks for predication, including those that ordinarily do not
2006 // need predication such as the header block, and collect instructions needing
2007 // predication in TailFoldedMaskedOp.
2008 for (BasicBlock *BB : TheLoop->blocks()) {
2009 [[maybe_unused]] bool R =
2010 blockCanBePredicated(BB, SafePointers, TailFoldedMaskedOp);
2011 assert(R && "Must be able to predicate block when tail-folding.");
2012 }
2013}
2014
2015} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define DEBUG_TYPE
Hexagon Common GEP
#define LV_NAME
static cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
static cl::opt< bool > AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables."))
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_AlwaysScalable, "always", "Scalable vectorization is available and always favored when " "feasible")))
static cl::opt< bool > EnableHistogramVectorization("enable-histogram-loop-vectorization", cl::init(false), cl::Hidden, cl::desc("Enables autovectorization of some loops containing histograms"))
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
This file defines the LoopVectorizationLegality class.
This file provides a LoopVectorizationPlanner class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
#define T
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
static bool isSimple(Instruction *I)
static void visit(BasicBlock &Start, std::function< bool(BasicBlock *)> op)
#define LLVM_DEBUG(...)
Definition Debug.h:119
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
static const uint32_t IV[8]
Definition blake3_impl.h:83
@ NoAlias
The two locations do not alias at all.
iterator begin() const
Definition ArrayRef.h:129
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:869
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
A struct for saving information about induction variables.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, ArrayRef< const SCEVPredicate * > NoWrapPreds={}, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
Class to represent integer types.
An instruction for reading from memory.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
static LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB, const Loop *TheLoop, const DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
iterator_range< block_iterator > blocks() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool isLoopHeader(const BlockT *BB) const
LLVM_ABI bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
void collectUnitStridePredicates() const
Add unit stride predicates for memory accesses to PSE, if runtime checks are allowed and an inner loo...
LLVM_ABI bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
LLVM_ABI bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
LLVM_ABI int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
bool hasUncountableExitWithSideEffects() const
Returns true if this is an early exit loop with state-changing or potentially-faulting operations and...
LLVM_ABI bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
LLVM_ABI bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
LLVM_ABI const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
LLVM_ABI const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
LLVM_ABI bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
LLVM_ABI bool isInvariant(Value *V) const
Returns true if V is invariant across all loop iterations according to SCEV.
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
LLVM_ABI bool canFoldTailByMasking() const
Return true if we can vectorize this loop while folding its tail by masking.
LLVM_ABI void prepareToFoldTailByMasking()
Mark all respective loads/stores for masking.
bool hasUncountableEarlyExit() const
Returns true if the loop has uncountable early exits, i.e.
LLVM_ABI bool isUniformMemOp(Instruction &I, std::optional< ElementCount > VF) const
A uniform memory op is a load or store which accesses the same memory location on all VF lanes,...
LLVM_ABI bool isUniform(Value *V, std::optional< ElementCount > VF) const
Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...
LLVM_ABI bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
LLVM_ABI bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
@ SK_AlwaysScalable
Always vectorize loops using scalable vectors if feasible (i.e.
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
LLVM_ABI bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
LLVM_ABI bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
LLVM_ABI void emitRemarkWithHints() const
Dumps all the hint information.
LLVM_ABI void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LLVM_ABI LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition LoopInfo.cpp:67
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition LoopInfo.cpp:174
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition LoopInfo.cpp:523
Metadata node.
Definition Metadata.h:1075
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1439
ArrayRef< MDOperand > operands() const
Definition Metadata.h:1437
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1445
Tracking metadata reference owned by Metadata.
Definition Metadata.h:897
A single uniqued string.
Definition Metadata.h:722
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:632
iterator find(const KeyT &Key)
Definition MapVector.h:156
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Root of the metadata hierarchy.
Definition Metadata.h:64
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static LLVM_ABI bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
SCEVUse getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This visitor recursively visits a SCEV expression and re-writes it.
const SCEV * visit(const SCEV *S)
This class represents an analyzed expression in the program.
static constexpr auto FlagAnyWrap
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getCouldNotCompute()
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getPointerOperand()
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Provides information about what library functions are available for the current target.
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
Value * getOperand(unsigned i) const
Definition User.h:207
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition VectorUtils.h:87
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition VectorUtils.h:76
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr bool isZero() const
Definition TypeSize.h:153
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:696
Add a small namespace to avoid name clashes with the classes used in the streaming interface.
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition Loads.cpp:432
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
static IntegerType * getWiderInductionTy(const DataLayout &DL, Type *Ty0, Type *Ty1)
static IntegerType * getInductionIntegerTy(const DataLayout &DL, Type *Ty)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
bool canVectorizeTy(Type *Ty)
Returns true if Ty is a valid vector element type, void, or an unpacked literal struct where all elem...
TargetTransformInfo TTI
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isReadOnlyLoop(Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, SmallVectorImpl< LoadInst * > &NonDereferenceableAndAlignedLoads, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns true if the loop contains read-only memory accesses and doesn't throw.
Definition Loads.cpp:892
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop, const PredicatedScalarEvolution &PSE, SmallVectorImpl< HistogramInfo > &Histograms)
Find histogram operations that match high-level code in loops:
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition Loads.cpp:290
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
SCEVUseT< const SCEV * > SCEVUse
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
Dependece between memory access instructions.
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
static LLVM_ABI VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static LLVM_ABI const unsigned MaxVectorWidth
Maximum SIMD width.
static LLVM_ABI bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static LLVM_ABI unsigned VectorizationInterleave
Interleave factor as overridden by the user.