LLVM 23.0.0git
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1//===- LoopVectorizationLegality.cpp --------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides loop vectorization legality analysis. Original code
10// resided in LoopVectorize.cpp for a long time.
11//
12// At this point, it is implemented as a utility class, not as an analysis
13// pass. It should be easy to create an analysis pass around it if there
14// is a need (but D45420 needs to happen first).
15//
16
19#include "llvm/Analysis/Loads.h"
28#include "llvm/IR/Dominators.h"
33
34using namespace llvm;
35using namespace PatternMatch;
36
37#define LV_NAME "loop-vectorize"
38#define DEBUG_TYPE LV_NAME
39
40static cl::opt<bool>
41 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
42 cl::desc("Enable if-conversion during vectorization."));
43
44static cl::opt<bool>
45AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
46 cl::desc("Enable recognition of non-constant strided "
47 "pointer induction variables."));
48
49static cl::opt<bool>
50 HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
51 cl::desc("Allow enabling loop hints to reorder "
52 "FP operations during vectorization."));
53
54// TODO: Move size-based thresholds out of legality checking, make cost based
55// decisions instead of hard thresholds.
57 "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
58 cl::desc("The maximum number of SCEV checks allowed."));
59
61 "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
62 cl::desc("The maximum number of SCEV checks allowed with a "
63 "vectorize(enable) pragma"));
64
67 "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
69 cl::desc("Control whether the compiler can use scalable vectors to "
70 "vectorize a loop"),
73 "Scalable vectorization is disabled."),
76 "Scalable vectorization is available and favored when the "
77 "cost is inconclusive."),
80 "Scalable vectorization is available and favored when the "
81 "cost is inconclusive."),
84 "Scalable vectorization is available and always favored when "
85 "feasible")));
86
88 "enable-histogram-loop-vectorization", cl::init(false), cl::Hidden,
89 cl::desc("Enables autovectorization of some loops containing histograms"));
90
91/// Maximum vectorization interleave count.
92static const unsigned MaxInterleaveFactor = 16;
93
94namespace llvm {
95
96bool LoopVectorizeHints::Hint::validate(unsigned Val) {
97 switch (Kind) {
98 case HK_WIDTH:
100 case HK_INTERLEAVE:
101 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
102 case HK_FORCE:
103 return (Val <= 1);
104 case HK_ISVECTORIZED:
105 case HK_PREDICATE:
106 case HK_SCALABLE:
107 return (Val == 0 || Val == 1);
108 }
109 return false;
110}
111
113 bool InterleaveOnlyWhenForced,
116 : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
117 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
118 Force("vectorize.enable", FK_Undefined, HK_FORCE),
119 IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
120 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
121 Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
122 TheLoop(L), ORE(ORE) {
123 // Populate values with existing loop metadata.
124 getHintsFromMetadata();
125
126 // force-vector-interleave overrides DisableInterleaving.
129
130 // If the metadata doesn't explicitly specify whether to enable scalable
131 // vectorization, then decide based on the following criteria (increasing
132 // level of priority):
133 // - Target default
134 // - Metadata width
135 // - Force option (always overrides)
137 if (TTI)
138 Scalable.Value = TTI->enableScalableVectorization() ? SK_PreferScalable
140
141 if (Width.Value)
142 // If the width is set, but the metadata says nothing about the scalable
143 // property, then assume it concerns only a fixed-width UserVF.
144 // If width is not set, the flag takes precedence.
145 Scalable.Value = SK_FixedWidthOnly;
146 }
147
148 // If the flag is set to force any use of scalable vectors, override the loop
149 // hints.
150 if (ForceScalableVectorization.getValue() !=
152 Scalable.Value = ForceScalableVectorization.getValue();
153
154 // Scalable vectorization is disabled if no preference is specified.
156 Scalable.Value = SK_FixedWidthOnly;
157
158 if (IsVectorized.Value != 1)
159 // If the vectorization width and interleaving count are both 1 then
160 // consider the loop to have been already vectorized because there's
161 // nothing more that we can do.
162 IsVectorized.Value =
164 LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
165 << "LV: Interleaving disabled by the pass manager\n");
166}
167
169 TheLoop->addIntLoopAttribute("llvm.loop.isvectorized", 1,
170 {Twine(Prefix(), "vectorize.").str(),
171 Twine(Prefix(), "interleave.").str()});
172
173 // Update internal cache.
174 IsVectorized.Value = 1;
175}
176
177void LoopVectorizeHints::reportDisallowedVectorization(
178 const StringRef DebugMsg, const StringRef RemarkName,
179 const StringRef RemarkMsg, const Loop *L) const {
180 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: " << DebugMsg << ".\n");
181 ORE.emit(OptimizationRemarkMissed(LV_NAME, RemarkName, L->getStartLoc(),
182 L->getHeader())
183 << "loop not vectorized: " << RemarkMsg);
184}
185
187 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
189 if (Force.Value == LoopVectorizeHints::FK_Disabled) {
190 reportDisallowedVectorization("#pragma vectorize disable",
191 "MissedExplicitlyDisabled",
192 "vectorization is explicitly disabled", L);
193 } else if (hasDisableAllTransformsHint(L)) {
194 reportDisallowedVectorization("loop hasDisableAllTransformsHint",
195 "MissedTransformsDisabled",
196 "loop transformations are disabled", L);
197 } else {
198 llvm_unreachable("loop vect disabled for an unknown reason");
199 }
200 return false;
201 }
202
203 if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
204 reportDisallowedVectorization(
205 "VectorizeOnlyWhenForced is set, and no #pragma vectorize enable",
206 "MissedForceOnly", "only vectorizing loops that explicitly request it",
207 L);
208 return false;
209 }
210
211 if (getIsVectorized() == 1) {
212 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
213 // FIXME: Add interleave.disable metadata. This will allow
214 // vectorize.disable to be used without disabling the pass and errors
215 // to differentiate between disabled vectorization and a width of 1.
216 ORE.emit([&]() {
217 return OptimizationRemarkAnalysis(LV_NAME, "AllDisabled",
218 L->getStartLoc(), L->getHeader())
219 << "loop not vectorized: vectorization and interleaving are "
220 "explicitly disabled, or the loop has already been "
221 "vectorized";
222 });
223 return false;
224 }
225
226 return true;
227}
228
230 using namespace ore;
231
232 ORE.emit([&]() {
233 if (Force.Value == LoopVectorizeHints::FK_Disabled)
234 return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
235 TheLoop->getStartLoc(),
236 TheLoop->getHeader())
237 << "loop not vectorized: vectorization is explicitly disabled";
238
239 OptimizationRemarkMissed R(LV_NAME, "MissedDetails", TheLoop->getStartLoc(),
240 TheLoop->getHeader());
241 R << "loop not vectorized";
242 if (Force.Value == LoopVectorizeHints::FK_Enabled) {
243 R << " (Force=" << NV("Force", true);
244 if (Width.Value != 0)
245 R << ", Vector Width=" << NV("VectorWidth", getWidth());
246 if (getInterleave() != 0)
247 R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
248 R << ")";
249 }
250 return R;
251 });
252}
253
255 // Allow the vectorizer to change the order of operations if enabling
256 // loop hints are provided
257 ElementCount EC = getWidth();
258 return HintsAllowReordering &&
260 EC.getKnownMinValue() > 1);
261}
262
263void LoopVectorizeHints::getHintsFromMetadata() {
264 MDNode *LoopID = TheLoop->getLoopID();
265 if (!LoopID)
266 return;
267
268 // First operand should refer to the loop id itself.
269 assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
270 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
271
272 for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) {
273 const MDString *S = nullptr;
275
276 // The expected hint is either a MDString or a MDNode with the first
277 // operand a MDString.
278 if (const MDNode *MD = dyn_cast<MDNode>(MDO)) {
279 if (!MD || MD->getNumOperands() == 0)
280 continue;
281 S = dyn_cast<MDString>(MD->getOperand(0));
282 for (unsigned Idx = 1; Idx < MD->getNumOperands(); ++Idx)
283 Args.push_back(MD->getOperand(Idx));
284 } else {
285 S = dyn_cast<MDString>(MDO);
286 assert(Args.size() == 0 && "too many arguments for MDString");
287 }
288
289 if (!S)
290 continue;
291
292 // Check if the hint starts with the loop metadata prefix.
293 StringRef Name = S->getString();
294 if (Args.size() == 1)
295 setHint(Name, Args[0]);
296 }
297}
298
299void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
300 if (!Name.consume_front(Prefix()))
301 return;
302
303 const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
304 if (!C)
305 return;
306 unsigned Val = C->getZExtValue();
307
308 Hint *Hints[] = {&Width, &Interleave, &Force,
309 &IsVectorized, &Predicate, &Scalable};
310 for (auto *H : Hints) {
311 if (Name == H->Name) {
312 if (H->validate(Val))
313 H->Value = Val;
314 else
315 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
316 break;
317 }
318 }
319}
320
321// Return true if the inner loop \p Lp is uniform with regard to the outer loop
322// \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
323// executing the inner loop will execute the same iterations). This check is
324// very constrained for now but it will be relaxed in the future. \p Lp is
325// considered uniform if it meets all the following conditions:
326// 1) it has a canonical IV (starting from 0 and with stride 1),
327// 2) its latch terminator is a conditional branch and,
328// 3) its latch condition is a compare instruction whose operands are the
329// canonical IV and an OuterLp invariant.
330// This check doesn't take into account the uniformity of other conditions not
331// related to the loop latch because they don't affect the loop uniformity.
332//
333// NOTE: We decided to keep all these checks and its associated documentation
334// together so that we can easily have a picture of the current supported loop
335// nests. However, some of the current checks don't depend on \p OuterLp and
336// would be redundantly executed for each \p Lp if we invoked this function for
337// different candidate outer loops. This is not the case for now because we
338// don't currently have the infrastructure to evaluate multiple candidate outer
339// loops and \p OuterLp will be a fixed parameter while we only support explicit
340// outer loop vectorization. It's also very likely that these checks go away
341// before introducing the aforementioned infrastructure. However, if this is not
342// the case, we should move the \p OuterLp independent checks to a separate
343// function that is only executed once for each \p Lp.
344static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
345 assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
346
347 // If Lp is the outer loop, it's uniform by definition.
348 if (Lp == OuterLp)
349 return true;
350 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
351
352 // 1.
354 if (!IV) {
355 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
356 return false;
357 }
358
359 // 2.
360 BasicBlock *Latch = Lp->getLoopLatch();
361 auto *LatchBr = dyn_cast<CondBrInst>(Latch->getTerminator());
362 if (!LatchBr) {
363 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
364 return false;
365 }
366
367 // 3.
368 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
369 if (!LatchCmp) {
371 dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
372 return false;
373 }
374
375 Value *CondOp0 = LatchCmp->getOperand(0);
376 Value *CondOp1 = LatchCmp->getOperand(1);
377 Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
378 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
379 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
380 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
381 return false;
382 }
383
384 return true;
385}
386
387// Return true if \p Lp and all its nested loops are uniform with regard to \p
388// OuterLp.
389static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
390 if (!isUniformLoop(Lp, OuterLp))
391 return false;
392
393 // Check if nested loops are uniform.
394 for (Loop *SubLp : *Lp)
395 if (!isUniformLoopNest(SubLp, OuterLp))
396 return false;
397
398 return true;
399}
400
402 assert(Ty->isIntOrPtrTy() && "Expected integer or pointer type");
403
404 if (Ty->isPointerTy())
405 return DL.getIntPtrType(Ty->getContext(), Ty->getPointerAddressSpace());
406
407 // It is possible that char's or short's overflow when we ask for the loop's
408 // trip count, work around this by changing the type size.
409 if (Ty->getScalarSizeInBits() < 32)
410 return Type::getInt32Ty(Ty->getContext());
411
412 return cast<IntegerType>(Ty);
413}
414
416 Type *Ty1) {
419 return TyA->getScalarSizeInBits() > TyB->getScalarSizeInBits() ? TyA : TyB;
420}
421
422/// Check that the instruction has outside loop users and is not an
423/// identified reduction variable.
424static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
425 SmallPtrSetImpl<Value *> &AllowedExit) {
426 // Reductions, Inductions and non-header phis are allowed to have exit users. All
427 // other instructions must not have external users.
428 if (!AllowedExit.count(Inst))
429 // Check that all of the users of the loop are inside the BB.
430 for (User *U : Inst->users()) {
432 // This user may be a reduction exit value.
433 if (!TheLoop->contains(UI)) {
434 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
435 return true;
436 }
437 }
438 return false;
439}
440
441/// Returns true if A and B have same pointer operands or same SCEVs addresses
443 StoreInst *B) {
444 // Compare store
445 if (A == B)
446 return true;
447
448 // Otherwise Compare pointers
449 Value *APtr = A->getPointerOperand();
450 Value *BPtr = B->getPointerOperand();
451 if (APtr == BPtr)
452 return true;
453
454 // Otherwise compare address SCEVs
455 return SE->getSCEV(APtr) == SE->getSCEV(BPtr);
456}
457
459 Value *Ptr) const {
460 // FIXME: Currently, the set of symbolic strides is sometimes queried before
461 // it's collected. This happens from canVectorizeWithIfConvert, when the
462 // pointer is checked to reference consecutive elements suitable for a
463 // masked access.
464 // Stride versioning requires adding a SCEV equality predicate; only consult
465 // the symbolic strides when runtime SCEV checks are permitted.
466 const auto &Strides = LAI && AllowRuntimeSCEVChecks
467 ? LAI->getSymbolicStrides()
469 int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, *DT, Strides,
470 AllowRuntimeSCEVChecks, false)
471 .value_or(0);
472 if (Stride == 1 || Stride == -1)
473 return Stride;
474 return 0;
475}
476
478 return LAI->isInvariant(V);
479}
480
481namespace {
482/// A rewriter to build the SCEVs for each of the VF lanes in the expected
483/// vectorized loop, which can then be compared to detect their uniformity. This
484/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
485/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
486/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
487/// uniformity.
488class SCEVAddRecForUniformityRewriter
489 : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
490 /// Multiplier to be applied to the step of AddRecs in TheLoop.
491 unsigned StepMultiplier;
492
493 /// Offset to be added to the AddRecs in TheLoop.
494 unsigned Offset;
495
496 /// Loop for which to rewrite AddRecsFor.
497 Loop *TheLoop;
498
499 /// Is any sub-expressions not analyzable w.r.t. uniformity?
500 bool CannotAnalyze = false;
501
502 bool canAnalyze() const { return !CannotAnalyze; }
503
504public:
505 SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
506 unsigned Offset, Loop *TheLoop)
507 : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
508 TheLoop(TheLoop) {}
509
510 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
511 assert(Expr->getLoop() == TheLoop &&
512 "addrec outside of TheLoop must be invariant and should have been "
513 "handled earlier");
514 // Build a new AddRec by multiplying the step by StepMultiplier and
515 // incrementing the start by Offset * step.
516 Type *Ty = Expr->getType();
517 const SCEV *Step = Expr->getStepRecurrence(SE);
518 if (!SE.isLoopInvariant(Step, TheLoop)) {
519 CannotAnalyze = true;
520 return Expr;
521 }
522 const SCEV *NewStep =
523 SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
524 const SCEV *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
525 const SCEV *NewStart =
526 SE.getAddExpr(Expr->getStart(), SCEVUse(ScaledOffset));
527 return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
528 }
529
530 const SCEV *visit(const SCEV *S) {
531 if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
532 return S;
534 }
535
536 const SCEV *visitUnknown(const SCEVUnknown *S) {
537 if (SE.isLoopInvariant(S, TheLoop))
538 return S;
539 // The value could vary across iterations.
540 CannotAnalyze = true;
541 return S;
542 }
543
544 const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
545 // Could not analyze the expression.
546 CannotAnalyze = true;
547 return S;
548 }
549
550 static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
551 unsigned StepMultiplier, unsigned Offset,
552 Loop *TheLoop) {
553 /// Bail out if the expression does not contain an UDiv expression.
554 /// Uniform values which are not loop invariant require operations to strip
555 /// out the lowest bits. For now just look for UDivs and use it to avoid
556 /// re-writing UDIV-free expressions for other lanes to limit compile time.
557 if (!SCEVExprContains(S,
558 [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
559 return SE.getCouldNotCompute();
560
561 SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
562 TheLoop);
563 const SCEV *Result = Rewriter.visit(S);
564
565 if (Rewriter.canAnalyze())
566 return Result;
567 return SE.getCouldNotCompute();
568 }
569};
570
571} // namespace
572
574 if (isInvariant(V))
575 return true;
576 if (VF.isScalable())
577 return false;
578 if (VF.isScalar())
579 return true;
580
581 // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
582 // never considered uniform.
583 auto *SE = PSE.getSE();
584 if (!SE->isSCEVable(V->getType()))
585 return false;
586 const SCEV *S = SE->getSCEV(V);
587
588 // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
589 // lane 0 matches the expressions for all other lanes.
590 unsigned FixedVF = VF.getKnownMinValue();
591 const SCEV *FirstLaneExpr =
592 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
593 if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
594 return false;
595
596 // Make sure the expressions for lanes FixedVF-1..1 match the expression for
597 // lane 0. We check lanes in reverse order for compile-time, as frequently
598 // checking the last lane is sufficient to rule out uniformity.
599 return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
600 const SCEV *IthLaneExpr =
601 SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
602 return FirstLaneExpr == IthLaneExpr;
603 });
604}
605
607 ElementCount VF) const {
609 if (!Ptr)
610 return false;
611 // Note: There's nothing inherent which prevents predicated loads and
612 // stores from being uniform. The current lowering simply doesn't handle
613 // it; in particular, the cost model distinguishes scatter/gather from
614 // scalar w/predication, and we currently rely on the scalar path.
615 return isUniform(Ptr, VF) && !blockNeedsPredication(I.getParent());
616}
617
618bool LoopVectorizationLegality::canVectorizeOuterLoop() {
619 assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
620 // Store the result and return it at the end instead of exiting early, in case
621 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
622 bool Result = true;
623 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
624
625 for (BasicBlock *BB : TheLoop->blocks()) {
626 // Check whether the BB terminator is a branch. Any other terminator is
627 // not supported yet.
628 Instruction *Term = BB->getTerminator();
630 reportVectorizationFailure("Unsupported basic block terminator",
631 "loop control flow is not understood by vectorizer",
632 "CFGNotUnderstood", ORE, TheLoop);
633 if (DoExtraAnalysis)
634 Result = false;
635 else
636 return false;
637 }
638
639 // Check whether the branch is a supported one. Only unconditional
640 // branches, conditional branches with an outer loop invariant condition or
641 // backedges are supported.
642 // FIXME: We skip these checks when VPlan predication is enabled as we
643 // want to allow divergent branches. This whole check will be removed
644 // once VPlan predication is on by default.
645 auto *Br = dyn_cast<CondBrInst>(Term);
646 if (Br && !TheLoop->isLoopInvariant(Br->getCondition()) &&
647 !LI->isLoopHeader(Br->getSuccessor(0)) &&
648 !LI->isLoopHeader(Br->getSuccessor(1))) {
649 reportVectorizationFailure("Unsupported conditional branch",
650 "loop control flow is not understood by vectorizer",
651 "CFGNotUnderstood", ORE, TheLoop);
652 if (DoExtraAnalysis)
653 Result = false;
654 else
655 return false;
656 }
657 }
658
659 // Check whether inner loops are uniform. At this point, we only support
660 // simple outer loops scenarios with uniform nested loops.
661 if (!isUniformLoopNest(TheLoop /*loop nest*/,
662 TheLoop /*context outer loop*/)) {
663 reportVectorizationFailure("Outer loop contains divergent loops",
664 "loop control flow is not understood by vectorizer",
665 "CFGNotUnderstood", ORE, TheLoop);
666 if (DoExtraAnalysis)
667 Result = false;
668 else
669 return false;
670 }
671
672 // Check whether we are able to set up outer loop induction.
673 if (!setupOuterLoopInductions()) {
674 reportVectorizationFailure("Unsupported outer loop Phi(s)",
675 "UnsupportedPhi", ORE, TheLoop);
676 if (DoExtraAnalysis)
677 Result = false;
678 else
679 return false;
680 }
681
682 return Result;
683}
684
685void LoopVectorizationLegality::addInductionPhi(
686 PHINode *Phi, const InductionDescriptor &ID,
687 SmallPtrSetImpl<Value *> &AllowedExit) {
688 Inductions[Phi] = ID;
689
690 // In case this induction also comes with casts that we know we can ignore
691 // in the vectorized loop body, record them here. All casts could be recorded
692 // here for ignoring, but suffices to record only the first (as it is the
693 // only one that may bw used outside the cast sequence).
694 ArrayRef<Instruction *> Casts = ID.getCastInsts();
695 if (!Casts.empty())
696 InductionCastsToIgnore.insert(*Casts.begin());
697
698 Type *PhiTy = Phi->getType();
699 const DataLayout &DL = Phi->getDataLayout();
700
701 assert((PhiTy->isIntOrPtrTy() || PhiTy->isFloatingPointTy()) &&
702 "Expected int, ptr, or FP induction phi type");
703
704 // Get the widest type.
705 if (PhiTy->isIntOrPtrTy()) {
706 if (!WidestIndTy)
707 WidestIndTy = getInductionIntegerTy(DL, PhiTy);
708 else
709 WidestIndTy = getWiderInductionTy(DL, PhiTy, WidestIndTy);
710 }
711
712 // Int inductions are special because we only allow one IV.
713 if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
714 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
715 isa<Constant>(ID.getStartValue()) &&
716 cast<Constant>(ID.getStartValue())->isNullValue()) {
717
718 // Use the phi node with the widest type as induction. Use the last
719 // one if there are multiple (no good reason for doing this other
720 // than it is expedient). We've checked that it begins at zero and
721 // steps by one, so this is a canonical induction variable.
722 if (!PrimaryInduction || PhiTy == WidestIndTy)
723 PrimaryInduction = Phi;
724 }
725
726 // Both the PHI node itself, and the "post-increment" value feeding
727 // back into the PHI node may have external users.
728 // We can allow those uses, except if the SCEVs we have for them rely
729 // on predicates that only hold within the loop, since allowing the exit
730 // currently means re-using this SCEV outside the loop (see PR33706 for more
731 // details).
732 if (PSE.getPredicate().isAlwaysTrue()) {
733 AllowedExit.insert(Phi);
734 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
735 }
736
737 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
738}
739
740bool LoopVectorizationLegality::setupOuterLoopInductions() {
741 BasicBlock *Header = TheLoop->getHeader();
742
743 // Returns true if a given Phi is a supported induction.
744 auto IsSupportedPhi = [&](PHINode &Phi) -> bool {
745 InductionDescriptor ID;
746 if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
748 addInductionPhi(&Phi, ID, AllowedExit);
749 return true;
750 }
751 // Bail out for any Phi in the outer loop header that is not a supported
752 // induction.
754 dbgs() << "LV: Found unsupported PHI for outer loop vectorization.\n");
755 return false;
756 };
757
758 return llvm::all_of(Header->phis(), IsSupportedPhi);
759}
760
761/// Checks if a function is scalarizable according to the TLI, in
762/// the sense that it should be vectorized and then expanded in
763/// multiple scalar calls. This is represented in the
764/// TLI via mappings that do not specify a vector name, as in the
765/// following example:
766///
767/// const VecDesc VecIntrinsics[] = {
768/// {"llvm.phx.abs.i32", "", 4}
769/// };
770static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
771 const StringRef ScalarName = CI.getCalledFunction()->getName();
772 bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
773 // Check that all known VFs are not associated to a vector
774 // function, i.e. the vector name is emty.
775 if (Scalarize) {
776 ElementCount WidestFixedVF, WidestScalableVF;
777 TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
779 ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
780 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
782 ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
783 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
784 assert((WidestScalableVF.isZero() || !Scalarize) &&
785 "Caller may decide to scalarize a variant using a scalable VF");
786 }
787 return Scalarize;
788}
789
790bool LoopVectorizationLegality::canVectorizeInstrs() {
791 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
792 bool Result = true;
793
794 // For each block in the loop.
795 for (BasicBlock *BB : TheLoop->blocks()) {
796 // Scan the instructions in the block and look for hazards.
797 for (Instruction &I : *BB) {
798 Result &= canVectorizeInstr(I);
799 if (!DoExtraAnalysis && !Result)
800 return false;
801 }
802 }
803
804 if (!PrimaryInduction) {
805 if (Inductions.empty()) {
807 "Did not find one integer induction var",
808 "loop induction variable could not be identified",
809 "NoInductionVariable", ORE, TheLoop);
810 return false;
811 }
812 if (!WidestIndTy) {
814 "Did not find one integer induction var",
815 "integer loop induction variable could not be identified",
816 "NoIntegerInductionVariable", ORE, TheLoop);
817 return false;
818 }
819 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
820 }
821
822 // Now we know the widest induction type, check if our found induction
823 // is the same size. If it's not, unset it here and InnerLoopVectorizer
824 // will create another.
825 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
826 PrimaryInduction = nullptr;
827
828 return Result;
829}
830
831bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) {
832 BasicBlock *BB = I.getParent();
833 BasicBlock *Header = TheLoop->getHeader();
834
835 if (auto *Phi = dyn_cast<PHINode>(&I)) {
836 Type *PhiTy = Phi->getType();
837 // Check that this PHI type is allowed.
838 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
839 !PhiTy->isPointerTy()) {
841 "Found a non-int non-pointer PHI",
842 "loop control flow is not understood by vectorizer",
843 "CFGNotUnderstood", ORE, TheLoop);
844 return false;
845 }
846
847 // If this PHINode is not in the header block, then we know that we
848 // can convert it to select during if-conversion. No need to check if
849 // the PHIs in this block are induction or reduction variables.
850 if (BB != Header) {
851 // Non-header phi nodes that have outside uses can be vectorized. Add
852 // them to the list of allowed exits.
853 // Unsafe cyclic dependencies with header phis are identified during
854 // legalization for reduction, induction and fixed order
855 // recurrences.
856 AllowedExit.insert(&I);
857 return true;
858 }
859
860 // We only allow if-converted PHIs with exactly two incoming values.
861 if (Phi->getNumIncomingValues() != 2) {
863 "Found an invalid PHI",
864 "loop control flow is not understood by vectorizer",
865 "CFGNotUnderstood", ORE, TheLoop, Phi);
866 return false;
867 }
868
869 RecurrenceDescriptor RedDes;
870 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, DT,
871 PSE.getSE())) {
872 Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
873 AllowedExit.insert(RedDes.getLoopExitInstr());
874 Reductions[Phi] = std::move(RedDes);
877 RedDes.getRecurrenceKind())) &&
878 "Only min/max recurrences are allowed to have multiple uses "
879 "currently");
880 return true;
881 }
882
883 // We prevent matching non-constant strided pointer IVS to preserve
884 // historical vectorizer behavior after a generalization of the
885 // IVDescriptor code. The intent is to remove this check, but we
886 // have to fix issues around code quality for such loops first.
887 auto IsDisallowedStridedPointerInduction =
888 [](const InductionDescriptor &ID) {
890 return false;
891 return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
892 ID.getConstIntStepValue() == nullptr;
893 };
894
895 // TODO: Instead of recording the AllowedExit, it would be good to
896 // record the complementary set: NotAllowedExit. These include (but may
897 // not be limited to):
898 // 1. Reduction phis as they represent the one-before-last value, which
899 // is not available when vectorized
900 // 2. Induction phis and increment when SCEV predicates cannot be used
901 // outside the loop - see addInductionPhi
902 // 3. Non-Phis with outside uses when SCEV predicates cannot be used
903 // outside the loop - see call to hasOutsideLoopUser in the non-phi
904 // handling below
905 // 4. FixedOrderRecurrence phis that can possibly be handled by
906 // extraction.
907 // By recording these, we can then reason about ways to vectorize each
908 // of these NotAllowedExit.
909 InductionDescriptor ID;
910 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
911 !IsDisallowedStridedPointerInduction(ID)) {
912 addInductionPhi(Phi, ID, AllowedExit);
913 Requirements->addExactFPMathInst(ID.getExactFPMathInst());
914 return true;
915 }
916
917 if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
918 AllowedExit.insert(Phi);
919 FixedOrderRecurrences.insert(Phi);
920 return true;
921 }
922
923 // As a last resort, coerce the PHI to a AddRec expression
924 // and re-try classifying it a an induction PHI.
925 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
926 !IsDisallowedStridedPointerInduction(ID)) {
927 addInductionPhi(Phi, ID, AllowedExit);
928 return true;
929 }
930
931 reportVectorizationFailure("Found an unidentified PHI",
932 "value that could not be identified as "
933 "reduction is used outside the loop",
934 "NonReductionValueUsedOutsideLoop", ORE, TheLoop,
935 Phi);
936 return false;
937 } // end of PHI handling
938
939 // We handle calls that:
940 // * Have a mapping to an IR intrinsic.
941 // * Have a vector version available.
942 auto *CI = dyn_cast<CallInst>(&I);
943
944 if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
945 !(CI->getCalledFunction() && TLI &&
946 (!VFDatabase::getMappings(*CI).empty() || isTLIScalarize(*TLI, *CI)))) {
947 // If the call is a recognized math libary call, it is likely that
948 // we can vectorize it given loosened floating-point constraints.
949 LibFunc Func;
950 bool IsMathLibCall =
951 TLI && CI->getCalledFunction() && CI->getType()->isFloatingPointTy() &&
952 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
953 TLI->hasOptimizedCodeGen(Func);
954
955 if (IsMathLibCall) {
956 // TODO: Ideally, we should not use clang-specific language here,
957 // but it's hard to provide meaningful yet generic advice.
958 // Also, should this be guarded by allowExtraAnalysis() and/or be part
959 // of the returned info from isFunctionVectorizable()?
961 "Found a non-intrinsic callsite",
962 "library call cannot be vectorized. "
963 "Try compiling with -fno-math-errno, -ffast-math, "
964 "or similar flags",
965 "CantVectorizeLibcall", ORE, TheLoop, CI);
966 } else {
967 reportVectorizationFailure("Found a non-intrinsic callsite",
968 "call instruction cannot be vectorized",
969 "CantVectorizeLibcall", ORE, TheLoop, CI);
970 }
971 return false;
972 }
973
974 // Some intrinsics have scalar arguments and should be same in order for
975 // them to be vectorized (i.e. loop invariant).
976 if (CI) {
977 auto *SE = PSE.getSE();
978 Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
979 for (unsigned Idx = 0; Idx < CI->arg_size(); ++Idx)
980 if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, Idx, TTI)) {
981 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(Idx)), TheLoop)) {
983 "Found unvectorizable intrinsic",
984 "intrinsic instruction cannot be vectorized",
985 "CantVectorizeIntrinsic", ORE, TheLoop, CI);
986 return false;
987 }
988 }
989 }
990
991 // If we found a vectorized variant of a function, note that so LV can
992 // make better decisions about maximum VF.
993 if (CI && !VFDatabase::getMappings(*CI).empty())
994 VecCallVariantsFound = true;
995
996 auto CanWidenInstructionTy = [](Instruction const &Inst) {
997 Type *InstTy = Inst.getType();
998 if (!isa<StructType>(InstTy))
999 return canVectorizeTy(InstTy);
1000
1001 // For now, we only recognize struct values returned from calls where
1002 // all users are extractvalue as vectorizable. All element types of the
1003 // struct must be types that can be widened.
1004 return isa<CallInst>(Inst) && canVectorizeTy(InstTy) &&
1005 all_of(Inst.users(), IsaPred<ExtractValueInst>);
1006 };
1007
1008 // Check that the instruction return type is vectorizable.
1009 // We can't vectorize casts from vector type to scalar type.
1010 // Also, we can't vectorize extractelement instructions.
1011 if (!CanWidenInstructionTy(I) ||
1012 (isa<CastInst>(I) &&
1013 !VectorType::isValidElementType(I.getOperand(0)->getType())) ||
1015 reportVectorizationFailure("Found unvectorizable type",
1016 "instruction return type cannot be vectorized",
1017 "CantVectorizeInstructionReturnType", ORE,
1018 TheLoop, &I);
1019 return false;
1020 }
1021
1022 // Check that the stored type is vectorizable.
1023 if (auto *ST = dyn_cast<StoreInst>(&I)) {
1024 Type *T = ST->getValueOperand()->getType();
1026 reportVectorizationFailure("Store instruction cannot be vectorized",
1027 "CantVectorizeStore", ORE, TheLoop, ST);
1028 return false;
1029 }
1030
1031 // For nontemporal stores, check that a nontemporal vector version is
1032 // supported on the target.
1033 if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
1034 // Arbitrarily try a vector of 2 elements.
1035 auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
1036 assert(VecTy && "did not find vectorized version of stored type");
1037 if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
1039 "nontemporal store instruction cannot be vectorized",
1040 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
1041 return false;
1042 }
1043 }
1044
1045 } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
1046 if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
1047 // For nontemporal loads, check that a nontemporal vector version is
1048 // supported on the target (arbitrarily try a vector of 2 elements).
1049 auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
1050 assert(VecTy && "did not find vectorized version of load type");
1051 if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
1053 "nontemporal load instruction cannot be vectorized",
1054 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
1055 return false;
1056 }
1057 }
1058
1059 // FP instructions can allow unsafe algebra, thus vectorizable by
1060 // non-IEEE-754 compliant SIMD units.
1061 // This applies to floating-point math operations and calls, not memory
1062 // operations, shuffles, or casts, as they don't change precision or
1063 // semantics.
1064 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
1065 !I.isFast()) {
1066 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
1067 Hints->setPotentiallyUnsafe();
1068 }
1069
1070 // Reduction instructions are allowed to have exit users.
1071 // All other instructions must not have external users.
1072 if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
1073 // We can safely vectorize loops where instructions within the loop are
1074 // used outside the loop only if the SCEV predicates within the loop is
1075 // same as outside the loop. Allowing the exit means reusing the SCEV
1076 // outside the loop.
1077 if (PSE.getPredicate().isAlwaysTrue()) {
1078 AllowedExit.insert(&I);
1079 return true;
1080 }
1081 reportVectorizationFailure("Value cannot be used outside the loop",
1082 "ValueUsedOutsideLoop", ORE, TheLoop, &I);
1083 return false;
1084 }
1085
1086 return true;
1087}
1088
1089/// Find histogram operations that match high-level code in loops:
1090/// \code
1091/// buckets[indices[i]]+=step;
1092/// \endcode
1093///
1094/// It matches a pattern starting from \p HSt, which Stores to the 'buckets'
1095/// array the computed histogram. It uses a BinOp to sum all counts, storing
1096/// them using a loop-variant index Load from the 'indices' input array.
1097///
1098/// On successful matches it updates the STATISTIC 'HistogramsDetected',
1099/// regardless of hardware support. When there is support, it additionally
1100/// stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers
1101/// used to update histogram in \p HistogramPtrs.
1102static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop,
1103 const PredicatedScalarEvolution &PSE,
1104 SmallVectorImpl<HistogramInfo> &Histograms) {
1105
1106 // Store value must come from a Binary Operation.
1107 Instruction *HPtrInstr = nullptr;
1108 BinaryOperator *HBinOp = nullptr;
1109 if (!match(HSt, m_Store(m_BinOp(HBinOp), m_Instruction(HPtrInstr))))
1110 return false;
1111
1112 // BinOp must be an Add or a Sub modifying the bucket value by a
1113 // loop invariant amount.
1114 // FIXME: We assume the loop invariant term is on the RHS.
1115 // Fine for an immediate/constant, but maybe not a generic value?
1116 Value *HIncVal = nullptr;
1117 if (!match(HBinOp, m_Add(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))) &&
1118 !match(HBinOp, m_Sub(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))))
1119 return false;
1120
1121 // Make sure the increment value is loop invariant.
1122 if (!TheLoop->isLoopInvariant(HIncVal))
1123 return false;
1124
1125 // The address to store is calculated through a GEP Instruction.
1127 if (!GEP)
1128 return false;
1129
1130 // Restrict address calculation to constant indices except for the last term.
1131 Value *HIdx = nullptr;
1132 for (Value *Index : GEP->indices()) {
1133 if (HIdx)
1134 return false;
1135 if (!isa<ConstantInt>(Index))
1136 HIdx = Index;
1137 }
1138
1139 if (!HIdx)
1140 return false;
1141
1142 // Check that the index is calculated by loading from another array. Ignore
1143 // any extensions.
1144 // FIXME: Support indices from other sources than a linear load from memory?
1145 // We're currently trying to match an operation looping over an array
1146 // of indices, but there could be additional levels of indirection
1147 // in place, or possibly some additional calculation to form the index
1148 // from the loaded data.
1149 Value *VPtrVal;
1150 if (!match(HIdx, m_ZExtOrSExtOrSelf(m_Load(m_Value(VPtrVal)))))
1151 return false;
1152
1153 // Make sure the index address varies in this loop, not an outer loop.
1154 const auto *AR = dyn_cast<SCEVAddRecExpr>(PSE.getSE()->getSCEV(VPtrVal));
1155 if (!AR || AR->getLoop() != TheLoop)
1156 return false;
1157
1158 // Ensure we'll have the same mask by checking that all parts of the histogram
1159 // (gather load, update, scatter store) are in the same block.
1160 LoadInst *IndexedLoad = cast<LoadInst>(HBinOp->getOperand(0));
1161 BasicBlock *LdBB = IndexedLoad->getParent();
1162 if (LdBB != HBinOp->getParent() || LdBB != HSt->getParent())
1163 return false;
1164
1165 LLVM_DEBUG(dbgs() << "LV: Found histogram for: " << *HSt << "\n");
1166
1167 // Store the operations that make up the histogram.
1168 Histograms.emplace_back(IndexedLoad, HBinOp, HSt);
1169 return true;
1170}
1171
1172bool LoopVectorizationLegality::canVectorizeIndirectUnsafeDependences() {
1173 // For now, we only support an IndirectUnsafe dependency that calculates
1174 // a histogram
1176 return false;
1177
1178 // Find a single IndirectUnsafe dependency.
1179 const MemoryDepChecker::Dependence *IUDep = nullptr;
1180 const MemoryDepChecker &DepChecker = LAI->getDepChecker();
1181 const auto *Deps = DepChecker.getDependences();
1182 // If there were too many dependences, LAA abandons recording them. We can't
1183 // proceed safely if we don't know what the dependences are.
1184 if (!Deps)
1185 return false;
1186
1187 for (const MemoryDepChecker::Dependence &Dep : *Deps) {
1188 // Ignore dependencies that are either known to be safe or can be
1189 // checked at runtime.
1192 continue;
1193
1194 // We're only interested in IndirectUnsafe dependencies here, where the
1195 // address might come from a load from memory. We also only want to handle
1196 // one such dependency, at least for now.
1197 if (Dep.Type != MemoryDepChecker::Dependence::IndirectUnsafe || IUDep)
1198 return false;
1199
1200 IUDep = &Dep;
1201 }
1202 if (!IUDep)
1203 return false;
1204
1205 // For now only normal loads and stores are supported.
1206 LoadInst *LI = dyn_cast<LoadInst>(IUDep->getSource(DepChecker));
1207 StoreInst *SI = dyn_cast<StoreInst>(IUDep->getDestination(DepChecker));
1208
1209 if (!LI || !SI)
1210 return false;
1211
1212 LLVM_DEBUG(dbgs() << "LV: Checking for a histogram on: " << *SI << "\n");
1213 return findHistogram(LI, SI, TheLoop, LAI->getPSE(), Histograms);
1214}
1215
1216bool LoopVectorizationLegality::canVectorizeMemory() {
1217 LAI = &LAIs.getInfo(*TheLoop);
1218 const OptimizationRemarkAnalysis *LAR = LAI->getReport();
1219 if (LAR) {
1220 ORE->emit([&]() {
1221 return OptimizationRemarkAnalysis(LV_NAME, "loop not vectorized: ", *LAR);
1222 });
1223 }
1224
1225 if (!LAI->canVectorizeMemory()) {
1228 "Cannot vectorize unsafe dependencies in uncountable exit loop with "
1229 "side effects",
1230 "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE,
1231 TheLoop);
1232 return false;
1233 }
1234
1235 return canVectorizeIndirectUnsafeDependences();
1236 }
1237
1238 if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
1239 reportVectorizationFailure("We don't allow storing to uniform addresses",
1240 "write to a loop invariant address could not "
1241 "be vectorized",
1242 "CantVectorizeStoreToLoopInvariantAddress", ORE,
1243 TheLoop);
1244 return false;
1245 }
1246
1247 // We can vectorize stores to invariant address when final reduction value is
1248 // guaranteed to be stored at the end of the loop. Also, if decision to
1249 // vectorize loop is made, runtime checks are added so as to make sure that
1250 // invariant address won't alias with any other objects.
1251 if (!LAI->getStoresToInvariantAddresses().empty()) {
1252 // For each invariant address, check if last stored value is unconditional
1253 // and the address is not calculated inside the loop.
1254 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1256 continue;
1257
1258 if (blockNeedsPredication(SI->getParent())) {
1260 "We don't allow storing to uniform addresses",
1261 "write of conditional recurring variant value to a loop "
1262 "invariant address could not be vectorized",
1263 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1264 return false;
1265 }
1266
1267 // Invariant address should be defined outside of loop. LICM pass usually
1268 // makes sure it happens, but in rare cases it does not, we do not want
1269 // to overcomplicate vectorization to support this case.
1270 if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
1271 if (TheLoop->contains(Ptr)) {
1273 "Invariant address is calculated inside the loop",
1274 "write to a loop invariant address could not "
1275 "be vectorized",
1276 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1277 return false;
1278 }
1279 }
1280 }
1281
1282 if (LAI->hasStoreStoreDependenceInvolvingLoopInvariantAddress()) {
1283 // For each invariant address, check its last stored value is the result
1284 // of one of our reductions.
1285 //
1286 // We do not check if dependence with loads exists because that is already
1287 // checked via hasLoadStoreDependenceInvolvingLoopInvariantAddress.
1288 ScalarEvolution *SE = PSE.getSE();
1289 SmallVector<StoreInst *, 4> UnhandledStores;
1290 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1292 // Earlier stores to this address are effectively deadcode.
1293 // With opaque pointers it is possible for one pointer to be used with
1294 // different sizes of stored values:
1295 // store i32 0, ptr %x
1296 // store i8 0, ptr %x
1297 // The latest store doesn't complitely overwrite the first one in the
1298 // example. That is why we have to make sure that types of stored
1299 // values are same.
1300 // TODO: Check that bitwidth of unhandled store is smaller then the
1301 // one that overwrites it and add a test.
1302 erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
1303 return storeToSameAddress(SE, SI, I) &&
1304 I->getValueOperand()->getType() ==
1305 SI->getValueOperand()->getType();
1306 });
1307 continue;
1308 }
1309 UnhandledStores.push_back(SI);
1310 }
1311
1312 bool IsOK = UnhandledStores.empty();
1313 // TODO: we should also validate against InvariantMemSets.
1314 if (!IsOK) {
1316 "We don't allow storing to uniform addresses",
1317 "write to a loop invariant address could not "
1318 "be vectorized",
1319 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1320 return false;
1321 }
1322 }
1323 }
1324
1325 PSE.addPredicate(LAI->getPSE().getPredicate());
1326 return true;
1327}
1328
1330 bool EnableStrictReductions) {
1331
1332 // First check if there is any ExactFP math or if we allow reassociations
1333 if (!Requirements->getExactFPInst() || Hints->allowReordering())
1334 return true;
1335
1336 // If the above is false, we have ExactFPMath & do not allow reordering.
1337 // If the EnableStrictReductions flag is set, first check if we have any
1338 // Exact FP induction vars, which we cannot vectorize.
1339 if (!EnableStrictReductions ||
1340 any_of(getInductionVars(), [&](auto &Induction) -> bool {
1341 InductionDescriptor IndDesc = Induction.second;
1342 return IndDesc.getExactFPMathInst();
1343 }))
1344 return false;
1345
1346 // We can now only vectorize if all reductions with Exact FP math also
1347 // have the isOrdered flag set, which indicates that we can move the
1348 // reduction operations in-loop.
1349 return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
1350 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1351 return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
1352 }));
1353}
1354
1356 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1357 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1358 return RdxDesc.IntermediateStore == SI;
1359 });
1360}
1361
1363 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1364 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1365 if (!RdxDesc.IntermediateStore)
1366 return false;
1367
1368 ScalarEvolution *SE = PSE.getSE();
1369 Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
1370 return V == InvariantAddress ||
1371 SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
1372 });
1373}
1374
1376 Value *In0 = const_cast<Value *>(V);
1378 if (!PN)
1379 return false;
1380
1381 return Inductions.count(PN);
1382}
1383
1384const InductionDescriptor *
1386 if (!isInductionPhi(Phi))
1387 return nullptr;
1388 auto &ID = getInductionVars().find(Phi)->second;
1389 if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
1391 return &ID;
1392 return nullptr;
1393}
1394
1395const InductionDescriptor *
1397 if (!isInductionPhi(Phi))
1398 return nullptr;
1399 auto &ID = getInductionVars().find(Phi)->second;
1401 return &ID;
1402 return nullptr;
1403}
1404
1406 const Value *V) const {
1407 auto *Inst = dyn_cast<Instruction>(V);
1408 return (Inst && InductionCastsToIgnore.count(Inst));
1409}
1410
1414
1416 const PHINode *Phi) const {
1417 return FixedOrderRecurrences.count(Phi);
1418}
1419
1421 const BasicBlock *BB) const {
1422 // When vectorizing early exits, create predicates for the latch block only.
1423 // For a single early exit, it must be a direct predecessor of the latch.
1424 // For multiple early exits, they form a chain where each exiting block
1425 // dominates all subsequent blocks up to the latch.
1426 BasicBlock *Latch = TheLoop->getLoopLatch();
1428 return BB == Latch;
1429 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
1430}
1431
1432bool LoopVectorizationLegality::blockCanBePredicated(
1433 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
1434 SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
1435 for (Instruction &I : *BB) {
1436 // We can predicate blocks with calls to assume, as long as we drop them in
1437 // case we flatten the CFG via predication.
1439 MaskedOp.insert(&I);
1440 continue;
1441 }
1442
1443 // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1444 // TODO: there might be cases that it should block the vectorization. Let's
1445 // ignore those for now.
1447 continue;
1448
1449 // We can allow masked calls if there's at least one vector variant, even
1450 // if we end up scalarizing due to the cost model calculations.
1451 // TODO: Allow other calls if they have appropriate attributes... readonly
1452 // and argmemonly?
1453 if (CallInst *CI = dyn_cast<CallInst>(&I))
1455 MaskedOp.insert(CI);
1456 continue;
1457 }
1458
1459 // Loads are handled via masking (or speculated if safe to do so.)
1460 if (auto *LI = dyn_cast<LoadInst>(&I)) {
1461 if (!SafePtrs.count(LI->getPointerOperand()))
1462 MaskedOp.insert(LI);
1463 continue;
1464 }
1465
1466 // Predicated store requires some form of masking:
1467 // 1) masked store HW instruction,
1468 // 2) emulation via load-blend-store (only if safe and legal to do so,
1469 // be aware on the race conditions), or
1470 // 3) element-by-element predicate check and scalar store.
1471 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1472 MaskedOp.insert(SI);
1473 continue;
1474 }
1475
1476 if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
1477 return false;
1478 }
1479
1480 return true;
1481}
1482
1483bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1484 if (!EnableIfConversion) {
1485 reportVectorizationFailure("If-conversion is disabled",
1486 "IfConversionDisabled", ORE, TheLoop);
1487 return false;
1488 }
1489
1490 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1491
1492 // A list of pointers which are known to be dereferenceable within scope of
1493 // the loop body for each iteration of the loop which executes. That is,
1494 // the memory pointed to can be dereferenced (with the access size implied by
1495 // the value's type) unconditionally within the loop header without
1496 // introducing a new fault.
1497 SmallPtrSet<Value *, 8> SafePointers;
1498
1499 // Collect safe addresses.
1500 for (BasicBlock *BB : TheLoop->blocks()) {
1501 if (!blockNeedsPredication(BB)) {
1502 for (Instruction &I : *BB)
1503 if (auto *Ptr = getLoadStorePointerOperand(&I))
1504 SafePointers.insert(Ptr);
1505 continue;
1506 }
1507
1508 // For a block which requires predication, a address may be safe to access
1509 // in the loop w/o predication if we can prove dereferenceability facts
1510 // sufficient to ensure it'll never fault within the loop. For the moment,
1511 // we restrict this to loads; stores are more complicated due to
1512 // concurrency restrictions.
1513 ScalarEvolution &SE = *PSE.getSE();
1515 for (Instruction &I : *BB) {
1516 LoadInst *LI = dyn_cast<LoadInst>(&I);
1517
1518 // Make sure we can execute all computations feeding into Ptr in the loop
1519 // w/o triggering UB and that none of the out-of-loop operands are poison.
1520 // We do not need to check if operations inside the loop can produce
1521 // poison due to flags (e.g. due to an inbounds GEP going out of bounds),
1522 // because flags will be dropped when executing them unconditionally.
1523 // TODO: Results could be improved by considering poison-propagation
1524 // properties of visited ops.
1525 auto CanSpeculatePointerOp = [this](Value *Ptr) {
1526 SmallVector<Value *> Worklist = {Ptr};
1527 SmallPtrSet<Value *, 4> Visited;
1528 while (!Worklist.empty()) {
1529 Value *CurrV = Worklist.pop_back_val();
1530 if (!Visited.insert(CurrV).second)
1531 continue;
1532
1533 auto *CurrI = dyn_cast<Instruction>(CurrV);
1534 if (!CurrI || !TheLoop->contains(CurrI)) {
1535 // If operands from outside the loop may be poison then Ptr may also
1536 // be poison.
1537 if (!isGuaranteedNotToBePoison(CurrV, AC,
1538 TheLoop->getLoopPredecessor()
1539 ->getTerminator()
1540 ->getIterator(),
1541 DT))
1542 return false;
1543 continue;
1544 }
1545
1546 // A loaded value may be poison, independent of any flags.
1547 if (isa<LoadInst>(CurrI) && !isGuaranteedNotToBePoison(CurrV, AC))
1548 return false;
1549
1550 // For other ops, assume poison can only be introduced via flags,
1551 // which can be dropped.
1552 if (!isa<PHINode>(CurrI) && !isSafeToSpeculativelyExecute(CurrI))
1553 return false;
1554 append_range(Worklist, CurrI->operands());
1555 }
1556 return true;
1557 };
1558 // Pass the Predicates pointer to isDereferenceableAndAlignedInLoop so
1559 // that it will consider loops that need guarding by SCEV checks. The
1560 // vectoriser will generate these checks if we decide to vectorise.
1561 if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1562 CanSpeculatePointerOp(LI->getPointerOperand()) &&
1563 isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC,
1564 &Predicates))
1565 SafePointers.insert(LI->getPointerOperand());
1566 Predicates.clear();
1567 }
1568 }
1569
1570 // Collect the blocks that need predication.
1571 for (BasicBlock *BB : TheLoop->blocks()) {
1572 // We support only branches and switch statements as terminators inside the
1573 // loop.
1574 if (isa<SwitchInst>(BB->getTerminator())) {
1575 if (TheLoop->isLoopExiting(BB)) {
1576 reportVectorizationFailure("Loop contains an unsupported switch",
1577 "LoopContainsUnsupportedSwitch", ORE,
1578 TheLoop, BB->getTerminator());
1579 return false;
1580 }
1581 } else if (!isa<UncondBrInst, CondBrInst>(BB->getTerminator())) {
1582 reportVectorizationFailure("Loop contains an unsupported terminator",
1583 "LoopContainsUnsupportedTerminator", ORE,
1584 TheLoop, BB->getTerminator());
1585 return false;
1586 }
1587
1588 // We must be able to predicate all blocks that need to be predicated.
1589 if (blockNeedsPredication(BB) &&
1590 !blockCanBePredicated(BB, SafePointers, ConditionallyExecutedOps)) {
1592 "Control flow cannot be substituted for a select", "NoCFGForSelect",
1593 ORE, TheLoop, BB->getTerminator());
1594 return false;
1595 }
1596 }
1597
1598 // We can if-convert this loop.
1599 return true;
1600}
1601
1602// Helper function to canVectorizeLoopNestCFG.
1603bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1604 bool UseVPlanNativePath) {
1605 assert((UseVPlanNativePath || Lp->isInnermost()) &&
1606 "VPlan-native path is not enabled.");
1607
1608 // TODO: ORE should be improved to show more accurate information when an
1609 // outer loop can't be vectorized because a nested loop is not understood or
1610 // legal. Something like: "outer_loop_location: loop not vectorized:
1611 // (inner_loop_location) loop control flow is not understood by vectorizer".
1612
1613 // Store the result and return it at the end instead of exiting early, in case
1614 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1615 bool Result = true;
1616 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1617
1618 // We must have a loop in canonical form. Loops with indirectbr in them cannot
1619 // be canonicalized.
1620 if (!Lp->getLoopPreheader()) {
1621 reportVectorizationFailure("Loop doesn't have a legal pre-header",
1622 "loop control flow is not understood by vectorizer",
1623 "CFGNotUnderstood", ORE, TheLoop);
1624 if (DoExtraAnalysis)
1625 Result = false;
1626 else
1627 return false;
1628 }
1629
1630 // We must have a single backedge.
1631 if (Lp->getNumBackEdges() != 1) {
1632 reportVectorizationFailure("The loop must have a single backedge",
1633 "loop control flow is not understood by vectorizer",
1634 "CFGNotUnderstood", ORE, TheLoop);
1635 if (DoExtraAnalysis)
1636 Result = false;
1637 else
1638 return false;
1639 }
1640
1641 // The latch must be terminated by a branch.
1642 BasicBlock *Latch = Lp->getLoopLatch();
1643 if (Latch && !isa<UncondBrInst, CondBrInst>(Latch->getTerminator())) {
1645 "The loop latch terminator is not a UncondBrInst/CondBrInst",
1646 "loop control flow is not understood by vectorizer", "CFGNotUnderstood",
1647 ORE, TheLoop);
1648 if (DoExtraAnalysis)
1649 Result = false;
1650 else
1651 return false;
1652 }
1653
1654 return Result;
1655}
1656
1657bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1658 Loop *Lp, bool UseVPlanNativePath) {
1659 // Store the result and return it at the end instead of exiting early, in case
1660 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1661 bool Result = true;
1662 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1663 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1664 if (DoExtraAnalysis)
1665 Result = false;
1666 else
1667 return false;
1668 }
1669
1670 // Recursively check whether the loop control flow of nested loops is
1671 // understood.
1672 for (Loop *SubLp : *Lp)
1673 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1674 if (DoExtraAnalysis)
1675 Result = false;
1676 else
1677 return false;
1678 }
1679
1680 return Result;
1681}
1682
1683bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
1684 BasicBlock *LatchBB = TheLoop->getLoopLatch();
1685 if (!LatchBB) {
1686 reportVectorizationFailure("Loop does not have a latch",
1687 "Cannot vectorize early exit loop",
1688 "NoLatchEarlyExit", ORE, TheLoop);
1689 return false;
1690 }
1691
1692 if (Reductions.size() || FixedOrderRecurrences.size()) {
1694 "Found reductions or recurrences in early-exit loop",
1695 "Cannot vectorize early exit loop with reductions or recurrences",
1696 "RecurrencesInEarlyExitLoop", ORE, TheLoop);
1697 return false;
1698 }
1699
1700 SmallVector<BasicBlock *, 8> ExitingBlocks;
1701 TheLoop->getExitingBlocks(ExitingBlocks);
1702
1703 // Keep a record of all the exiting blocks.
1705 SmallVector<BasicBlock *> UncountableExitingBlocks;
1706 for (BasicBlock *BB : ExitingBlocks) {
1707 const SCEV *EC =
1708 PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates);
1709 if (isa<SCEVCouldNotCompute>(EC)) {
1710 if (size(successors(BB)) != 2) {
1712 "Early exiting block does not have exactly two successors",
1713 "Incorrect number of successors from early exiting block",
1714 "EarlyExitTooManySuccessors", ORE, TheLoop);
1715 return false;
1716 }
1717
1718 UncountableExitingBlocks.push_back(BB);
1719 } else
1720 CountableExitingBlocks.push_back(BB);
1721 }
1722 // We can safely ignore the predicates here because when vectorizing the loop
1723 // the PredicatatedScalarEvolution class will keep track of all predicates
1724 // for each exiting block anyway. This happens when calling
1725 // PSE.getSymbolicMaxBackedgeTakenCount() below.
1726 Predicates.clear();
1727
1728 if (UncountableExitingBlocks.empty()) {
1729 LLVM_DEBUG(dbgs() << "LV: Could not find any uncountable exits");
1730 return false;
1731 }
1732
1733 // The latch block must have a countable exit.
1735 PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
1737 "Cannot determine exact exit count for latch block",
1738 "Cannot vectorize early exit loop",
1739 "UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
1740 return false;
1741 }
1742 assert(llvm::is_contained(CountableExitingBlocks, LatchBB) &&
1743 "Latch block not found in list of countable exits!");
1744
1745 // Check to see if there are instructions that could potentially generate
1746 // exceptions or have side-effects.
1747 auto IsSafeOperation = [](Instruction *I) -> bool {
1748 switch (I->getOpcode()) {
1749 case Instruction::Load:
1750 case Instruction::Store:
1751 case Instruction::PHI:
1752 case Instruction::UncondBr:
1753 case Instruction::CondBr:
1754 // These are checked separately.
1755 return true;
1756 default:
1758 }
1759 };
1760
1761 bool HasSideEffects = false;
1762 for (auto *BB : TheLoop->blocks())
1763 for (auto &I : *BB) {
1764 if (I.mayWriteToMemory()) {
1765 if (isa<StoreInst>(&I) && cast<StoreInst>(&I)->isSimple()) {
1766 HasSideEffects = true;
1767 continue;
1768 }
1769
1770 // We don't support complex writes to memory.
1772 "Complex writes to memory unsupported in early exit loops",
1773 "Cannot vectorize early exit loop with complex writes to memory",
1774 "WritesInEarlyExitLoop", ORE, TheLoop);
1775 return false;
1776 }
1777
1778 if (!IsSafeOperation(&I)) {
1779 reportVectorizationFailure("Early exit loop contains operations that "
1780 "cannot be speculatively executed",
1781 "UnsafeOperationsEarlyExitLoop", ORE,
1782 TheLoop);
1783 return false;
1784 }
1785 }
1786
1787 SmallVector<LoadInst *, 4> NonDerefLoads;
1788 // TODO: Handle loops that may fault.
1789 if (!HasSideEffects) {
1790 // Read-only loop.
1791 Predicates.clear();
1792 if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads,
1793 &Predicates)) {
1795 "Loop may fault", "Cannot vectorize non-read-only early exit loop",
1796 "NonReadOnlyEarlyExitLoop", ORE, TheLoop);
1797 return false;
1798 }
1799 } else {
1800 // Check all uncountable exiting blocks for movable loads.
1801 for (BasicBlock *ExitingBB : UncountableExitingBlocks) {
1802 if (!canUncountableExitConditionLoadBeMoved(ExitingBB))
1803 return false;
1804 }
1805 }
1806
1807 // Check non-dereferenceable loads if any.
1808 for (LoadInst *LI : NonDerefLoads) {
1809 // Only support unit-stride access for now.
1810 int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand());
1811 if (Stride != 1) {
1813 "Loop contains potentially faulting strided load",
1814 "Cannot vectorize early exit loop with "
1815 "strided fault-only-first load",
1816 "EarlyExitLoopWithStridedFaultOnlyFirstLoad", ORE, TheLoop);
1817 return false;
1818 }
1819 }
1820
1821 [[maybe_unused]] const SCEV *SymbolicMaxBTC =
1822 PSE.getSymbolicMaxBackedgeTakenCount();
1823 // Since we have an exact exit count for the latch and the early exit
1824 // dominates the latch, then this should guarantee a computed SCEV value.
1825 assert(!isa<SCEVCouldNotCompute>(SymbolicMaxBTC) &&
1826 "Failed to get symbolic expression for backedge taken count");
1827 LLVM_DEBUG(dbgs() << "LV: Found an early exit loop with symbolic max "
1828 "backedge taken count: "
1829 << *SymbolicMaxBTC << '\n');
1830 UncountableExitType = HasSideEffects ? UncountableExitTrait::ReadWrite
1832 return true;
1833}
1834
1835bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(
1836 BasicBlock *ExitingBlock) {
1837 // Try to find a load in the critical path for the uncountable exit condition.
1838 // This is currently matching about the simplest form we can, expecting
1839 // only one in-loop load, the result of which is directly compared against
1840 // a loop-invariant value.
1841 // FIXME: We're insisting on a single use for now, because otherwise we will
1842 // need to make PHI nodes for other users. That can be done once the initial
1843 // transform code lands.
1844 auto *Br = cast<CondBrInst>(ExitingBlock->getTerminator());
1845
1846 using namespace llvm::PatternMatch;
1847 Instruction *L = nullptr;
1848 Value *Ptr = nullptr;
1849 Value *R = nullptr;
1850 if (!match(Br->getCondition(),
1852 m_Value(R))))) {
1854 "Early exit loop with store but no supported condition load",
1855 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
1856 return false;
1857 }
1858
1859 // FIXME: Don't rely on operand ordering for the comparison.
1860 if (!TheLoop->isLoopInvariant(R)) {
1862 "Early exit loop with store but no supported condition load",
1863 "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
1864 return false;
1865 }
1866
1867 // Make sure that the load address is not loop invariant; we want an
1868 // address calculation that we can rotate to the next vector iteration.
1869 const auto *AR = dyn_cast<SCEVAddRecExpr>(PSE.getSE()->getSCEV(Ptr));
1870 if (!AR || AR->getLoop() != TheLoop || !AR->isAffine()) {
1872 "Uncountable exit condition depends on load with an address that is "
1873 "not an add recurrence in the loop",
1874 "EarlyExitLoadInvariantAddress", ORE, TheLoop);
1875 return false;
1876 }
1877
1878 ICFLoopSafetyInfo SafetyInfo;
1879 SafetyInfo.computeLoopSafetyInfo(TheLoop);
1880 LoadInst *Load = cast<LoadInst>(L);
1881 // We need to know that load will be executed before we can hoist a
1882 // copy out to run just before the first iteration.
1883 if (!SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) {
1885 "Load for uncountable exit not guaranteed to execute",
1886 "ConditionalUncountableExitLoad", ORE, TheLoop);
1887 return false;
1888 }
1889
1890 // Prohibit any potential aliasing with any instruction in the loop which
1891 // might store to memory.
1892 // FIXME: Relax this constraint where possible.
1893 for (auto *BB : TheLoop->blocks()) {
1894 for (auto &I : *BB) {
1895 if (&I == Load)
1896 continue;
1897
1898 if (I.mayWriteToMemory()) {
1899 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1900 AliasResult AR = AA->alias(Ptr, SI->getPointerOperand());
1901 if (AR == AliasResult::NoAlias)
1902 continue;
1903 }
1904
1906 "Cannot determine whether critical uncountable exit load address "
1907 "does not alias with a memory write",
1908 "CantVectorizeAliasWithCriticalUncountableExitLoad", ORE, TheLoop);
1909 return false;
1910 }
1911 }
1912 }
1913
1914 return true;
1915}
1916
1917bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1918 // Store the result and return it at the end instead of exiting early, in case
1919 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1920 bool Result = true;
1921
1922 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1923 // Check whether the loop-related control flow in the loop nest is expected by
1924 // vectorizer.
1925 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1926 if (DoExtraAnalysis) {
1927 LLVM_DEBUG(dbgs() << "LV: legality check failed: loop nest");
1928 Result = false;
1929 } else {
1930 return false;
1931 }
1932 }
1933
1934 // We need to have a loop header.
1935 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1936 << '\n');
1937
1938 // Specific checks for outer loops. We skip the remaining legal checks at this
1939 // point because they don't support outer loops.
1940 if (!TheLoop->isInnermost()) {
1941 assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1942
1943 if (!canVectorizeOuterLoop()) {
1944 reportVectorizationFailure("Unsupported outer loop",
1945 "UnsupportedOuterLoop", ORE, TheLoop);
1946 // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1947 // outer loops.
1948 return false;
1949 }
1950
1951 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1952 return Result;
1953 }
1954
1955 assert(TheLoop->isInnermost() && "Inner loop expected.");
1956 // Check if we can if-convert non-single-bb loops.
1957 unsigned NumBlocks = TheLoop->getNumBlocks();
1958 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1959 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1960 if (DoExtraAnalysis)
1961 Result = false;
1962 else
1963 return false;
1964 }
1965
1966 // Check if we can vectorize the instructions and CFG in this loop.
1967 if (!canVectorizeInstrs()) {
1968 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1969 if (DoExtraAnalysis)
1970 Result = false;
1971 else
1972 return false;
1973 }
1974
1975 if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
1976 if (TheLoop->getExitingBlock()) {
1977 reportVectorizationFailure("Cannot vectorize uncountable loop",
1978 "UnsupportedUncountableLoop", ORE, TheLoop);
1979 if (DoExtraAnalysis)
1980 Result = false;
1981 else
1982 return false;
1983 } else {
1984 if (!isVectorizableEarlyExitLoop()) {
1985 assert(UncountableExitType == UncountableExitTrait::None &&
1986 "Must be false without vectorizable early-exit loop");
1987 if (DoExtraAnalysis)
1988 Result = false;
1989 else
1990 return false;
1991 }
1992 }
1993 }
1994
1995 // Go over each instruction and look at memory deps.
1996 if (!canVectorizeMemory()) {
1997 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1998 if (DoExtraAnalysis)
1999 Result = false;
2000 else
2001 return false;
2002 }
2003
2004 // Bail out for ReadWrite loops with uncountable exits for now.
2005 if (UncountableExitType == UncountableExitTrait::ReadWrite) {
2007 "Writes to memory unsupported in early exit loops",
2008 "Cannot vectorize early exit loop with writes to memory",
2009 "WritesInEarlyExitLoop", ORE, TheLoop);
2010 return false;
2011 }
2012
2013 if (Result) {
2014 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
2015 << (LAI->getRuntimePointerChecking()->Need
2016 ? " (with a runtime bound check)"
2017 : "")
2018 << "!\n");
2019 }
2020
2021 unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
2022 if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
2023 SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
2024
2025 if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
2026 LLVM_DEBUG(dbgs() << "LV: Vectorization not profitable "
2027 "due to SCEVThreshold");
2028 reportVectorizationFailure("Too many SCEV checks needed",
2029 "Too many SCEV assumptions need to be made and checked at runtime",
2030 "TooManySCEVRunTimeChecks", ORE, TheLoop);
2031 if (DoExtraAnalysis)
2032 Result = false;
2033 else
2034 return false;
2035 }
2036
2037 // Okay! We've done all the tests. If any have failed, return false. Otherwise
2038 // we can vectorize, and at this point we don't have any other mem analysis
2039 // which may limit our maximum vectorization factor, so just return true with
2040 // no restrictions.
2041 return Result;
2042}
2043
2045 // The only loops we can vectorize without a scalar epilogue, are loops with
2046 // a bottom-test and a single exiting block. We'd have to handle the fact
2047 // that not every instruction executes on the last iteration. This will
2048 // require a lane mask which varies through the vector loop body. (TODO)
2049 if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
2050 LLVM_DEBUG(
2051 dbgs()
2052 << "LV: Cannot fold tail by masking. Requires a singe latch exit\n");
2053 return false;
2054 }
2055
2056 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
2057
2058 // The list of pointers that we can safely read and write to remains empty.
2059 SmallPtrSet<Value *, 8> SafePointers;
2060
2061 // Check all blocks for predication, including those that ordinarily do not
2062 // need predication such as the header block.
2064 for (BasicBlock *BB : TheLoop->blocks()) {
2065 if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
2066 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking.\n");
2067 return false;
2068 }
2069 }
2070
2071 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
2072
2073 return true;
2074}
2075
2077 // The list of pointers that we can safely read and write to remains empty.
2078 SmallPtrSet<Value *, 8> SafePointers;
2079
2080 // Mark all blocks for predication, including those that ordinarily do not
2081 // need predication such as the header block, and collect instructions needing
2082 // predication in TailFoldedMaskedOp.
2083 for (BasicBlock *BB : TheLoop->blocks()) {
2084 [[maybe_unused]] bool R =
2085 blockCanBePredicated(BB, SafePointers, TailFoldedMaskedOp);
2086 assert(R && "Must be able to predicate block when tail-folding.");
2087 }
2088}
2089
2090} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define DEBUG_TYPE
Hexagon Common GEP
#define LV_NAME
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
static cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
static cl::opt< bool > AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables."))
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_AlwaysScalable, "always", "Scalable vectorization is available and always favored when " "feasible")))
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static cl::opt< bool > EnableHistogramVectorization("enable-histogram-loop-vectorization", cl::init(false), cl::Hidden, cl::desc("Enables autovectorization of some loops containing histograms"))
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
This file defines the LoopVectorizationLegality class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
#define T
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
static bool isSimple(Instruction *I)
static void visit(BasicBlock &Start, std::function< bool(BasicBlock *)> op)
#define LLVM_DEBUG(...)
Definition Debug.h:119
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
static const uint32_t IV[8]
Definition blake3_impl.h:83
@ NoAlias
The two locations do not alias at all.
iterator begin() const
Definition ArrayRef.h:129
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:873
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
A struct for saving information about induction variables.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
Class to represent integer types.
An instruction for reading from memory.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
static LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB, const Loop *TheLoop, const DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
iterator_range< block_iterator > blocks() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool isLoopHeader(const BlockT *BB) const
bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
bool blockNeedsPredication(const BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
bool hasUncountableExitWithSideEffects() const
Returns true if this is an early exit loop with state-changing or potentially-faulting operations and...
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
bool isUniform(Value *V, ElementCount VF) const
Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
bool isInvariant(Value *V) const
Returns true if V is invariant across all loop iterations according to SCEV.
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
bool canFoldTailByMasking() const
Return true if we can vectorize this loop while folding its tail by masking.
void prepareToFoldTailByMasking()
Mark all respective loads/stores for masking.
bool hasUncountableEarlyExit() const
Returns true if the loop has uncountable early exits, i.e.
bool isUniformMemOp(Instruction &I, ElementCount VF) const
A uniform memory op is a load or store which accesses the same memory location on all VF lanes,...
bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
@ SK_AlwaysScalable
Always vectorize loops using scalable vectors if feasible (i.e.
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
void emitRemarkWithHints() const
Dumps all the hint information.
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition LoopInfo.cpp:67
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition LoopInfo.cpp:174
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition LoopInfo.cpp:523
Metadata node.
Definition Metadata.h:1080
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
ArrayRef< MDOperand > operands() const
Definition Metadata.h:1442
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
Tracking metadata reference owned by Metadata.
Definition Metadata.h:902
A single uniqued string.
Definition Metadata.h:722
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:632
iterator find(const KeyT &Key)
Definition MapVector.h:156
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
Root of the metadata hierarchy.
Definition Metadata.h:64
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static LLVM_ABI bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Instruction * getLoopExitInstr() const
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
SCEVUse getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This visitor recursively visits a SCEV expression and re-writes it.
const SCEV * visit(const SCEV *S)
This class represents an analyzed expression in the program.
static constexpr auto FlagAnyWrap
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getCouldNotCompute()
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getPointerOperand()
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Provides information about what library functions are available for the current target.
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:272
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
Value * getOperand(unsigned i) const
Definition User.h:207
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition VectorUtils.h:87
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition VectorUtils.h:76
LLVM Value Representation.
Definition Value.h:75
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr bool isZero() const
Definition TypeSize.h:153
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:696
Add a small namespace to avoid name clashes with the classes used in the streaming interface.
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition Loads.cpp:432
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
static IntegerType * getWiderInductionTy(const DataLayout &DL, Type *Ty0, Type *Ty1)
static IntegerType * getInductionIntegerTy(const DataLayout &DL, Type *Ty)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
bool canVectorizeTy(Type *Ty)
Returns true if Ty is a valid vector element type, void, or an unpacked literal struct where all elem...
TargetTransformInfo TTI
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isReadOnlyLoop(Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, SmallVectorImpl< LoadInst * > &NonDereferenceableAndAlignedLoads, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns true if the loop contains read-only memory accesses and doesn't throw.
Definition Loads.cpp:892
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop, const PredicatedScalarEvolution &PSE, SmallVectorImpl< HistogramInfo > &Histograms)
Find histogram operations that match high-level code in loops:
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition Loads.cpp:290
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
SCEVUseT< const SCEV * > SCEVUse
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
Dependece between memory access instructions.
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
static LLVM_ABI VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static LLVM_ABI const unsigned MaxVectorWidth
Maximum SIMD width.
static LLVM_ABI bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static LLVM_ABI unsigned VectorizationInterleave
Interleave factor as overridden by the user.