LLVM 17.0.0git
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1//===- LoopVectorizationLegality.cpp --------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides loop vectorization legality analysis. Original code
10// resided in LoopVectorize.cpp for a long time.
11//
12// At this point, it is implemented as a utility class, not as an analysis
13// pass. It should be easy to create an analysis pass around it if there
14// is a need (but D45420 needs to happen first).
15//
16
18#include "llvm/Analysis/Loads.h"
29
30using namespace llvm;
31using namespace PatternMatch;
32
33#define LV_NAME "loop-vectorize"
34#define DEBUG_TYPE LV_NAME
35
36static cl::opt<bool>
37 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
38 cl::desc("Enable if-conversion during vectorization."));
39
40namespace llvm {
42 HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
43 cl::desc("Allow enabling loop hints to reorder "
44 "FP operations during vectorization."));
45}
46
47// TODO: Move size-based thresholds out of legality checking, make cost based
48// decisions instead of hard thresholds.
50 "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
51 cl::desc("The maximum number of SCEV checks allowed."));
52
54 "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
55 cl::desc("The maximum number of SCEV checks allowed with a "
56 "vectorize(enable) pragma"));
57
60 "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
62 cl::desc("Control whether the compiler can use scalable vectors to "
63 "vectorize a loop"),
66 "Scalable vectorization is disabled."),
69 "Scalable vectorization is available and favored when the "
70 "cost is inconclusive."),
73 "Scalable vectorization is available and favored when the "
74 "cost is inconclusive.")));
75
76/// Maximum vectorization interleave count.
77static const unsigned MaxInterleaveFactor = 16;
78
79namespace llvm {
80
81bool LoopVectorizeHints::Hint::validate(unsigned Val) {
82 switch (Kind) {
83 case HK_WIDTH:
85 case HK_INTERLEAVE:
86 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
87 case HK_FORCE:
88 return (Val <= 1);
89 case HK_ISVECTORIZED:
90 case HK_PREDICATE:
91 case HK_SCALABLE:
92 return (Val == 0 || Val == 1);
93 }
94 return false;
95}
96
98 bool InterleaveOnlyWhenForced,
101 : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
102 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
103 Force("vectorize.enable", FK_Undefined, HK_FORCE),
104 IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
105 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
106 Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
107 TheLoop(L), ORE(ORE) {
108 // Populate values with existing loop metadata.
109 getHintsFromMetadata();
110
111 // force-vector-interleave overrides DisableInterleaving.
114
115 // If the metadata doesn't explicitly specify whether to enable scalable
116 // vectorization, then decide based on the following criteria (increasing
117 // level of priority):
118 // - Target default
119 // - Metadata width
120 // - Force option (always overrides)
122 if (TTI)
125
126 if (Width.Value)
127 // If the width is set, but the metadata says nothing about the scalable
128 // property, then assume it concerns only a fixed-width UserVF.
129 // If width is not set, the flag takes precedence.
130 Scalable.Value = SK_FixedWidthOnly;
131 }
132
133 // If the flag is set to force any use of scalable vectors, override the loop
134 // hints.
135 if (ForceScalableVectorization.getValue() !=
137 Scalable.Value = ForceScalableVectorization.getValue();
138
139 // Scalable vectorization is disabled if no preference is specified.
141 Scalable.Value = SK_FixedWidthOnly;
142
143 if (IsVectorized.Value != 1)
144 // If the vectorization width and interleaving count are both 1 then
145 // consider the loop to have been already vectorized because there's
146 // nothing more that we can do.
147 IsVectorized.Value =
149 LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
150 << "LV: Interleaving disabled by the pass manager\n");
151}
152
154 LLVMContext &Context = TheLoop->getHeader()->getContext();
155
156 MDNode *IsVectorizedMD = MDNode::get(
157 Context,
158 {MDString::get(Context, "llvm.loop.isvectorized"),
160 MDNode *LoopID = TheLoop->getLoopID();
161 MDNode *NewLoopID =
163 {Twine(Prefix(), "vectorize.").str(),
164 Twine(Prefix(), "interleave.").str()},
165 {IsVectorizedMD});
166 TheLoop->setLoopID(NewLoopID);
167
168 // Update internal cache.
169 IsVectorized.Value = 1;
170}
171
173 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
175 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
177 return false;
178 }
179
180 if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
181 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
183 return false;
184 }
185
186 if (getIsVectorized() == 1) {
187 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
188 // FIXME: Add interleave.disable metadata. This will allow
189 // vectorize.disable to be used without disabling the pass and errors
190 // to differentiate between disabled vectorization and a width of 1.
191 ORE.emit([&]() {
193 "AllDisabled", L->getStartLoc(),
194 L->getHeader())
195 << "loop not vectorized: vectorization and interleaving are "
196 "explicitly disabled, or the loop has already been "
197 "vectorized";
198 });
199 return false;
200 }
201
202 return true;
203}
204
206 using namespace ore;
207
208 ORE.emit([&]() {
209 if (Force.Value == LoopVectorizeHints::FK_Disabled)
210 return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
211 TheLoop->getStartLoc(),
212 TheLoop->getHeader())
213 << "loop not vectorized: vectorization is explicitly disabled";
214 else {
215 OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
216 TheLoop->getStartLoc(), TheLoop->getHeader());
217 R << "loop not vectorized";
218 if (Force.Value == LoopVectorizeHints::FK_Enabled) {
219 R << " (Force=" << NV("Force", true);
220 if (Width.Value != 0)
221 R << ", Vector Width=" << NV("VectorWidth", getWidth());
222 if (getInterleave() != 0)
223 R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
224 R << ")";
225 }
226 return R;
227 }
228 });
229}
230
233 return LV_NAME;
235 return LV_NAME;
237 return LV_NAME;
239}
240
242 // Allow the vectorizer to change the order of operations if enabling
243 // loop hints are provided
244 ElementCount EC = getWidth();
245 return HintsAllowReordering &&
247 EC.getKnownMinValue() > 1);
248}
249
250void LoopVectorizeHints::getHintsFromMetadata() {
251 MDNode *LoopID = TheLoop->getLoopID();
252 if (!LoopID)
253 return;
254
255 // First operand should refer to the loop id itself.
256 assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
257 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
258
259 for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
260 const MDString *S = nullptr;
262
263 // The expected hint is either a MDString or a MDNode with the first
264 // operand a MDString.
265 if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
266 if (!MD || MD->getNumOperands() == 0)
267 continue;
268 S = dyn_cast<MDString>(MD->getOperand(0));
269 for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
270 Args.push_back(MD->getOperand(i));
271 } else {
272 S = dyn_cast<MDString>(LoopID->getOperand(i));
273 assert(Args.size() == 0 && "too many arguments for MDString");
274 }
275
276 if (!S)
277 continue;
278
279 // Check if the hint starts with the loop metadata prefix.
280 StringRef Name = S->getString();
281 if (Args.size() == 1)
282 setHint(Name, Args[0]);
283 }
284}
285
286void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
287 if (!Name.startswith(Prefix()))
288 return;
289 Name = Name.substr(Prefix().size(), StringRef::npos);
290
291 const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
292 if (!C)
293 return;
294 unsigned Val = C->getZExtValue();
295
296 Hint *Hints[] = {&Width, &Interleave, &Force,
297 &IsVectorized, &Predicate, &Scalable};
298 for (auto *H : Hints) {
299 if (Name == H->Name) {
300 if (H->validate(Val))
301 H->Value = Val;
302 else
303 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
304 break;
305 }
306 }
307}
308
309// Return true if the inner loop \p Lp is uniform with regard to the outer loop
310// \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
311// executing the inner loop will execute the same iterations). This check is
312// very constrained for now but it will be relaxed in the future. \p Lp is
313// considered uniform if it meets all the following conditions:
314// 1) it has a canonical IV (starting from 0 and with stride 1),
315// 2) its latch terminator is a conditional branch and,
316// 3) its latch condition is a compare instruction whose operands are the
317// canonical IV and an OuterLp invariant.
318// This check doesn't take into account the uniformity of other conditions not
319// related to the loop latch because they don't affect the loop uniformity.
320//
321// NOTE: We decided to keep all these checks and its associated documentation
322// together so that we can easily have a picture of the current supported loop
323// nests. However, some of the current checks don't depend on \p OuterLp and
324// would be redundantly executed for each \p Lp if we invoked this function for
325// different candidate outer loops. This is not the case for now because we
326// don't currently have the infrastructure to evaluate multiple candidate outer
327// loops and \p OuterLp will be a fixed parameter while we only support explicit
328// outer loop vectorization. It's also very likely that these checks go away
329// before introducing the aforementioned infrastructure. However, if this is not
330// the case, we should move the \p OuterLp independent checks to a separate
331// function that is only executed once for each \p Lp.
332static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
333 assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
334
335 // If Lp is the outer loop, it's uniform by definition.
336 if (Lp == OuterLp)
337 return true;
338 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
339
340 // 1.
342 if (!IV) {
343 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
344 return false;
345 }
346
347 // 2.
348 BasicBlock *Latch = Lp->getLoopLatch();
349 auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
350 if (!LatchBr || LatchBr->isUnconditional()) {
351 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
352 return false;
353 }
354
355 // 3.
356 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
357 if (!LatchCmp) {
359 dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
360 return false;
361 }
362
363 Value *CondOp0 = LatchCmp->getOperand(0);
364 Value *CondOp1 = LatchCmp->getOperand(1);
365 Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
366 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
367 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
368 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
369 return false;
370 }
371
372 return true;
373}
374
375// Return true if \p Lp and all its nested loops are uniform with regard to \p
376// OuterLp.
377static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
378 if (!isUniformLoop(Lp, OuterLp))
379 return false;
380
381 // Check if nested loops are uniform.
382 for (Loop *SubLp : *Lp)
383 if (!isUniformLoopNest(SubLp, OuterLp))
384 return false;
385
386 return true;
387}
388
390 if (Ty->isPointerTy())
391 return DL.getIntPtrType(Ty);
392
393 // It is possible that char's or short's overflow when we ask for the loop's
394 // trip count, work around this by changing the type size.
395 if (Ty->getScalarSizeInBits() < 32)
396 return Type::getInt32Ty(Ty->getContext());
397
398 return Ty;
399}
400
401static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
404 if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
405 return Ty0;
406 return Ty1;
407}
408
409/// Check that the instruction has outside loop users and is not an
410/// identified reduction variable.
411static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
412 SmallPtrSetImpl<Value *> &AllowedExit) {
413 // Reductions, Inductions and non-header phis are allowed to have exit users. All
414 // other instructions must not have external users.
415 if (!AllowedExit.count(Inst))
416 // Check that all of the users of the loop are inside the BB.
417 for (User *U : Inst->users()) {
418 Instruction *UI = cast<Instruction>(U);
419 // This user may be a reduction exit value.
420 if (!TheLoop->contains(UI)) {
421 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
422 return true;
423 }
424 }
425 return false;
426}
427
428/// Returns true if A and B have same pointer operands or same SCEVs addresses
430 StoreInst *B) {
431 // Compare store
432 if (A == B)
433 return true;
434
435 // Otherwise Compare pointers
436 Value *APtr = A->getPointerOperand();
437 Value *BPtr = B->getPointerOperand();
438 if (APtr == BPtr)
439 return true;
440
441 // Otherwise compare address SCEVs
442 if (SE->getSCEV(APtr) == SE->getSCEV(BPtr))
443 return true;
444
445 return false;
446}
447
449 Value *Ptr) const {
450 const ValueToValueMap &Strides =
451 getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
452
453 Function *F = TheLoop->getHeader()->getParent();
454 bool OptForSize = F->hasOptSize() ||
455 llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
457 bool CanAddPredicate = !OptForSize;
458 int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
459 CanAddPredicate, false).value_or(0);
460 if (Stride == 1 || Stride == -1)
461 return Stride;
462 return 0;
463}
464
466 return LAI->isUniform(V);
467}
468
471 if (!Ptr)
472 return false;
473 // Note: There's nothing inherent which prevents predicated loads and
474 // stores from being uniform. The current lowering simply doesn't handle
475 // it; in particular, the cost model distinguishes scatter/gather from
476 // scalar w/predication, and we currently rely on the scalar path.
477 return isUniform(Ptr) && !blockNeedsPredication(I.getParent());
478}
479
480bool LoopVectorizationLegality::canVectorizeOuterLoop() {
481 assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
482 // Store the result and return it at the end instead of exiting early, in case
483 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
484 bool Result = true;
485 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
486
487 for (BasicBlock *BB : TheLoop->blocks()) {
488 // Check whether the BB terminator is a BranchInst. Any other terminator is
489 // not supported yet.
490 auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
491 if (!Br) {
492 reportVectorizationFailure("Unsupported basic block terminator",
493 "loop control flow is not understood by vectorizer",
494 "CFGNotUnderstood", ORE, TheLoop);
495 if (DoExtraAnalysis)
496 Result = false;
497 else
498 return false;
499 }
500
501 // Check whether the BranchInst is a supported one. Only unconditional
502 // branches, conditional branches with an outer loop invariant condition or
503 // backedges are supported.
504 // FIXME: We skip these checks when VPlan predication is enabled as we
505 // want to allow divergent branches. This whole check will be removed
506 // once VPlan predication is on by default.
507 if (Br && Br->isConditional() &&
508 !TheLoop->isLoopInvariant(Br->getCondition()) &&
509 !LI->isLoopHeader(Br->getSuccessor(0)) &&
510 !LI->isLoopHeader(Br->getSuccessor(1))) {
511 reportVectorizationFailure("Unsupported conditional branch",
512 "loop control flow is not understood by vectorizer",
513 "CFGNotUnderstood", ORE, TheLoop);
514 if (DoExtraAnalysis)
515 Result = false;
516 else
517 return false;
518 }
519 }
520
521 // Check whether inner loops are uniform. At this point, we only support
522 // simple outer loops scenarios with uniform nested loops.
523 if (!isUniformLoopNest(TheLoop /*loop nest*/,
524 TheLoop /*context outer loop*/)) {
525 reportVectorizationFailure("Outer loop contains divergent loops",
526 "loop control flow is not understood by vectorizer",
527 "CFGNotUnderstood", ORE, TheLoop);
528 if (DoExtraAnalysis)
529 Result = false;
530 else
531 return false;
532 }
533
534 // Check whether we are able to set up outer loop induction.
535 if (!setupOuterLoopInductions()) {
536 reportVectorizationFailure("Unsupported outer loop Phi(s)",
537 "Unsupported outer loop Phi(s)",
538 "UnsupportedPhi", ORE, TheLoop);
539 if (DoExtraAnalysis)
540 Result = false;
541 else
542 return false;
543 }
544
545 return Result;
546}
547
548void LoopVectorizationLegality::addInductionPhi(
549 PHINode *Phi, const InductionDescriptor &ID,
550 SmallPtrSetImpl<Value *> &AllowedExit) {
551 Inductions[Phi] = ID;
552
553 // In case this induction also comes with casts that we know we can ignore
554 // in the vectorized loop body, record them here. All casts could be recorded
555 // here for ignoring, but suffices to record only the first (as it is the
556 // only one that may bw used outside the cast sequence).
557 const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
558 if (!Casts.empty())
559 InductionCastsToIgnore.insert(*Casts.begin());
560
561 Type *PhiTy = Phi->getType();
562 const DataLayout &DL = Phi->getModule()->getDataLayout();
563
564 // Get the widest type.
565 if (!PhiTy->isFloatingPointTy()) {
566 if (!WidestIndTy)
567 WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
568 else
569 WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
570 }
571
572 // Int inductions are special because we only allow one IV.
573 if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
574 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
575 isa<Constant>(ID.getStartValue()) &&
576 cast<Constant>(ID.getStartValue())->isNullValue()) {
577
578 // Use the phi node with the widest type as induction. Use the last
579 // one if there are multiple (no good reason for doing this other
580 // than it is expedient). We've checked that it begins at zero and
581 // steps by one, so this is a canonical induction variable.
582 if (!PrimaryInduction || PhiTy == WidestIndTy)
583 PrimaryInduction = Phi;
584 }
585
586 // Both the PHI node itself, and the "post-increment" value feeding
587 // back into the PHI node may have external users.
588 // We can allow those uses, except if the SCEVs we have for them rely
589 // on predicates that only hold within the loop, since allowing the exit
590 // currently means re-using this SCEV outside the loop (see PR33706 for more
591 // details).
592 if (PSE.getPredicate().isAlwaysTrue()) {
593 AllowedExit.insert(Phi);
594 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
595 }
596
597 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
598}
599
600bool LoopVectorizationLegality::setupOuterLoopInductions() {
601 BasicBlock *Header = TheLoop->getHeader();
602
603 // Returns true if a given Phi is a supported induction.
604 auto isSupportedPhi = [&](PHINode &Phi) -> bool {
606 if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
608 addInductionPhi(&Phi, ID, AllowedExit);
609 return true;
610 } else {
611 // Bail out for any Phi in the outer loop header that is not a supported
612 // induction.
614 dbgs()
615 << "LV: Found unsupported PHI for outer loop vectorization.\n");
616 return false;
617 }
618 };
619
620 if (llvm::all_of(Header->phis(), isSupportedPhi))
621 return true;
622 else
623 return false;
624}
625
626/// Checks if a function is scalarizable according to the TLI, in
627/// the sense that it should be vectorized and then expanded in
628/// multiple scalar calls. This is represented in the
629/// TLI via mappings that do not specify a vector name, as in the
630/// following example:
631///
632/// const VecDesc VecIntrinsics[] = {
633/// {"llvm.phx.abs.i32", "", 4}
634/// };
635static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
636 const StringRef ScalarName = CI.getCalledFunction()->getName();
637 bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
638 // Check that all known VFs are not associated to a vector
639 // function, i.e. the vector name is emty.
640 if (Scalarize) {
641 ElementCount WidestFixedVF, WidestScalableVF;
642 TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
644 ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
645 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
647 ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
648 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
649 assert((WidestScalableVF.isZero() || !Scalarize) &&
650 "Caller may decide to scalarize a variant using a scalable VF");
651 }
652 return Scalarize;
653}
654
655bool LoopVectorizationLegality::canVectorizeInstrs() {
656 BasicBlock *Header = TheLoop->getHeader();
657
658 // For each block in the loop.
659 for (BasicBlock *BB : TheLoop->blocks()) {
660 // Scan the instructions in the block and look for hazards.
661 for (Instruction &I : *BB) {
662 if (auto *Phi = dyn_cast<PHINode>(&I)) {
663 Type *PhiTy = Phi->getType();
664 // Check that this PHI type is allowed.
665 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
666 !PhiTy->isPointerTy()) {
667 reportVectorizationFailure("Found a non-int non-pointer PHI",
668 "loop control flow is not understood by vectorizer",
669 "CFGNotUnderstood", ORE, TheLoop);
670 return false;
671 }
672
673 // If this PHINode is not in the header block, then we know that we
674 // can convert it to select during if-conversion. No need to check if
675 // the PHIs in this block are induction or reduction variables.
676 if (BB != Header) {
677 // Non-header phi nodes that have outside uses can be vectorized. Add
678 // them to the list of allowed exits.
679 // Unsafe cyclic dependencies with header phis are identified during
680 // legalization for reduction, induction and fixed order
681 // recurrences.
682 AllowedExit.insert(&I);
683 continue;
684 }
685
686 // We only allow if-converted PHIs with exactly two incoming values.
687 if (Phi->getNumIncomingValues() != 2) {
688 reportVectorizationFailure("Found an invalid PHI",
689 "loop control flow is not understood by vectorizer",
690 "CFGNotUnderstood", ORE, TheLoop, Phi);
691 return false;
692 }
693
695 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
696 DT, PSE.getSE())) {
697 Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
698 AllowedExit.insert(RedDes.getLoopExitInstr());
699 Reductions[Phi] = RedDes;
700 continue;
701 }
702
703 // TODO: Instead of recording the AllowedExit, it would be good to
704 // record the complementary set: NotAllowedExit. These include (but may
705 // not be limited to):
706 // 1. Reduction phis as they represent the one-before-last value, which
707 // is not available when vectorized
708 // 2. Induction phis and increment when SCEV predicates cannot be used
709 // outside the loop - see addInductionPhi
710 // 3. Non-Phis with outside uses when SCEV predicates cannot be used
711 // outside the loop - see call to hasOutsideLoopUser in the non-phi
712 // handling below
713 // 4. FixedOrderRecurrence phis that can possibly be handled by
714 // extraction.
715 // By recording these, we can then reason about ways to vectorize each
716 // of these NotAllowedExit.
718 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
719 addInductionPhi(Phi, ID, AllowedExit);
720 Requirements->addExactFPMathInst(ID.getExactFPMathInst());
721 continue;
722 }
723
725 SinkAfter, DT)) {
726 AllowedExit.insert(Phi);
727 FixedOrderRecurrences.insert(Phi);
728 continue;
729 }
730
731 // As a last resort, coerce the PHI to a AddRec expression
732 // and re-try classifying it a an induction PHI.
733 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
734 addInductionPhi(Phi, ID, AllowedExit);
735 continue;
736 }
737
738 reportVectorizationFailure("Found an unidentified PHI",
739 "value that could not be identified as "
740 "reduction is used outside the loop",
741 "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
742 return false;
743 } // end of PHI handling
744
745 // We handle calls that:
746 // * Are debug info intrinsics.
747 // * Have a mapping to an IR intrinsic.
748 // * Have a vector version available.
749 auto *CI = dyn_cast<CallInst>(&I);
750
751 if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
752 !isa<DbgInfoIntrinsic>(CI) &&
753 !(CI->getCalledFunction() && TLI &&
754 (!VFDatabase::getMappings(*CI).empty() ||
755 isTLIScalarize(*TLI, *CI)))) {
756 // If the call is a recognized math libary call, it is likely that
757 // we can vectorize it given loosened floating-point constraints.
759 bool IsMathLibCall =
760 TLI && CI->getCalledFunction() &&
761 CI->getType()->isFloatingPointTy() &&
762 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
763 TLI->hasOptimizedCodeGen(Func);
764
765 if (IsMathLibCall) {
766 // TODO: Ideally, we should not use clang-specific language here,
767 // but it's hard to provide meaningful yet generic advice.
768 // Also, should this be guarded by allowExtraAnalysis() and/or be part
769 // of the returned info from isFunctionVectorizable()?
771 "Found a non-intrinsic callsite",
772 "library call cannot be vectorized. "
773 "Try compiling with -fno-math-errno, -ffast-math, "
774 "or similar flags",
775 "CantVectorizeLibcall", ORE, TheLoop, CI);
776 } else {
777 reportVectorizationFailure("Found a non-intrinsic callsite",
778 "call instruction cannot be vectorized",
779 "CantVectorizeLibcall", ORE, TheLoop, CI);
780 }
781 return false;
782 }
783
784 // Some intrinsics have scalar arguments and should be same in order for
785 // them to be vectorized (i.e. loop invariant).
786 if (CI) {
787 auto *SE = PSE.getSE();
788 Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
789 for (unsigned i = 0, e = CI->arg_size(); i != e; ++i)
790 if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, i)) {
791 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
792 reportVectorizationFailure("Found unvectorizable intrinsic",
793 "intrinsic instruction cannot be vectorized",
794 "CantVectorizeIntrinsic", ORE, TheLoop, CI);
795 return false;
796 }
797 }
798 }
799
800 // Check that the instruction return type is vectorizable.
801 // Also, we can't vectorize extractelement instructions.
802 if ((!VectorType::isValidElementType(I.getType()) &&
803 !I.getType()->isVoidTy()) ||
804 isa<ExtractElementInst>(I)) {
805 reportVectorizationFailure("Found unvectorizable type",
806 "instruction return type cannot be vectorized",
807 "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
808 return false;
809 }
810
811 // Check that the stored type is vectorizable.
812 if (auto *ST = dyn_cast<StoreInst>(&I)) {
813 Type *T = ST->getValueOperand()->getType();
815 reportVectorizationFailure("Store instruction cannot be vectorized",
816 "store instruction cannot be vectorized",
817 "CantVectorizeStore", ORE, TheLoop, ST);
818 return false;
819 }
820
821 // For nontemporal stores, check that a nontemporal vector version is
822 // supported on the target.
823 if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
824 // Arbitrarily try a vector of 2 elements.
825 auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
826 assert(VecTy && "did not find vectorized version of stored type");
827 if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
829 "nontemporal store instruction cannot be vectorized",
830 "nontemporal store instruction cannot be vectorized",
831 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
832 return false;
833 }
834 }
835
836 } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
837 if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
838 // For nontemporal loads, check that a nontemporal vector version is
839 // supported on the target (arbitrarily try a vector of 2 elements).
840 auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
841 assert(VecTy && "did not find vectorized version of load type");
842 if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
844 "nontemporal load instruction cannot be vectorized",
845 "nontemporal load instruction cannot be vectorized",
846 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
847 return false;
848 }
849 }
850
851 // FP instructions can allow unsafe algebra, thus vectorizable by
852 // non-IEEE-754 compliant SIMD units.
853 // This applies to floating-point math operations and calls, not memory
854 // operations, shuffles, or casts, as they don't change precision or
855 // semantics.
856 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
857 !I.isFast()) {
858 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
859 Hints->setPotentiallyUnsafe();
860 }
861
862 // Reduction instructions are allowed to have exit users.
863 // All other instructions must not have external users.
864 if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
865 // We can safely vectorize loops where instructions within the loop are
866 // used outside the loop only if the SCEV predicates within the loop is
867 // same as outside the loop. Allowing the exit means reusing the SCEV
868 // outside the loop.
869 if (PSE.getPredicate().isAlwaysTrue()) {
870 AllowedExit.insert(&I);
871 continue;
872 }
873 reportVectorizationFailure("Value cannot be used outside the loop",
874 "value cannot be used outside the loop",
875 "ValueUsedOutsideLoop", ORE, TheLoop, &I);
876 return false;
877 }
878 } // next instr.
879 }
880
881 if (!PrimaryInduction) {
882 if (Inductions.empty()) {
883 reportVectorizationFailure("Did not find one integer induction var",
884 "loop induction variable could not be identified",
885 "NoInductionVariable", ORE, TheLoop);
886 return false;
887 } else if (!WidestIndTy) {
888 reportVectorizationFailure("Did not find one integer induction var",
889 "integer loop induction variable could not be identified",
890 "NoIntegerInductionVariable", ORE, TheLoop);
891 return false;
892 } else {
893 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
894 }
895 }
896
897 // For fixed order recurrences, we use the previous value (incoming value from
898 // the latch) to check if it dominates all users of the recurrence. Bail out
899 // if we have to sink such an instruction for another recurrence, as the
900 // dominance requirement may not hold after sinking.
901 BasicBlock *LoopLatch = TheLoop->getLoopLatch();
902 if (any_of(FixedOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
903 Instruction *V =
904 cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
905 return SinkAfter.contains(V);
906 }))
907 return false;
908
909 // Now we know the widest induction type, check if our found induction
910 // is the same size. If it's not, unset it here and InnerLoopVectorizer
911 // will create another.
912 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
913 PrimaryInduction = nullptr;
914
915 return true;
916}
917
918bool LoopVectorizationLegality::canVectorizeMemory() {
919 LAI = &LAIs.getInfo(*TheLoop);
920 const OptimizationRemarkAnalysis *LAR = LAI->getReport();
921 if (LAR) {
922 ORE->emit([&]() {
923 return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
924 "loop not vectorized: ", *LAR);
925 });
926 }
927
928 if (!LAI->canVectorizeMemory())
929 return false;
930
931 // We can vectorize stores to invariant address when final reduction value is
932 // guaranteed to be stored at the end of the loop. Also, if decision to
933 // vectorize loop is made, runtime checks are added so as to make sure that
934 // invariant address won't alias with any other objects.
935 if (!LAI->getStoresToInvariantAddresses().empty()) {
936 // For each invariant address, check if last stored value is unconditional
937 // and the address is not calculated inside the loop.
938 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
940 continue;
941
942 if (blockNeedsPredication(SI->getParent())) {
944 "We don't allow storing to uniform addresses",
945 "write of conditional recurring variant value to a loop "
946 "invariant address could not be vectorized",
947 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
948 return false;
949 }
950
951 // Invariant address should be defined outside of loop. LICM pass usually
952 // makes sure it happens, but in rare cases it does not, we do not want
953 // to overcomplicate vectorization to support this case.
954 if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
955 if (TheLoop->contains(Ptr)) {
957 "Invariant address is calculated inside the loop",
958 "write to a loop invariant address could not "
959 "be vectorized",
960 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
961 return false;
962 }
963 }
964 }
965
967 // For each invariant address, check its last stored value is the result
968 // of one of our reductions.
969 //
970 // We do not check if dependence with loads exists because they are
971 // currently rejected earlier in LoopAccessInfo::analyzeLoop. In case this
972 // behaviour changes we have to modify this code.
973 ScalarEvolution *SE = PSE.getSE();
974 SmallVector<StoreInst *, 4> UnhandledStores;
975 for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
977 // Earlier stores to this address are effectively deadcode.
978 // With opaque pointers it is possible for one pointer to be used with
979 // different sizes of stored values:
980 // store i32 0, ptr %x
981 // store i8 0, ptr %x
982 // The latest store doesn't complitely overwrite the first one in the
983 // example. That is why we have to make sure that types of stored
984 // values are same.
985 // TODO: Check that bitwidth of unhandled store is smaller then the
986 // one that overwrites it and add a test.
987 erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
988 return storeToSameAddress(SE, SI, I) &&
989 I->getValueOperand()->getType() ==
990 SI->getValueOperand()->getType();
991 });
992 continue;
993 }
994 UnhandledStores.push_back(SI);
995 }
996
997 bool IsOK = UnhandledStores.empty();
998 // TODO: we should also validate against InvariantMemSets.
999 if (!IsOK) {
1001 "We don't allow storing to uniform addresses",
1002 "write to a loop invariant address could not "
1003 "be vectorized",
1004 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1005 return false;
1006 }
1007 }
1008 }
1009
1010 PSE.addPredicate(LAI->getPSE().getPredicate());
1011 return true;
1012}
1013
1015 bool EnableStrictReductions) {
1016
1017 // First check if there is any ExactFP math or if we allow reassociations
1018 if (!Requirements->getExactFPInst() || Hints->allowReordering())
1019 return true;
1020
1021 // If the above is false, we have ExactFPMath & do not allow reordering.
1022 // If the EnableStrictReductions flag is set, first check if we have any
1023 // Exact FP induction vars, which we cannot vectorize.
1024 if (!EnableStrictReductions ||
1025 any_of(getInductionVars(), [&](auto &Induction) -> bool {
1026 InductionDescriptor IndDesc = Induction.second;
1027 return IndDesc.getExactFPMathInst();
1028 }))
1029 return false;
1030
1031 // We can now only vectorize if all reductions with Exact FP math also
1032 // have the isOrdered flag set, which indicates that we can move the
1033 // reduction operations in-loop.
1034 return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
1035 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1036 return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
1037 }));
1038}
1039
1041 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1042 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1043 return RdxDesc.IntermediateStore == SI;
1044 });
1045}
1046
1048 return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1049 const RecurrenceDescriptor &RdxDesc = Reduction.second;
1050 if (!RdxDesc.IntermediateStore)
1051 return false;
1052
1053 ScalarEvolution *SE = PSE.getSE();
1054 Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
1055 return V == InvariantAddress ||
1056 SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
1057 });
1058}
1059
1061 Value *In0 = const_cast<Value *>(V);
1062 PHINode *PN = dyn_cast_or_null<PHINode>(In0);
1063 if (!PN)
1064 return false;
1065
1066 return Inductions.count(PN);
1067}
1068
1069const InductionDescriptor *
1071 if (!isInductionPhi(Phi))
1072 return nullptr;
1073 auto &ID = getInductionVars().find(Phi)->second;
1074 if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
1076 return &ID;
1077 return nullptr;
1078}
1079
1080const InductionDescriptor *
1082 if (!isInductionPhi(Phi))
1083 return nullptr;
1084 auto &ID = getInductionVars().find(Phi)->second;
1086 return &ID;
1087 return nullptr;
1088}
1089
1091 const Value *V) const {
1092 auto *Inst = dyn_cast<Instruction>(V);
1093 return (Inst && InductionCastsToIgnore.count(Inst));
1094}
1095
1098}
1099
1101 const PHINode *Phi) const {
1102 return FixedOrderRecurrences.count(Phi);
1103}
1104
1106 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
1107}
1108
1109bool LoopVectorizationLegality::blockCanBePredicated(
1110 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
1112 SmallPtrSetImpl<Instruction *> &ConditionalAssumes) const {
1113 for (Instruction &I : *BB) {
1114 // We can predicate blocks with calls to assume, as long as we drop them in
1115 // case we flatten the CFG via predication.
1116 if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
1117 ConditionalAssumes.insert(&I);
1118 continue;
1119 }
1120
1121 // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1122 // TODO: there might be cases that it should block the vectorization. Let's
1123 // ignore those for now.
1124 if (isa<NoAliasScopeDeclInst>(&I))
1125 continue;
1126
1127 // We can allow masked calls if there's at least one vector variant, even
1128 // if we end up scalarizing due to the cost model calculations.
1129 // TODO: Allow other calls if they have appropriate attributes... readonly
1130 // and argmemonly?
1131 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1132 // Check whether we have at least one masked vector version of a scalar
1133 // function.
1135 [](VFInfo &Info) { return Info.isMasked(); })) {
1136 MaskedOp.insert(CI);
1137 continue;
1138 }
1139 }
1140
1141 // Loads are handled via masking (or speculated if safe to do so.)
1142 if (auto *LI = dyn_cast<LoadInst>(&I)) {
1143 if (!SafePtrs.count(LI->getPointerOperand()))
1144 MaskedOp.insert(LI);
1145 continue;
1146 }
1147
1148 // Predicated store requires some form of masking:
1149 // 1) masked store HW instruction,
1150 // 2) emulation via load-blend-store (only if safe and legal to do so,
1151 // be aware on the race conditions), or
1152 // 3) element-by-element predicate check and scalar store.
1153 if (auto *SI = dyn_cast<StoreInst>(&I)) {
1154 MaskedOp.insert(SI);
1155 continue;
1156 }
1157
1158 if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
1159 return false;
1160 }
1161
1162 return true;
1163}
1164
1165bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1166 if (!EnableIfConversion) {
1167 reportVectorizationFailure("If-conversion is disabled",
1168 "if-conversion is disabled",
1169 "IfConversionDisabled",
1170 ORE, TheLoop);
1171 return false;
1172 }
1173
1174 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1175
1176 // A list of pointers which are known to be dereferenceable within scope of
1177 // the loop body for each iteration of the loop which executes. That is,
1178 // the memory pointed to can be dereferenced (with the access size implied by
1179 // the value's type) unconditionally within the loop header without
1180 // introducing a new fault.
1181 SmallPtrSet<Value *, 8> SafePointers;
1182
1183 // Collect safe addresses.
1184 for (BasicBlock *BB : TheLoop->blocks()) {
1185 if (!blockNeedsPredication(BB)) {
1186 for (Instruction &I : *BB)
1187 if (auto *Ptr = getLoadStorePointerOperand(&I))
1188 SafePointers.insert(Ptr);
1189 continue;
1190 }
1191
1192 // For a block which requires predication, a address may be safe to access
1193 // in the loop w/o predication if we can prove dereferenceability facts
1194 // sufficient to ensure it'll never fault within the loop. For the moment,
1195 // we restrict this to loads; stores are more complicated due to
1196 // concurrency restrictions.
1197 ScalarEvolution &SE = *PSE.getSE();
1198 for (Instruction &I : *BB) {
1199 LoadInst *LI = dyn_cast<LoadInst>(&I);
1200 if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1201 isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC))
1202 SafePointers.insert(LI->getPointerOperand());
1203 }
1204 }
1205
1206 // Collect the blocks that need predication.
1207 for (BasicBlock *BB : TheLoop->blocks()) {
1208 // We don't support switch statements inside loops.
1209 if (!isa<BranchInst>(BB->getTerminator())) {
1210 reportVectorizationFailure("Loop contains a switch statement",
1211 "loop contains a switch statement",
1212 "LoopContainsSwitch", ORE, TheLoop,
1213 BB->getTerminator());
1214 return false;
1215 }
1216
1217 // We must be able to predicate all blocks that need to be predicated.
1218 if (blockNeedsPredication(BB)) {
1219 if (!blockCanBePredicated(BB, SafePointers, MaskedOp,
1220 ConditionalAssumes)) {
1222 "Control flow cannot be substituted for a select",
1223 "control flow cannot be substituted for a select",
1224 "NoCFGForSelect", ORE, TheLoop,
1225 BB->getTerminator());
1226 return false;
1227 }
1228 }
1229 }
1230
1231 // We can if-convert this loop.
1232 return true;
1233}
1234
1235// Helper function to canVectorizeLoopNestCFG.
1236bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1237 bool UseVPlanNativePath) {
1238 assert((UseVPlanNativePath || Lp->isInnermost()) &&
1239 "VPlan-native path is not enabled.");
1240
1241 // TODO: ORE should be improved to show more accurate information when an
1242 // outer loop can't be vectorized because a nested loop is not understood or
1243 // legal. Something like: "outer_loop_location: loop not vectorized:
1244 // (inner_loop_location) loop control flow is not understood by vectorizer".
1245
1246 // Store the result and return it at the end instead of exiting early, in case
1247 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1248 bool Result = true;
1249 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1250
1251 // We must have a loop in canonical form. Loops with indirectbr in them cannot
1252 // be canonicalized.
1253 if (!Lp->getLoopPreheader()) {
1254 reportVectorizationFailure("Loop doesn't have a legal pre-header",
1255 "loop control flow is not understood by vectorizer",
1256 "CFGNotUnderstood", ORE, TheLoop);
1257 if (DoExtraAnalysis)
1258 Result = false;
1259 else
1260 return false;
1261 }
1262
1263 // We must have a single backedge.
1264 if (Lp->getNumBackEdges() != 1) {
1265 reportVectorizationFailure("The loop must have a single backedge",
1266 "loop control flow is not understood by vectorizer",
1267 "CFGNotUnderstood", ORE, TheLoop);
1268 if (DoExtraAnalysis)
1269 Result = false;
1270 else
1271 return false;
1272 }
1273
1274 return Result;
1275}
1276
1277bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1278 Loop *Lp, bool UseVPlanNativePath) {
1279 // Store the result and return it at the end instead of exiting early, in case
1280 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1281 bool Result = true;
1282 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1283 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1284 if (DoExtraAnalysis)
1285 Result = false;
1286 else
1287 return false;
1288 }
1289
1290 // Recursively check whether the loop control flow of nested loops is
1291 // understood.
1292 for (Loop *SubLp : *Lp)
1293 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1294 if (DoExtraAnalysis)
1295 Result = false;
1296 else
1297 return false;
1298 }
1299
1300 return Result;
1301}
1302
1303bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1304 // Store the result and return it at the end instead of exiting early, in case
1305 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1306 bool Result = true;
1307
1308 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1309 // Check whether the loop-related control flow in the loop nest is expected by
1310 // vectorizer.
1311 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1312 if (DoExtraAnalysis)
1313 Result = false;
1314 else
1315 return false;
1316 }
1317
1318 // We need to have a loop header.
1319 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1320 << '\n');
1321
1322 // Specific checks for outer loops. We skip the remaining legal checks at this
1323 // point because they don't support outer loops.
1324 if (!TheLoop->isInnermost()) {
1325 assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1326
1327 if (!canVectorizeOuterLoop()) {
1328 reportVectorizationFailure("Unsupported outer loop",
1329 "unsupported outer loop",
1330 "UnsupportedOuterLoop",
1331 ORE, TheLoop);
1332 // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1333 // outer loops.
1334 return false;
1335 }
1336
1337 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1338 return Result;
1339 }
1340
1341 assert(TheLoop->isInnermost() && "Inner loop expected.");
1342 // Check if we can if-convert non-single-bb loops.
1343 unsigned NumBlocks = TheLoop->getNumBlocks();
1344 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1345 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1346 if (DoExtraAnalysis)
1347 Result = false;
1348 else
1349 return false;
1350 }
1351
1352 // Check if we can vectorize the instructions and CFG in this loop.
1353 if (!canVectorizeInstrs()) {
1354 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1355 if (DoExtraAnalysis)
1356 Result = false;
1357 else
1358 return false;
1359 }
1360
1361 // Go over each instruction and look at memory deps.
1362 if (!canVectorizeMemory()) {
1363 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1364 if (DoExtraAnalysis)
1365 Result = false;
1366 else
1367 return false;
1368 }
1369
1370 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1372 ? " (with a runtime bound check)"
1373 : "")
1374 << "!\n");
1375
1376 unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1377 if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1378 SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1379
1380 if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
1381 reportVectorizationFailure("Too many SCEV checks needed",
1382 "Too many SCEV assumptions need to be made and checked at runtime",
1383 "TooManySCEVRunTimeChecks", ORE, TheLoop);
1384 if (DoExtraAnalysis)
1385 Result = false;
1386 else
1387 return false;
1388 }
1389
1390 // Okay! We've done all the tests. If any have failed, return false. Otherwise
1391 // we can vectorize, and at this point we don't have any other mem analysis
1392 // which may limit our maximum vectorization factor, so just return true with
1393 // no restrictions.
1394 return Result;
1395}
1396
1398
1399 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1400
1401 SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1402
1403 for (const auto &Reduction : getReductionVars())
1404 ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1405
1406 // TODO: handle non-reduction outside users when tail is folded by masking.
1407 for (auto *AE : AllowedExit) {
1408 // Check that all users of allowed exit values are inside the loop or
1409 // are the live-out of a reduction.
1410 if (ReductionLiveOuts.count(AE))
1411 continue;
1412 for (User *U : AE->users()) {
1413 Instruction *UI = cast<Instruction>(U);
1414 if (TheLoop->contains(UI))
1415 continue;
1416 LLVM_DEBUG(
1417 dbgs()
1418 << "LV: Cannot fold tail by masking, loop has an outside user for "
1419 << *UI << "\n");
1420 return false;
1421 }
1422 }
1423
1424 // The list of pointers that we can safely read and write to remains empty.
1425 SmallPtrSet<Value *, 8> SafePointers;
1426
1428 SmallPtrSet<Instruction *, 8> TmpConditionalAssumes;
1429
1430 // Check and mark all blocks for predication, including those that ordinarily
1431 // do not need predication such as the header block.
1432 for (BasicBlock *BB : TheLoop->blocks()) {
1433 if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp,
1434 TmpConditionalAssumes)) {
1435 LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as requested.\n");
1436 return false;
1437 }
1438 }
1439
1440 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1441
1442 MaskedOp.insert(TmpMaskedOp.begin(), TmpMaskedOp.end());
1443 ConditionalAssumes.insert(TmpConditionalAssumes.begin(),
1444 TmpConditionalAssumes.end());
1445
1446 return true;
1447}
1448
1449} // namespace llvm
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:678
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
#define DEBUG_TYPE
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:524
loop Loop Strength Reduction
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive.")))
#define LV_NAME
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
This file defines the LoopVectorizationLegality class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
LLVMContext & Context
return ToRemove size() > 0
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This pass exposes codegen information to IR-level passes.
static const uint32_t IV[8]
Definition: blake3_impl.h:77
Class for arbitrary precision integers.
Definition: APInt.h:75
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1408
This class represents a function call, abstracting a target machine's calling convention.
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:419
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:294
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:291
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:704
A struct for saving information about induction variables.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C / sizeof(elem).
@ IK_IntInduction
Integer induction variable. Step = C.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:177
Value * getPointerOperand()
Definition: Instructions.h:264
const LoopAccessInfo & getInfo(Loop &L)
bool hasDependenceInvolvingLoopInvariantAddress() const
If the loop has memory dependence involving an invariant address, i.e.
ArrayRef< StoreInst * > getStoresToInvariantAddresses() const
Return the list of stores to invariant addresses.
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
const OptimizationRemarkAnalysis * getReport() const
The diagnostics report generated for the analysis.
const RuntimePointerChecking * getRuntimePointerChecking() const
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:139
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:232
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:182
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:202
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:267
BlockT * getHeader() const
Definition: LoopInfo.h:105
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:195
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:183
bool isLoopHeader(const BlockT *BB) const
Definition: LoopInfo.h:1005
bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
bool blockNeedsPredication(BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
bool prepareToFoldTailByMasking()
Return true if we can vectorize this loop while folding its tail by masking, and mark all respective ...
bool isUniformMemOp(Instruction &I) const
A uniform memory op is a load or store which accesses the same memory location on all lanes.
bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
void addExactFPMathInst(Instruction *I)
Track the 1st floating-point instruction that can not be reassociated.
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
void emitRemarkWithHints() const
Dumps all the hint information.
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:631
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:525
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition: LoopInfo.cpp:150
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:501
Metadata node.
Definition: Metadata.h:943
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1291
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1399
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1297
A single uniqued string.
Definition: Metadata.h:611
StringRef getString() const
Definition: Metadata.cpp:507
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:497
size_type count(const KeyT &Key) const
Definition: MapVector.h:145
iterator find(const KeyT &Key)
Definition: MapVector.h:147
bool empty() const
Definition: MapVector.h:80
Root of the metadata hierarchy.
Definition: Metadata.h:61
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
unsigned getNumIncomingValues() const
Return the number of incoming edges.
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
const SCEVPredicate & getPredicate() const
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:69
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Instruction * getLoopExitInstr() const
static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, MapVector< Instruction *, Instruction * > &SinkAfter, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
bool Need
This flag indicates if we need to add the runtime check.
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
iterator end() const
Definition: SmallPtrSet.h:408
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
iterator begin() const
Definition: SmallPtrSet.h:403
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
Value * getPointerOperand()
Definition: Instructions.h:393
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
static constexpr size_t npos
Definition: StringRef.h:52
Provides information about what library functions are available for the current target.
bool hasOptimizedCodeGen(LibFunc F) const
Tests if the function is both available and a candidate for optimized code generation.
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:267
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:258
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:231
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:265
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
iterator_range< user_iterator > users()
Definition: Value.h:421
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:695
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:212
constexpr bool isZero() const
Definition: TypeSize.h:151
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:703
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1782
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
DenseMap< const Value *, Value * > ValueToValueMap
cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
static Type * getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1)
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
static Type * convertPointerToIntegerType(const DataLayout &DL, Type *Ty)
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1789
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition: Loads.cpp:262
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1126
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2076
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
Holds the VFShape for a specific scalar to vector function mapping.
Definition: VectorUtils.h:123
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static const unsigned MaxVectorWidth
Maximum SIMD width.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.