LLVM  16.0.0git
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1 //===- LoopVectorizationLegality.cpp --------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides loop vectorization legality analysis. Original code
10 // resided in LoopVectorize.cpp for a long time.
11 //
12 // At this point, it is implemented as a utility class, not as an analysis
13 // pass. It should be easy to create an analysis pass around it if there
14 // is a need (but D45420 needs to happen first).
15 //
16 
18 #include "llvm/Analysis/Loads.h"
19 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/PatternMatch.h"
29 
30 using namespace llvm;
31 using namespace PatternMatch;
32 
33 #define LV_NAME "loop-vectorize"
34 #define DEBUG_TYPE LV_NAME
35 
36 static cl::opt<bool>
37  EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
38  cl::desc("Enable if-conversion during vectorization."));
39 
40 namespace llvm {
42  HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
43  cl::desc("Allow enabling loop hints to reorder "
44  "FP operations during vectorization."));
45 }
46 
47 // TODO: Move size-based thresholds out of legality checking, make cost based
48 // decisions instead of hard thresholds.
50  "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
51  cl::desc("The maximum number of SCEV checks allowed."));
52 
54  "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
55  cl::desc("The maximum number of SCEV checks allowed with a "
56  "vectorize(enable) pragma"));
57 
60  "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
61  cl::Hidden,
62  cl::desc("Control whether the compiler can use scalable vectors to "
63  "vectorize a loop"),
64  cl::values(
66  "Scalable vectorization is disabled."),
67  clEnumValN(
69  "Scalable vectorization is available and favored when the "
70  "cost is inconclusive."),
71  clEnumValN(
73  "Scalable vectorization is available and favored when the "
74  "cost is inconclusive.")));
75 
76 /// Maximum vectorization interleave count.
77 static const unsigned MaxInterleaveFactor = 16;
78 
79 namespace llvm {
80 
81 bool LoopVectorizeHints::Hint::validate(unsigned Val) {
82  switch (Kind) {
83  case HK_WIDTH:
84  return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
85  case HK_INTERLEAVE:
86  return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
87  case HK_FORCE:
88  return (Val <= 1);
89  case HK_ISVECTORIZED:
90  case HK_PREDICATE:
91  case HK_SCALABLE:
92  return (Val == 0 || Val == 1);
93  }
94  return false;
95 }
96 
98  bool InterleaveOnlyWhenForced,
100  const TargetTransformInfo *TTI)
101  : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
102  Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
103  Force("vectorize.enable", FK_Undefined, HK_FORCE),
104  IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
105  Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
106  Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
107  TheLoop(L), ORE(ORE) {
108  // Populate values with existing loop metadata.
109  getHintsFromMetadata();
110 
111  // force-vector-interleave overrides DisableInterleaving.
114 
115  // If the metadata doesn't explicitly specify whether to enable scalable
116  // vectorization, then decide based on the following criteria (increasing
117  // level of priority):
118  // - Target default
119  // - Metadata width
120  // - Force option (always overrides)
121  if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified) {
122  if (TTI)
125 
126  if (Width.Value)
127  // If the width is set, but the metadata says nothing about the scalable
128  // property, then assume it concerns only a fixed-width UserVF.
129  // If width is not set, the flag takes precedence.
130  Scalable.Value = SK_FixedWidthOnly;
131  }
132 
133  // If the flag is set to force any use of scalable vectors, override the loop
134  // hints.
135  if (ForceScalableVectorization.getValue() !=
137  Scalable.Value = ForceScalableVectorization.getValue();
138 
139  // Scalable vectorization is disabled if no preference is specified.
141  Scalable.Value = SK_FixedWidthOnly;
142 
143  if (IsVectorized.Value != 1)
144  // If the vectorization width and interleaving count are both 1 then
145  // consider the loop to have been already vectorized because there's
146  // nothing more that we can do.
147  IsVectorized.Value =
149  LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
150  << "LV: Interleaving disabled by the pass manager\n");
151 }
152 
154  LLVMContext &Context = TheLoop->getHeader()->getContext();
155 
156  MDNode *IsVectorizedMD = MDNode::get(
157  Context,
158  {MDString::get(Context, "llvm.loop.isvectorized"),
160  MDNode *LoopID = TheLoop->getLoopID();
161  MDNode *NewLoopID =
163  {Twine(Prefix(), "vectorize.").str(),
164  Twine(Prefix(), "interleave.").str()},
165  {IsVectorizedMD});
166  TheLoop->setLoopID(NewLoopID);
167 
168  // Update internal cache.
169  IsVectorized.Value = 1;
170 }
171 
173  Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
175  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
177  return false;
178  }
179 
180  if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
181  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
183  return false;
184  }
185 
186  if (getIsVectorized() == 1) {
187  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
188  // FIXME: Add interleave.disable metadata. This will allow
189  // vectorize.disable to be used without disabling the pass and errors
190  // to differentiate between disabled vectorization and a width of 1.
191  ORE.emit([&]() {
193  "AllDisabled", L->getStartLoc(),
194  L->getHeader())
195  << "loop not vectorized: vectorization and interleaving are "
196  "explicitly disabled, or the loop has already been "
197  "vectorized";
198  });
199  return false;
200  }
201 
202  return true;
203 }
204 
206  using namespace ore;
207 
208  ORE.emit([&]() {
209  if (Force.Value == LoopVectorizeHints::FK_Disabled)
210  return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
211  TheLoop->getStartLoc(),
212  TheLoop->getHeader())
213  << "loop not vectorized: vectorization is explicitly disabled";
214  else {
215  OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
216  TheLoop->getStartLoc(), TheLoop->getHeader());
217  R << "loop not vectorized";
218  if (Force.Value == LoopVectorizeHints::FK_Enabled) {
219  R << " (Force=" << NV("Force", true);
220  if (Width.Value != 0)
221  R << ", Vector Width=" << NV("VectorWidth", getWidth());
222  if (getInterleave() != 0)
223  R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
224  R << ")";
225  }
226  return R;
227  }
228  });
229 }
230 
232  if (getWidth() == ElementCount::getFixed(1))
233  return LV_NAME;
235  return LV_NAME;
237  return LV_NAME;
239 }
240 
242  // Allow the vectorizer to change the order of operations if enabling
243  // loop hints are provided
244  ElementCount EC = getWidth();
245  return HintsAllowReordering &&
247  EC.getKnownMinValue() > 1);
248 }
249 
250 void LoopVectorizeHints::getHintsFromMetadata() {
251  MDNode *LoopID = TheLoop->getLoopID();
252  if (!LoopID)
253  return;
254 
255  // First operand should refer to the loop id itself.
256  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
257  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
258 
259  for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
260  const MDString *S = nullptr;
262 
263  // The expected hint is either a MDString or a MDNode with the first
264  // operand a MDString.
265  if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
266  if (!MD || MD->getNumOperands() == 0)
267  continue;
268  S = dyn_cast<MDString>(MD->getOperand(0));
269  for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
270  Args.push_back(MD->getOperand(i));
271  } else {
272  S = dyn_cast<MDString>(LoopID->getOperand(i));
273  assert(Args.size() == 0 && "too many arguments for MDString");
274  }
275 
276  if (!S)
277  continue;
278 
279  // Check if the hint starts with the loop metadata prefix.
280  StringRef Name = S->getString();
281  if (Args.size() == 1)
282  setHint(Name, Args[0]);
283  }
284 }
285 
286 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
287  if (!Name.startswith(Prefix()))
288  return;
289  Name = Name.substr(Prefix().size(), StringRef::npos);
290 
291  const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
292  if (!C)
293  return;
294  unsigned Val = C->getZExtValue();
295 
296  Hint *Hints[] = {&Width, &Interleave, &Force,
297  &IsVectorized, &Predicate, &Scalable};
298  for (auto *H : Hints) {
299  if (Name == H->Name) {
300  if (H->validate(Val))
301  H->Value = Val;
302  else
303  LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
304  break;
305  }
306  }
307 }
308 
309 // Return true if the inner loop \p Lp is uniform with regard to the outer loop
310 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
311 // executing the inner loop will execute the same iterations). This check is
312 // very constrained for now but it will be relaxed in the future. \p Lp is
313 // considered uniform if it meets all the following conditions:
314 // 1) it has a canonical IV (starting from 0 and with stride 1),
315 // 2) its latch terminator is a conditional branch and,
316 // 3) its latch condition is a compare instruction whose operands are the
317 // canonical IV and an OuterLp invariant.
318 // This check doesn't take into account the uniformity of other conditions not
319 // related to the loop latch because they don't affect the loop uniformity.
320 //
321 // NOTE: We decided to keep all these checks and its associated documentation
322 // together so that we can easily have a picture of the current supported loop
323 // nests. However, some of the current checks don't depend on \p OuterLp and
324 // would be redundantly executed for each \p Lp if we invoked this function for
325 // different candidate outer loops. This is not the case for now because we
326 // don't currently have the infrastructure to evaluate multiple candidate outer
327 // loops and \p OuterLp will be a fixed parameter while we only support explicit
328 // outer loop vectorization. It's also very likely that these checks go away
329 // before introducing the aforementioned infrastructure. However, if this is not
330 // the case, we should move the \p OuterLp independent checks to a separate
331 // function that is only executed once for each \p Lp.
332 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
333  assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
334 
335  // If Lp is the outer loop, it's uniform by definition.
336  if (Lp == OuterLp)
337  return true;
338  assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
339 
340  // 1.
342  if (!IV) {
343  LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
344  return false;
345  }
346 
347  // 2.
348  BasicBlock *Latch = Lp->getLoopLatch();
349  auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
350  if (!LatchBr || LatchBr->isUnconditional()) {
351  LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
352  return false;
353  }
354 
355  // 3.
356  auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
357  if (!LatchCmp) {
358  LLVM_DEBUG(
359  dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
360  return false;
361  }
362 
363  Value *CondOp0 = LatchCmp->getOperand(0);
364  Value *CondOp1 = LatchCmp->getOperand(1);
365  Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
366  if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
367  !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
368  LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
369  return false;
370  }
371 
372  return true;
373 }
374 
375 // Return true if \p Lp and all its nested loops are uniform with regard to \p
376 // OuterLp.
377 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
378  if (!isUniformLoop(Lp, OuterLp))
379  return false;
380 
381  // Check if nested loops are uniform.
382  for (Loop *SubLp : *Lp)
383  if (!isUniformLoopNest(SubLp, OuterLp))
384  return false;
385 
386  return true;
387 }
388 
390  if (Ty->isPointerTy())
391  return DL.getIntPtrType(Ty);
392 
393  // It is possible that char's or short's overflow when we ask for the loop's
394  // trip count, work around this by changing the type size.
395  if (Ty->getScalarSizeInBits() < 32)
396  return Type::getInt32Ty(Ty->getContext());
397 
398  return Ty;
399 }
400 
401 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
402  Ty0 = convertPointerToIntegerType(DL, Ty0);
403  Ty1 = convertPointerToIntegerType(DL, Ty1);
404  if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
405  return Ty0;
406  return Ty1;
407 }
408 
409 /// Check that the instruction has outside loop users and is not an
410 /// identified reduction variable.
411 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
412  SmallPtrSetImpl<Value *> &AllowedExit) {
413  // Reductions, Inductions and non-header phis are allowed to have exit users. All
414  // other instructions must not have external users.
415  if (!AllowedExit.count(Inst))
416  // Check that all of the users of the loop are inside the BB.
417  for (User *U : Inst->users()) {
418  Instruction *UI = cast<Instruction>(U);
419  // This user may be a reduction exit value.
420  if (!TheLoop->contains(UI)) {
421  LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
422  return true;
423  }
424  }
425  return false;
426 }
427 
428 /// Returns true if A and B have same pointer operands or same SCEVs addresses
430  StoreInst *B) {
431  // Compare store
432  if (A == B)
433  return true;
434 
435  // Otherwise Compare pointers
436  Value *APtr = A->getPointerOperand();
437  Value *BPtr = B->getPointerOperand();
438  if (APtr == BPtr)
439  return true;
440 
441  // Otherwise compare address SCEVs
442  if (SE->getSCEV(APtr) == SE->getSCEV(BPtr))
443  return true;
444 
445  return false;
446 }
447 
449  Value *Ptr) const {
450  const ValueToValueMap &Strides =
451  getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
452 
453  Function *F = TheLoop->getHeader()->getParent();
454  bool OptForSize = F->hasOptSize() ||
455  llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
457  bool CanAddPredicate = !OptForSize;
458  int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
459  CanAddPredicate, false).value_or(0);
460  if (Stride == 1 || Stride == -1)
461  return Stride;
462  return 0;
463 }
464 
466  return LAI->isUniform(V);
467 }
468 
471  if (!Ptr)
472  return false;
473  // Note: There's nothing inherent which prevents predicated loads and
474  // stores from being uniform. The current lowering simply doesn't handle
475  // it; in particular, the cost model distinguishes scatter/gather from
476  // scalar w/predication, and we currently rely on the scalar path.
477  return isUniform(Ptr) && !blockNeedsPredication(I.getParent());
478 }
479 
480 bool LoopVectorizationLegality::canVectorizeOuterLoop() {
481  assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
482  // Store the result and return it at the end instead of exiting early, in case
483  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
484  bool Result = true;
485  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
486 
487  for (BasicBlock *BB : TheLoop->blocks()) {
488  // Check whether the BB terminator is a BranchInst. Any other terminator is
489  // not supported yet.
490  auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
491  if (!Br) {
492  reportVectorizationFailure("Unsupported basic block terminator",
493  "loop control flow is not understood by vectorizer",
494  "CFGNotUnderstood", ORE, TheLoop);
495  if (DoExtraAnalysis)
496  Result = false;
497  else
498  return false;
499  }
500 
501  // Check whether the BranchInst is a supported one. Only unconditional
502  // branches, conditional branches with an outer loop invariant condition or
503  // backedges are supported.
504  // FIXME: We skip these checks when VPlan predication is enabled as we
505  // want to allow divergent branches. This whole check will be removed
506  // once VPlan predication is on by default.
507  if (Br && Br->isConditional() &&
508  !TheLoop->isLoopInvariant(Br->getCondition()) &&
509  !LI->isLoopHeader(Br->getSuccessor(0)) &&
510  !LI->isLoopHeader(Br->getSuccessor(1))) {
511  reportVectorizationFailure("Unsupported conditional branch",
512  "loop control flow is not understood by vectorizer",
513  "CFGNotUnderstood", ORE, TheLoop);
514  if (DoExtraAnalysis)
515  Result = false;
516  else
517  return false;
518  }
519  }
520 
521  // Check whether inner loops are uniform. At this point, we only support
522  // simple outer loops scenarios with uniform nested loops.
523  if (!isUniformLoopNest(TheLoop /*loop nest*/,
524  TheLoop /*context outer loop*/)) {
525  reportVectorizationFailure("Outer loop contains divergent loops",
526  "loop control flow is not understood by vectorizer",
527  "CFGNotUnderstood", ORE, TheLoop);
528  if (DoExtraAnalysis)
529  Result = false;
530  else
531  return false;
532  }
533 
534  // Check whether we are able to set up outer loop induction.
535  if (!setupOuterLoopInductions()) {
536  reportVectorizationFailure("Unsupported outer loop Phi(s)",
537  "Unsupported outer loop Phi(s)",
538  "UnsupportedPhi", ORE, TheLoop);
539  if (DoExtraAnalysis)
540  Result = false;
541  else
542  return false;
543  }
544 
545  return Result;
546 }
547 
548 void LoopVectorizationLegality::addInductionPhi(
549  PHINode *Phi, const InductionDescriptor &ID,
550  SmallPtrSetImpl<Value *> &AllowedExit) {
551  Inductions[Phi] = ID;
552 
553  // In case this induction also comes with casts that we know we can ignore
554  // in the vectorized loop body, record them here. All casts could be recorded
555  // here for ignoring, but suffices to record only the first (as it is the
556  // only one that may bw used outside the cast sequence).
557  const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
558  if (!Casts.empty())
559  InductionCastsToIgnore.insert(*Casts.begin());
560 
561  Type *PhiTy = Phi->getType();
562  const DataLayout &DL = Phi->getModule()->getDataLayout();
563 
564  // Get the widest type.
565  if (!PhiTy->isFloatingPointTy()) {
566  if (!WidestIndTy)
567  WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
568  else
569  WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
570  }
571 
572  // Int inductions are special because we only allow one IV.
573  if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
574  ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
575  isa<Constant>(ID.getStartValue()) &&
576  cast<Constant>(ID.getStartValue())->isNullValue()) {
577 
578  // Use the phi node with the widest type as induction. Use the last
579  // one if there are multiple (no good reason for doing this other
580  // than it is expedient). We've checked that it begins at zero and
581  // steps by one, so this is a canonical induction variable.
582  if (!PrimaryInduction || PhiTy == WidestIndTy)
583  PrimaryInduction = Phi;
584  }
585 
586  // Both the PHI node itself, and the "post-increment" value feeding
587  // back into the PHI node may have external users.
588  // We can allow those uses, except if the SCEVs we have for them rely
589  // on predicates that only hold within the loop, since allowing the exit
590  // currently means re-using this SCEV outside the loop (see PR33706 for more
591  // details).
592  if (PSE.getPredicate().isAlwaysTrue()) {
593  AllowedExit.insert(Phi);
594  AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
595  }
596 
597  LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
598 }
599 
600 bool LoopVectorizationLegality::setupOuterLoopInductions() {
601  BasicBlock *Header = TheLoop->getHeader();
602 
603  // Returns true if a given Phi is a supported induction.
604  auto isSupportedPhi = [&](PHINode &Phi) -> bool {
606  if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
608  addInductionPhi(&Phi, ID, AllowedExit);
609  return true;
610  } else {
611  // Bail out for any Phi in the outer loop header that is not a supported
612  // induction.
613  LLVM_DEBUG(
614  dbgs()
615  << "LV: Found unsupported PHI for outer loop vectorization.\n");
616  return false;
617  }
618  };
619 
620  if (llvm::all_of(Header->phis(), isSupportedPhi))
621  return true;
622  else
623  return false;
624 }
625 
626 /// Checks if a function is scalarizable according to the TLI, in
627 /// the sense that it should be vectorized and then expanded in
628 /// multiple scalar calls. This is represented in the
629 /// TLI via mappings that do not specify a vector name, as in the
630 /// following example:
631 ///
632 /// const VecDesc VecIntrinsics[] = {
633 /// {"llvm.phx.abs.i32", "", 4}
634 /// };
635 static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
636  const StringRef ScalarName = CI.getCalledFunction()->getName();
637  bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
638  // Check that all known VFs are not associated to a vector
639  // function, i.e. the vector name is emty.
640  if (Scalarize) {
641  ElementCount WidestFixedVF, WidestScalableVF;
642  TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
644  ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
645  Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
647  ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
648  Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
649  assert((WidestScalableVF.isZero() || !Scalarize) &&
650  "Caller may decide to scalarize a variant using a scalable VF");
651  }
652  return Scalarize;
653 }
654 
655 bool LoopVectorizationLegality::canVectorizeInstrs() {
656  BasicBlock *Header = TheLoop->getHeader();
657 
658  // For each block in the loop.
659  for (BasicBlock *BB : TheLoop->blocks()) {
660  // Scan the instructions in the block and look for hazards.
661  for (Instruction &I : *BB) {
662  if (auto *Phi = dyn_cast<PHINode>(&I)) {
663  Type *PhiTy = Phi->getType();
664  // Check that this PHI type is allowed.
665  if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
666  !PhiTy->isPointerTy()) {
667  reportVectorizationFailure("Found a non-int non-pointer PHI",
668  "loop control flow is not understood by vectorizer",
669  "CFGNotUnderstood", ORE, TheLoop);
670  return false;
671  }
672 
673  // If this PHINode is not in the header block, then we know that we
674  // can convert it to select during if-conversion. No need to check if
675  // the PHIs in this block are induction or reduction variables.
676  if (BB != Header) {
677  // Non-header phi nodes that have outside uses can be vectorized. Add
678  // them to the list of allowed exits.
679  // Unsafe cyclic dependencies with header phis are identified during
680  // legalization for reduction, induction and fixed order
681  // recurrences.
682  AllowedExit.insert(&I);
683  continue;
684  }
685 
686  // We only allow if-converted PHIs with exactly two incoming values.
687  if (Phi->getNumIncomingValues() != 2) {
688  reportVectorizationFailure("Found an invalid PHI",
689  "loop control flow is not understood by vectorizer",
690  "CFGNotUnderstood", ORE, TheLoop, Phi);
691  return false;
692  }
693 
694  RecurrenceDescriptor RedDes;
695  if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
696  DT, PSE.getSE())) {
697  Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
698  AllowedExit.insert(RedDes.getLoopExitInstr());
699  Reductions[Phi] = RedDes;
700  continue;
701  }
702 
703  // TODO: Instead of recording the AllowedExit, it would be good to
704  // record the complementary set: NotAllowedExit. These include (but may
705  // not be limited to):
706  // 1. Reduction phis as they represent the one-before-last value, which
707  // is not available when vectorized
708  // 2. Induction phis and increment when SCEV predicates cannot be used
709  // outside the loop - see addInductionPhi
710  // 3. Non-Phis with outside uses when SCEV predicates cannot be used
711  // outside the loop - see call to hasOutsideLoopUser in the non-phi
712  // handling below
713  // 4. FixedOrderRecurrence phis that can possibly be handled by
714  // extraction.
715  // By recording these, we can then reason about ways to vectorize each
716  // of these NotAllowedExit.
718  if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
719  addInductionPhi(Phi, ID, AllowedExit);
720  Requirements->addExactFPMathInst(ID.getExactFPMathInst());
721  continue;
722  }
723 
725  SinkAfter, DT)) {
726  AllowedExit.insert(Phi);
727  FixedOrderRecurrences.insert(Phi);
728  continue;
729  }
730 
731  // As a last resort, coerce the PHI to a AddRec expression
732  // and re-try classifying it a an induction PHI.
733  if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
734  addInductionPhi(Phi, ID, AllowedExit);
735  continue;
736  }
737 
738  reportVectorizationFailure("Found an unidentified PHI",
739  "value that could not be identified as "
740  "reduction is used outside the loop",
741  "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
742  return false;
743  } // end of PHI handling
744 
745  // We handle calls that:
746  // * Are debug info intrinsics.
747  // * Have a mapping to an IR intrinsic.
748  // * Have a vector version available.
749  auto *CI = dyn_cast<CallInst>(&I);
750 
751  if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
752  !isa<DbgInfoIntrinsic>(CI) &&
753  !(CI->getCalledFunction() && TLI &&
754  (!VFDatabase::getMappings(*CI).empty() ||
755  isTLIScalarize(*TLI, *CI)))) {
756  // If the call is a recognized math libary call, it is likely that
757  // we can vectorize it given loosened floating-point constraints.
758  LibFunc Func;
759  bool IsMathLibCall =
760  TLI && CI->getCalledFunction() &&
761  CI->getType()->isFloatingPointTy() &&
762  TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
763  TLI->hasOptimizedCodeGen(Func);
764 
765  if (IsMathLibCall) {
766  // TODO: Ideally, we should not use clang-specific language here,
767  // but it's hard to provide meaningful yet generic advice.
768  // Also, should this be guarded by allowExtraAnalysis() and/or be part
769  // of the returned info from isFunctionVectorizable()?
771  "Found a non-intrinsic callsite",
772  "library call cannot be vectorized. "
773  "Try compiling with -fno-math-errno, -ffast-math, "
774  "or similar flags",
775  "CantVectorizeLibcall", ORE, TheLoop, CI);
776  } else {
777  reportVectorizationFailure("Found a non-intrinsic callsite",
778  "call instruction cannot be vectorized",
779  "CantVectorizeLibcall", ORE, TheLoop, CI);
780  }
781  return false;
782  }
783 
784  // Some intrinsics have scalar arguments and should be same in order for
785  // them to be vectorized (i.e. loop invariant).
786  if (CI) {
787  auto *SE = PSE.getSE();
788  Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
789  for (unsigned i = 0, e = CI->arg_size(); i != e; ++i)
790  if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, i)) {
791  if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
792  reportVectorizationFailure("Found unvectorizable intrinsic",
793  "intrinsic instruction cannot be vectorized",
794  "CantVectorizeIntrinsic", ORE, TheLoop, CI);
795  return false;
796  }
797  }
798  }
799 
800  // Check that the instruction return type is vectorizable.
801  // Also, we can't vectorize extractelement instructions.
802  if ((!VectorType::isValidElementType(I.getType()) &&
803  !I.getType()->isVoidTy()) ||
804  isa<ExtractElementInst>(I)) {
805  reportVectorizationFailure("Found unvectorizable type",
806  "instruction return type cannot be vectorized",
807  "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
808  return false;
809  }
810 
811  // Check that the stored type is vectorizable.
812  if (auto *ST = dyn_cast<StoreInst>(&I)) {
813  Type *T = ST->getValueOperand()->getType();
815  reportVectorizationFailure("Store instruction cannot be vectorized",
816  "store instruction cannot be vectorized",
817  "CantVectorizeStore", ORE, TheLoop, ST);
818  return false;
819  }
820 
821  // For nontemporal stores, check that a nontemporal vector version is
822  // supported on the target.
823  if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
824  // Arbitrarily try a vector of 2 elements.
825  auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
826  assert(VecTy && "did not find vectorized version of stored type");
827  if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
829  "nontemporal store instruction cannot be vectorized",
830  "nontemporal store instruction cannot be vectorized",
831  "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
832  return false;
833  }
834  }
835 
836  } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
837  if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
838  // For nontemporal loads, check that a nontemporal vector version is
839  // supported on the target (arbitrarily try a vector of 2 elements).
840  auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
841  assert(VecTy && "did not find vectorized version of load type");
842  if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
844  "nontemporal load instruction cannot be vectorized",
845  "nontemporal load instruction cannot be vectorized",
846  "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
847  return false;
848  }
849  }
850 
851  // FP instructions can allow unsafe algebra, thus vectorizable by
852  // non-IEEE-754 compliant SIMD units.
853  // This applies to floating-point math operations and calls, not memory
854  // operations, shuffles, or casts, as they don't change precision or
855  // semantics.
856  } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
857  !I.isFast()) {
858  LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
859  Hints->setPotentiallyUnsafe();
860  }
861 
862  // Reduction instructions are allowed to have exit users.
863  // All other instructions must not have external users.
864  if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
865  // We can safely vectorize loops where instructions within the loop are
866  // used outside the loop only if the SCEV predicates within the loop is
867  // same as outside the loop. Allowing the exit means reusing the SCEV
868  // outside the loop.
869  if (PSE.getPredicate().isAlwaysTrue()) {
870  AllowedExit.insert(&I);
871  continue;
872  }
873  reportVectorizationFailure("Value cannot be used outside the loop",
874  "value cannot be used outside the loop",
875  "ValueUsedOutsideLoop", ORE, TheLoop, &I);
876  return false;
877  }
878  } // next instr.
879  }
880 
881  if (!PrimaryInduction) {
882  if (Inductions.empty()) {
883  reportVectorizationFailure("Did not find one integer induction var",
884  "loop induction variable could not be identified",
885  "NoInductionVariable", ORE, TheLoop);
886  return false;
887  } else if (!WidestIndTy) {
888  reportVectorizationFailure("Did not find one integer induction var",
889  "integer loop induction variable could not be identified",
890  "NoIntegerInductionVariable", ORE, TheLoop);
891  return false;
892  } else {
893  LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
894  }
895  }
896 
897  // For fixed order recurrences, we use the previous value (incoming value from
898  // the latch) to check if it dominates all users of the recurrence. Bail out
899  // if we have to sink such an instruction for another recurrence, as the
900  // dominance requirement may not hold after sinking.
901  BasicBlock *LoopLatch = TheLoop->getLoopLatch();
902  if (any_of(FixedOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
903  Instruction *V =
904  cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
905  return SinkAfter.find(V) != SinkAfter.end();
906  }))
907  return false;
908 
909  // Now we know the widest induction type, check if our found induction
910  // is the same size. If it's not, unset it here and InnerLoopVectorizer
911  // will create another.
912  if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
913  PrimaryInduction = nullptr;
914 
915  return true;
916 }
917 
918 bool LoopVectorizationLegality::canVectorizeMemory() {
919  LAI = &LAIs.getInfo(*TheLoop);
920  const OptimizationRemarkAnalysis *LAR = LAI->getReport();
921  if (LAR) {
922  ORE->emit([&]() {
923  return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
924  "loop not vectorized: ", *LAR);
925  });
926  }
927 
928  if (!LAI->canVectorizeMemory())
929  return false;
930 
931  // We can vectorize stores to invariant address when final reduction value is
932  // guaranteed to be stored at the end of the loop. Also, if decision to
933  // vectorize loop is made, runtime checks are added so as to make sure that
934  // invariant address won't alias with any other objects.
935  if (!LAI->getStoresToInvariantAddresses().empty()) {
936  // For each invariant address, check if last stored value is unconditional
937  // and the address is not calculated inside the loop.
938  for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
940  continue;
941 
942  if (blockNeedsPredication(SI->getParent())) {
944  "We don't allow storing to uniform addresses",
945  "write of conditional recurring variant value to a loop "
946  "invariant address could not be vectorized",
947  "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
948  return false;
949  }
950 
951  // Invariant address should be defined outside of loop. LICM pass usually
952  // makes sure it happens, but in rare cases it does not, we do not want
953  // to overcomplicate vectorization to support this case.
954  if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
955  if (TheLoop->contains(Ptr)) {
957  "Invariant address is calculated inside the loop",
958  "write to a loop invariant address could not "
959  "be vectorized",
960  "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
961  return false;
962  }
963  }
964  }
965 
967  // For each invariant address, check its last stored value is the result
968  // of one of our reductions.
969  //
970  // We do not check if dependence with loads exists because they are
971  // currently rejected earlier in LoopAccessInfo::analyzeLoop. In case this
972  // behaviour changes we have to modify this code.
973  ScalarEvolution *SE = PSE.getSE();
974  SmallVector<StoreInst *, 4> UnhandledStores;
975  for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
977  // Earlier stores to this address are effectively deadcode.
978  // With opaque pointers it is possible for one pointer to be used with
979  // different sizes of stored values:
980  // store i32 0, ptr %x
981  // store i8 0, ptr %x
982  // The latest store doesn't complitely overwrite the first one in the
983  // example. That is why we have to make sure that types of stored
984  // values are same.
985  // TODO: Check that bitwidth of unhandled store is smaller then the
986  // one that overwrites it and add a test.
987  erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
988  return storeToSameAddress(SE, SI, I) &&
989  I->getValueOperand()->getType() ==
990  SI->getValueOperand()->getType();
991  });
992  continue;
993  }
994  UnhandledStores.push_back(SI);
995  }
996 
997  bool IsOK = UnhandledStores.empty();
998  // TODO: we should also validate against InvariantMemSets.
999  if (!IsOK) {
1001  "We don't allow storing to uniform addresses",
1002  "write to a loop invariant address could not "
1003  "be vectorized",
1004  "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1005  return false;
1006  }
1007  }
1008  }
1009 
1010  PSE.addPredicate(LAI->getPSE().getPredicate());
1011  return true;
1012 }
1013 
1015  bool EnableStrictReductions) {
1016 
1017  // First check if there is any ExactFP math or if we allow reassociations
1018  if (!Requirements->getExactFPInst() || Hints->allowReordering())
1019  return true;
1020 
1021  // If the above is false, we have ExactFPMath & do not allow reordering.
1022  // If the EnableStrictReductions flag is set, first check if we have any
1023  // Exact FP induction vars, which we cannot vectorize.
1024  if (!EnableStrictReductions ||
1025  any_of(getInductionVars(), [&](auto &Induction) -> bool {
1026  InductionDescriptor IndDesc = Induction.second;
1027  return IndDesc.getExactFPMathInst();
1028  }))
1029  return false;
1030 
1031  // We can now only vectorize if all reductions with Exact FP math also
1032  // have the isOrdered flag set, which indicates that we can move the
1033  // reduction operations in-loop.
1034  return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
1035  const RecurrenceDescriptor &RdxDesc = Reduction.second;
1036  return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
1037  }));
1038 }
1039 
1041  return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1042  const RecurrenceDescriptor &RdxDesc = Reduction.second;
1043  return RdxDesc.IntermediateStore == SI;
1044  });
1045 }
1046 
1048  return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1049  const RecurrenceDescriptor &RdxDesc = Reduction.second;
1050  if (!RdxDesc.IntermediateStore)
1051  return false;
1052 
1053  ScalarEvolution *SE = PSE.getSE();
1054  Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
1055  return V == InvariantAddress ||
1056  SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
1057  });
1058 }
1059 
1061  Value *In0 = const_cast<Value *>(V);
1062  PHINode *PN = dyn_cast_or_null<PHINode>(In0);
1063  if (!PN)
1064  return false;
1065 
1066  return Inductions.count(PN);
1067 }
1068 
1069 const InductionDescriptor *
1071  if (!isInductionPhi(Phi))
1072  return nullptr;
1073  auto &ID = getInductionVars().find(Phi)->second;
1074  if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
1076  return &ID;
1077  return nullptr;
1078 }
1079 
1080 const InductionDescriptor *
1082  if (!isInductionPhi(Phi))
1083  return nullptr;
1084  auto &ID = getInductionVars().find(Phi)->second;
1085  if (ID.getKind() == InductionDescriptor::IK_PtrInduction)
1086  return &ID;
1087  return nullptr;
1088 }
1089 
1091  const Value *V) const {
1092  auto *Inst = dyn_cast<Instruction>(V);
1093  return (Inst && InductionCastsToIgnore.count(Inst));
1094 }
1095 
1097  return isInductionPhi(V) || isCastedInductionVariable(V);
1098 }
1099 
1101  const PHINode *Phi) const {
1102  return FixedOrderRecurrences.count(Phi);
1103 }
1104 
1106  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
1107 }
1108 
1109 bool LoopVectorizationLegality::blockCanBePredicated(
1112  SmallPtrSetImpl<Instruction *> &ConditionalAssumes) const {
1113  for (Instruction &I : *BB) {
1114  // We can predicate blocks with calls to assume, as long as we drop them in
1115  // case we flatten the CFG via predication.
1116  if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
1117  ConditionalAssumes.insert(&I);
1118  continue;
1119  }
1120 
1121  // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1122  // TODO: there might be cases that it should block the vectorization. Let's
1123  // ignore those for now.
1124  if (isa<NoAliasScopeDeclInst>(&I))
1125  continue;
1126 
1127  // Loads are handled via masking (or speculated if safe to do so.)
1128  if (auto *LI = dyn_cast<LoadInst>(&I)) {
1129  if (!SafePtrs.count(LI->getPointerOperand()))
1130  MaskedOp.insert(LI);
1131  continue;
1132  }
1133 
1134  // Predicated store requires some form of masking:
1135  // 1) masked store HW instruction,
1136  // 2) emulation via load-blend-store (only if safe and legal to do so,
1137  // be aware on the race conditions), or
1138  // 3) element-by-element predicate check and scalar store.
1139  if (auto *SI = dyn_cast<StoreInst>(&I)) {
1140  MaskedOp.insert(SI);
1141  continue;
1142  }
1143 
1144  if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
1145  return false;
1146  }
1147 
1148  return true;
1149 }
1150 
1151 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1152  if (!EnableIfConversion) {
1153  reportVectorizationFailure("If-conversion is disabled",
1154  "if-conversion is disabled",
1155  "IfConversionDisabled",
1156  ORE, TheLoop);
1157  return false;
1158  }
1159 
1160  assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1161 
1162  // A list of pointers which are known to be dereferenceable within scope of
1163  // the loop body for each iteration of the loop which executes. That is,
1164  // the memory pointed to can be dereferenced (with the access size implied by
1165  // the value's type) unconditionally within the loop header without
1166  // introducing a new fault.
1167  SmallPtrSet<Value *, 8> SafePointers;
1168 
1169  // Collect safe addresses.
1170  for (BasicBlock *BB : TheLoop->blocks()) {
1171  if (!blockNeedsPredication(BB)) {
1172  for (Instruction &I : *BB)
1173  if (auto *Ptr = getLoadStorePointerOperand(&I))
1174  SafePointers.insert(Ptr);
1175  continue;
1176  }
1177 
1178  // For a block which requires predication, a address may be safe to access
1179  // in the loop w/o predication if we can prove dereferenceability facts
1180  // sufficient to ensure it'll never fault within the loop. For the moment,
1181  // we restrict this to loads; stores are more complicated due to
1182  // concurrency restrictions.
1183  ScalarEvolution &SE = *PSE.getSE();
1184  for (Instruction &I : *BB) {
1185  LoadInst *LI = dyn_cast<LoadInst>(&I);
1186  if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1187  isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC))
1188  SafePointers.insert(LI->getPointerOperand());
1189  }
1190  }
1191 
1192  // Collect the blocks that need predication.
1193  for (BasicBlock *BB : TheLoop->blocks()) {
1194  // We don't support switch statements inside loops.
1195  if (!isa<BranchInst>(BB->getTerminator())) {
1196  reportVectorizationFailure("Loop contains a switch statement",
1197  "loop contains a switch statement",
1198  "LoopContainsSwitch", ORE, TheLoop,
1199  BB->getTerminator());
1200  return false;
1201  }
1202 
1203  // We must be able to predicate all blocks that need to be predicated.
1204  if (blockNeedsPredication(BB)) {
1205  if (!blockCanBePredicated(BB, SafePointers, MaskedOp,
1206  ConditionalAssumes)) {
1208  "Control flow cannot be substituted for a select",
1209  "control flow cannot be substituted for a select",
1210  "NoCFGForSelect", ORE, TheLoop,
1211  BB->getTerminator());
1212  return false;
1213  }
1214  }
1215  }
1216 
1217  // We can if-convert this loop.
1218  return true;
1219 }
1220 
1221 // Helper function to canVectorizeLoopNestCFG.
1222 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1223  bool UseVPlanNativePath) {
1224  assert((UseVPlanNativePath || Lp->isInnermost()) &&
1225  "VPlan-native path is not enabled.");
1226 
1227  // TODO: ORE should be improved to show more accurate information when an
1228  // outer loop can't be vectorized because a nested loop is not understood or
1229  // legal. Something like: "outer_loop_location: loop not vectorized:
1230  // (inner_loop_location) loop control flow is not understood by vectorizer".
1231 
1232  // Store the result and return it at the end instead of exiting early, in case
1233  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1234  bool Result = true;
1235  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1236 
1237  // We must have a loop in canonical form. Loops with indirectbr in them cannot
1238  // be canonicalized.
1239  if (!Lp->getLoopPreheader()) {
1240  reportVectorizationFailure("Loop doesn't have a legal pre-header",
1241  "loop control flow is not understood by vectorizer",
1242  "CFGNotUnderstood", ORE, TheLoop);
1243  if (DoExtraAnalysis)
1244  Result = false;
1245  else
1246  return false;
1247  }
1248 
1249  // We must have a single backedge.
1250  if (Lp->getNumBackEdges() != 1) {
1251  reportVectorizationFailure("The loop must have a single backedge",
1252  "loop control flow is not understood by vectorizer",
1253  "CFGNotUnderstood", ORE, TheLoop);
1254  if (DoExtraAnalysis)
1255  Result = false;
1256  else
1257  return false;
1258  }
1259 
1260  return Result;
1261 }
1262 
1263 bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1264  Loop *Lp, bool UseVPlanNativePath) {
1265  // Store the result and return it at the end instead of exiting early, in case
1266  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1267  bool Result = true;
1268  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1269  if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1270  if (DoExtraAnalysis)
1271  Result = false;
1272  else
1273  return false;
1274  }
1275 
1276  // Recursively check whether the loop control flow of nested loops is
1277  // understood.
1278  for (Loop *SubLp : *Lp)
1279  if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1280  if (DoExtraAnalysis)
1281  Result = false;
1282  else
1283  return false;
1284  }
1285 
1286  return Result;
1287 }
1288 
1289 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1290  // Store the result and return it at the end instead of exiting early, in case
1291  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1292  bool Result = true;
1293 
1294  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1295  // Check whether the loop-related control flow in the loop nest is expected by
1296  // vectorizer.
1297  if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1298  if (DoExtraAnalysis)
1299  Result = false;
1300  else
1301  return false;
1302  }
1303 
1304  // We need to have a loop header.
1305  LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1306  << '\n');
1307 
1308  // Specific checks for outer loops. We skip the remaining legal checks at this
1309  // point because they don't support outer loops.
1310  if (!TheLoop->isInnermost()) {
1311  assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1312 
1313  if (!canVectorizeOuterLoop()) {
1314  reportVectorizationFailure("Unsupported outer loop",
1315  "unsupported outer loop",
1316  "UnsupportedOuterLoop",
1317  ORE, TheLoop);
1318  // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1319  // outer loops.
1320  return false;
1321  }
1322 
1323  LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1324  return Result;
1325  }
1326 
1327  assert(TheLoop->isInnermost() && "Inner loop expected.");
1328  // Check if we can if-convert non-single-bb loops.
1329  unsigned NumBlocks = TheLoop->getNumBlocks();
1330  if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1331  LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1332  if (DoExtraAnalysis)
1333  Result = false;
1334  else
1335  return false;
1336  }
1337 
1338  // Check if we can vectorize the instructions and CFG in this loop.
1339  if (!canVectorizeInstrs()) {
1340  LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1341  if (DoExtraAnalysis)
1342  Result = false;
1343  else
1344  return false;
1345  }
1346 
1347  // Go over each instruction and look at memory deps.
1348  if (!canVectorizeMemory()) {
1349  LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1350  if (DoExtraAnalysis)
1351  Result = false;
1352  else
1353  return false;
1354  }
1355 
1356  LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1357  << (LAI->getRuntimePointerChecking()->Need
1358  ? " (with a runtime bound check)"
1359  : "")
1360  << "!\n");
1361 
1362  unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1363  if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1364  SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1365 
1366  if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
1367  reportVectorizationFailure("Too many SCEV checks needed",
1368  "Too many SCEV assumptions need to be made and checked at runtime",
1369  "TooManySCEVRunTimeChecks", ORE, TheLoop);
1370  if (DoExtraAnalysis)
1371  Result = false;
1372  else
1373  return false;
1374  }
1375 
1376  // Okay! We've done all the tests. If any have failed, return false. Otherwise
1377  // we can vectorize, and at this point we don't have any other mem analysis
1378  // which may limit our maximum vectorization factor, so just return true with
1379  // no restrictions.
1380  return Result;
1381 }
1382 
1384 
1385  LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1386 
1387  SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1388 
1389  for (const auto &Reduction : getReductionVars())
1390  ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1391 
1392  // TODO: handle non-reduction outside users when tail is folded by masking.
1393  for (auto *AE : AllowedExit) {
1394  // Check that all users of allowed exit values are inside the loop or
1395  // are the live-out of a reduction.
1396  if (ReductionLiveOuts.count(AE))
1397  continue;
1398  for (User *U : AE->users()) {
1399  Instruction *UI = cast<Instruction>(U);
1400  if (TheLoop->contains(UI))
1401  continue;
1402  LLVM_DEBUG(
1403  dbgs()
1404  << "LV: Cannot fold tail by masking, loop has an outside user for "
1405  << *UI << "\n");
1406  return false;
1407  }
1408  }
1409 
1410  // The list of pointers that we can safely read and write to remains empty.
1411  SmallPtrSet<Value *, 8> SafePointers;
1412 
1414  SmallPtrSet<Instruction *, 8> TmpConditionalAssumes;
1415 
1416  // Check and mark all blocks for predication, including those that ordinarily
1417  // do not need predication such as the header block.
1418  for (BasicBlock *BB : TheLoop->blocks()) {
1419  if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp,
1420  TmpConditionalAssumes)) {
1421  LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as requested.\n");
1422  return false;
1423  }
1424  }
1425 
1426  LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1427 
1428  MaskedOp.insert(TmpMaskedOp.begin(), TmpMaskedOp.end());
1429  ConditionalAssumes.insert(TmpConditionalAssumes.begin(),
1430  TmpConditionalAssumes.end());
1431 
1432  return true;
1433 }
1434 
1435 } // namespace llvm
i
i
Definition: README.txt:29
llvm::mustSuppressSpeculation
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition: ValueTracking.cpp:4720
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:734
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
llvm::LoopVectorizationLegality::getReductionVars
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Definition: LoopVectorizationLegality.h:291
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:69
llvm::storeToSameAddress
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, StoreInst *B)
Returns true if A and B have same pointer operands or same SCEVs addresses.
Definition: LoopVectorizationLegality.cpp:429
llvm::PredicatedScalarEvolution::getPredicate
const SCEVPredicate & getPredicate() const
Definition: ScalarEvolution.cpp:14603
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::LoopAccessInfo::isUniform
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
Definition: LoopAccessAnalysis.cpp:2547
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:237
llvm::ElementCount
Definition: TypeSize.h:404
EnableIfConversion
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
llvm::getVectorIntrinsicIDForCall
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:135
Loads.h
llvm::LoopVectorizationLegality::isFixedOrderRecurrence
bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
Definition: LoopVectorizationLegality.cpp:1100
llvm::InductionDescriptor::getExactFPMathInst
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
Definition: IVDescriptors.h:357
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
llvm::RecurrenceDescriptor::hasExactFPMath
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Definition: IVDescriptors.h:210
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:139
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
llvm::StringRef::npos
static constexpr size_t npos
Definition: StringRef.h:52
llvm::LinearPolySize< ElementCount >::isKnownLE
static bool isKnownLE(const LinearPolySize &LHS, const LinearPolySize &RHS)
Definition: TypeSize.h:340
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
SizeOpts.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:173
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:631
llvm::TargetLibraryInfo::isFunctionVectorizable
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
Definition: TargetLibraryInfo.h:335
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1972
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::VectorizerParams::VectorizationInterleave
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
Definition: LoopAccessAnalysis.h:44
llvm::LoopVectorizeHints::SK_Unspecified
@ SK_Unspecified
Not selected.
Definition: LoopVectorizationLegality.h:116
ValueTracking.h
OptimizationRemarkEmitter.h
llvm::isVectorIntrinsicWithScalarOpAtArg
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:101
llvm::OptimizationRemarkEmitter::allowExtraAnalysis
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
Definition: OptimizationRemarkEmitter.h:98
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
VectorizeSCEVCheckThreshold
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
llvm::InductionDescriptor::IK_IntInduction
@ IK_IntInduction
Integer induction variable. Step = C.
Definition: IVDescriptors.h:311
llvm::LoopVectorizeHints::SK_FixedWidthOnly
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
Definition: LoopVectorizationLegality.h:118
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::LoopVectorizeHints::getWidth
ElementCount getWidth() const
Definition: LoopVectorizationLegality.h:138
llvm::LoopVectorizationLegality::canVectorizeFPMath
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
Definition: LoopVectorizationLegality.cpp:1014
llvm::LoopVectorizeHints::getIsVectorized
unsigned getIsVectorized() const
Definition: LoopVectorizationLegality.h:152
llvm::TargetTransformInfo::isLegalNTLoad
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
Definition: TargetTransformInfo.cpp:407
llvm::TargetTransformInfo::isLegalNTStore
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
Definition: TargetTransformInfo.cpp:402
llvm::LoopVectorizeHints::LoopVectorizeHints
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
Definition: LoopVectorizationLegality.cpp:97
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::ConstantAsMetadata::get
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:420
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::reportVectorizationFailure
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
Definition: LoopVectorize.cpp:976
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:261
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:458
llvm::isUniformLoopNest
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
Definition: LoopVectorizationLegality.cpp:377
llvm::RISCVFeatures::validate
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Definition: RISCVBaseInfo.cpp:97
llvm::LoopVectorizationLegality::prepareToFoldTailByMasking
bool prepareToFoldTailByMasking()
Return true if we can vectorize this loop while folding its tail by masking, and mark all respective ...
Definition: LoopVectorizationLegality.cpp:1383
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::LoopBase::getNumBlocks
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:202
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1400
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1298
llvm::makePostTransformationMetadata
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1126
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::LoopVectorizeHints::FK_Undefined
@ FK_Undefined
Not selected.
Definition: LoopVectorizationLegality.h:109
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:187
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1709
llvm::shouldOptimizeForSize
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
Definition: MachineSizeOpts.cpp:183
llvm::InductionDescriptor
A struct for saving information about induction variables.
Definition: IVDescriptors.h:306
llvm::LoopAccessInfo::hasDependenceInvolvingLoopInvariantAddress
bool hasDependenceInvolvingLoopInvariantAddress() const
If the loop has memory dependence involving an invariant address, i.e.
Definition: LoopAccessAnalysis.h:625
MaxInterleaveFactor
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
Definition: LoopVectorizationLegality.cpp:77
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:524
llvm::InductionDescriptor::IK_PtrInduction
@ IK_PtrInduction
Pointer induction var. Step = C / sizeof(elem).
Definition: IVDescriptors.h:312
llvm::LoopAccessInfo::blockNeedsPredication
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition: LoopAccessAnalysis.cpp:2518
llvm::ValueToValueMap
DenseMap< const Value *, Value * > ValueToValueMap
Definition: ScalarEvolutionExpressions.h:906
llvm::LoopVectorizationLegality::isInvariantStoreOfReduction
bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
Definition: LoopVectorizationLegality.cpp:1040
llvm::User
Definition: User.h:44
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:36
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::isUniformLoop
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
Definition: LoopVectorizationLegality.cpp:332
llvm::RecurrenceDescriptor::getExactFPMathInst
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
Definition: IVDescriptors.h:213
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1397
SI
@ SI
Definition: SIInstrInfo.cpp:7966
llvm::LoopVectorizeHints::vectorizeAnalysisPassName
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
Definition: LoopVectorizationLegality.cpp:231
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::LoopVectorizationRequirements::addExactFPMathInst
void addExactFPMathInst(Instruction *I)
Track the 1st floating-point instruction that can not be reassociated.
Definition: LoopVectorizationLegality.h:217
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:195
llvm::LoopVectorizationLegality::blockNeedsPredication
bool blockNeedsPredication(BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition: LoopVectorizationLegality.cpp:1105
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:246
llvm::LoopVectorizationLegality::getPointerInductionDescriptor
const InductionDescriptor * getPointerInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is pointer induction.
Definition: LoopVectorizationLegality.cpp:1081
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2884
llvm::LoopVectorizationLegality::isUniformMemOp
bool isUniformMemOp(Instruction &I) const
A uniform memory op is a load or store which accesses the same memory location on all lanes.
Definition: LoopVectorizationLegality.cpp:469
llvm::TargetTransformInfo::enableScalableVectorization
bool enableScalableVectorization() const
Definition: TargetTransformInfo.cpp:1174
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:298
llvm::Instruction
Definition: Instruction.h:42
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::PGSOQueryType::IRPass
@ IRPass
llvm::LoopVectorizeHints::getForce
enum ForceKind getForce() const
Definition: LoopVectorizationLegality.h:154
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
PatternMatch.h
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:684
llvm::Metadata
Root of the metadata hierarchy.
Definition: Metadata.h:62
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2791
llvm::LinearPolySize< ElementCount >::getFixed
static ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:283
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
LoopInfo.h
llvm::Twine::str
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4442
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:210
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1292
llvm::VectorType::isValidElementType
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:675
VectorUtils.h
llvm::cl::opt< bool >
llvm::PPC::Predicate
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:298
llvm::LoopVectorizationLegality::isUniform
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
Definition: LoopVectorizationLegality.cpp:465
llvm::LoopVectorizeHints::SK_PreferScalable
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
Definition: LoopVectorizationLegality.h:122
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:705
llvm::RuntimePointerChecking::Need
bool Need
This flag indicates if we need to add the runtime check.
Definition: LoopAccessAnalysis.h:480
llvm::MapVector::find
iterator find(const KeyT &Key)
Definition: MapVector.h:148
llvm::VFDatabase::getMappings
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:249
llvm::HintsAllowReordering
cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
llvm::SmallPtrSetImpl::end
iterator end() const
Definition: SmallPtrSet.h:408
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::LoopVectorizeHints::FK_Enabled
@ FK_Enabled
Forcing enabled.
Definition: LoopVectorizationLegality.h:111
llvm::LoopVectorizeHints::getInterleave
unsigned getInterleave() const
Definition: LoopVectorizationLegality.h:143
llvm::LoopVectorizeHints::allowVectorization
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
Definition: LoopVectorizationLegality.cpp:172
llvm::getPtrStride
Optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
Definition: LoopAccessAnalysis.cpp:1369
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::DenseMap< const Value *, Value * >
llvm::LoopAccessInfo::getRuntimePointerChecking
const RuntimePointerChecking * getRuntimePointerChecking() const
Definition: LoopAccessAnalysis.h:576
I
#define I(x, y, z)
Definition: MD5.cpp:58
ForceScalableVectorization
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive.")))
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:183
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:403
llvm::VectorizerParams::MaxVectorWidth
static const unsigned MaxVectorWidth
Maximum SIMD width.
Definition: LoopAccessAnalysis.h:39
llvm::LoopVectorizationLegality::isInvariantAddressOfReduction
bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
Definition: LoopVectorizationLegality.cpp:1047
llvm::LoopAccessInfo::getReport
const OptimizationRemarkAnalysis * getReport() const
The diagnostics report generated for the analysis.
Definition: LoopAccessAnalysis.h:600
llvm::MDString::get
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:498
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:232
llvm::LoopAccessInfo::getStoresToInvariantAddresses
ArrayRef< StoreInst * > getStoresToInvariantAddresses() const
Return the list of stores to invariant addresses.
Definition: LoopAccessAnalysis.h:630
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::PredicatedScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
Definition: ScalarEvolution.cpp:14563
llvm::LoopVectorizationLegality::isCastedInductionVariable
bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
Definition: LoopVectorizationLegality.cpp:1090
llvm::LoopVectorizationLegality::isInductionVariable
bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
Definition: LoopVectorizationLegality.cpp:1096
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::LoopAccessInfo::canVectorizeMemory
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
Definition: LoopAccessAnalysis.h:569
llvm::SCEVPredicate::isAlwaysTrue
virtual bool isAlwaysTrue() const =0
Returns true if the predicate is always true.
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
llvm::InductionDescriptor::IK_FpInduction
@ IK_FpInduction
Floating point induction variable.
Definition: IVDescriptors.h:313
llvm::isTLIScalarize
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
Definition: LoopVectorizationLegality.cpp:635
llvm::LoopVectorizationRequirements::getExactFPInst
Instruction * getExactFPInst()
Definition: LoopVectorizationLegality.h:222
llvm::MDNode
Metadata node.
Definition: Metadata.h:944
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
LV_NAME
#define LV_NAME
Definition: LoopVectorizationLegality.cpp:33
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1690
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::RecurrenceDescriptor::IntermediateStore
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
Definition: IVDescriptors.h:276
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1716
llvm::LoopVectorizationLegality::isConsecutivePtr
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
Definition: LoopVectorizationLegality.cpp:448
llvm::SCEVPredicate::getComplexity
virtual unsigned getComplexity() const
Returns the estimated complexity of this predicate.
Definition: ScalarEvolution.h:238
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::LoopVectorizeHints::ScalableForceKind
ScalableForceKind
Definition: LoopVectorizationLegality.h:114
llvm::Loop::getCanonicalInductionVariable
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition: LoopInfo.cpp:150
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:680
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::LoopVectorizeHints::emitRemarkWithHints
void emitRemarkWithHints() const
Dumps all the hint information.
Definition: LoopVectorizationLegality.cpp:205
llvm::convertPointerToIntegerType
static Type * convertPointerToIntegerType(const DataLayout &DL, Type *Ty)
Definition: LoopVectorizationLegality.cpp:389
llvm::LoopVectorizationLegality::getIntOrFpInductionDescriptor
const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
Definition: LoopVectorizationLegality.cpp:1070
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:780
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::VectorizerParams::isInterleaveForced
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
Definition: LoopAccessAnalysis.cpp:139
LoopVectorize.h
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
llvm::MapVector::empty
bool empty() const
Definition: MapVector.h:80
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:182
llvm::InductionDescriptor::isInductionPHI
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Definition: IVDescriptors.cpp:1518
PragmaVectorizeSCEVCheckThreshold
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
llvm::LoopInfoBase::isLoopHeader
bool isLoopHeader(const BlockT *BB) const
Definition: LoopInfo.h:1005
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
llvm::MapVector::count
size_type count(const KeyT &Key) const
Definition: MapVector.h:143
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
H
#define H(x, y, z)
Definition: MD5.cpp:57
llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:525
llvm::VectorizationFactor
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Definition: LoopVectorizationPlanner.h:188
llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:501
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:105
llvm::LinearPolySize< ElementCount >::getScalable
static ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:286
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:226
LoopVectorizationLegality.h
llvm::PredicatedScalarEvolution::addPredicate
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
Definition: ScalarEvolution.cpp:14592
llvm::LoopBase::getNumBackEdges
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:267
llvm::OptimizationRemarkAnalysis::AlwaysPrint
static const char * AlwaysPrint
Definition: DiagnosticInfo.h:820
llvm::UnivariateLinearPolyBase::isZero
bool isZero() const
Definition: TypeSize.h:229
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:439
llvm::isDereferenceableAndAlignedInLoop
bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition: Loads.cpp:262
llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:69
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopVectorizationLegality.cpp:34
llvm::LoopVectorizationLegality::isInductionPhi
bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
Definition: LoopVectorizationLegality.cpp:1060
llvm::StoreInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:390
llvm::LoopVectorizeHints::setAlreadyVectorized
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
Definition: LoopVectorizationLegality.cpp:153
llvm::VectorizerParams
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
Definition: LoopAccessAnalysis.h:37
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2699
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::LoopVectorizeHints::allowReordering
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
Definition: LoopVectorizationLegality.cpp:241
IV
static const uint32_t IV[8]
Definition: blake3_impl.h:85
llvm::SmallVectorImpl< Instruction * >
llvm::TargetLibraryInfo::getWidestVF
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
Definition: TargetLibraryInfo.h:435
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:399
llvm::SmallPtrSetImpl< Value * >
llvm::getLoadStorePointerOperand
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Definition: Instructions.h:5361
llvm::RecurrenceDescriptor::isOrdered
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
Definition: IVDescriptors.h:260
llvm::RecurrenceDescriptor::isReductionPHI
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
Definition: IVDescriptors.cpp:825
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
llvm::LoopVectorizationLegality::getInductionVars
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
Definition: LoopVectorizationLegality.h:294
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::getWiderType
static Type * getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1)
Definition: LoopVectorizationLegality.cpp:401
llvm::TargetLibraryInfo::hasOptimizedCodeGen
bool hasOptimizedCodeGen(LibFunc F) const
Tests if the function is both available and a candidate for optimized code generation.
Definition: TargetLibraryInfo.h:347
llvm::RecurrenceDescriptor::getLoopExitInstr
Instruction * getLoopExitInstr() const
Definition: IVDescriptors.h:206
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::cl::desc
Definition: CommandLine.h:413
llvm::hasOutsideLoopUser
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
Definition: LoopVectorizationLegality.cpp:411
llvm::LoopAccessInfo::getPSE
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
Definition: LoopAccessAnalysis.h:639
llvm::RecurrenceDescriptor::isFixedOrderRecurrence
static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, MapVector< Instruction *, Instruction * > &SinkAfter, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
Definition: IVDescriptors.cpp:924
llvm::PredicatedScalarEvolution::getSE
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
Definition: ScalarEvolution.h:2251
llvm::LoopVectorizeHints::FK_Disabled
@ FK_Disabled
Forcing disabled.
Definition: LoopVectorizationLegality.h:110
Reduction
loop Loop Strength Reduction
Definition: LoopStrengthReduce.cpp:6965
llvm::MDString
A single uniqued string.
Definition: Metadata.h:612
llvm::LoopAccessInfoManager::getInfo
const LoopAccessInfo & getInfo(Loop &L)
Definition: LoopAccessAnalysis.cpp:2672
llvm::Optional::value_or
constexpr T value_or(U &&alt) const &
Definition: Optional.h:291
llvm::LoopVectorizationLegality::canVectorize
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
Definition: LoopVectorizationLegality.cpp:1289
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:39