LLVM  14.0.0git
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1 //===- LoopVectorizationLegality.cpp --------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides loop vectorization legality analysis. Original code
10 // resided in LoopVectorize.cpp for a long time.
11 //
12 // At this point, it is implemented as a utility class, not as an analysis
13 // pass. It should be easy to create an analysis pass around it if there
14 // is a need (but D45420 needs to happen first).
15 //
16 
18 #include "llvm/Analysis/Loads.h"
19 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/IR/IntrinsicInst.h"
24 #include "llvm/IR/PatternMatch.h"
27 
28 using namespace llvm;
29 using namespace PatternMatch;
30 
31 #define LV_NAME "loop-vectorize"
32 #define DEBUG_TYPE LV_NAME
33 
35 
36 static cl::opt<bool>
37  EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
38  cl::desc("Enable if-conversion during vectorization."));
39 
40 namespace llvm {
42  HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
43  cl::desc("Allow enabling loop hints to reorder "
44  "FP operations during vectorization."));
45 }
46 
47 // TODO: Move size-based thresholds out of legality checking, make cost based
48 // decisions instead of hard thresholds.
50  "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
51  cl::desc("The maximum number of SCEV checks allowed."));
52 
54  "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
55  cl::desc("The maximum number of SCEV checks allowed with a "
56  "vectorize(enable) pragma"));
57 
60  "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
61  cl::Hidden,
62  cl::desc("Control whether the compiler can use scalable vectors to "
63  "vectorize a loop"),
64  cl::values(
66  "Scalable vectorization is disabled."),
67  clEnumValN(
69  "Scalable vectorization is available and favored when the "
70  "cost is inconclusive."),
71  clEnumValN(
73  "Scalable vectorization is available and favored when the "
74  "cost is inconclusive.")));
75 
76 /// Maximum vectorization interleave count.
77 static const unsigned MaxInterleaveFactor = 16;
78 
79 namespace llvm {
80 
81 bool LoopVectorizeHints::Hint::validate(unsigned Val) {
82  switch (Kind) {
83  case HK_WIDTH:
84  return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
85  case HK_INTERLEAVE:
86  return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
87  case HK_FORCE:
88  return (Val <= 1);
89  case HK_ISVECTORIZED:
90  case HK_PREDICATE:
91  case HK_SCALABLE:
92  return (Val == 0 || Val == 1);
93  }
94  return false;
95 }
96 
98  bool InterleaveOnlyWhenForced,
100  const TargetTransformInfo *TTI)
101  : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
102  Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
103  Force("vectorize.enable", FK_Undefined, HK_FORCE),
104  IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
105  Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
106  Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
107  TheLoop(L), ORE(ORE) {
108  // Populate values with existing loop metadata.
109  getHintsFromMetadata();
110 
111  // force-vector-interleave overrides DisableInterleaving.
114 
115  // If the metadata doesn't explicitly specify whether to enable scalable
116  // vectorization, then decide based on the following criteria (increasing
117  // level of priority):
118  // - Target default
119  // - Metadata width
120  // - Force option (always overrides)
121  if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified) {
122  if (TTI)
125 
126  if (Width.Value)
127  // If the width is set, but the metadata says nothing about the scalable
128  // property, then assume it concerns only a fixed-width UserVF.
129  // If width is not set, the flag takes precedence.
130  Scalable.Value = SK_FixedWidthOnly;
131  }
132 
133  // If the flag is set to force any use of scalable vectors, override the loop
134  // hints.
135  if (ForceScalableVectorization.getValue() !=
137  Scalable.Value = ForceScalableVectorization.getValue();
138 
139  // Scalable vectorization is disabled if no preference is specified.
141  Scalable.Value = SK_FixedWidthOnly;
142 
143  if (IsVectorized.Value != 1)
144  // If the vectorization width and interleaving count are both 1 then
145  // consider the loop to have been already vectorized because there's
146  // nothing more that we can do.
147  IsVectorized.Value =
149  LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
150  << "LV: Interleaving disabled by the pass manager\n");
151 }
152 
154  LLVMContext &Context = TheLoop->getHeader()->getContext();
155 
156  MDNode *IsVectorizedMD = MDNode::get(
157  Context,
158  {MDString::get(Context, "llvm.loop.isvectorized"),
160  MDNode *LoopID = TheLoop->getLoopID();
161  MDNode *NewLoopID =
163  {Twine(Prefix(), "vectorize.").str(),
164  Twine(Prefix(), "interleave.").str()},
165  {IsVectorizedMD});
166  TheLoop->setLoopID(NewLoopID);
167 
168  // Update internal cache.
169  IsVectorized.Value = 1;
170 }
171 
173  Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
175  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
177  return false;
178  }
179 
180  if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
181  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
183  return false;
184  }
185 
186  if (getIsVectorized() == 1) {
187  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
188  // FIXME: Add interleave.disable metadata. This will allow
189  // vectorize.disable to be used without disabling the pass and errors
190  // to differentiate between disabled vectorization and a width of 1.
191  ORE.emit([&]() {
193  "AllDisabled", L->getStartLoc(),
194  L->getHeader())
195  << "loop not vectorized: vectorization and interleaving are "
196  "explicitly disabled, or the loop has already been "
197  "vectorized";
198  });
199  return false;
200  }
201 
202  return true;
203 }
204 
206  using namespace ore;
207 
208  ORE.emit([&]() {
209  if (Force.Value == LoopVectorizeHints::FK_Disabled)
210  return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
211  TheLoop->getStartLoc(),
212  TheLoop->getHeader())
213  << "loop not vectorized: vectorization is explicitly disabled";
214  else {
215  OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
216  TheLoop->getStartLoc(), TheLoop->getHeader());
217  R << "loop not vectorized";
218  if (Force.Value == LoopVectorizeHints::FK_Enabled) {
219  R << " (Force=" << NV("Force", true);
220  if (Width.Value != 0)
221  R << ", Vector Width=" << NV("VectorWidth", getWidth());
222  if (getInterleave() != 0)
223  R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
224  R << ")";
225  }
226  return R;
227  }
228  });
229 }
230 
232  if (getWidth() == ElementCount::getFixed(1))
233  return LV_NAME;
235  return LV_NAME;
237  return LV_NAME;
239 }
240 
242  // Allow the vectorizer to change the order of operations if enabling
243  // loop hints are provided
244  ElementCount EC = getWidth();
245  return HintsAllowReordering &&
247  EC.getKnownMinValue() > 1);
248 }
249 
250 void LoopVectorizeHints::getHintsFromMetadata() {
251  MDNode *LoopID = TheLoop->getLoopID();
252  if (!LoopID)
253  return;
254 
255  // First operand should refer to the loop id itself.
256  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
257  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
258 
259  for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
260  const MDString *S = nullptr;
262 
263  // The expected hint is either a MDString or a MDNode with the first
264  // operand a MDString.
265  if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
266  if (!MD || MD->getNumOperands() == 0)
267  continue;
268  S = dyn_cast<MDString>(MD->getOperand(0));
269  for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
270  Args.push_back(MD->getOperand(i));
271  } else {
272  S = dyn_cast<MDString>(LoopID->getOperand(i));
273  assert(Args.size() == 0 && "too many arguments for MDString");
274  }
275 
276  if (!S)
277  continue;
278 
279  // Check if the hint starts with the loop metadata prefix.
280  StringRef Name = S->getString();
281  if (Args.size() == 1)
282  setHint(Name, Args[0]);
283  }
284 }
285 
286 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
287  if (!Name.startswith(Prefix()))
288  return;
289  Name = Name.substr(Prefix().size(), StringRef::npos);
290 
291  const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
292  if (!C)
293  return;
294  unsigned Val = C->getZExtValue();
295 
296  Hint *Hints[] = {&Width, &Interleave, &Force,
297  &IsVectorized, &Predicate, &Scalable};
298  for (auto H : Hints) {
299  if (Name == H->Name) {
300  if (H->validate(Val))
301  H->Value = Val;
302  else
303  LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
304  break;
305  }
306  }
307 }
308 
309 // Return true if the inner loop \p Lp is uniform with regard to the outer loop
310 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
311 // executing the inner loop will execute the same iterations). This check is
312 // very constrained for now but it will be relaxed in the future. \p Lp is
313 // considered uniform if it meets all the following conditions:
314 // 1) it has a canonical IV (starting from 0 and with stride 1),
315 // 2) its latch terminator is a conditional branch and,
316 // 3) its latch condition is a compare instruction whose operands are the
317 // canonical IV and an OuterLp invariant.
318 // This check doesn't take into account the uniformity of other conditions not
319 // related to the loop latch because they don't affect the loop uniformity.
320 //
321 // NOTE: We decided to keep all these checks and its associated documentation
322 // together so that we can easily have a picture of the current supported loop
323 // nests. However, some of the current checks don't depend on \p OuterLp and
324 // would be redundantly executed for each \p Lp if we invoked this function for
325 // different candidate outer loops. This is not the case for now because we
326 // don't currently have the infrastructure to evaluate multiple candidate outer
327 // loops and \p OuterLp will be a fixed parameter while we only support explicit
328 // outer loop vectorization. It's also very likely that these checks go away
329 // before introducing the aforementioned infrastructure. However, if this is not
330 // the case, we should move the \p OuterLp independent checks to a separate
331 // function that is only executed once for each \p Lp.
332 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
333  assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
334 
335  // If Lp is the outer loop, it's uniform by definition.
336  if (Lp == OuterLp)
337  return true;
338  assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
339 
340  // 1.
342  if (!IV) {
343  LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
344  return false;
345  }
346 
347  // 2.
348  BasicBlock *Latch = Lp->getLoopLatch();
349  auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
350  if (!LatchBr || LatchBr->isUnconditional()) {
351  LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
352  return false;
353  }
354 
355  // 3.
356  auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
357  if (!LatchCmp) {
358  LLVM_DEBUG(
359  dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
360  return false;
361  }
362 
363  Value *CondOp0 = LatchCmp->getOperand(0);
364  Value *CondOp1 = LatchCmp->getOperand(1);
365  Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
366  if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
367  !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
368  LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
369  return false;
370  }
371 
372  return true;
373 }
374 
375 // Return true if \p Lp and all its nested loops are uniform with regard to \p
376 // OuterLp.
377 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
378  if (!isUniformLoop(Lp, OuterLp))
379  return false;
380 
381  // Check if nested loops are uniform.
382  for (Loop *SubLp : *Lp)
383  if (!isUniformLoopNest(SubLp, OuterLp))
384  return false;
385 
386  return true;
387 }
388 
389 /// Check whether it is safe to if-convert this phi node.
390 ///
391 /// Phi nodes with constant expressions that can trap are not safe to if
392 /// convert.
394  for (PHINode &Phi : BB->phis()) {
395  for (Value *V : Phi.incoming_values())
396  if (auto *C = dyn_cast<Constant>(V))
397  if (C->canTrap())
398  return false;
399  }
400  return true;
401 }
402 
404  if (Ty->isPointerTy())
405  return DL.getIntPtrType(Ty);
406 
407  // It is possible that char's or short's overflow when we ask for the loop's
408  // trip count, work around this by changing the type size.
409  if (Ty->getScalarSizeInBits() < 32)
410  return Type::getInt32Ty(Ty->getContext());
411 
412  return Ty;
413 }
414 
415 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
416  Ty0 = convertPointerToIntegerType(DL, Ty0);
417  Ty1 = convertPointerToIntegerType(DL, Ty1);
418  if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
419  return Ty0;
420  return Ty1;
421 }
422 
423 /// Check that the instruction has outside loop users and is not an
424 /// identified reduction variable.
425 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
426  SmallPtrSetImpl<Value *> &AllowedExit) {
427  // Reductions, Inductions and non-header phis are allowed to have exit users. All
428  // other instructions must not have external users.
429  if (!AllowedExit.count(Inst))
430  // Check that all of the users of the loop are inside the BB.
431  for (User *U : Inst->users()) {
432  Instruction *UI = cast<Instruction>(U);
433  // This user may be a reduction exit value.
434  if (!TheLoop->contains(UI)) {
435  LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
436  return true;
437  }
438  }
439  return false;
440 }
441 
443  Value *Ptr) const {
444  const ValueToValueMap &Strides =
445  getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
446 
447  Function *F = TheLoop->getHeader()->getParent();
448  bool OptForSize = F->hasOptSize() ||
449  llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
451  bool CanAddPredicate = !OptForSize;
452  int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
453  CanAddPredicate, false);
454  if (Stride == 1 || Stride == -1)
455  return Stride;
456  return 0;
457 }
458 
460  return LAI->isUniform(V);
461 }
462 
463 bool LoopVectorizationLegality::canVectorizeOuterLoop() {
464  assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
465  // Store the result and return it at the end instead of exiting early, in case
466  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
467  bool Result = true;
468  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
469 
470  for (BasicBlock *BB : TheLoop->blocks()) {
471  // Check whether the BB terminator is a BranchInst. Any other terminator is
472  // not supported yet.
473  auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
474  if (!Br) {
475  reportVectorizationFailure("Unsupported basic block terminator",
476  "loop control flow is not understood by vectorizer",
477  "CFGNotUnderstood", ORE, TheLoop);
478  if (DoExtraAnalysis)
479  Result = false;
480  else
481  return false;
482  }
483 
484  // Check whether the BranchInst is a supported one. Only unconditional
485  // branches, conditional branches with an outer loop invariant condition or
486  // backedges are supported.
487  // FIXME: We skip these checks when VPlan predication is enabled as we
488  // want to allow divergent branches. This whole check will be removed
489  // once VPlan predication is on by default.
490  if (!EnableVPlanPredication && Br && Br->isConditional() &&
491  !TheLoop->isLoopInvariant(Br->getCondition()) &&
492  !LI->isLoopHeader(Br->getSuccessor(0)) &&
493  !LI->isLoopHeader(Br->getSuccessor(1))) {
494  reportVectorizationFailure("Unsupported conditional branch",
495  "loop control flow is not understood by vectorizer",
496  "CFGNotUnderstood", ORE, TheLoop);
497  if (DoExtraAnalysis)
498  Result = false;
499  else
500  return false;
501  }
502  }
503 
504  // Check whether inner loops are uniform. At this point, we only support
505  // simple outer loops scenarios with uniform nested loops.
506  if (!isUniformLoopNest(TheLoop /*loop nest*/,
507  TheLoop /*context outer loop*/)) {
508  reportVectorizationFailure("Outer loop contains divergent loops",
509  "loop control flow is not understood by vectorizer",
510  "CFGNotUnderstood", ORE, TheLoop);
511  if (DoExtraAnalysis)
512  Result = false;
513  else
514  return false;
515  }
516 
517  // Check whether we are able to set up outer loop induction.
518  if (!setupOuterLoopInductions()) {
519  reportVectorizationFailure("Unsupported outer loop Phi(s)",
520  "Unsupported outer loop Phi(s)",
521  "UnsupportedPhi", ORE, TheLoop);
522  if (DoExtraAnalysis)
523  Result = false;
524  else
525  return false;
526  }
527 
528  return Result;
529 }
530 
531 void LoopVectorizationLegality::addInductionPhi(
532  PHINode *Phi, const InductionDescriptor &ID,
533  SmallPtrSetImpl<Value *> &AllowedExit) {
534  Inductions[Phi] = ID;
535 
536  // In case this induction also comes with casts that we know we can ignore
537  // in the vectorized loop body, record them here. All casts could be recorded
538  // here for ignoring, but suffices to record only the first (as it is the
539  // only one that may bw used outside the cast sequence).
540  const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
541  if (!Casts.empty())
542  InductionCastsToIgnore.insert(*Casts.begin());
543 
544  Type *PhiTy = Phi->getType();
545  const DataLayout &DL = Phi->getModule()->getDataLayout();
546 
547  // Get the widest type.
548  if (!PhiTy->isFloatingPointTy()) {
549  if (!WidestIndTy)
550  WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
551  else
552  WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
553  }
554 
555  // Int inductions are special because we only allow one IV.
556  if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
557  ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
558  isa<Constant>(ID.getStartValue()) &&
559  cast<Constant>(ID.getStartValue())->isNullValue()) {
560 
561  // Use the phi node with the widest type as induction. Use the last
562  // one if there are multiple (no good reason for doing this other
563  // than it is expedient). We've checked that it begins at zero and
564  // steps by one, so this is a canonical induction variable.
565  if (!PrimaryInduction || PhiTy == WidestIndTy)
566  PrimaryInduction = Phi;
567  }
568 
569  // Both the PHI node itself, and the "post-increment" value feeding
570  // back into the PHI node may have external users.
571  // We can allow those uses, except if the SCEVs we have for them rely
572  // on predicates that only hold within the loop, since allowing the exit
573  // currently means re-using this SCEV outside the loop (see PR33706 for more
574  // details).
575  if (PSE.getUnionPredicate().isAlwaysTrue()) {
576  AllowedExit.insert(Phi);
577  AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
578  }
579 
580  LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
581 }
582 
583 bool LoopVectorizationLegality::setupOuterLoopInductions() {
584  BasicBlock *Header = TheLoop->getHeader();
585 
586  // Returns true if a given Phi is a supported induction.
587  auto isSupportedPhi = [&](PHINode &Phi) -> bool {
589  if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
591  addInductionPhi(&Phi, ID, AllowedExit);
592  return true;
593  } else {
594  // Bail out for any Phi in the outer loop header that is not a supported
595  // induction.
596  LLVM_DEBUG(
597  dbgs()
598  << "LV: Found unsupported PHI for outer loop vectorization.\n");
599  return false;
600  }
601  };
602 
603  if (llvm::all_of(Header->phis(), isSupportedPhi))
604  return true;
605  else
606  return false;
607 }
608 
609 /// Checks if a function is scalarizable according to the TLI, in
610 /// the sense that it should be vectorized and then expanded in
611 /// multiple scalar calls. This is represented in the
612 /// TLI via mappings that do not specify a vector name, as in the
613 /// following example:
614 ///
615 /// const VecDesc VecIntrinsics[] = {
616 /// {"llvm.phx.abs.i32", "", 4}
617 /// };
618 static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
619  const StringRef ScalarName = CI.getCalledFunction()->getName();
620  bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
621  // Check that all known VFs are not associated to a vector
622  // function, i.e. the vector name is emty.
623  if (Scalarize) {
624  ElementCount WidestFixedVF, WidestScalableVF;
625  TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
627  ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
628  Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
630  ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
631  Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
632  assert((WidestScalableVF.isZero() || !Scalarize) &&
633  "Caller may decide to scalarize a variant using a scalable VF");
634  }
635  return Scalarize;
636 }
637 
638 bool LoopVectorizationLegality::canVectorizeInstrs() {
639  BasicBlock *Header = TheLoop->getHeader();
640 
641  // For each block in the loop.
642  for (BasicBlock *BB : TheLoop->blocks()) {
643  // Scan the instructions in the block and look for hazards.
644  for (Instruction &I : *BB) {
645  if (auto *Phi = dyn_cast<PHINode>(&I)) {
646  Type *PhiTy = Phi->getType();
647  // Check that this PHI type is allowed.
648  if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
649  !PhiTy->isPointerTy()) {
650  reportVectorizationFailure("Found a non-int non-pointer PHI",
651  "loop control flow is not understood by vectorizer",
652  "CFGNotUnderstood", ORE, TheLoop);
653  return false;
654  }
655 
656  // If this PHINode is not in the header block, then we know that we
657  // can convert it to select during if-conversion. No need to check if
658  // the PHIs in this block are induction or reduction variables.
659  if (BB != Header) {
660  // Non-header phi nodes that have outside uses can be vectorized. Add
661  // them to the list of allowed exits.
662  // Unsafe cyclic dependencies with header phis are identified during
663  // legalization for reduction, induction and first order
664  // recurrences.
665  AllowedExit.insert(&I);
666  continue;
667  }
668 
669  // We only allow if-converted PHIs with exactly two incoming values.
670  if (Phi->getNumIncomingValues() != 2) {
671  reportVectorizationFailure("Found an invalid PHI",
672  "loop control flow is not understood by vectorizer",
673  "CFGNotUnderstood", ORE, TheLoop, Phi);
674  return false;
675  }
676 
677  RecurrenceDescriptor RedDes;
678  if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
679  DT)) {
680  Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
681  AllowedExit.insert(RedDes.getLoopExitInstr());
682  Reductions[Phi] = RedDes;
683  continue;
684  }
685 
686  // TODO: Instead of recording the AllowedExit, it would be good to record the
687  // complementary set: NotAllowedExit. These include (but may not be
688  // limited to):
689  // 1. Reduction phis as they represent the one-before-last value, which
690  // is not available when vectorized
691  // 2. Induction phis and increment when SCEV predicates cannot be used
692  // outside the loop - see addInductionPhi
693  // 3. Non-Phis with outside uses when SCEV predicates cannot be used
694  // outside the loop - see call to hasOutsideLoopUser in the non-phi
695  // handling below
696  // 4. FirstOrderRecurrence phis that can possibly be handled by
697  // extraction.
698  // By recording these, we can then reason about ways to vectorize each
699  // of these NotAllowedExit.
701  if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
702  addInductionPhi(Phi, ID, AllowedExit);
703  Requirements->addExactFPMathInst(ID.getExactFPMathInst());
704  continue;
705  }
706 
708  SinkAfter, DT)) {
709  AllowedExit.insert(Phi);
710  FirstOrderRecurrences.insert(Phi);
711  continue;
712  }
713 
714  // As a last resort, coerce the PHI to a AddRec expression
715  // and re-try classifying it a an induction PHI.
716  if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
717  addInductionPhi(Phi, ID, AllowedExit);
718  continue;
719  }
720 
721  reportVectorizationFailure("Found an unidentified PHI",
722  "value that could not be identified as "
723  "reduction is used outside the loop",
724  "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
725  return false;
726  } // end of PHI handling
727 
728  // We handle calls that:
729  // * Are debug info intrinsics.
730  // * Have a mapping to an IR intrinsic.
731  // * Have a vector version available.
732  auto *CI = dyn_cast<CallInst>(&I);
733 
734  if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
735  !isa<DbgInfoIntrinsic>(CI) &&
736  !(CI->getCalledFunction() && TLI &&
737  (!VFDatabase::getMappings(*CI).empty() ||
738  isTLIScalarize(*TLI, *CI)))) {
739  // If the call is a recognized math libary call, it is likely that
740  // we can vectorize it given loosened floating-point constraints.
741  LibFunc Func;
742  bool IsMathLibCall =
743  TLI && CI->getCalledFunction() &&
744  CI->getType()->isFloatingPointTy() &&
745  TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
746  TLI->hasOptimizedCodeGen(Func);
747 
748  if (IsMathLibCall) {
749  // TODO: Ideally, we should not use clang-specific language here,
750  // but it's hard to provide meaningful yet generic advice.
751  // Also, should this be guarded by allowExtraAnalysis() and/or be part
752  // of the returned info from isFunctionVectorizable()?
754  "Found a non-intrinsic callsite",
755  "library call cannot be vectorized. "
756  "Try compiling with -fno-math-errno, -ffast-math, "
757  "or similar flags",
758  "CantVectorizeLibcall", ORE, TheLoop, CI);
759  } else {
760  reportVectorizationFailure("Found a non-intrinsic callsite",
761  "call instruction cannot be vectorized",
762  "CantVectorizeLibcall", ORE, TheLoop, CI);
763  }
764  return false;
765  }
766 
767  // Some intrinsics have scalar arguments and should be same in order for
768  // them to be vectorized (i.e. loop invariant).
769  if (CI) {
770  auto *SE = PSE.getSE();
771  Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
772  for (unsigned i = 0, e = CI->arg_size(); i != e; ++i)
773  if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
774  if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
775  reportVectorizationFailure("Found unvectorizable intrinsic",
776  "intrinsic instruction cannot be vectorized",
777  "CantVectorizeIntrinsic", ORE, TheLoop, CI);
778  return false;
779  }
780  }
781  }
782 
783  // Check that the instruction return type is vectorizable.
784  // Also, we can't vectorize extractelement instructions.
785  if ((!VectorType::isValidElementType(I.getType()) &&
786  !I.getType()->isVoidTy()) ||
787  isa<ExtractElementInst>(I)) {
788  reportVectorizationFailure("Found unvectorizable type",
789  "instruction return type cannot be vectorized",
790  "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
791  return false;
792  }
793 
794  // Check that the stored type is vectorizable.
795  if (auto *ST = dyn_cast<StoreInst>(&I)) {
796  Type *T = ST->getValueOperand()->getType();
798  reportVectorizationFailure("Store instruction cannot be vectorized",
799  "store instruction cannot be vectorized",
800  "CantVectorizeStore", ORE, TheLoop, ST);
801  return false;
802  }
803 
804  // For nontemporal stores, check that a nontemporal vector version is
805  // supported on the target.
806  if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
807  // Arbitrarily try a vector of 2 elements.
808  auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
809  assert(VecTy && "did not find vectorized version of stored type");
810  if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
812  "nontemporal store instruction cannot be vectorized",
813  "nontemporal store instruction cannot be vectorized",
814  "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
815  return false;
816  }
817  }
818 
819  } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
820  if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
821  // For nontemporal loads, check that a nontemporal vector version is
822  // supported on the target (arbitrarily try a vector of 2 elements).
823  auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
824  assert(VecTy && "did not find vectorized version of load type");
825  if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
827  "nontemporal load instruction cannot be vectorized",
828  "nontemporal load instruction cannot be vectorized",
829  "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
830  return false;
831  }
832  }
833 
834  // FP instructions can allow unsafe algebra, thus vectorizable by
835  // non-IEEE-754 compliant SIMD units.
836  // This applies to floating-point math operations and calls, not memory
837  // operations, shuffles, or casts, as they don't change precision or
838  // semantics.
839  } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
840  !I.isFast()) {
841  LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
842  Hints->setPotentiallyUnsafe();
843  }
844 
845  // Reduction instructions are allowed to have exit users.
846  // All other instructions must not have external users.
847  if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
848  // We can safely vectorize loops where instructions within the loop are
849  // used outside the loop only if the SCEV predicates within the loop is
850  // same as outside the loop. Allowing the exit means reusing the SCEV
851  // outside the loop.
852  if (PSE.getUnionPredicate().isAlwaysTrue()) {
853  AllowedExit.insert(&I);
854  continue;
855  }
856  reportVectorizationFailure("Value cannot be used outside the loop",
857  "value cannot be used outside the loop",
858  "ValueUsedOutsideLoop", ORE, TheLoop, &I);
859  return false;
860  }
861  } // next instr.
862  }
863 
864  if (!PrimaryInduction) {
865  if (Inductions.empty()) {
866  reportVectorizationFailure("Did not find one integer induction var",
867  "loop induction variable could not be identified",
868  "NoInductionVariable", ORE, TheLoop);
869  return false;
870  } else if (!WidestIndTy) {
871  reportVectorizationFailure("Did not find one integer induction var",
872  "integer loop induction variable could not be identified",
873  "NoIntegerInductionVariable", ORE, TheLoop);
874  return false;
875  } else {
876  LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
877  }
878  }
879 
880  // For first order recurrences, we use the previous value (incoming value from
881  // the latch) to check if it dominates all users of the recurrence. Bail out
882  // if we have to sink such an instruction for another recurrence, as the
883  // dominance requirement may not hold after sinking.
884  BasicBlock *LoopLatch = TheLoop->getLoopLatch();
885  if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
886  Instruction *V =
887  cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
888  return SinkAfter.find(V) != SinkAfter.end();
889  }))
890  return false;
891 
892  // Now we know the widest induction type, check if our found induction
893  // is the same size. If it's not, unset it here and InnerLoopVectorizer
894  // will create another.
895  if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
896  PrimaryInduction = nullptr;
897 
898  return true;
899 }
900 
901 bool LoopVectorizationLegality::canVectorizeMemory() {
902  LAI = &(*GetLAA)(*TheLoop);
903  const OptimizationRemarkAnalysis *LAR = LAI->getReport();
904  if (LAR) {
905  ORE->emit([&]() {
906  return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
907  "loop not vectorized: ", *LAR);
908  });
909  }
910 
911  if (!LAI->canVectorizeMemory())
912  return false;
913 
915  reportVectorizationFailure("Stores to a uniform address",
916  "write to a loop invariant address could not be vectorized",
917  "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
918  return false;
919  }
920 
922  PSE.addPredicate(LAI->getPSE().getUnionPredicate());
923  return true;
924 }
925 
927  bool EnableStrictReductions) {
928 
929  // First check if there is any ExactFP math or if we allow reassociations
930  if (!Requirements->getExactFPInst() || Hints->allowReordering())
931  return true;
932 
933  // If the above is false, we have ExactFPMath & do not allow reordering.
934  // If the EnableStrictReductions flag is set, first check if we have any
935  // Exact FP induction vars, which we cannot vectorize.
936  if (!EnableStrictReductions ||
937  any_of(getInductionVars(), [&](auto &Induction) -> bool {
938  InductionDescriptor IndDesc = Induction.second;
939  return IndDesc.getExactFPMathInst();
940  }))
941  return false;
942 
943  // We can now only vectorize if all reductions with Exact FP math also
944  // have the isOrdered flag set, which indicates that we can move the
945  // reduction operations in-loop.
946  return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
947  const RecurrenceDescriptor &RdxDesc = Reduction.second;
948  return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
949  }));
950 }
951 
953  Value *In0 = const_cast<Value *>(V);
954  PHINode *PN = dyn_cast_or_null<PHINode>(In0);
955  if (!PN)
956  return false;
957 
958  return Inductions.count(PN);
959 }
960 
961 const InductionDescriptor *
963  if (!isInductionPhi(Phi))
964  return nullptr;
965  auto &ID = getInductionVars().find(Phi)->second;
966  if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
968  return &ID;
969  return nullptr;
970 }
971 
973  const Value *V) const {
974  auto *Inst = dyn_cast<Instruction>(V);
975  return (Inst && InductionCastsToIgnore.count(Inst));
976 }
977 
980 }
981 
983  const PHINode *Phi) const {
984  return FirstOrderRecurrences.count(Phi);
985 }
986 
988  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
989 }
990 
991 bool LoopVectorizationLegality::blockCanBePredicated(
994  SmallPtrSetImpl<Instruction *> &ConditionalAssumes) const {
995  for (Instruction &I : *BB) {
996  // Check that we don't have a constant expression that can trap as operand.
997  for (Value *Operand : I.operands()) {
998  if (auto *C = dyn_cast<Constant>(Operand))
999  if (C->canTrap())
1000  return false;
1001  }
1002 
1003  // We can predicate blocks with calls to assume, as long as we drop them in
1004  // case we flatten the CFG via predication.
1005  if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
1006  ConditionalAssumes.insert(&I);
1007  continue;
1008  }
1009 
1010  // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1011  // TODO: there might be cases that it should block the vectorization. Let's
1012  // ignore those for now.
1013  if (isa<NoAliasScopeDeclInst>(&I))
1014  continue;
1015 
1016  // We might be able to hoist the load.
1017  if (I.mayReadFromMemory()) {
1018  auto *LI = dyn_cast<LoadInst>(&I);
1019  if (!LI)
1020  return false;
1021  if (!SafePtrs.count(LI->getPointerOperand())) {
1022  MaskedOp.insert(LI);
1023  continue;
1024  }
1025  }
1026 
1027  if (I.mayWriteToMemory()) {
1028  auto *SI = dyn_cast<StoreInst>(&I);
1029  if (!SI)
1030  return false;
1031  // Predicated store requires some form of masking:
1032  // 1) masked store HW instruction,
1033  // 2) emulation via load-blend-store (only if safe and legal to do so,
1034  // be aware on the race conditions), or
1035  // 3) element-by-element predicate check and scalar store.
1036  MaskedOp.insert(SI);
1037  continue;
1038  }
1039  if (I.mayThrow())
1040  return false;
1041  }
1042 
1043  return true;
1044 }
1045 
1046 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1047  if (!EnableIfConversion) {
1048  reportVectorizationFailure("If-conversion is disabled",
1049  "if-conversion is disabled",
1050  "IfConversionDisabled",
1051  ORE, TheLoop);
1052  return false;
1053  }
1054 
1055  assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1056 
1057  // A list of pointers which are known to be dereferenceable within scope of
1058  // the loop body for each iteration of the loop which executes. That is,
1059  // the memory pointed to can be dereferenced (with the access size implied by
1060  // the value's type) unconditionally within the loop header without
1061  // introducing a new fault.
1062  SmallPtrSet<Value *, 8> SafePointers;
1063 
1064  // Collect safe addresses.
1065  for (BasicBlock *BB : TheLoop->blocks()) {
1066  if (!blockNeedsPredication(BB)) {
1067  for (Instruction &I : *BB)
1068  if (auto *Ptr = getLoadStorePointerOperand(&I))
1069  SafePointers.insert(Ptr);
1070  continue;
1071  }
1072 
1073  // For a block which requires predication, a address may be safe to access
1074  // in the loop w/o predication if we can prove dereferenceability facts
1075  // sufficient to ensure it'll never fault within the loop. For the moment,
1076  // we restrict this to loads; stores are more complicated due to
1077  // concurrency restrictions.
1078  ScalarEvolution &SE = *PSE.getSE();
1079  for (Instruction &I : *BB) {
1080  LoadInst *LI = dyn_cast<LoadInst>(&I);
1081  if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1082  isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
1083  SafePointers.insert(LI->getPointerOperand());
1084  }
1085  }
1086 
1087  // Collect the blocks that need predication.
1088  BasicBlock *Header = TheLoop->getHeader();
1089  for (BasicBlock *BB : TheLoop->blocks()) {
1090  // We don't support switch statements inside loops.
1091  if (!isa<BranchInst>(BB->getTerminator())) {
1092  reportVectorizationFailure("Loop contains a switch statement",
1093  "loop contains a switch statement",
1094  "LoopContainsSwitch", ORE, TheLoop,
1095  BB->getTerminator());
1096  return false;
1097  }
1098 
1099  // We must be able to predicate all blocks that need to be predicated.
1100  if (blockNeedsPredication(BB)) {
1101  if (!blockCanBePredicated(BB, SafePointers, MaskedOp,
1102  ConditionalAssumes)) {
1104  "Control flow cannot be substituted for a select",
1105  "control flow cannot be substituted for a select",
1106  "NoCFGForSelect", ORE, TheLoop,
1107  BB->getTerminator());
1108  return false;
1109  }
1110  } else if (BB != Header && !canIfConvertPHINodes(BB)) {
1112  "Control flow cannot be substituted for a select",
1113  "control flow cannot be substituted for a select",
1114  "NoCFGForSelect", ORE, TheLoop,
1115  BB->getTerminator());
1116  return false;
1117  }
1118  }
1119 
1120  // We can if-convert this loop.
1121  return true;
1122 }
1123 
1124 // Helper function to canVectorizeLoopNestCFG.
1125 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1126  bool UseVPlanNativePath) {
1127  assert((UseVPlanNativePath || Lp->isInnermost()) &&
1128  "VPlan-native path is not enabled.");
1129 
1130  // TODO: ORE should be improved to show more accurate information when an
1131  // outer loop can't be vectorized because a nested loop is not understood or
1132  // legal. Something like: "outer_loop_location: loop not vectorized:
1133  // (inner_loop_location) loop control flow is not understood by vectorizer".
1134 
1135  // Store the result and return it at the end instead of exiting early, in case
1136  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1137  bool Result = true;
1138  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1139 
1140  // We must have a loop in canonical form. Loops with indirectbr in them cannot
1141  // be canonicalized.
1142  if (!Lp->getLoopPreheader()) {
1143  reportVectorizationFailure("Loop doesn't have a legal pre-header",
1144  "loop control flow is not understood by vectorizer",
1145  "CFGNotUnderstood", ORE, TheLoop);
1146  if (DoExtraAnalysis)
1147  Result = false;
1148  else
1149  return false;
1150  }
1151 
1152  // We must have a single backedge.
1153  if (Lp->getNumBackEdges() != 1) {
1154  reportVectorizationFailure("The loop must have a single backedge",
1155  "loop control flow is not understood by vectorizer",
1156  "CFGNotUnderstood", ORE, TheLoop);
1157  if (DoExtraAnalysis)
1158  Result = false;
1159  else
1160  return false;
1161  }
1162 
1163  return Result;
1164 }
1165 
1166 bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1167  Loop *Lp, bool UseVPlanNativePath) {
1168  // Store the result and return it at the end instead of exiting early, in case
1169  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1170  bool Result = true;
1171  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1172  if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1173  if (DoExtraAnalysis)
1174  Result = false;
1175  else
1176  return false;
1177  }
1178 
1179  // Recursively check whether the loop control flow of nested loops is
1180  // understood.
1181  for (Loop *SubLp : *Lp)
1182  if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1183  if (DoExtraAnalysis)
1184  Result = false;
1185  else
1186  return false;
1187  }
1188 
1189  return Result;
1190 }
1191 
1192 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1193  // Store the result and return it at the end instead of exiting early, in case
1194  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1195  bool Result = true;
1196 
1197  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1198  // Check whether the loop-related control flow in the loop nest is expected by
1199  // vectorizer.
1200  if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1201  if (DoExtraAnalysis)
1202  Result = false;
1203  else
1204  return false;
1205  }
1206 
1207  // We need to have a loop header.
1208  LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1209  << '\n');
1210 
1211  // Specific checks for outer loops. We skip the remaining legal checks at this
1212  // point because they don't support outer loops.
1213  if (!TheLoop->isInnermost()) {
1214  assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1215 
1216  if (!canVectorizeOuterLoop()) {
1217  reportVectorizationFailure("Unsupported outer loop",
1218  "unsupported outer loop",
1219  "UnsupportedOuterLoop",
1220  ORE, TheLoop);
1221  // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1222  // outer loops.
1223  return false;
1224  }
1225 
1226  LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1227  return Result;
1228  }
1229 
1230  assert(TheLoop->isInnermost() && "Inner loop expected.");
1231  // Check if we can if-convert non-single-bb loops.
1232  unsigned NumBlocks = TheLoop->getNumBlocks();
1233  if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1234  LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1235  if (DoExtraAnalysis)
1236  Result = false;
1237  else
1238  return false;
1239  }
1240 
1241  // Check if we can vectorize the instructions and CFG in this loop.
1242  if (!canVectorizeInstrs()) {
1243  LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1244  if (DoExtraAnalysis)
1245  Result = false;
1246  else
1247  return false;
1248  }
1249 
1250  // Go over each instruction and look at memory deps.
1251  if (!canVectorizeMemory()) {
1252  LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1253  if (DoExtraAnalysis)
1254  Result = false;
1255  else
1256  return false;
1257  }
1258 
1259  LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1260  << (LAI->getRuntimePointerChecking()->Need
1261  ? " (with a runtime bound check)"
1262  : "")
1263  << "!\n");
1264 
1265  unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1266  if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1267  SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1268 
1269  if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
1270  reportVectorizationFailure("Too many SCEV checks needed",
1271  "Too many SCEV assumptions need to be made and checked at runtime",
1272  "TooManySCEVRunTimeChecks", ORE, TheLoop);
1273  if (DoExtraAnalysis)
1274  Result = false;
1275  else
1276  return false;
1277  }
1278 
1279  // Okay! We've done all the tests. If any have failed, return false. Otherwise
1280  // we can vectorize, and at this point we don't have any other mem analysis
1281  // which may limit our maximum vectorization factor, so just return true with
1282  // no restrictions.
1283  return Result;
1284 }
1285 
1287 
1288  LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1289 
1290  SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1291 
1292  for (auto &Reduction : getReductionVars())
1293  ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1294 
1295  // TODO: handle non-reduction outside users when tail is folded by masking.
1296  for (auto *AE : AllowedExit) {
1297  // Check that all users of allowed exit values are inside the loop or
1298  // are the live-out of a reduction.
1299  if (ReductionLiveOuts.count(AE))
1300  continue;
1301  for (User *U : AE->users()) {
1302  Instruction *UI = cast<Instruction>(U);
1303  if (TheLoop->contains(UI))
1304  continue;
1305  LLVM_DEBUG(
1306  dbgs()
1307  << "LV: Cannot fold tail by masking, loop has an outside user for "
1308  << *UI << "\n");
1309  return false;
1310  }
1311  }
1312 
1313  // The list of pointers that we can safely read and write to remains empty.
1314  SmallPtrSet<Value *, 8> SafePointers;
1315 
1317  SmallPtrSet<Instruction *, 8> TmpConditionalAssumes;
1318 
1319  // Check and mark all blocks for predication, including those that ordinarily
1320  // do not need predication such as the header block.
1321  for (BasicBlock *BB : TheLoop->blocks()) {
1322  if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp,
1323  TmpConditionalAssumes)) {
1324  LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as requested.\n");
1325  return false;
1326  }
1327  }
1328 
1329  LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1330 
1331  MaskedOp.insert(TmpMaskedOp.begin(), TmpMaskedOp.end());
1332  ConditionalAssumes.insert(TmpConditionalAssumes.begin(),
1333  TmpConditionalAssumes.end());
1334 
1335  return true;
1336 }
1337 
1338 } // namespace llvm
i
i
Definition: README.txt:29
llvm::mustSuppressSpeculation
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition: ValueTracking.cpp:4501
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:730
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:64
llvm::LoopVectorizationLegality::getReductionVars
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Definition: LoopVectorizationLegality.h:283
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::LoopAccessInfo::isUniform
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
Definition: LoopAccessAnalysis.cpp:2172
llvm::RecurrenceDescriptor::isReductionPHI
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Returns true if Phi is a reduction in TheLoop.
Definition: IVDescriptors.cpp:765
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:217
llvm::ElementCount
Definition: TypeSize.h:385
EnableIfConversion
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
llvm::getVectorIntrinsicIDForCall
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:130
Loads.h
T
llvm::InductionDescriptor::getExactFPMathInst
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
Definition: IVDescriptors.h:357
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::RecurrenceDescriptor::hasExactFPMath
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Definition: IVDescriptors.h:212
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
llvm::StringRef::npos
static constexpr size_t npos
Definition: StringRef.h:59
llvm::LinearPolySize< ElementCount >::isKnownLE
static bool isKnownLE(const LinearPolySize &LHS, const LinearPolySize &RHS)
Definition: TypeSize.h:340
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1177
llvm::LoopVectorizationRequirements::addRuntimePointerChecks
void addRuntimePointerChecks(unsigned Num)
Definition: LoopVectorizationLegality.h:208
SizeOpts.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:634
llvm::TargetLibraryInfo::isFunctionVectorizable
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
Definition: TargetLibraryInfo.h:328
llvm::PredicatedScalarEvolution::getUnionPredicate
const SCEVUnionPredicate & getUnionPredicate() const
Definition: ScalarEvolution.cpp:13851
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::VectorizerParams::VectorizationInterleave
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
Definition: LoopAccessAnalysis.h:43
llvm::LoopVectorizeHints::SK_Unspecified
@ SK_Unspecified
Not selected.
Definition: LoopVectorizationLegality.h:102
ValueTracking.h
llvm::OptimizationRemarkEmitter::allowExtraAnalysis
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
Definition: OptimizationRemarkEmitter.h:98
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
VectorizeSCEVCheckThreshold
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
llvm::InductionDescriptor::IK_IntInduction
@ IK_IntInduction
Integer induction variable. Step = C.
Definition: IVDescriptors.h:311
llvm::LoopVectorizeHints::SK_FixedWidthOnly
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
Definition: LoopVectorizationLegality.h:104
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::getPtrStride
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
Definition: LoopAccessAnalysis.cpp:1052
llvm::LoopVectorizeHints::getWidth
ElementCount getWidth() const
Definition: LoopVectorizationLegality.h:124
llvm::LoopVectorizationLegality::canVectorizeFPMath
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
Definition: LoopVectorizationLegality.cpp:926
llvm::LoopVectorizeHints::getIsVectorized
unsigned getIsVectorized() const
Definition: LoopVectorizationLegality.h:138
llvm::TargetTransformInfo::isLegalNTLoad
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
Definition: TargetTransformInfo.cpp:397
llvm::TargetTransformInfo::isLegalNTStore
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
Definition: TargetTransformInfo.cpp:392
llvm::LoopVectorizeHints::LoopVectorizeHints
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
Definition: LoopVectorizationLegality.cpp:97
llvm::ConstantAsMetadata::get
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:419
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::reportVectorizationFailure
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
Definition: LoopVectorize.cpp:1081
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:272
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::isUniformLoopNest
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
Definition: LoopVectorizationLegality.cpp:377
llvm::RISCVFeatures::validate
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Definition: RISCVBaseInfo.cpp:100
llvm::LoopVectorizationLegality::prepareToFoldTailByMasking
bool prepareToFoldTailByMasking()
Return true if we can vectorize this loop while folding its tail by masking, and mark all respective ...
Definition: LoopVectorizationLegality.cpp:1286
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:241
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1233
F
#define F(x, y, z)
Definition: MD5.cpp:55
Context
ManagedStatic< detail::RecordContext > Context
Definition: Record.cpp:96
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1143
llvm::makePostTransformationMetadata
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1125
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::LoopVectorizeHints::FK_Undefined
@ FK_Undefined
Not selected.
Definition: LoopVectorizationLegality.h:95
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:185
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1649
llvm::shouldOptimizeForSize
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
Definition: MachineSizeOpts.cpp:183
llvm::InductionDescriptor
A struct for saving information about induction variables.
Definition: IVDescriptors.h:306
llvm::LoopAccessInfo::hasDependenceInvolvingLoopInvariantAddress
bool hasDependenceInvolvingLoopInvariantAddress() const
If the loop has memory dependence involving an invariant address, i.e.
Definition: LoopAccessAnalysis.h:577
MaxInterleaveFactor
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
Definition: LoopVectorizationLegality.cpp:77
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:519
llvm::LoopAccessInfo::blockNeedsPredication
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition: LoopAccessAnalysis.cpp:2143
llvm::ValueToValueMap
DenseMap< const Value *, Value * > ValueToValueMap
Definition: ScalarEvolutionExpressions.h:903
llvm::User
Definition: User.h:44
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:34
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::isUniformLoop
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
Definition: LoopVectorizationLegality.cpp:332
llvm::RecurrenceDescriptor::getExactFPMathInst
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
Definition: IVDescriptors.h:215
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1398
llvm::LoopVectorizeHints::vectorizeAnalysisPassName
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
Definition: LoopVectorizationLegality.cpp:231
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::LoopVectorizationRequirements::addExactFPMathInst
void addExactFPMathInst(Instruction *I)
Track the 1st floating-point instruction that can not be reassociated.
Definition: LoopVectorizationLegality.h:203
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::LoopVectorizationLegality::blockNeedsPredication
bool blockNeedsPredication(BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition: LoopVectorizationLegality.cpp:987
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:226
EnableVPlanPredication
cl::opt< bool > EnableVPlanPredication
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2842
llvm::TargetTransformInfo::enableScalableVectorization
bool enableScalableVectorization() const
Definition: TargetTransformInfo.cpp:1085
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:291
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:191
llvm::PGSOQueryType::IRPass
@ IRPass
llvm::LoopVectorizeHints::getForce
enum ForceKind getForce() const
Definition: LoopVectorizationLegality.h:140
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:354
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:932
PatternMatch.h
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:686
llvm::Metadata
Root of the metadata hierarchy.
Definition: Metadata.h:62
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2749
llvm::LinearPolySize< ElementCount >::getFixed
static ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:283
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
LoopInfo.h
llvm::Twine::str
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:190
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1137
llvm::VectorType::isValidElementType
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:677
llvm::canIfConvertPHINodes
static bool canIfConvertPHINodes(BasicBlock *BB)
Check whether it is safe to if-convert this phi node.
Definition: LoopVectorizationLegality.cpp:393
VectorUtils.h
llvm::cl::opt< bool >
llvm::SCEVUnionPredicate::getComplexity
unsigned getComplexity() const override
We estimate the complexity of a union predicate as the size number of predicates in the union.
Definition: ScalarEvolution.h:449
llvm::LoopVectorizeHints::SK_PreferScalable
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
Definition: LoopVectorizationLegality.h:108
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:697
llvm::RuntimePointerChecking::Need
bool Need
This flag indicates if we need to add the runtime check.
Definition: LoopAccessAnalysis.h:447
llvm::MapVector::find
iterator find(const KeyT &Key)
Definition: MapVector.h:148
llvm::VFDatabase::getMappings
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:249
llvm::HintsAllowReordering
cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
llvm::SmallPtrSetImpl::end
iterator end() const
Definition: SmallPtrSet.h:407
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::LoopVectorizeHints::FK_Enabled
@ FK_Enabled
Forcing enabled.
Definition: LoopVectorizationLegality.h:97
llvm::LoopVectorizeHints::getInterleave
unsigned getInterleave() const
Definition: LoopVectorizationLegality.h:129
llvm::LoopVectorizeHints::allowVectorization
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
Definition: LoopVectorizationLegality.cpp:172
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap< const Value *, Value * >
llvm::LoopAccessInfo::getRuntimePointerChecking
const RuntimePointerChecking * getRuntimePointerChecking() const
Definition: LoopAccessAnalysis.h:528
I
#define I(x, y, z)
Definition: MD5.cpp:58
ForceScalableVectorization
static cl::opt< LoopVectorizeHints::ScalableForceKind > ForceScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "on", "Scalable vectorization is available and favored when the " "cost is inconclusive.")))
llvm::hasVectorInstrinsicScalarOpd
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:99
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:402
llvm::VectorizerParams::MaxVectorWidth
static const unsigned MaxVectorWidth
Maximum SIMD width.
Definition: LoopAccessAnalysis.h:38
llvm::LoopAccessInfo::getReport
const OptimizationRemarkAnalysis * getReport() const
The diagnostics report generated for the analysis.
Definition: LoopAccessAnalysis.h:552
llvm::MDString::get
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:473
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::PredicatedScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
Definition: ScalarEvolution.cpp:13816
llvm::LoopVectorizationLegality::isCastedInductionVariable
bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
Definition: LoopVectorizationLegality.cpp:972
llvm::LoopVectorizationLegality::isInductionVariable
bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
Definition: LoopVectorizationLegality.cpp:978
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::LoopAccessInfo::canVectorizeMemory
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
Definition: LoopAccessAnalysis.h:521
llvm::InductionDescriptor::IK_FpInduction
@ IK_FpInduction
Floating point induction variable.
Definition: IVDescriptors.h:313
llvm::isTLIScalarize
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
Definition: LoopVectorizationLegality.cpp:618
llvm::LoopVectorizationRequirements::getExactFPInst
Instruction * getExactFPInst()
Definition: LoopVectorizationLegality.h:211
llvm::MDNode
Metadata node.
Definition: Metadata.h:906
llvm::isDereferenceableAndAlignedInLoop
bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition: Loads.cpp:273
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::LoopVectorizationLegality::isUniform
bool isUniform(Value *V)
Returns true if the value V is uniform within the loop.
Definition: LoopVectorizationLegality.cpp:459
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
LV_NAME
#define LV_NAME
Definition: LoopVectorizationLegality.cpp:31
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1630
llvm::SCEVUnionPredicate::isAlwaysTrue
bool isAlwaysTrue() const override
Implementation of the SCEVPredicate interface.
Definition: ScalarEvolution.cpp:13750
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1656
llvm::LoopVectorizationLegality::isConsecutivePtr
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
Definition: LoopVectorizationLegality.cpp:442
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::LoopVectorizeHints::ScalableForceKind
ScalableForceKind
Definition: LoopVectorizationLegality.h:100
llvm::Loop::getCanonicalInductionVariable
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition: LoopInfo.cpp:150
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:672
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::LoopVectorizeHints::emitRemarkWithHints
void emitRemarkWithHints() const
Dumps all the hint information.
Definition: LoopVectorizationLegality.cpp:205
llvm::convertPointerToIntegerType
static Type * convertPointerToIntegerType(const DataLayout &DL, Type *Ty)
Definition: LoopVectorizationLegality.cpp:403
llvm::LoopVectorizationLegality::getIntOrFpInductionDescriptor
const InductionDescriptor * getIntOrFpInductionDescriptor(PHINode *Phi) const
Returns a pointer to the induction descriptor, if Phi is an integer or floating point induction.
Definition: LoopVectorizationLegality.cpp:962
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:776
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:127
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:180
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
llvm::VectorizerParams::isInterleaveForced
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
Definition: LoopAccessAnalysis.cpp:132
LoopVectorize.h
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:36
llvm::MapVector::empty
bool empty() const
Definition: MapVector.h:80
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::InductionDescriptor::isInductionPHI
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Definition: IVDescriptors.cpp:1362
PragmaVectorizeSCEVCheckThreshold
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
llvm::LoopInfoBase::isLoopHeader
bool isLoopHeader(const BlockT *BB) const
Definition: LoopInfo.h:983
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::MapVector::count
size_type count(const KeyT &Key) const
Definition: MapVector.h:143
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::LoopVectorizationLegality::isFirstOrderRecurrence
bool isFirstOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a first-order recurrence in this loop.
Definition: LoopVectorizationLegality.cpp:982
H
#define H(x, y, z)
Definition: MD5.cpp:57
llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:528
llvm::VectorizationFactor
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Definition: LoopVectorizationPlanner.h:184
llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:504
llvm::RecurrenceDescriptor::isFirstOrderRecurrence
static bool isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, MapVector< Instruction *, Instruction * > &SinkAfter, DominatorTree *DT)
Returns true if Phi is a first-order recurrence.
Definition: IVDescriptors.cpp:850
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::LinearPolySize< ElementCount >::getScalable
static ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:286
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:221
LoopVectorizationLegality.h
llvm::LoopAccessInfo::getNumRuntimePointerChecks
unsigned getNumRuntimePointerChecks() const
Number of memchecks required to prove independence of otherwise may-alias pointers.
Definition: LoopAccessAnalysis.h:534
llvm::PredicatedScalarEvolution::addPredicate
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
Definition: ScalarEvolution.cpp:13844
llvm::LoopBase::getNumBackEdges
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:250
llvm::OptimizationRemarkAnalysis::AlwaysPrint
static const char * AlwaysPrint
Definition: DiagnosticInfo.h:816
llvm::UnivariateLinearPolyBase::isZero
bool isZero() const
Definition: TypeSize.h:229
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:416
Predicate
llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:73
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopVectorizationLegality.cpp:32
llvm::LoopVectorizationLegality::isInductionPhi
bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
Definition: LoopVectorizationLegality.cpp:952
llvm::LoopVectorizeHints::setAlreadyVectorized
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
Definition: LoopVectorizationLegality.cpp:153
llvm::VectorizerParams
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
Definition: LoopAccessAnalysis.h:36
llvm::PHINode
Definition: Instructions.h:2657
llvm::LoopVectorizeHints::allowReordering
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
Definition: LoopVectorizationLegality.cpp:241
llvm::SmallVectorImpl< Instruction * >
llvm::TargetLibraryInfo::getWidestVF
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
Definition: TargetLibraryInfo.h:425
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
llvm::SmallPtrSetImpl< Value * >
llvm::getLoadStorePointerOperand
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Definition: Instructions.h:5319
llvm::RecurrenceDescriptor::isOrdered
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
Definition: IVDescriptors.h:265
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1478
llvm::LoopVectorizationLegality::getInductionVars
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
Definition: LoopVectorizationLegality.h:286
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::getWiderType
static Type * getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1)
Definition: LoopVectorizationLegality.cpp:415
llvm::TargetLibraryInfo::hasOptimizedCodeGen
bool hasOptimizedCodeGen(LibFunc F) const
Tests if the function is both available and a candidate for optimized code generation.
Definition: TargetLibraryInfo.h:340
llvm::RecurrenceDescriptor::getLoopExitInstr
Instruction * getLoopExitInstr() const
Definition: IVDescriptors.h:208
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::cl::desc
Definition: CommandLine.h:412
llvm::hasOutsideLoopUser
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
Definition: LoopVectorizationLegality.cpp:425
llvm::LoopAccessInfo::getPSE
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
Definition: LoopAccessAnalysis.h:586
llvm::PredicatedScalarEvolution::getSE
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
Definition: ScalarEvolution.h:2202
llvm::LoopVectorizeHints::FK_Disabled
@ FK_Disabled
Forcing disabled.
Definition: LoopVectorizationLegality.h:96
Reduction
loop Loop Strength Reduction
Definition: LoopStrengthReduce.cpp:6443
llvm::MDString
A single uniqued string.
Definition: Metadata.h:611
llvm::LoopVectorizationLegality::canVectorize
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
Definition: LoopVectorizationLegality.cpp:1192
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38