LLVM  14.0.0git
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1 //===- LoopVectorizationLegality.cpp --------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides loop vectorization legality analysis. Original code
10 // resided in LoopVectorize.cpp for a long time.
11 //
12 // At this point, it is implemented as a utility class, not as an analysis
13 // pass. It should be easy to create an analysis pass around it if there
14 // is a need (but D45420 needs to happen first).
15 //
16 
18 #include "llvm/Analysis/Loads.h"
19 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/IR/IntrinsicInst.h"
24 #include "llvm/IR/PatternMatch.h"
27 
28 using namespace llvm;
29 using namespace PatternMatch;
30 
31 #define LV_NAME "loop-vectorize"
32 #define DEBUG_TYPE LV_NAME
33 
35 
36 static cl::opt<bool>
37  EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
38  cl::desc("Enable if-conversion during vectorization."));
39 
40 namespace llvm {
42  HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
43  cl::desc("Allow enabling loop hints to reorder "
44  "FP operations during vectorization."));
45 }
46 
47 // TODO: Move size-based thresholds out of legality checking, make cost based
48 // decisions instead of hard thresholds.
50  "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
51  cl::desc("The maximum number of SCEV checks allowed."));
52 
54  "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
55  cl::desc("The maximum number of SCEV checks allowed with a "
56  "vectorize(enable) pragma"));
57 
58 // FIXME: When scalable vectorization is stable enough, change the default
59 // to SK_PreferFixedWidth.
61  "scalable-vectorization", cl::init(LoopVectorizeHints::SK_FixedWidthOnly),
62  cl::Hidden,
63  cl::desc("Control whether the compiler can use scalable vectors to "
64  "vectorize a loop"),
65  cl::values(
67  "Scalable vectorization is disabled."),
69  "Scalable vectorization is available, but favor fixed-width "
70  "vectorization when the cost is inconclusive."),
72  "Scalable vectorization is available and favored when the "
73  "cost is inconclusive.")));
74 
75 /// Maximum vectorization interleave count.
76 static const unsigned MaxInterleaveFactor = 16;
77 
78 namespace llvm {
79 
80 bool LoopVectorizeHints::Hint::validate(unsigned Val) {
81  switch (Kind) {
82  case HK_WIDTH:
83  return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
84  case HK_INTERLEAVE:
85  return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
86  case HK_FORCE:
87  return (Val <= 1);
88  case HK_ISVECTORIZED:
89  case HK_PREDICATE:
90  case HK_SCALABLE:
91  return (Val == 0 || Val == 1);
92  }
93  return false;
94 }
95 
97  bool InterleaveOnlyWhenForced,
99  : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
100  Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
101  Force("vectorize.enable", FK_Undefined, HK_FORCE),
102  IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
103  Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
104  Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
105  TheLoop(L), ORE(ORE) {
106  // Populate values with existing loop metadata.
107  getHintsFromMetadata();
108 
109  // force-vector-interleave overrides DisableInterleaving.
112 
114  // If the width is set, but the metadata says nothing about the scalable
115  // property, then assume it concerns only a fixed-width UserVF.
116  // If width is not set, the flag takes precedence.
117  Scalable.Value = Width.Value ? SK_FixedWidthOnly : ScalableVectorization;
119  // If the flag is set to disable any use of scalable vectors, override the
120  // loop hint.
121  Scalable.Value = SK_FixedWidthOnly;
122 
123  if (IsVectorized.Value != 1)
124  // If the vectorization width and interleaving count are both 1 then
125  // consider the loop to have been already vectorized because there's
126  // nothing more that we can do.
127  IsVectorized.Value =
129  LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
130  << "LV: Interleaving disabled by the pass manager\n");
131 }
132 
134  LLVMContext &Context = TheLoop->getHeader()->getContext();
135 
136  MDNode *IsVectorizedMD = MDNode::get(
137  Context,
138  {MDString::get(Context, "llvm.loop.isvectorized"),
140  MDNode *LoopID = TheLoop->getLoopID();
141  MDNode *NewLoopID =
143  {Twine(Prefix(), "vectorize.").str(),
144  Twine(Prefix(), "interleave.").str()},
145  {IsVectorizedMD});
146  TheLoop->setLoopID(NewLoopID);
147 
148  // Update internal cache.
149  IsVectorized.Value = 1;
150 }
151 
153  Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
155  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
157  return false;
158  }
159 
160  if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
161  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
163  return false;
164  }
165 
166  if (getIsVectorized() == 1) {
167  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
168  // FIXME: Add interleave.disable metadata. This will allow
169  // vectorize.disable to be used without disabling the pass and errors
170  // to differentiate between disabled vectorization and a width of 1.
171  ORE.emit([&]() {
173  "AllDisabled", L->getStartLoc(),
174  L->getHeader())
175  << "loop not vectorized: vectorization and interleaving are "
176  "explicitly disabled, or the loop has already been "
177  "vectorized";
178  });
179  return false;
180  }
181 
182  return true;
183 }
184 
186  using namespace ore;
187 
188  ORE.emit([&]() {
189  if (Force.Value == LoopVectorizeHints::FK_Disabled)
190  return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
191  TheLoop->getStartLoc(),
192  TheLoop->getHeader())
193  << "loop not vectorized: vectorization is explicitly disabled";
194  else {
195  OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
196  TheLoop->getStartLoc(), TheLoop->getHeader());
197  R << "loop not vectorized";
198  if (Force.Value == LoopVectorizeHints::FK_Enabled) {
199  R << " (Force=" << NV("Force", true);
200  if (Width.Value != 0)
201  R << ", Vector Width=" << NV("VectorWidth", getWidth());
202  if (getInterleave() != 0)
203  R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
204  R << ")";
205  }
206  return R;
207  }
208  });
209 }
210 
212  if (getWidth() == ElementCount::getFixed(1))
213  return LV_NAME;
215  return LV_NAME;
217  return LV_NAME;
219 }
220 
222  // Allow the vectorizer to change the order of operations if enabling
223  // loop hints are provided
224  ElementCount EC = getWidth();
225  return HintsAllowReordering &&
227  EC.getKnownMinValue() > 1);
228 }
229 
230 void LoopVectorizeHints::getHintsFromMetadata() {
231  MDNode *LoopID = TheLoop->getLoopID();
232  if (!LoopID)
233  return;
234 
235  // First operand should refer to the loop id itself.
236  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
237  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
238 
239  for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
240  const MDString *S = nullptr;
242 
243  // The expected hint is either a MDString or a MDNode with the first
244  // operand a MDString.
245  if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
246  if (!MD || MD->getNumOperands() == 0)
247  continue;
248  S = dyn_cast<MDString>(MD->getOperand(0));
249  for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
250  Args.push_back(MD->getOperand(i));
251  } else {
252  S = dyn_cast<MDString>(LoopID->getOperand(i));
253  assert(Args.size() == 0 && "too many arguments for MDString");
254  }
255 
256  if (!S)
257  continue;
258 
259  // Check if the hint starts with the loop metadata prefix.
260  StringRef Name = S->getString();
261  if (Args.size() == 1)
262  setHint(Name, Args[0]);
263  }
264 }
265 
266 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
267  if (!Name.startswith(Prefix()))
268  return;
269  Name = Name.substr(Prefix().size(), StringRef::npos);
270 
271  const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
272  if (!C)
273  return;
274  unsigned Val = C->getZExtValue();
275 
276  Hint *Hints[] = {&Width, &Interleave, &Force,
277  &IsVectorized, &Predicate, &Scalable};
278  for (auto H : Hints) {
279  if (Name == H->Name) {
280  if (H->validate(Val))
281  H->Value = Val;
282  else
283  LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
284  break;
285  }
286  }
287 }
288 
289 // Return true if the inner loop \p Lp is uniform with regard to the outer loop
290 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
291 // executing the inner loop will execute the same iterations). This check is
292 // very constrained for now but it will be relaxed in the future. \p Lp is
293 // considered uniform if it meets all the following conditions:
294 // 1) it has a canonical IV (starting from 0 and with stride 1),
295 // 2) its latch terminator is a conditional branch and,
296 // 3) its latch condition is a compare instruction whose operands are the
297 // canonical IV and an OuterLp invariant.
298 // This check doesn't take into account the uniformity of other conditions not
299 // related to the loop latch because they don't affect the loop uniformity.
300 //
301 // NOTE: We decided to keep all these checks and its associated documentation
302 // together so that we can easily have a picture of the current supported loop
303 // nests. However, some of the current checks don't depend on \p OuterLp and
304 // would be redundantly executed for each \p Lp if we invoked this function for
305 // different candidate outer loops. This is not the case for now because we
306 // don't currently have the infrastructure to evaluate multiple candidate outer
307 // loops and \p OuterLp will be a fixed parameter while we only support explicit
308 // outer loop vectorization. It's also very likely that these checks go away
309 // before introducing the aforementioned infrastructure. However, if this is not
310 // the case, we should move the \p OuterLp independent checks to a separate
311 // function that is only executed once for each \p Lp.
312 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
313  assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
314 
315  // If Lp is the outer loop, it's uniform by definition.
316  if (Lp == OuterLp)
317  return true;
318  assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
319 
320  // 1.
322  if (!IV) {
323  LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
324  return false;
325  }
326 
327  // 2.
328  BasicBlock *Latch = Lp->getLoopLatch();
329  auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
330  if (!LatchBr || LatchBr->isUnconditional()) {
331  LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
332  return false;
333  }
334 
335  // 3.
336  auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
337  if (!LatchCmp) {
338  LLVM_DEBUG(
339  dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
340  return false;
341  }
342 
343  Value *CondOp0 = LatchCmp->getOperand(0);
344  Value *CondOp1 = LatchCmp->getOperand(1);
345  Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
346  if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
347  !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
348  LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
349  return false;
350  }
351 
352  return true;
353 }
354 
355 // Return true if \p Lp and all its nested loops are uniform with regard to \p
356 // OuterLp.
357 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
358  if (!isUniformLoop(Lp, OuterLp))
359  return false;
360 
361  // Check if nested loops are uniform.
362  for (Loop *SubLp : *Lp)
363  if (!isUniformLoopNest(SubLp, OuterLp))
364  return false;
365 
366  return true;
367 }
368 
369 /// Check whether it is safe to if-convert this phi node.
370 ///
371 /// Phi nodes with constant expressions that can trap are not safe to if
372 /// convert.
374  for (PHINode &Phi : BB->phis()) {
375  for (Value *V : Phi.incoming_values())
376  if (auto *C = dyn_cast<Constant>(V))
377  if (C->canTrap())
378  return false;
379  }
380  return true;
381 }
382 
384  if (Ty->isPointerTy())
385  return DL.getIntPtrType(Ty);
386 
387  // It is possible that char's or short's overflow when we ask for the loop's
388  // trip count, work around this by changing the type size.
389  if (Ty->getScalarSizeInBits() < 32)
390  return Type::getInt32Ty(Ty->getContext());
391 
392  return Ty;
393 }
394 
395 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
396  Ty0 = convertPointerToIntegerType(DL, Ty0);
397  Ty1 = convertPointerToIntegerType(DL, Ty1);
398  if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
399  return Ty0;
400  return Ty1;
401 }
402 
403 /// Check that the instruction has outside loop users and is not an
404 /// identified reduction variable.
405 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
406  SmallPtrSetImpl<Value *> &AllowedExit) {
407  // Reductions, Inductions and non-header phis are allowed to have exit users. All
408  // other instructions must not have external users.
409  if (!AllowedExit.count(Inst))
410  // Check that all of the users of the loop are inside the BB.
411  for (User *U : Inst->users()) {
412  Instruction *UI = cast<Instruction>(U);
413  // This user may be a reduction exit value.
414  if (!TheLoop->contains(UI)) {
415  LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
416  return true;
417  }
418  }
419  return false;
420 }
421 
423  Value *Ptr) const {
424  const ValueToValueMap &Strides =
425  getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
426 
427  Function *F = TheLoop->getHeader()->getParent();
428  bool OptForSize = F->hasOptSize() ||
429  llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
431  bool CanAddPredicate = !OptForSize;
432  int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
433  CanAddPredicate, false);
434  if (Stride == 1 || Stride == -1)
435  return Stride;
436  return 0;
437 }
438 
440  return LAI->isUniform(V);
441 }
442 
443 bool LoopVectorizationLegality::canVectorizeOuterLoop() {
444  assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
445  // Store the result and return it at the end instead of exiting early, in case
446  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
447  bool Result = true;
448  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
449 
450  for (BasicBlock *BB : TheLoop->blocks()) {
451  // Check whether the BB terminator is a BranchInst. Any other terminator is
452  // not supported yet.
453  auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
454  if (!Br) {
455  reportVectorizationFailure("Unsupported basic block terminator",
456  "loop control flow is not understood by vectorizer",
457  "CFGNotUnderstood", ORE, TheLoop);
458  if (DoExtraAnalysis)
459  Result = false;
460  else
461  return false;
462  }
463 
464  // Check whether the BranchInst is a supported one. Only unconditional
465  // branches, conditional branches with an outer loop invariant condition or
466  // backedges are supported.
467  // FIXME: We skip these checks when VPlan predication is enabled as we
468  // want to allow divergent branches. This whole check will be removed
469  // once VPlan predication is on by default.
470  if (!EnableVPlanPredication && Br && Br->isConditional() &&
471  !TheLoop->isLoopInvariant(Br->getCondition()) &&
472  !LI->isLoopHeader(Br->getSuccessor(0)) &&
473  !LI->isLoopHeader(Br->getSuccessor(1))) {
474  reportVectorizationFailure("Unsupported conditional branch",
475  "loop control flow is not understood by vectorizer",
476  "CFGNotUnderstood", ORE, TheLoop);
477  if (DoExtraAnalysis)
478  Result = false;
479  else
480  return false;
481  }
482  }
483 
484  // Check whether inner loops are uniform. At this point, we only support
485  // simple outer loops scenarios with uniform nested loops.
486  if (!isUniformLoopNest(TheLoop /*loop nest*/,
487  TheLoop /*context outer loop*/)) {
488  reportVectorizationFailure("Outer loop contains divergent loops",
489  "loop control flow is not understood by vectorizer",
490  "CFGNotUnderstood", ORE, TheLoop);
491  if (DoExtraAnalysis)
492  Result = false;
493  else
494  return false;
495  }
496 
497  // Check whether we are able to set up outer loop induction.
498  if (!setupOuterLoopInductions()) {
499  reportVectorizationFailure("Unsupported outer loop Phi(s)",
500  "Unsupported outer loop Phi(s)",
501  "UnsupportedPhi", ORE, TheLoop);
502  if (DoExtraAnalysis)
503  Result = false;
504  else
505  return false;
506  }
507 
508  return Result;
509 }
510 
511 void LoopVectorizationLegality::addInductionPhi(
512  PHINode *Phi, const InductionDescriptor &ID,
513  SmallPtrSetImpl<Value *> &AllowedExit) {
514  Inductions[Phi] = ID;
515 
516  // In case this induction also comes with casts that we know we can ignore
517  // in the vectorized loop body, record them here. All casts could be recorded
518  // here for ignoring, but suffices to record only the first (as it is the
519  // only one that may bw used outside the cast sequence).
520  const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
521  if (!Casts.empty())
522  InductionCastsToIgnore.insert(*Casts.begin());
523 
524  Type *PhiTy = Phi->getType();
525  const DataLayout &DL = Phi->getModule()->getDataLayout();
526 
527  // Get the widest type.
528  if (!PhiTy->isFloatingPointTy()) {
529  if (!WidestIndTy)
530  WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
531  else
532  WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
533  }
534 
535  // Int inductions are special because we only allow one IV.
536  if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
537  ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
538  isa<Constant>(ID.getStartValue()) &&
539  cast<Constant>(ID.getStartValue())->isNullValue()) {
540 
541  // Use the phi node with the widest type as induction. Use the last
542  // one if there are multiple (no good reason for doing this other
543  // than it is expedient). We've checked that it begins at zero and
544  // steps by one, so this is a canonical induction variable.
545  if (!PrimaryInduction || PhiTy == WidestIndTy)
546  PrimaryInduction = Phi;
547  }
548 
549  // Both the PHI node itself, and the "post-increment" value feeding
550  // back into the PHI node may have external users.
551  // We can allow those uses, except if the SCEVs we have for them rely
552  // on predicates that only hold within the loop, since allowing the exit
553  // currently means re-using this SCEV outside the loop (see PR33706 for more
554  // details).
555  if (PSE.getUnionPredicate().isAlwaysTrue()) {
556  AllowedExit.insert(Phi);
557  AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
558  }
559 
560  LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
561 }
562 
563 bool LoopVectorizationLegality::setupOuterLoopInductions() {
564  BasicBlock *Header = TheLoop->getHeader();
565 
566  // Returns true if a given Phi is a supported induction.
567  auto isSupportedPhi = [&](PHINode &Phi) -> bool {
569  if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
571  addInductionPhi(&Phi, ID, AllowedExit);
572  return true;
573  } else {
574  // Bail out for any Phi in the outer loop header that is not a supported
575  // induction.
576  LLVM_DEBUG(
577  dbgs()
578  << "LV: Found unsupported PHI for outer loop vectorization.\n");
579  return false;
580  }
581  };
582 
583  if (llvm::all_of(Header->phis(), isSupportedPhi))
584  return true;
585  else
586  return false;
587 }
588 
589 /// Checks if a function is scalarizable according to the TLI, in
590 /// the sense that it should be vectorized and then expanded in
591 /// multiple scalar calls. This is represented in the
592 /// TLI via mappings that do not specify a vector name, as in the
593 /// following example:
594 ///
595 /// const VecDesc VecIntrinsics[] = {
596 /// {"llvm.phx.abs.i32", "", 4}
597 /// };
598 static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
599  const StringRef ScalarName = CI.getCalledFunction()->getName();
600  bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
601  // Check that all known VFs are not associated to a vector
602  // function, i.e. the vector name is emty.
603  if (Scalarize) {
604  ElementCount WidestFixedVF, WidestScalableVF;
605  TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
607  ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
608  Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
610  ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
611  Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
612  assert((WidestScalableVF.isZero() || !Scalarize) &&
613  "Caller may decide to scalarize a variant using a scalable VF");
614  }
615  return Scalarize;
616 }
617 
618 bool LoopVectorizationLegality::canVectorizeInstrs() {
619  BasicBlock *Header = TheLoop->getHeader();
620 
621  // For each block in the loop.
622  for (BasicBlock *BB : TheLoop->blocks()) {
623  // Scan the instructions in the block and look for hazards.
624  for (Instruction &I : *BB) {
625  if (auto *Phi = dyn_cast<PHINode>(&I)) {
626  Type *PhiTy = Phi->getType();
627  // Check that this PHI type is allowed.
628  if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
629  !PhiTy->isPointerTy()) {
630  reportVectorizationFailure("Found a non-int non-pointer PHI",
631  "loop control flow is not understood by vectorizer",
632  "CFGNotUnderstood", ORE, TheLoop);
633  return false;
634  }
635 
636  // If this PHINode is not in the header block, then we know that we
637  // can convert it to select during if-conversion. No need to check if
638  // the PHIs in this block are induction or reduction variables.
639  if (BB != Header) {
640  // Non-header phi nodes that have outside uses can be vectorized. Add
641  // them to the list of allowed exits.
642  // Unsafe cyclic dependencies with header phis are identified during
643  // legalization for reduction, induction and first order
644  // recurrences.
645  AllowedExit.insert(&I);
646  continue;
647  }
648 
649  // We only allow if-converted PHIs with exactly two incoming values.
650  if (Phi->getNumIncomingValues() != 2) {
651  reportVectorizationFailure("Found an invalid PHI",
652  "loop control flow is not understood by vectorizer",
653  "CFGNotUnderstood", ORE, TheLoop, Phi);
654  return false;
655  }
656 
657  RecurrenceDescriptor RedDes;
658  if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
659  DT)) {
660  Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
661  AllowedExit.insert(RedDes.getLoopExitInstr());
662  Reductions[Phi] = RedDes;
663  continue;
664  }
665 
666  // TODO: Instead of recording the AllowedExit, it would be good to record the
667  // complementary set: NotAllowedExit. These include (but may not be
668  // limited to):
669  // 1. Reduction phis as they represent the one-before-last value, which
670  // is not available when vectorized
671  // 2. Induction phis and increment when SCEV predicates cannot be used
672  // outside the loop - see addInductionPhi
673  // 3. Non-Phis with outside uses when SCEV predicates cannot be used
674  // outside the loop - see call to hasOutsideLoopUser in the non-phi
675  // handling below
676  // 4. FirstOrderRecurrence phis that can possibly be handled by
677  // extraction.
678  // By recording these, we can then reason about ways to vectorize each
679  // of these NotAllowedExit.
681  if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
682  addInductionPhi(Phi, ID, AllowedExit);
683  Requirements->addExactFPMathInst(ID.getExactFPMathInst());
684  continue;
685  }
686 
688  SinkAfter, DT)) {
689  AllowedExit.insert(Phi);
690  FirstOrderRecurrences.insert(Phi);
691  continue;
692  }
693 
694  // As a last resort, coerce the PHI to a AddRec expression
695  // and re-try classifying it a an induction PHI.
696  if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
697  addInductionPhi(Phi, ID, AllowedExit);
698  continue;
699  }
700 
701  reportVectorizationFailure("Found an unidentified PHI",
702  "value that could not be identified as "
703  "reduction is used outside the loop",
704  "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
705  return false;
706  } // end of PHI handling
707 
708  // We handle calls that:
709  // * Are debug info intrinsics.
710  // * Have a mapping to an IR intrinsic.
711  // * Have a vector version available.
712  auto *CI = dyn_cast<CallInst>(&I);
713 
714  if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
715  !isa<DbgInfoIntrinsic>(CI) &&
716  !(CI->getCalledFunction() && TLI &&
717  (!VFDatabase::getMappings(*CI).empty() ||
718  isTLIScalarize(*TLI, *CI)))) {
719  // If the call is a recognized math libary call, it is likely that
720  // we can vectorize it given loosened floating-point constraints.
721  LibFunc Func;
722  bool IsMathLibCall =
723  TLI && CI->getCalledFunction() &&
724  CI->getType()->isFloatingPointTy() &&
725  TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
726  TLI->hasOptimizedCodeGen(Func);
727 
728  if (IsMathLibCall) {
729  // TODO: Ideally, we should not use clang-specific language here,
730  // but it's hard to provide meaningful yet generic advice.
731  // Also, should this be guarded by allowExtraAnalysis() and/or be part
732  // of the returned info from isFunctionVectorizable()?
734  "Found a non-intrinsic callsite",
735  "library call cannot be vectorized. "
736  "Try compiling with -fno-math-errno, -ffast-math, "
737  "or similar flags",
738  "CantVectorizeLibcall", ORE, TheLoop, CI);
739  } else {
740  reportVectorizationFailure("Found a non-intrinsic callsite",
741  "call instruction cannot be vectorized",
742  "CantVectorizeLibcall", ORE, TheLoop, CI);
743  }
744  return false;
745  }
746 
747  // Some intrinsics have scalar arguments and should be same in order for
748  // them to be vectorized (i.e. loop invariant).
749  if (CI) {
750  auto *SE = PSE.getSE();
751  Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
752  for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
753  if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
754  if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
755  reportVectorizationFailure("Found unvectorizable intrinsic",
756  "intrinsic instruction cannot be vectorized",
757  "CantVectorizeIntrinsic", ORE, TheLoop, CI);
758  return false;
759  }
760  }
761  }
762 
763  // Check that the instruction return type is vectorizable.
764  // Also, we can't vectorize extractelement instructions.
765  if ((!VectorType::isValidElementType(I.getType()) &&
766  !I.getType()->isVoidTy()) ||
767  isa<ExtractElementInst>(I)) {
768  reportVectorizationFailure("Found unvectorizable type",
769  "instruction return type cannot be vectorized",
770  "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
771  return false;
772  }
773 
774  // Check that the stored type is vectorizable.
775  if (auto *ST = dyn_cast<StoreInst>(&I)) {
776  Type *T = ST->getValueOperand()->getType();
778  reportVectorizationFailure("Store instruction cannot be vectorized",
779  "store instruction cannot be vectorized",
780  "CantVectorizeStore", ORE, TheLoop, ST);
781  return false;
782  }
783 
784  // For nontemporal stores, check that a nontemporal vector version is
785  // supported on the target.
786  if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
787  // Arbitrarily try a vector of 2 elements.
788  auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
789  assert(VecTy && "did not find vectorized version of stored type");
790  if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
792  "nontemporal store instruction cannot be vectorized",
793  "nontemporal store instruction cannot be vectorized",
794  "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
795  return false;
796  }
797  }
798 
799  } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
800  if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
801  // For nontemporal loads, check that a nontemporal vector version is
802  // supported on the target (arbitrarily try a vector of 2 elements).
803  auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
804  assert(VecTy && "did not find vectorized version of load type");
805  if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
807  "nontemporal load instruction cannot be vectorized",
808  "nontemporal load instruction cannot be vectorized",
809  "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
810  return false;
811  }
812  }
813 
814  // FP instructions can allow unsafe algebra, thus vectorizable by
815  // non-IEEE-754 compliant SIMD units.
816  // This applies to floating-point math operations and calls, not memory
817  // operations, shuffles, or casts, as they don't change precision or
818  // semantics.
819  } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
820  !I.isFast()) {
821  LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
822  Hints->setPotentiallyUnsafe();
823  }
824 
825  // Reduction instructions are allowed to have exit users.
826  // All other instructions must not have external users.
827  if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
828  // We can safely vectorize loops where instructions within the loop are
829  // used outside the loop only if the SCEV predicates within the loop is
830  // same as outside the loop. Allowing the exit means reusing the SCEV
831  // outside the loop.
832  if (PSE.getUnionPredicate().isAlwaysTrue()) {
833  AllowedExit.insert(&I);
834  continue;
835  }
836  reportVectorizationFailure("Value cannot be used outside the loop",
837  "value cannot be used outside the loop",
838  "ValueUsedOutsideLoop", ORE, TheLoop, &I);
839  return false;
840  }
841  } // next instr.
842  }
843 
844  if (!PrimaryInduction) {
845  if (Inductions.empty()) {
846  reportVectorizationFailure("Did not find one integer induction var",
847  "loop induction variable could not be identified",
848  "NoInductionVariable", ORE, TheLoop);
849  return false;
850  } else if (!WidestIndTy) {
851  reportVectorizationFailure("Did not find one integer induction var",
852  "integer loop induction variable could not be identified",
853  "NoIntegerInductionVariable", ORE, TheLoop);
854  return false;
855  } else {
856  LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
857  }
858  }
859 
860  // For first order recurrences, we use the previous value (incoming value from
861  // the latch) to check if it dominates all users of the recurrence. Bail out
862  // if we have to sink such an instruction for another recurrence, as the
863  // dominance requirement may not hold after sinking.
864  BasicBlock *LoopLatch = TheLoop->getLoopLatch();
865  if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
866  Instruction *V =
867  cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
868  return SinkAfter.find(V) != SinkAfter.end();
869  }))
870  return false;
871 
872  // Now we know the widest induction type, check if our found induction
873  // is the same size. If it's not, unset it here and InnerLoopVectorizer
874  // will create another.
875  if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
876  PrimaryInduction = nullptr;
877 
878  return true;
879 }
880 
881 bool LoopVectorizationLegality::canVectorizeMemory() {
882  LAI = &(*GetLAA)(*TheLoop);
883  const OptimizationRemarkAnalysis *LAR = LAI->getReport();
884  if (LAR) {
885  ORE->emit([&]() {
886  return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
887  "loop not vectorized: ", *LAR);
888  });
889  }
890 
891  if (!LAI->canVectorizeMemory())
892  return false;
893 
895  reportVectorizationFailure("Stores to a uniform address",
896  "write to a loop invariant address could not be vectorized",
897  "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
898  return false;
899  }
900 
902  PSE.addPredicate(LAI->getPSE().getUnionPredicate());
903  return true;
904 }
905 
907  bool EnableStrictReductions) {
908 
909  // First check if there is any ExactFP math or if we allow reassociations
910  if (!Requirements->getExactFPInst() || Hints->allowReordering())
911  return true;
912 
913  // If the above is false, we have ExactFPMath & do not allow reordering.
914  // If the EnableStrictReductions flag is set, first check if we have any
915  // Exact FP induction vars, which we cannot vectorize.
916  if (!EnableStrictReductions ||
917  any_of(getInductionVars(), [&](auto &Induction) -> bool {
918  InductionDescriptor IndDesc = Induction.second;
919  return IndDesc.getExactFPMathInst();
920  }))
921  return false;
922 
923  // We can now only vectorize if all reductions with Exact FP math also
924  // have the isOrdered flag set, which indicates that we can move the
925  // reduction operations in-loop.
926  return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
927  const RecurrenceDescriptor &RdxDesc = Reduction.second;
928  return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
929  }));
930 }
931 
933  Value *In0 = const_cast<Value *>(V);
934  PHINode *PN = dyn_cast_or_null<PHINode>(In0);
935  if (!PN)
936  return false;
937 
938  return Inductions.count(PN);
939 }
940 
942  auto *Inst = dyn_cast<Instruction>(V);
943  return (Inst && InductionCastsToIgnore.count(Inst));
944 }
945 
948 }
949 
951  return FirstOrderRecurrences.count(Phi);
952 }
953 
955  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
956 }
957 
958 bool LoopVectorizationLegality::blockCanBePredicated(
961  SmallPtrSetImpl<Instruction *> &ConditionalAssumes) const {
962  for (Instruction &I : *BB) {
963  // Check that we don't have a constant expression that can trap as operand.
964  for (Value *Operand : I.operands()) {
965  if (auto *C = dyn_cast<Constant>(Operand))
966  if (C->canTrap())
967  return false;
968  }
969 
970  // We can predicate blocks with calls to assume, as long as we drop them in
971  // case we flatten the CFG via predication.
972  if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
973  ConditionalAssumes.insert(&I);
974  continue;
975  }
976 
977  // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
978  // TODO: there might be cases that it should block the vectorization. Let's
979  // ignore those for now.
980  if (isa<NoAliasScopeDeclInst>(&I))
981  continue;
982 
983  // We might be able to hoist the load.
984  if (I.mayReadFromMemory()) {
985  auto *LI = dyn_cast<LoadInst>(&I);
986  if (!LI)
987  return false;
988  if (!SafePtrs.count(LI->getPointerOperand())) {
989  MaskedOp.insert(LI);
990  continue;
991  }
992  }
993 
994  if (I.mayWriteToMemory()) {
995  auto *SI = dyn_cast<StoreInst>(&I);
996  if (!SI)
997  return false;
998  // Predicated store requires some form of masking:
999  // 1) masked store HW instruction,
1000  // 2) emulation via load-blend-store (only if safe and legal to do so,
1001  // be aware on the race conditions), or
1002  // 3) element-by-element predicate check and scalar store.
1003  MaskedOp.insert(SI);
1004  continue;
1005  }
1006  if (I.mayThrow())
1007  return false;
1008  }
1009 
1010  return true;
1011 }
1012 
1013 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1014  if (!EnableIfConversion) {
1015  reportVectorizationFailure("If-conversion is disabled",
1016  "if-conversion is disabled",
1017  "IfConversionDisabled",
1018  ORE, TheLoop);
1019  return false;
1020  }
1021 
1022  assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1023 
1024  // A list of pointers which are known to be dereferenceable within scope of
1025  // the loop body for each iteration of the loop which executes. That is,
1026  // the memory pointed to can be dereferenced (with the access size implied by
1027  // the value's type) unconditionally within the loop header without
1028  // introducing a new fault.
1029  SmallPtrSet<Value *, 8> SafePointers;
1030 
1031  // Collect safe addresses.
1032  for (BasicBlock *BB : TheLoop->blocks()) {
1033  if (!blockNeedsPredication(BB)) {
1034  for (Instruction &I : *BB)
1035  if (auto *Ptr = getLoadStorePointerOperand(&I))
1036  SafePointers.insert(Ptr);
1037  continue;
1038  }
1039 
1040  // For a block which requires predication, a address may be safe to access
1041  // in the loop w/o predication if we can prove dereferenceability facts
1042  // sufficient to ensure it'll never fault within the loop. For the moment,
1043  // we restrict this to loads; stores are more complicated due to
1044  // concurrency restrictions.
1045  ScalarEvolution &SE = *PSE.getSE();
1046  for (Instruction &I : *BB) {
1047  LoadInst *LI = dyn_cast<LoadInst>(&I);
1048  if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1049  isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
1050  SafePointers.insert(LI->getPointerOperand());
1051  }
1052  }
1053 
1054  // Collect the blocks that need predication.
1055  BasicBlock *Header = TheLoop->getHeader();
1056  for (BasicBlock *BB : TheLoop->blocks()) {
1057  // We don't support switch statements inside loops.
1058  if (!isa<BranchInst>(BB->getTerminator())) {
1059  reportVectorizationFailure("Loop contains a switch statement",
1060  "loop contains a switch statement",
1061  "LoopContainsSwitch", ORE, TheLoop,
1062  BB->getTerminator());
1063  return false;
1064  }
1065 
1066  // We must be able to predicate all blocks that need to be predicated.
1067  if (blockNeedsPredication(BB)) {
1068  if (!blockCanBePredicated(BB, SafePointers, MaskedOp,
1069  ConditionalAssumes)) {
1071  "Control flow cannot be substituted for a select",
1072  "control flow cannot be substituted for a select",
1073  "NoCFGForSelect", ORE, TheLoop,
1074  BB->getTerminator());
1075  return false;
1076  }
1077  } else if (BB != Header && !canIfConvertPHINodes(BB)) {
1079  "Control flow cannot be substituted for a select",
1080  "control flow cannot be substituted for a select",
1081  "NoCFGForSelect", ORE, TheLoop,
1082  BB->getTerminator());
1083  return false;
1084  }
1085  }
1086 
1087  // We can if-convert this loop.
1088  return true;
1089 }
1090 
1091 // Helper function to canVectorizeLoopNestCFG.
1092 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1093  bool UseVPlanNativePath) {
1094  assert((UseVPlanNativePath || Lp->isInnermost()) &&
1095  "VPlan-native path is not enabled.");
1096 
1097  // TODO: ORE should be improved to show more accurate information when an
1098  // outer loop can't be vectorized because a nested loop is not understood or
1099  // legal. Something like: "outer_loop_location: loop not vectorized:
1100  // (inner_loop_location) loop control flow is not understood by vectorizer".
1101 
1102  // Store the result and return it at the end instead of exiting early, in case
1103  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1104  bool Result = true;
1105  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1106 
1107  // We must have a loop in canonical form. Loops with indirectbr in them cannot
1108  // be canonicalized.
1109  if (!Lp->getLoopPreheader()) {
1110  reportVectorizationFailure("Loop doesn't have a legal pre-header",
1111  "loop control flow is not understood by vectorizer",
1112  "CFGNotUnderstood", ORE, TheLoop);
1113  if (DoExtraAnalysis)
1114  Result = false;
1115  else
1116  return false;
1117  }
1118 
1119  // We must have a single backedge.
1120  if (Lp->getNumBackEdges() != 1) {
1121  reportVectorizationFailure("The loop must have a single backedge",
1122  "loop control flow is not understood by vectorizer",
1123  "CFGNotUnderstood", ORE, TheLoop);
1124  if (DoExtraAnalysis)
1125  Result = false;
1126  else
1127  return false;
1128  }
1129 
1130  return Result;
1131 }
1132 
1133 bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1134  Loop *Lp, bool UseVPlanNativePath) {
1135  // Store the result and return it at the end instead of exiting early, in case
1136  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1137  bool Result = true;
1138  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1139  if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1140  if (DoExtraAnalysis)
1141  Result = false;
1142  else
1143  return false;
1144  }
1145 
1146  // Recursively check whether the loop control flow of nested loops is
1147  // understood.
1148  for (Loop *SubLp : *Lp)
1149  if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1150  if (DoExtraAnalysis)
1151  Result = false;
1152  else
1153  return false;
1154  }
1155 
1156  return Result;
1157 }
1158 
1159 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1160  // Store the result and return it at the end instead of exiting early, in case
1161  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1162  bool Result = true;
1163 
1164  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1165  // Check whether the loop-related control flow in the loop nest is expected by
1166  // vectorizer.
1167  if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1168  if (DoExtraAnalysis)
1169  Result = false;
1170  else
1171  return false;
1172  }
1173 
1174  // We need to have a loop header.
1175  LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1176  << '\n');
1177 
1178  // Specific checks for outer loops. We skip the remaining legal checks at this
1179  // point because they don't support outer loops.
1180  if (!TheLoop->isInnermost()) {
1181  assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1182 
1183  if (!canVectorizeOuterLoop()) {
1184  reportVectorizationFailure("Unsupported outer loop",
1185  "unsupported outer loop",
1186  "UnsupportedOuterLoop",
1187  ORE, TheLoop);
1188  // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1189  // outer loops.
1190  return false;
1191  }
1192 
1193  LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1194  return Result;
1195  }
1196 
1197  assert(TheLoop->isInnermost() && "Inner loop expected.");
1198  // Check if we can if-convert non-single-bb loops.
1199  unsigned NumBlocks = TheLoop->getNumBlocks();
1200  if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1201  LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1202  if (DoExtraAnalysis)
1203  Result = false;
1204  else
1205  return false;
1206  }
1207 
1208  // Check if we can vectorize the instructions and CFG in this loop.
1209  if (!canVectorizeInstrs()) {
1210  LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1211  if (DoExtraAnalysis)
1212  Result = false;
1213  else
1214  return false;
1215  }
1216 
1217  // Go over each instruction and look at memory deps.
1218  if (!canVectorizeMemory()) {
1219  LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1220  if (DoExtraAnalysis)
1221  Result = false;
1222  else
1223  return false;
1224  }
1225 
1226  LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1227  << (LAI->getRuntimePointerChecking()->Need
1228  ? " (with a runtime bound check)"
1229  : "")
1230  << "!\n");
1231 
1232  unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1233  if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1234  SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1235 
1236  if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
1237  reportVectorizationFailure("Too many SCEV checks needed",
1238  "Too many SCEV assumptions need to be made and checked at runtime",
1239  "TooManySCEVRunTimeChecks", ORE, TheLoop);
1240  if (DoExtraAnalysis)
1241  Result = false;
1242  else
1243  return false;
1244  }
1245 
1246  // Okay! We've done all the tests. If any have failed, return false. Otherwise
1247  // we can vectorize, and at this point we don't have any other mem analysis
1248  // which may limit our maximum vectorization factor, so just return true with
1249  // no restrictions.
1250  return Result;
1251 }
1252 
1254 
1255  LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1256 
1257  SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1258 
1259  for (auto &Reduction : getReductionVars())
1260  ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1261 
1262  // TODO: handle non-reduction outside users when tail is folded by masking.
1263  for (auto *AE : AllowedExit) {
1264  // Check that all users of allowed exit values are inside the loop or
1265  // are the live-out of a reduction.
1266  if (ReductionLiveOuts.count(AE))
1267  continue;
1268  for (User *U : AE->users()) {
1269  Instruction *UI = cast<Instruction>(U);
1270  if (TheLoop->contains(UI))
1271  continue;
1272  LLVM_DEBUG(
1273  dbgs()
1274  << "LV: Cannot fold tail by masking, loop has an outside user for "
1275  << *UI << "\n");
1276  return false;
1277  }
1278  }
1279 
1280  // The list of pointers that we can safely read and write to remains empty.
1281  SmallPtrSet<Value *, 8> SafePointers;
1282 
1284  SmallPtrSet<Instruction *, 8> TmpConditionalAssumes;
1285 
1286  // Check and mark all blocks for predication, including those that ordinarily
1287  // do not need predication such as the header block.
1288  for (BasicBlock *BB : TheLoop->blocks()) {
1289  if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp,
1290  TmpConditionalAssumes)) {
1291  LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as requested.\n");
1292  return false;
1293  }
1294  }
1295 
1296  LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1297 
1298  MaskedOp.insert(TmpMaskedOp.begin(), TmpMaskedOp.end());
1299  ConditionalAssumes.insert(TmpConditionalAssumes.begin(),
1300  TmpConditionalAssumes.end());
1301 
1302  return true;
1303 }
1304 
1305 } // namespace llvm
i
i
Definition: README.txt:29
llvm::mustSuppressSpeculation
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition: ValueTracking.cpp:4577
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:730
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:64
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::LoopAccessInfo::isUniform
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
Definition: LoopAccessAnalysis.cpp:2174
llvm::RecurrenceDescriptor::isReductionPHI
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Returns true if Phi is a reduction in TheLoop.
Definition: IVDescriptors.cpp:663
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:228
llvm::ElementCount
Definition: TypeSize.h:386
EnableIfConversion
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
llvm::getVectorIntrinsicIDForCall
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:130
Loads.h
llvm::InductionDescriptor::getExactFPMathInst
Instruction * getExactFPMathInst()
Returns floating-point induction operator that does not allow reassociation (transforming the inducti...
Definition: IVDescriptors.h:320
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::RecurrenceDescriptor::hasExactFPMath
bool hasExactFPMath() const
Returns true if the recurrence has floating-point math that requires precise (ordered) operations.
Definition: IVDescriptors.h:194
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
llvm::StringRef::npos
static constexpr size_t npos
Definition: StringRef.h:60
llvm::LinearPolySize< ElementCount >::isKnownLE
static bool isKnownLE(const LinearPolySize &LHS, const LinearPolySize &RHS)
Definition: TypeSize.h:341
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::LoopVectorizationRequirements::addRuntimePointerChecks
void addRuntimePointerChecks(unsigned Num)
Definition: LoopVectorizationLegality.h:221
SizeOpts.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:633
llvm::TargetLibraryInfo::isFunctionVectorizable
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
Definition: TargetLibraryInfo.h:326
llvm::PredicatedScalarEvolution::getUnionPredicate
const SCEVUnionPredicate & getUnionPredicate() const
Definition: ScalarEvolution.cpp:13309
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:461
llvm::VectorizerParams::VectorizationInterleave
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
Definition: LoopAccessAnalysis.h:44
llvm::LoopVectorizeHints::SK_Unspecified
@ SK_Unspecified
Not selected.
Definition: LoopVectorizationLegality.h:101
ValueTracking.h
llvm::OptimizationRemarkEmitter::allowExtraAnalysis
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
Definition: OptimizationRemarkEmitter.h:98
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::LoopVectorizationLegality::isInductionVariable
bool isInductionVariable(const Value *V)
Returns True if V can be considered as an induction variable in this loop.
Definition: LoopVectorizationLegality.cpp:946
VectorizeSCEVCheckThreshold
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
llvm::InductionDescriptor::IK_IntInduction
@ IK_IntInduction
Integer induction variable. Step = C.
Definition: IVDescriptors.h:274
llvm::LoopVectorizeHints::SK_FixedWidthOnly
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
Definition: LoopVectorizationLegality.h:103
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::getPtrStride
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
Definition: LoopAccessAnalysis.cpp:1029
llvm::LoopVectorizeHints::getWidth
ElementCount getWidth() const
Definition: LoopVectorizationLegality.h:125
llvm::LoopVectorizationLegality::canVectorizeFPMath
bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
Definition: LoopVectorizationLegality.cpp:906
llvm::LoopVectorizeHints::getIsVectorized
unsigned getIsVectorized() const
Definition: LoopVectorizationLegality.h:138
llvm::TargetTransformInfo::isLegalNTLoad
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
Definition: TargetTransformInfo.cpp:392
llvm::TargetTransformInfo::isLegalNTStore
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
Definition: TargetTransformInfo.cpp:387
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::ConstantAsMetadata::get
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:419
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::reportVectorizationFailure
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
Definition: LoopVectorize.cpp:1121
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:267
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::isUniformLoopNest
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
Definition: LoopVectorizationLegality.cpp:357
llvm::RISCVFeatures::validate
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Definition: RISCVBaseInfo.cpp:90
llvm::LoopVectorizationLegality::isInductionPhi
bool isInductionPhi(const Value *V)
Returns True if V is a Phi node of an induction variable in this loop.
Definition: LoopVectorizationLegality.cpp:932
llvm::LoopVectorizationLegality::prepareToFoldTailByMasking
bool prepareToFoldTailByMasking()
Return true if we can vectorize this loop while folding its tail by masking, and mark all respective ...
Definition: LoopVectorizationLegality.cpp:1253
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:203
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1203
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1113
llvm::makePostTransformationMetadata
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1124
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::LoopVectorizeHints::FK_Undefined
@ FK_Undefined
Not selected.
Definition: LoopVectorizationLegality.h:94
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1547
llvm::shouldOptimizeForSize
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
Definition: MachineSizeOpts.cpp:183
llvm::InductionDescriptor
A struct for saving information about induction variables.
Definition: IVDescriptors.h:269
llvm::LoopAccessInfo::hasDependenceInvolvingLoopInvariantAddress
bool hasDependenceInvolvingLoopInvariantAddress() const
If the loop has memory dependence involving an invariant address, i.e.
Definition: LoopAccessAnalysis.h:578
MaxInterleaveFactor
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
Definition: LoopVectorizationLegality.cpp:76
ScalableVectorization
static cl::opt< LoopVectorizeHints::ScalableForceKind > ScalableVectorization("scalable-vectorization", cl::init(LoopVectorizeHints::SK_FixedWidthOnly), cl::Hidden, cl::desc("Control whether the compiler can use scalable vectors to " "vectorize a loop"), cl::values(clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", "Scalable vectorization is disabled."), clEnumValN(LoopVectorizeHints::SK_PreferFixedWidth, "on", "Scalable vectorization is available, but favor fixed-width " "vectorization when the cost is inconclusive."), clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred", "Scalable vectorization is available and favored when the " "cost is inconclusive.")))
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:519
llvm::LoopAccessInfo::blockNeedsPredication
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition: LoopAccessAnalysis.cpp:2145
llvm::ValueToValueMap
DenseMap< const Value *, Value * > ValueToValueMap
Definition: ScalarEvolutionExpressions.h:857
llvm::User
Definition: User.h:44
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:34
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::isUniformLoop
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
Definition: LoopVectorizationLegality.cpp:312
llvm::RecurrenceDescriptor::getExactFPMathInst
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
Definition: IVDescriptors.h:197
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1393
llvm::LoopVectorizeHints::vectorizeAnalysisPassName
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
Definition: LoopVectorizationLegality.cpp:211
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::LoopVectorizationRequirements::addExactFPMathInst
void addExactFPMathInst(Instruction *I)
Track the 1st floating-point instruction that can not be reassociated.
Definition: LoopVectorizationLegality.h:216
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::LoopVectorizationLegality::blockNeedsPredication
bool blockNeedsPredication(BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition: LoopVectorizationLegality.cpp:954
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:237
EnableVPlanPredication
cl::opt< bool > EnableVPlanPredication
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2810
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:289
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:153
llvm::PGSOQueryType::IRPass
@ IRPass
llvm::LoopVectorizeHints::getForce
enum ForceKind getForce() const
Definition: LoopVectorizationLegality.h:140
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:354
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:900
llvm::LoopVectorizationLegality::isFirstOrderRecurrence
bool isFirstOrderRecurrence(const PHINode *Phi)
Returns True if Phi is a first-order recurrence in this loop.
Definition: LoopVectorizationLegality.cpp:950
PatternMatch.h
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:648
llvm::Metadata
Root of the metadata hierarchy.
Definition: Metadata.h:62
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2717
llvm::LinearPolySize< ElementCount >::getFixed
static ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:284
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
LoopInfo.h
llvm::Twine::str
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:201
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1107
llvm::VectorType::isValidElementType
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:639
llvm::canIfConvertPHINodes
static bool canIfConvertPHINodes(BasicBlock *BB)
Check whether it is safe to if-convert this phi node.
Definition: LoopVectorizationLegality.cpp:373
VectorUtils.h
llvm::cl::opt< bool >
llvm::SCEVUnionPredicate::getComplexity
unsigned getComplexity() const override
We estimate the complexity of a union predicate as the size number of predicates in the union.
Definition: ScalarEvolution.h:450
llvm::LoopVectorizeHints::SK_PreferScalable
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
Definition: LoopVectorizationLegality.h:107
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:699
llvm::RuntimePointerChecking::Need
bool Need
This flag indicates if we need to add the runtime check.
Definition: LoopAccessAnalysis.h:448
llvm::VFDatabase::getMappings
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:249
llvm::HintsAllowReordering
cl::opt< bool > HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " "FP operations during vectorization."))
llvm::SmallPtrSetImpl::end
iterator end() const
Definition: SmallPtrSet.h:407
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::LoopVectorizeHints::FK_Enabled
@ FK_Enabled
Forcing enabled.
Definition: LoopVectorizationLegality.h:96
llvm::LoopVectorizationLegality::isCastedInductionVariable
bool isCastedInductionVariable(const Value *V)
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
Definition: LoopVectorizationLegality.cpp:941
llvm::LoopVectorizeHints::getInterleave
unsigned getInterleave() const
Definition: LoopVectorizationLegality.h:129
llvm::LoopVectorizeHints::allowVectorization
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
Definition: LoopVectorizationLegality.cpp:152
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap< const Value *, Value * >
llvm::LoopAccessInfo::getRuntimePointerChecking
const RuntimePointerChecking * getRuntimePointerChecking() const
Definition: LoopAccessAnalysis.h:529
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::hasVectorInstrinsicScalarOpd
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:99
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:402
llvm::VectorizerParams::MaxVectorWidth
static const unsigned MaxVectorWidth
Maximum SIMD width.
Definition: LoopAccessAnalysis.h:39
llvm::LoopAccessInfo::getReport
const OptimizationRemarkAnalysis * getReport() const
The diagnostics report generated for the analysis.
Definition: LoopAccessAnalysis.h:553
llvm::MDString::get
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:473
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
llvm::LoopVectorizationLegality::getReductionVars
ReductionList & getReductionVars()
Returns the reduction variables found in the loop.
Definition: LoopVectorizationLegality.h:296
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::PredicatedScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
Definition: ScalarEvolution.cpp:13274
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::LoopAccessInfo::canVectorizeMemory
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
Definition: LoopAccessAnalysis.h:522
llvm::isTLIScalarize
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
Definition: LoopVectorizationLegality.cpp:598
llvm::LoopVectorizationRequirements::getExactFPInst
Instruction * getExactFPInst()
Definition: LoopVectorizationLegality.h:224
llvm::MDNode
Metadata node.
Definition: Metadata.h:901
llvm::isDereferenceableAndAlignedInLoop
bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition: Loads.cpp:273
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::LoopVectorizationLegality::isUniform
bool isUniform(Value *V)
Returns true if the value V is uniform within the loop.
Definition: LoopVectorizationLegality.cpp:439
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
LV_NAME
#define LV_NAME
Definition: LoopVectorizationLegality.cpp:31
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1528
llvm::SCEVUnionPredicate::isAlwaysTrue
bool isAlwaysTrue() const override
Implementation of the SCEVPredicate interface.
Definition: ScalarEvolution.cpp:13218
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1554
llvm::LoopVectorizationLegality::isConsecutivePtr
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
Definition: LoopVectorizationLegality.cpp:422
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::LoopVectorizeHints::ScalableForceKind
ScalableForceKind
Definition: LoopVectorizationLegality.h:99
llvm::Loop::getCanonicalInductionVariable
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition: LoopInfo.cpp:150
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:674
llvm::LoopVectorizationLegality::getInductionVars
InductionList & getInductionVars()
Returns the induction variables found in the loop.
Definition: LoopVectorizationLegality.h:299
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::LoopVectorizeHints::emitRemarkWithHints
void emitRemarkWithHints() const
Dumps all the hint information.
Definition: LoopVectorizationLegality.cpp:185
llvm::convertPointerToIntegerType
static Type * convertPointerToIntegerType(const DataLayout &DL, Type *Ty)
Definition: LoopVectorizationLegality.cpp:383
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:776
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:297
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:127
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
llvm::VectorizerParams::isInterleaveForced
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
Definition: LoopAccessAnalysis.cpp:132
LoopVectorize.h
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
llvm::MapVector::empty
bool empty() const
Definition: MapVector.h:79
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::InductionDescriptor::isInductionPHI
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Definition: IVDescriptors.cpp:1232
PragmaVectorizeSCEVCheckThreshold
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
llvm::LoopInfoBase::isLoopHeader
bool isLoopHeader(const BlockT *BB) const
Definition: LoopInfo.h:980
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::MapVector::count
size_type count(const KeyT &Key) const
Definition: MapVector.h:142
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
H
#define H(x, y, z)
Definition: MD5.cpp:58
llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:527
llvm::VectorizationFactor
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Definition: LoopVectorizationPlanner.h:180
llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:503
llvm::RecurrenceDescriptor::isFirstOrderRecurrence
static bool isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, MapVector< Instruction *, Instruction * > &SinkAfter, DominatorTree *DT)
Returns true if Phi is a first-order recurrence.
Definition: IVDescriptors.cpp:732
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::LinearPolySize< ElementCount >::getScalable
static ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:287
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:219
LoopVectorizationLegality.h
llvm::LoopAccessInfo::getNumRuntimePointerChecks
unsigned getNumRuntimePointerChecks() const
Number of memchecks required to prove independence of otherwise may-alias pointers.
Definition: LoopAccessAnalysis.h:535
llvm::PredicatedScalarEvolution::addPredicate
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
Definition: ScalarEvolution.cpp:13302
llvm::LoopBase::getNumBackEdges
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:250
llvm::OptimizationRemarkAnalysis::AlwaysPrint
static const char * AlwaysPrint
Definition: DiagnosticInfo.h:816
llvm::LoopVectorizeHints::LoopVectorizeHints
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE)
Definition: LoopVectorizationLegality.cpp:96
llvm::UnivariateLinearPolyBase::isZero
bool isZero() const
Definition: TypeSize.h:229
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:410
Predicate
llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:67
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopVectorizationLegality.cpp:32
llvm::LoopVectorizeHints::setAlreadyVectorized
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
Definition: LoopVectorizationLegality.cpp:133
llvm::VectorizerParams
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
Definition: LoopAccessAnalysis.h:37
llvm::PHINode
Definition: Instructions.h:2625
llvm::LoopVectorizeHints::allowReordering
bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
Definition: LoopVectorizationLegality.cpp:221
llvm::SmallVectorImpl< Instruction * >
llvm::TargetLibraryInfo::getWidestVF
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
Definition: TargetLibraryInfo.h:423
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
llvm::SmallPtrSetImpl< Value * >
llvm::getLoadStorePointerOperand
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Definition: Instructions.h:5293
llvm::RecurrenceDescriptor::isOrdered
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
Definition: IVDescriptors.h:236
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::getWiderType
static Type * getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1)
Definition: LoopVectorizationLegality.cpp:395
llvm::TargetLibraryInfo::hasOptimizedCodeGen
bool hasOptimizedCodeGen(LibFunc F) const
Tests if the function is both available and a candidate for optimized code generation.
Definition: TargetLibraryInfo.h:338
llvm::RecurrenceDescriptor::getLoopExitInstr
Instruction * getLoopExitInstr() const
Definition: IVDescriptors.h:190
llvm::LoopVectorizeHints::SK_PreferFixedWidth
@ SK_PreferFixedWidth
Vectorize loops using scalable vectors or fixed-width vectors, but favor fixed-width vectors when the...
Definition: LoopVectorizationLegality.h:110
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::cl::desc
Definition: CommandLine.h:414
llvm::hasOutsideLoopUser
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
Definition: LoopVectorizationLegality.cpp:405
llvm::LoopAccessInfo::getPSE
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
Definition: LoopAccessAnalysis.h:587
llvm::PredicatedScalarEvolution::getSE
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
Definition: ScalarEvolution.h:2130
llvm::LoopVectorizeHints::FK_Disabled
@ FK_Disabled
Forcing disabled.
Definition: LoopVectorizationLegality.h:95
Reduction
loop Loop Strength Reduction
Definition: LoopStrengthReduce.cpp:6410
llvm::MDString
A single uniqued string.
Definition: Metadata.h:611
llvm::LoopVectorizationLegality::canVectorize
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
Definition: LoopVectorizationLegality.cpp:1159
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:422
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37