LLVM  13.0.0git
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1 //===- LoopVectorizationLegality.cpp --------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides loop vectorization legality analysis. Original code
10 // resided in LoopVectorize.cpp for a long time.
11 //
12 // At this point, it is implemented as a utility class, not as an analysis
13 // pass. It should be easy to create an analysis pass around it if there
14 // is a need (but D45420 needs to happen first).
15 //
16 
18 #include "llvm/Analysis/Loads.h"
19 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/IR/IntrinsicInst.h"
24 #include "llvm/IR/PatternMatch.h"
27 
28 using namespace llvm;
29 using namespace PatternMatch;
30 
31 #define LV_NAME "loop-vectorize"
32 #define DEBUG_TYPE LV_NAME
33 
35 
36 static cl::opt<bool>
37  EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
38  cl::desc("Enable if-conversion during vectorization."));
39 
40 // TODO: Move size-based thresholds out of legality checking, make cost based
41 // decisions instead of hard thresholds.
43  "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
44  cl::desc("The maximum number of SCEV checks allowed."));
45 
47  "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
48  cl::desc("The maximum number of SCEV checks allowed with a "
49  "vectorize(enable) pragma"));
50 
51 /// Maximum vectorization interleave count.
52 static const unsigned MaxInterleaveFactor = 16;
53 
54 namespace llvm {
55 
56 bool LoopVectorizeHints::Hint::validate(unsigned Val) {
57  switch (Kind) {
58  case HK_WIDTH:
59  return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
60  case HK_UNROLL:
61  return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
62  case HK_FORCE:
63  return (Val <= 1);
64  case HK_ISVECTORIZED:
65  case HK_PREDICATE:
66  case HK_SCALABLE:
67  return (Val == 0 || Val == 1);
68  }
69  return false;
70 }
71 
73  bool InterleaveOnlyWhenForced,
75  : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
76  Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
77  Force("vectorize.enable", FK_Undefined, HK_FORCE),
78  IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
79  Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
80  Scalable("vectorize.scalable.enable", false, HK_SCALABLE), TheLoop(L),
81  ORE(ORE) {
82  // Populate values with existing loop metadata.
83  getHintsFromMetadata();
84 
85  // force-vector-interleave overrides DisableInterleaving.
88 
89  if (IsVectorized.Value != 1)
90  // If the vectorization width and interleaving count are both 1 then
91  // consider the loop to have been already vectorized because there's
92  // nothing more that we can do.
93  IsVectorized.Value =
94  getWidth() == ElementCount::getFixed(1) && Interleave.Value == 1;
95  LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
96  << "LV: Interleaving disabled by the pass manager\n");
97 }
98 
100  LLVMContext &Context = TheLoop->getHeader()->getContext();
101 
102  MDNode *IsVectorizedMD = MDNode::get(
103  Context,
104  {MDString::get(Context, "llvm.loop.isvectorized"),
106  MDNode *LoopID = TheLoop->getLoopID();
107  MDNode *NewLoopID =
109  {Twine(Prefix(), "vectorize.").str(),
110  Twine(Prefix(), "interleave.").str()},
111  {IsVectorizedMD});
112  TheLoop->setLoopID(NewLoopID);
113 
114  // Update internal cache.
115  IsVectorized.Value = 1;
116 }
117 
119  Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
121  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
123  return false;
124  }
125 
126  if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
127  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
129  return false;
130  }
131 
132  if (getIsVectorized() == 1) {
133  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
134  // FIXME: Add interleave.disable metadata. This will allow
135  // vectorize.disable to be used without disabling the pass and errors
136  // to differentiate between disabled vectorization and a width of 1.
137  ORE.emit([&]() {
139  "AllDisabled", L->getStartLoc(),
140  L->getHeader())
141  << "loop not vectorized: vectorization and interleaving are "
142  "explicitly disabled, or the loop has already been "
143  "vectorized";
144  });
145  return false;
146  }
147 
148  return true;
149 }
150 
152  using namespace ore;
153 
154  ORE.emit([&]() {
155  if (Force.Value == LoopVectorizeHints::FK_Disabled)
156  return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
157  TheLoop->getStartLoc(),
158  TheLoop->getHeader())
159  << "loop not vectorized: vectorization is explicitly disabled";
160  else {
161  OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
162  TheLoop->getStartLoc(), TheLoop->getHeader());
163  R << "loop not vectorized";
164  if (Force.Value == LoopVectorizeHints::FK_Enabled) {
165  R << " (Force=" << NV("Force", true);
166  if (Width.Value != 0)
167  R << ", Vector Width=" << NV("VectorWidth", getWidth());
168  if (Interleave.Value != 0)
169  R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
170  R << ")";
171  }
172  return R;
173  }
174  });
175 }
176 
178  if (getWidth() == ElementCount::getFixed(1))
179  return LV_NAME;
181  return LV_NAME;
183  return LV_NAME;
185 }
186 
187 void LoopVectorizeHints::getHintsFromMetadata() {
188  MDNode *LoopID = TheLoop->getLoopID();
189  if (!LoopID)
190  return;
191 
192  // First operand should refer to the loop id itself.
193  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
194  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
195 
196  for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
197  const MDString *S = nullptr;
199 
200  // The expected hint is either a MDString or a MDNode with the first
201  // operand a MDString.
202  if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
203  if (!MD || MD->getNumOperands() == 0)
204  continue;
205  S = dyn_cast<MDString>(MD->getOperand(0));
206  for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
207  Args.push_back(MD->getOperand(i));
208  } else {
209  S = dyn_cast<MDString>(LoopID->getOperand(i));
210  assert(Args.size() == 0 && "too many arguments for MDString");
211  }
212 
213  if (!S)
214  continue;
215 
216  // Check if the hint starts with the loop metadata prefix.
217  StringRef Name = S->getString();
218  if (Args.size() == 1)
219  setHint(Name, Args[0]);
220  }
221 }
222 
223 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
224  if (!Name.startswith(Prefix()))
225  return;
226  Name = Name.substr(Prefix().size(), StringRef::npos);
227 
228  const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
229  if (!C)
230  return;
231  unsigned Val = C->getZExtValue();
232 
233  Hint *Hints[] = {&Width, &Interleave, &Force,
234  &IsVectorized, &Predicate, &Scalable};
235  for (auto H : Hints) {
236  if (Name == H->Name) {
237  if (H->validate(Val))
238  H->Value = Val;
239  else
240  LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
241  break;
242  }
243  }
244 }
245 
246 // Return true if the inner loop \p Lp is uniform with regard to the outer loop
247 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
248 // executing the inner loop will execute the same iterations). This check is
249 // very constrained for now but it will be relaxed in the future. \p Lp is
250 // considered uniform if it meets all the following conditions:
251 // 1) it has a canonical IV (starting from 0 and with stride 1),
252 // 2) its latch terminator is a conditional branch and,
253 // 3) its latch condition is a compare instruction whose operands are the
254 // canonical IV and an OuterLp invariant.
255 // This check doesn't take into account the uniformity of other conditions not
256 // related to the loop latch because they don't affect the loop uniformity.
257 //
258 // NOTE: We decided to keep all these checks and its associated documentation
259 // together so that we can easily have a picture of the current supported loop
260 // nests. However, some of the current checks don't depend on \p OuterLp and
261 // would be redundantly executed for each \p Lp if we invoked this function for
262 // different candidate outer loops. This is not the case for now because we
263 // don't currently have the infrastructure to evaluate multiple candidate outer
264 // loops and \p OuterLp will be a fixed parameter while we only support explicit
265 // outer loop vectorization. It's also very likely that these checks go away
266 // before introducing the aforementioned infrastructure. However, if this is not
267 // the case, we should move the \p OuterLp independent checks to a separate
268 // function that is only executed once for each \p Lp.
269 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
270  assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
271 
272  // If Lp is the outer loop, it's uniform by definition.
273  if (Lp == OuterLp)
274  return true;
275  assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
276 
277  // 1.
279  if (!IV) {
280  LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
281  return false;
282  }
283 
284  // 2.
285  BasicBlock *Latch = Lp->getLoopLatch();
286  auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
287  if (!LatchBr || LatchBr->isUnconditional()) {
288  LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
289  return false;
290  }
291 
292  // 3.
293  auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
294  if (!LatchCmp) {
295  LLVM_DEBUG(
296  dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
297  return false;
298  }
299 
300  Value *CondOp0 = LatchCmp->getOperand(0);
301  Value *CondOp1 = LatchCmp->getOperand(1);
302  Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
303  if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
304  !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
305  LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
306  return false;
307  }
308 
309  return true;
310 }
311 
312 // Return true if \p Lp and all its nested loops are uniform with regard to \p
313 // OuterLp.
314 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
315  if (!isUniformLoop(Lp, OuterLp))
316  return false;
317 
318  // Check if nested loops are uniform.
319  for (Loop *SubLp : *Lp)
320  if (!isUniformLoopNest(SubLp, OuterLp))
321  return false;
322 
323  return true;
324 }
325 
326 /// Check whether it is safe to if-convert this phi node.
327 ///
328 /// Phi nodes with constant expressions that can trap are not safe to if
329 /// convert.
331  for (PHINode &Phi : BB->phis()) {
332  for (Value *V : Phi.incoming_values())
333  if (auto *C = dyn_cast<Constant>(V))
334  if (C->canTrap())
335  return false;
336  }
337  return true;
338 }
339 
341  if (Ty->isPointerTy())
342  return DL.getIntPtrType(Ty);
343 
344  // It is possible that char's or short's overflow when we ask for the loop's
345  // trip count, work around this by changing the type size.
346  if (Ty->getScalarSizeInBits() < 32)
347  return Type::getInt32Ty(Ty->getContext());
348 
349  return Ty;
350 }
351 
352 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
353  Ty0 = convertPointerToIntegerType(DL, Ty0);
354  Ty1 = convertPointerToIntegerType(DL, Ty1);
355  if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
356  return Ty0;
357  return Ty1;
358 }
359 
360 /// Check that the instruction has outside loop users and is not an
361 /// identified reduction variable.
362 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
363  SmallPtrSetImpl<Value *> &AllowedExit) {
364  // Reductions, Inductions and non-header phis are allowed to have exit users. All
365  // other instructions must not have external users.
366  if (!AllowedExit.count(Inst))
367  // Check that all of the users of the loop are inside the BB.
368  for (User *U : Inst->users()) {
369  Instruction *UI = cast<Instruction>(U);
370  // This user may be a reduction exit value.
371  if (!TheLoop->contains(UI)) {
372  LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
373  return true;
374  }
375  }
376  return false;
377 }
378 
380  const ValueToValueMap &Strides =
381  getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
382 
383  Function *F = TheLoop->getHeader()->getParent();
384  bool OptForSize = F->hasOptSize() ||
385  llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
387  bool CanAddPredicate = !OptForSize;
388  int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
389  if (Stride == 1 || Stride == -1)
390  return Stride;
391  return 0;
392 }
393 
395  return LAI->isUniform(V);
396 }
397 
398 bool LoopVectorizationLegality::canVectorizeOuterLoop() {
399  assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
400  // Store the result and return it at the end instead of exiting early, in case
401  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
402  bool Result = true;
403  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
404 
405  for (BasicBlock *BB : TheLoop->blocks()) {
406  // Check whether the BB terminator is a BranchInst. Any other terminator is
407  // not supported yet.
408  auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
409  if (!Br) {
410  reportVectorizationFailure("Unsupported basic block terminator",
411  "loop control flow is not understood by vectorizer",
412  "CFGNotUnderstood", ORE, TheLoop);
413  if (DoExtraAnalysis)
414  Result = false;
415  else
416  return false;
417  }
418 
419  // Check whether the BranchInst is a supported one. Only unconditional
420  // branches, conditional branches with an outer loop invariant condition or
421  // backedges are supported.
422  // FIXME: We skip these checks when VPlan predication is enabled as we
423  // want to allow divergent branches. This whole check will be removed
424  // once VPlan predication is on by default.
425  if (!EnableVPlanPredication && Br && Br->isConditional() &&
426  !TheLoop->isLoopInvariant(Br->getCondition()) &&
427  !LI->isLoopHeader(Br->getSuccessor(0)) &&
428  !LI->isLoopHeader(Br->getSuccessor(1))) {
429  reportVectorizationFailure("Unsupported conditional branch",
430  "loop control flow is not understood by vectorizer",
431  "CFGNotUnderstood", ORE, TheLoop);
432  if (DoExtraAnalysis)
433  Result = false;
434  else
435  return false;
436  }
437  }
438 
439  // Check whether inner loops are uniform. At this point, we only support
440  // simple outer loops scenarios with uniform nested loops.
441  if (!isUniformLoopNest(TheLoop /*loop nest*/,
442  TheLoop /*context outer loop*/)) {
443  reportVectorizationFailure("Outer loop contains divergent loops",
444  "loop control flow is not understood by vectorizer",
445  "CFGNotUnderstood", ORE, TheLoop);
446  if (DoExtraAnalysis)
447  Result = false;
448  else
449  return false;
450  }
451 
452  // Check whether we are able to set up outer loop induction.
453  if (!setupOuterLoopInductions()) {
454  reportVectorizationFailure("Unsupported outer loop Phi(s)",
455  "Unsupported outer loop Phi(s)",
456  "UnsupportedPhi", ORE, TheLoop);
457  if (DoExtraAnalysis)
458  Result = false;
459  else
460  return false;
461  }
462 
463  return Result;
464 }
465 
466 void LoopVectorizationLegality::addInductionPhi(
467  PHINode *Phi, const InductionDescriptor &ID,
468  SmallPtrSetImpl<Value *> &AllowedExit) {
469  Inductions[Phi] = ID;
470 
471  // In case this induction also comes with casts that we know we can ignore
472  // in the vectorized loop body, record them here. All casts could be recorded
473  // here for ignoring, but suffices to record only the first (as it is the
474  // only one that may bw used outside the cast sequence).
475  const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
476  if (!Casts.empty())
477  InductionCastsToIgnore.insert(*Casts.begin());
478 
479  Type *PhiTy = Phi->getType();
480  const DataLayout &DL = Phi->getModule()->getDataLayout();
481 
482  // Get the widest type.
483  if (!PhiTy->isFloatingPointTy()) {
484  if (!WidestIndTy)
485  WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
486  else
487  WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
488  }
489 
490  // Int inductions are special because we only allow one IV.
491  if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
492  ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
493  isa<Constant>(ID.getStartValue()) &&
494  cast<Constant>(ID.getStartValue())->isNullValue()) {
495 
496  // Use the phi node with the widest type as induction. Use the last
497  // one if there are multiple (no good reason for doing this other
498  // than it is expedient). We've checked that it begins at zero and
499  // steps by one, so this is a canonical induction variable.
500  if (!PrimaryInduction || PhiTy == WidestIndTy)
501  PrimaryInduction = Phi;
502  }
503 
504  // Both the PHI node itself, and the "post-increment" value feeding
505  // back into the PHI node may have external users.
506  // We can allow those uses, except if the SCEVs we have for them rely
507  // on predicates that only hold within the loop, since allowing the exit
508  // currently means re-using this SCEV outside the loop (see PR33706 for more
509  // details).
510  if (PSE.getUnionPredicate().isAlwaysTrue()) {
511  AllowedExit.insert(Phi);
512  AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
513  }
514 
515  LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
516 }
517 
518 bool LoopVectorizationLegality::setupOuterLoopInductions() {
519  BasicBlock *Header = TheLoop->getHeader();
520 
521  // Returns true if a given Phi is a supported induction.
522  auto isSupportedPhi = [&](PHINode &Phi) -> bool {
524  if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
526  addInductionPhi(&Phi, ID, AllowedExit);
527  return true;
528  } else {
529  // Bail out for any Phi in the outer loop header that is not a supported
530  // induction.
531  LLVM_DEBUG(
532  dbgs()
533  << "LV: Found unsupported PHI for outer loop vectorization.\n");
534  return false;
535  }
536  };
537 
538  if (llvm::all_of(Header->phis(), isSupportedPhi))
539  return true;
540  else
541  return false;
542 }
543 
544 /// Checks if a function is scalarizable according to the TLI, in
545 /// the sense that it should be vectorized and then expanded in
546 /// multiple scalarcalls. This is represented in the
547 /// TLI via mappings that do not specify a vector name, as in the
548 /// following example:
549 ///
550 /// const VecDesc VecIntrinsics[] = {
551 /// {"llvm.phx.abs.i32", "", 4}
552 /// };
553 static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
554  const StringRef ScalarName = CI.getCalledFunction()->getName();
555  bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
556  // Check that all known VFs are not associated to a vector
557  // function, i.e. the vector name is emty.
558  if (Scalarize) {
559  ElementCount WidestFixedVF, WidestScalableVF;
560  TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
562  ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
563  Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
565  ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
566  Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
567  assert((WidestScalableVF.isZero() || !Scalarize) &&
568  "Caller may decide to scalarize a variant using a scalable VF");
569  }
570  return Scalarize;
571 }
572 
573 bool LoopVectorizationLegality::canVectorizeInstrs() {
574  BasicBlock *Header = TheLoop->getHeader();
575 
576  // For each block in the loop.
577  for (BasicBlock *BB : TheLoop->blocks()) {
578  // Scan the instructions in the block and look for hazards.
579  for (Instruction &I : *BB) {
580  if (auto *Phi = dyn_cast<PHINode>(&I)) {
581  Type *PhiTy = Phi->getType();
582  // Check that this PHI type is allowed.
583  if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
584  !PhiTy->isPointerTy()) {
585  reportVectorizationFailure("Found a non-int non-pointer PHI",
586  "loop control flow is not understood by vectorizer",
587  "CFGNotUnderstood", ORE, TheLoop);
588  return false;
589  }
590 
591  // If this PHINode is not in the header block, then we know that we
592  // can convert it to select during if-conversion. No need to check if
593  // the PHIs in this block are induction or reduction variables.
594  if (BB != Header) {
595  // Non-header phi nodes that have outside uses can be vectorized. Add
596  // them to the list of allowed exits.
597  // Unsafe cyclic dependencies with header phis are identified during
598  // legalization for reduction, induction and first order
599  // recurrences.
600  AllowedExit.insert(&I);
601  continue;
602  }
603 
604  // We only allow if-converted PHIs with exactly two incoming values.
605  if (Phi->getNumIncomingValues() != 2) {
606  reportVectorizationFailure("Found an invalid PHI",
607  "loop control flow is not understood by vectorizer",
608  "CFGNotUnderstood", ORE, TheLoop, Phi);
609  return false;
610  }
611 
612  RecurrenceDescriptor RedDes;
613  if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
614  DT)) {
615  Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
616  AllowedExit.insert(RedDes.getLoopExitInstr());
617  Reductions[Phi] = RedDes;
618  continue;
619  }
620 
621  // TODO: Instead of recording the AllowedExit, it would be good to record the
622  // complementary set: NotAllowedExit. These include (but may not be
623  // limited to):
624  // 1. Reduction phis as they represent the one-before-last value, which
625  // is not available when vectorized
626  // 2. Induction phis and increment when SCEV predicates cannot be used
627  // outside the loop - see addInductionPhi
628  // 3. Non-Phis with outside uses when SCEV predicates cannot be used
629  // outside the loop - see call to hasOutsideLoopUser in the non-phi
630  // handling below
631  // 4. FirstOrderRecurrence phis that can possibly be handled by
632  // extraction.
633  // By recording these, we can then reason about ways to vectorize each
634  // of these NotAllowedExit.
636  if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
637  addInductionPhi(Phi, ID, AllowedExit);
638  Requirements->addExactFPMathInst(ID.getExactFPMathInst());
639  continue;
640  }
641 
643  SinkAfter, DT)) {
644  AllowedExit.insert(Phi);
645  FirstOrderRecurrences.insert(Phi);
646  continue;
647  }
648 
649  // As a last resort, coerce the PHI to a AddRec expression
650  // and re-try classifying it a an induction PHI.
651  if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
652  addInductionPhi(Phi, ID, AllowedExit);
653  continue;
654  }
655 
656  reportVectorizationFailure("Found an unidentified PHI",
657  "value that could not be identified as "
658  "reduction is used outside the loop",
659  "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
660  return false;
661  } // end of PHI handling
662 
663  // We handle calls that:
664  // * Are debug info intrinsics.
665  // * Have a mapping to an IR intrinsic.
666  // * Have a vector version available.
667  auto *CI = dyn_cast<CallInst>(&I);
668 
669  if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
670  !isa<DbgInfoIntrinsic>(CI) &&
671  !(CI->getCalledFunction() && TLI &&
672  (!VFDatabase::getMappings(*CI).empty() ||
673  isTLIScalarize(*TLI, *CI)))) {
674  // If the call is a recognized math libary call, it is likely that
675  // we can vectorize it given loosened floating-point constraints.
676  LibFunc Func;
677  bool IsMathLibCall =
678  TLI && CI->getCalledFunction() &&
679  CI->getType()->isFloatingPointTy() &&
680  TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
681  TLI->hasOptimizedCodeGen(Func);
682 
683  if (IsMathLibCall) {
684  // TODO: Ideally, we should not use clang-specific language here,
685  // but it's hard to provide meaningful yet generic advice.
686  // Also, should this be guarded by allowExtraAnalysis() and/or be part
687  // of the returned info from isFunctionVectorizable()?
689  "Found a non-intrinsic callsite",
690  "library call cannot be vectorized. "
691  "Try compiling with -fno-math-errno, -ffast-math, "
692  "or similar flags",
693  "CantVectorizeLibcall", ORE, TheLoop, CI);
694  } else {
695  reportVectorizationFailure("Found a non-intrinsic callsite",
696  "call instruction cannot be vectorized",
697  "CantVectorizeLibcall", ORE, TheLoop, CI);
698  }
699  return false;
700  }
701 
702  // Some intrinsics have scalar arguments and should be same in order for
703  // them to be vectorized (i.e. loop invariant).
704  if (CI) {
705  auto *SE = PSE.getSE();
706  Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
707  for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
708  if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
709  if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
710  reportVectorizationFailure("Found unvectorizable intrinsic",
711  "intrinsic instruction cannot be vectorized",
712  "CantVectorizeIntrinsic", ORE, TheLoop, CI);
713  return false;
714  }
715  }
716  }
717 
718  // Check that the instruction return type is vectorizable.
719  // Also, we can't vectorize extractelement instructions.
720  if ((!VectorType::isValidElementType(I.getType()) &&
721  !I.getType()->isVoidTy()) ||
722  isa<ExtractElementInst>(I)) {
723  reportVectorizationFailure("Found unvectorizable type",
724  "instruction return type cannot be vectorized",
725  "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
726  return false;
727  }
728 
729  // Check that the stored type is vectorizable.
730  if (auto *ST = dyn_cast<StoreInst>(&I)) {
731  Type *T = ST->getValueOperand()->getType();
733  reportVectorizationFailure("Store instruction cannot be vectorized",
734  "store instruction cannot be vectorized",
735  "CantVectorizeStore", ORE, TheLoop, ST);
736  return false;
737  }
738 
739  // For nontemporal stores, check that a nontemporal vector version is
740  // supported on the target.
741  if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
742  // Arbitrarily try a vector of 2 elements.
743  auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
744  assert(VecTy && "did not find vectorized version of stored type");
745  if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
747  "nontemporal store instruction cannot be vectorized",
748  "nontemporal store instruction cannot be vectorized",
749  "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
750  return false;
751  }
752  }
753 
754  } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
755  if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
756  // For nontemporal loads, check that a nontemporal vector version is
757  // supported on the target (arbitrarily try a vector of 2 elements).
758  auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
759  assert(VecTy && "did not find vectorized version of load type");
760  if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
762  "nontemporal load instruction cannot be vectorized",
763  "nontemporal load instruction cannot be vectorized",
764  "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
765  return false;
766  }
767  }
768 
769  // FP instructions can allow unsafe algebra, thus vectorizable by
770  // non-IEEE-754 compliant SIMD units.
771  // This applies to floating-point math operations and calls, not memory
772  // operations, shuffles, or casts, as they don't change precision or
773  // semantics.
774  } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
775  !I.isFast()) {
776  LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
777  Hints->setPotentiallyUnsafe();
778  }
779 
780  // Reduction instructions are allowed to have exit users.
781  // All other instructions must not have external users.
782  if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
783  // We can safely vectorize loops where instructions within the loop are
784  // used outside the loop only if the SCEV predicates within the loop is
785  // same as outside the loop. Allowing the exit means reusing the SCEV
786  // outside the loop.
787  if (PSE.getUnionPredicate().isAlwaysTrue()) {
788  AllowedExit.insert(&I);
789  continue;
790  }
791  reportVectorizationFailure("Value cannot be used outside the loop",
792  "value cannot be used outside the loop",
793  "ValueUsedOutsideLoop", ORE, TheLoop, &I);
794  return false;
795  }
796  } // next instr.
797  }
798 
799  if (!PrimaryInduction) {
800  if (Inductions.empty()) {
801  reportVectorizationFailure("Did not find one integer induction var",
802  "loop induction variable could not be identified",
803  "NoInductionVariable", ORE, TheLoop);
804  return false;
805  } else if (!WidestIndTy) {
806  reportVectorizationFailure("Did not find one integer induction var",
807  "integer loop induction variable could not be identified",
808  "NoIntegerInductionVariable", ORE, TheLoop);
809  return false;
810  } else {
811  LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
812  }
813  }
814 
815  // For first order recurrences, we use the previous value (incoming value from
816  // the latch) to check if it dominates all users of the recurrence. Bail out
817  // if we have to sink such an instruction for another recurrence, as the
818  // dominance requirement may not hold after sinking.
819  BasicBlock *LoopLatch = TheLoop->getLoopLatch();
820  if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
821  Instruction *V =
822  cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
823  return SinkAfter.find(V) != SinkAfter.end();
824  }))
825  return false;
826 
827  // Now we know the widest induction type, check if our found induction
828  // is the same size. If it's not, unset it here and InnerLoopVectorizer
829  // will create another.
830  if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
831  PrimaryInduction = nullptr;
832 
833  return true;
834 }
835 
836 bool LoopVectorizationLegality::canVectorizeMemory() {
837  LAI = &(*GetLAA)(*TheLoop);
838  const OptimizationRemarkAnalysis *LAR = LAI->getReport();
839  if (LAR) {
840  ORE->emit([&]() {
841  return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
842  "loop not vectorized: ", *LAR);
843  });
844  }
845  if (!LAI->canVectorizeMemory())
846  return false;
847 
849  reportVectorizationFailure("Stores to a uniform address",
850  "write to a loop invariant address could not be vectorized",
851  "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
852  return false;
853  }
855  PSE.addPredicate(LAI->getPSE().getUnionPredicate());
856 
857  return true;
858 }
859 
861  Value *In0 = const_cast<Value *>(V);
862  PHINode *PN = dyn_cast_or_null<PHINode>(In0);
863  if (!PN)
864  return false;
865 
866  return Inductions.count(PN);
867 }
868 
870  auto *Inst = dyn_cast<Instruction>(V);
871  return (Inst && InductionCastsToIgnore.count(Inst));
872 }
873 
876 }
877 
879  return FirstOrderRecurrences.count(Phi);
880 }
881 
883  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
884 }
885 
886 bool LoopVectorizationLegality::blockCanBePredicated(
889  SmallPtrSetImpl<Instruction *> &ConditionalAssumes,
890  bool PreserveGuards) const {
891  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
892 
893  for (Instruction &I : *BB) {
894  // Check that we don't have a constant expression that can trap as operand.
895  for (Value *Operand : I.operands()) {
896  if (auto *C = dyn_cast<Constant>(Operand))
897  if (C->canTrap())
898  return false;
899  }
900 
901  // We can predicate blocks with calls to assume, as long as we drop them in
902  // case we flatten the CFG via predication.
903  if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
904  ConditionalAssumes.insert(&I);
905  continue;
906  }
907 
908  // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
909  // TODO: there might be cases that it should block the vectorization. Let's
910  // ignore those for now.
911  if (isa<NoAliasScopeDeclInst>(&I))
912  continue;
913 
914  // We might be able to hoist the load.
915  if (I.mayReadFromMemory()) {
916  auto *LI = dyn_cast<LoadInst>(&I);
917  if (!LI)
918  return false;
919  if (!SafePtrs.count(LI->getPointerOperand())) {
920  // !llvm.mem.parallel_loop_access implies if-conversion safety.
921  // Otherwise, record that the load needs (real or emulated) masking
922  // and let the cost model decide.
923  if (!IsAnnotatedParallel || PreserveGuards)
924  MaskedOp.insert(LI);
925  continue;
926  }
927  }
928 
929  if (I.mayWriteToMemory()) {
930  auto *SI = dyn_cast<StoreInst>(&I);
931  if (!SI)
932  return false;
933  // Predicated store requires some form of masking:
934  // 1) masked store HW instruction,
935  // 2) emulation via load-blend-store (only if safe and legal to do so,
936  // be aware on the race conditions), or
937  // 3) element-by-element predicate check and scalar store.
938  MaskedOp.insert(SI);
939  continue;
940  }
941  if (I.mayThrow())
942  return false;
943  }
944 
945  return true;
946 }
947 
948 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
949  if (!EnableIfConversion) {
950  reportVectorizationFailure("If-conversion is disabled",
951  "if-conversion is disabled",
952  "IfConversionDisabled",
953  ORE, TheLoop);
954  return false;
955  }
956 
957  assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
958 
959  // A list of pointers which are known to be dereferenceable within scope of
960  // the loop body for each iteration of the loop which executes. That is,
961  // the memory pointed to can be dereferenced (with the access size implied by
962  // the value's type) unconditionally within the loop header without
963  // introducing a new fault.
964  SmallPtrSet<Value *, 8> SafePointers;
965 
966  // Collect safe addresses.
967  for (BasicBlock *BB : TheLoop->blocks()) {
968  if (!blockNeedsPredication(BB)) {
969  for (Instruction &I : *BB)
970  if (auto *Ptr = getLoadStorePointerOperand(&I))
971  SafePointers.insert(Ptr);
972  continue;
973  }
974 
975  // For a block which requires predication, a address may be safe to access
976  // in the loop w/o predication if we can prove dereferenceability facts
977  // sufficient to ensure it'll never fault within the loop. For the moment,
978  // we restrict this to loads; stores are more complicated due to
979  // concurrency restrictions.
980  ScalarEvolution &SE = *PSE.getSE();
981  for (Instruction &I : *BB) {
982  LoadInst *LI = dyn_cast<LoadInst>(&I);
983  if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
984  isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
985  SafePointers.insert(LI->getPointerOperand());
986  }
987  }
988 
989  // Collect the blocks that need predication.
990  BasicBlock *Header = TheLoop->getHeader();
991  for (BasicBlock *BB : TheLoop->blocks()) {
992  // We don't support switch statements inside loops.
993  if (!isa<BranchInst>(BB->getTerminator())) {
994  reportVectorizationFailure("Loop contains a switch statement",
995  "loop contains a switch statement",
996  "LoopContainsSwitch", ORE, TheLoop,
997  BB->getTerminator());
998  return false;
999  }
1000 
1001  // We must be able to predicate all blocks that need to be predicated.
1002  if (blockNeedsPredication(BB)) {
1003  if (!blockCanBePredicated(BB, SafePointers, MaskedOp,
1004  ConditionalAssumes)) {
1006  "Control flow cannot be substituted for a select",
1007  "control flow cannot be substituted for a select",
1008  "NoCFGForSelect", ORE, TheLoop,
1009  BB->getTerminator());
1010  return false;
1011  }
1012  } else if (BB != Header && !canIfConvertPHINodes(BB)) {
1014  "Control flow cannot be substituted for a select",
1015  "control flow cannot be substituted for a select",
1016  "NoCFGForSelect", ORE, TheLoop,
1017  BB->getTerminator());
1018  return false;
1019  }
1020  }
1021 
1022  // We can if-convert this loop.
1023  return true;
1024 }
1025 
1026 // Helper function to canVectorizeLoopNestCFG.
1027 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1028  bool UseVPlanNativePath) {
1029  assert((UseVPlanNativePath || Lp->isInnermost()) &&
1030  "VPlan-native path is not enabled.");
1031 
1032  // TODO: ORE should be improved to show more accurate information when an
1033  // outer loop can't be vectorized because a nested loop is not understood or
1034  // legal. Something like: "outer_loop_location: loop not vectorized:
1035  // (inner_loop_location) loop control flow is not understood by vectorizer".
1036 
1037  // Store the result and return it at the end instead of exiting early, in case
1038  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1039  bool Result = true;
1040  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1041 
1042  // We must have a loop in canonical form. Loops with indirectbr in them cannot
1043  // be canonicalized.
1044  if (!Lp->getLoopPreheader()) {
1045  reportVectorizationFailure("Loop doesn't have a legal pre-header",
1046  "loop control flow is not understood by vectorizer",
1047  "CFGNotUnderstood", ORE, TheLoop);
1048  if (DoExtraAnalysis)
1049  Result = false;
1050  else
1051  return false;
1052  }
1053 
1054  // We must have a single backedge.
1055  if (Lp->getNumBackEdges() != 1) {
1056  reportVectorizationFailure("The loop must have a single backedge",
1057  "loop control flow is not understood by vectorizer",
1058  "CFGNotUnderstood", ORE, TheLoop);
1059  if (DoExtraAnalysis)
1060  Result = false;
1061  else
1062  return false;
1063  }
1064 
1065  // We currently must have a single "exit block" after the loop. Note that
1066  // multiple "exiting blocks" inside the loop are allowed, provided they all
1067  // reach the single exit block.
1068  // TODO: This restriction can be relaxed in the near future, it's here solely
1069  // to allow separation of changes for review. We need to generalize the phi
1070  // update logic in a number of places.
1071  if (!Lp->getUniqueExitBlock()) {
1072  reportVectorizationFailure("The loop must have a unique exit block",
1073  "loop control flow is not understood by vectorizer",
1074  "CFGNotUnderstood", ORE, TheLoop);
1075  if (DoExtraAnalysis)
1076  Result = false;
1077  else
1078  return false;
1079  }
1080  return Result;
1081 }
1082 
1083 bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1084  Loop *Lp, bool UseVPlanNativePath) {
1085  // Store the result and return it at the end instead of exiting early, in case
1086  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1087  bool Result = true;
1088  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1089  if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1090  if (DoExtraAnalysis)
1091  Result = false;
1092  else
1093  return false;
1094  }
1095 
1096  // Recursively check whether the loop control flow of nested loops is
1097  // understood.
1098  for (Loop *SubLp : *Lp)
1099  if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1100  if (DoExtraAnalysis)
1101  Result = false;
1102  else
1103  return false;
1104  }
1105 
1106  return Result;
1107 }
1108 
1109 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1110  // Store the result and return it at the end instead of exiting early, in case
1111  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1112  bool Result = true;
1113 
1114  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1115  // Check whether the loop-related control flow in the loop nest is expected by
1116  // vectorizer.
1117  if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1118  if (DoExtraAnalysis)
1119  Result = false;
1120  else
1121  return false;
1122  }
1123 
1124  // We need to have a loop header.
1125  LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1126  << '\n');
1127 
1128  // Specific checks for outer loops. We skip the remaining legal checks at this
1129  // point because they don't support outer loops.
1130  if (!TheLoop->isInnermost()) {
1131  assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1132 
1133  if (!canVectorizeOuterLoop()) {
1134  reportVectorizationFailure("Unsupported outer loop",
1135  "unsupported outer loop",
1136  "UnsupportedOuterLoop",
1137  ORE, TheLoop);
1138  // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1139  // outer loops.
1140  return false;
1141  }
1142 
1143  LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1144  return Result;
1145  }
1146 
1147  assert(TheLoop->isInnermost() && "Inner loop expected.");
1148  // Check if we can if-convert non-single-bb loops.
1149  unsigned NumBlocks = TheLoop->getNumBlocks();
1150  if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1151  LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1152  if (DoExtraAnalysis)
1153  Result = false;
1154  else
1155  return false;
1156  }
1157 
1158  // Check if we can vectorize the instructions and CFG in this loop.
1159  if (!canVectorizeInstrs()) {
1160  LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1161  if (DoExtraAnalysis)
1162  Result = false;
1163  else
1164  return false;
1165  }
1166 
1167  // Go over each instruction and look at memory deps.
1168  if (!canVectorizeMemory()) {
1169  LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1170  if (DoExtraAnalysis)
1171  Result = false;
1172  else
1173  return false;
1174  }
1175 
1176  LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1177  << (LAI->getRuntimePointerChecking()->Need
1178  ? " (with a runtime bound check)"
1179  : "")
1180  << "!\n");
1181 
1182  unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1183  if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1184  SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1185 
1186  if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
1187  reportVectorizationFailure("Too many SCEV checks needed",
1188  "Too many SCEV assumptions need to be made and checked at runtime",
1189  "TooManySCEVRunTimeChecks", ORE, TheLoop);
1190  if (DoExtraAnalysis)
1191  Result = false;
1192  else
1193  return false;
1194  }
1195 
1196  // Okay! We've done all the tests. If any have failed, return false. Otherwise
1197  // we can vectorize, and at this point we don't have any other mem analysis
1198  // which may limit our maximum vectorization factor, so just return true with
1199  // no restrictions.
1200  return Result;
1201 }
1202 
1204 
1205  LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1206 
1207  SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1208 
1209  for (auto &Reduction : getReductionVars())
1210  ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1211 
1212  // TODO: handle non-reduction outside users when tail is folded by masking.
1213  for (auto *AE : AllowedExit) {
1214  // Check that all users of allowed exit values are inside the loop or
1215  // are the live-out of a reduction.
1216  if (ReductionLiveOuts.count(AE))
1217  continue;
1218  for (User *U : AE->users()) {
1219  Instruction *UI = cast<Instruction>(U);
1220  if (TheLoop->contains(UI))
1221  continue;
1222  LLVM_DEBUG(
1223  dbgs()
1224  << "LV: Cannot fold tail by masking, loop has an outside user for "
1225  << *UI << "\n");
1226  return false;
1227  }
1228  }
1229 
1230  // The list of pointers that we can safely read and write to remains empty.
1231  SmallPtrSet<Value *, 8> SafePointers;
1232 
1234  SmallPtrSet<Instruction *, 8> TmpConditionalAssumes;
1235 
1236  // Check and mark all blocks for predication, including those that ordinarily
1237  // do not need predication such as the header block.
1238  for (BasicBlock *BB : TheLoop->blocks()) {
1239  if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp,
1240  TmpConditionalAssumes,
1241  /* MaskAllLoads= */ true)) {
1242  LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as requested.\n");
1243  return false;
1244  }
1245  }
1246 
1247  LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1248 
1249  MaskedOp.insert(TmpMaskedOp.begin(), TmpMaskedOp.end());
1250  ConditionalAssumes.insert(TmpConditionalAssumes.begin(),
1251  TmpConditionalAssumes.end());
1252 
1253  return true;
1254 }
1255 
1256 } // namespace llvm
i
i
Definition: README.txt:29
llvm::EngineKind::Kind
Kind
Definition: ExecutionEngine.h:524
llvm::mustSuppressSpeculation
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition: ValueTracking.cpp:4487
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:729
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:63
llvm::LoopVectorizationLegality::isConsecutivePtr
int isConsecutivePtr(Value *Ptr)
Check if this pointer is consecutive when vectorizing.
Definition: LoopVectorizationLegality.cpp:379
llvm
Definition: AllocatorList.h:23
llvm::LoopBase::getUniqueExitBlock
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:138
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::LoopAccessInfo::isUniform
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
Definition: LoopAccessAnalysis.cpp:2101
llvm::RecurrenceDescriptor::isReductionPHI
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Returns true if Phi is a reduction in TheLoop.
Definition: IVDescriptors.cpp:647
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:229
llvm::ElementCount
Definition: TypeSize.h:386
EnableIfConversion
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
llvm::getVectorIntrinsicIDForCall
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:120
Loads.h
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
llvm::StringRef::npos
static constexpr size_t npos
Definition: StringRef.h:59
llvm::LinearPolySize< ElementCount >::isKnownLE
static bool isKnownLE(const LinearPolySize &LHS, const LinearPolySize &RHS)
Definition: TypeSize.h:341
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::LoopVectorizationLegality::blockNeedsPredication
bool blockNeedsPredication(BasicBlock *BB)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition: LoopVectorizationLegality.cpp:882
llvm::LoopVectorizationRequirements::addRuntimePointerChecks
void addRuntimePointerChecks(unsigned Num)
Definition: LoopVectorizationLegality.h:186
SizeOpts.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:626
llvm::TargetLibraryInfo::isFunctionVectorizable
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
Definition: TargetLibraryInfo.h:314
llvm::PredicatedScalarEvolution::getUnionPredicate
const SCEVUnionPredicate & getUnionPredicate() const
Definition: ScalarEvolution.cpp:13164
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
llvm::VectorizerParams::VectorizationInterleave
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
Definition: LoopAccessAnalysis.h:44
ValueTracking.h
llvm::OptimizationRemarkEmitter::allowExtraAnalysis
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to produce fewer false positi...
Definition: OptimizationRemarkEmitter.h:90
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::LoopVectorizationLegality::isInductionVariable
bool isInductionVariable(const Value *V)
Returns True if V can be considered as an induction variable in this loop.
Definition: LoopVectorizationLegality.cpp:874
VectorizeSCEVCheckThreshold
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
llvm::InductionDescriptor::IK_IntInduction
@ IK_IntInduction
Integer induction variable. Step = C.
Definition: IVDescriptors.h:272
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::LoopVectorizeHints::getWidth
ElementCount getWidth() const
Definition: LoopVectorizationLegality.h:111
llvm::LoopVectorizeHints::getIsVectorized
unsigned getIsVectorized() const
Definition: LoopVectorizationLegality.h:115
llvm::TargetTransformInfo::isLegalNTLoad
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
Definition: TargetTransformInfo.cpp:390
llvm::TargetTransformInfo::isLegalNTStore
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
Definition: TargetTransformInfo.cpp:385
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::ConstantAsMetadata::get
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:419
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::reportVectorizationFailure
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
Definition: LoopVectorize.cpp:1125
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:266
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::isUniformLoopNest
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
Definition: LoopVectorizationLegality.cpp:314
llvm::RISCVFeatures::validate
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Definition: RISCVBaseInfo.cpp:90
llvm::LoopVectorizationLegality::isInductionPhi
bool isInductionPhi(const Value *V)
Returns True if V is a Phi node of an induction variable in this loop.
Definition: LoopVectorizationLegality.cpp:860
llvm::LoopVectorizationLegality::prepareToFoldTailByMasking
bool prepareToFoldTailByMasking()
Return true if we can vectorize this loop while folding its tail by masking, and mark all respective ...
Definition: LoopVectorizationLegality.cpp:1203
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:163
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:204
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1198
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1108
llvm::makePostTransformationMetadata
llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
Definition: LoopInfo.cpp:1046
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::LoopVectorizeHints::FK_Undefined
@ FK_Undefined
Not selected.
Definition: LoopVectorizationLegality.h:94
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:205
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1505
llvm::shouldOptimizeForSize
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
Definition: MachineSizeOpts.cpp:183
llvm::InductionDescriptor
A struct for saving information about induction variables.
Definition: IVDescriptors.h:267
llvm::LoopAccessInfo::hasDependenceInvolvingLoopInvariantAddress
bool hasDependenceInvolvingLoopInvariantAddress() const
If the loop has memory dependence involving an invariant address, i.e.
Definition: LoopAccessAnalysis.h:582
MaxInterleaveFactor
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
Definition: LoopVectorizationLegality.cpp:52
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:519
llvm::LoopAccessInfo::blockNeedsPredication
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition: LoopAccessAnalysis.cpp:2072
llvm::ValueToValueMap
DenseMap< const Value *, Value * > ValueToValueMap
Definition: ScalarEvolutionExpressions.h:849
llvm::User
Definition: User.h:44
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:34
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::isUniformLoop
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
Definition: LoopVectorizationLegality.cpp:269
llvm::RecurrenceDescriptor::getExactFPMathInst
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
Definition: IVDescriptors.h:195
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1396
SI
@ SI
Definition: SIInstrInfo.cpp:7342
llvm::LoopVectorizeHints::vectorizeAnalysisPassName
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
Definition: LoopVectorizationLegality.cpp:177
llvm::getPtrStride
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
Definition: LoopAccessAnalysis.cpp:1017
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::LoopVectorizationRequirements::addExactFPMathInst
void addExactFPMathInst(Instruction *I)
Track the 1st floating-point instruction that can not be reassociated.
Definition: LoopVectorizationLegality.h:181
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:235
false
Definition: StackSlotColoring.cpp:142
EnableVPlanPredication
cl::opt< bool > EnableVPlanPredication
llvm::PHINode::getIncomingValueForBlock
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Definition: Instructions.h:2755
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:277
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:154
llvm::PGSOQueryType::IRPass
@ IRPass
llvm::LoopVectorizeHints::getForce
enum ForceKind getForce() const
Definition: LoopVectorizationLegality.h:117
llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:354
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:885
llvm::LoopVectorizationLegality::isFirstOrderRecurrence
bool isFirstOrderRecurrence(const PHINode *Phi)
Returns True if Phi is a first-order recurrence in this loop.
Definition: LoopVectorizationLegality.cpp:878
PatternMatch.h
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:650
llvm::Metadata
Root of the metadata hierarchy.
Definition: Metadata.h:62
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2662
llvm::LinearPolySize< ElementCount >::getFixed
static ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:284
LoopInfo.h
llvm::Twine::str
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:202
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1102
llvm::VectorType::isValidElementType
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:641
llvm::canIfConvertPHINodes
static bool canIfConvertPHINodes(BasicBlock *BB)
Check whether it is safe to if-convert this phi node.
Definition: LoopVectorizationLegality.cpp:330
VectorUtils.h
llvm::cl::opt< bool >
llvm::SCEVUnionPredicate::getComplexity
unsigned getComplexity() const override
We estimate the complexity of a union predicate as the size number of predicates in the union.
Definition: ScalarEvolution.h:432
llvm::RuntimePointerChecking::Need
bool Need
This flag indicates if we need to add the runtime check.
Definition: LoopAccessAnalysis.h:452
llvm::VFDatabase::getMappings
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:257
llvm::SmallPtrSetImpl::end
iterator end() const
Definition: SmallPtrSet.h:407
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::LoopVectorizeHints::FK_Enabled
@ FK_Enabled
Forcing enabled.
Definition: LoopVectorizationLegality.h:96
llvm::LoopVectorizationLegality::isCastedInductionVariable
bool isCastedInductionVariable(const Value *V)
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
Definition: LoopVectorizationLegality.cpp:869
llvm::LoopVectorizeHints::allowVectorization
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
Definition: LoopVectorizationLegality.cpp:118
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap< const Value *, Value * >
llvm::LoopAccessInfo::getRuntimePointerChecking
const RuntimePointerChecking * getRuntimePointerChecking() const
Definition: LoopAccessAnalysis.h:533
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::hasVectorInstrinsicScalarOpd
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:99
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:402
llvm::VectorizerParams::MaxVectorWidth
static const unsigned MaxVectorWidth
Maximum SIMD width.
Definition: LoopAccessAnalysis.h:39
llvm::LoopAccessInfo::getReport
const OptimizationRemarkAnalysis * getReport() const
The diagnostics report generated for the analysis.
Definition: LoopAccessAnalysis.h:557
llvm::MDString::get
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:467
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
llvm::LoopVectorizationLegality::getReductionVars
ReductionList & getReductionVars()
Returns the reduction variables found in the loop.
Definition: LoopVectorizationLegality.h:259
llvm::RecurrenceDescriptor::isFirstOrderRecurrence
static bool isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, DenseMap< Instruction *, Instruction * > &SinkAfter, DominatorTree *DT)
Returns true if Phi is a first-order recurrence.
Definition: IVDescriptors.cpp:716
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::PredicatedScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
Definition: ScalarEvolution.cpp:13129
llvm::elfabi::ELFSymbolType::Func
@ Func
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::LoopAccessInfo::canVectorizeMemory
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
Definition: LoopAccessAnalysis.h:526
llvm::isTLIScalarize
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI)
Checks if a function is scalarizable according to the TLI, in the sense that it should be vectorized ...
Definition: LoopVectorizationLegality.cpp:553
llvm::MDNode
Metadata node.
Definition: Metadata.h:897
llvm::isDereferenceableAndAlignedInLoop
bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition: Loads.cpp:273
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::LoopVectorizationLegality::isUniform
bool isUniform(Value *V)
Returns true if the value V is uniform within the loop.
Definition: LoopVectorizationLegality.cpp:394
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:71
LV_NAME
#define LV_NAME
Definition: LoopVectorizationLegality.cpp:31
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1486
llvm::SCEVUnionPredicate::isAlwaysTrue
bool isAlwaysTrue() const override
Implementation of the SCEVPredicate interface.
Definition: ScalarEvolution.cpp:13073
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1512
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::Loop::getCanonicalInductionVariable
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition: LoopInfo.cpp:149
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::LoopVectorizeHints::emitRemarkWithHints
void emitRemarkWithHints() const
Dumps all the hint information.
Definition: LoopVectorizationLegality.cpp:151
llvm::convertPointerToIntegerType
static Type * convertPointerToIntegerType(const DataLayout &DL, Type *Ty)
Definition: LoopVectorizationLegality.cpp:340
llvm::OptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: DiagnosticInfo.h:770
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:298
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
llvm::VectorizerParams::isInterleaveForced
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
Definition: LoopAccessAnalysis.cpp:132
LoopVectorize.h
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
llvm::MapVector::empty
bool empty() const
Definition: MapVector.h:79
llvm::LoopBase::isInnermost
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
Definition: LoopInfo.h:165
llvm::InductionDescriptor::isInductionPHI
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
Definition: IVDescriptors.cpp:1184
PragmaVectorizeSCEVCheckThreshold
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
llvm::LoopInfoBase::isLoopHeader
bool isLoopHeader(const BlockT *BB) const
Definition: LoopInfo.h:977
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
llvm::MapVector::count
size_type count(const KeyT &Key) const
Definition: MapVector.h:142
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
H
#define H(x, y, z)
Definition: MD5.cpp:58
llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:520
llvm::VectorizationFactor
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
Definition: LoopVectorizationPlanner.h:180
llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:496
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::LinearPolySize< ElementCount >::getScalable
static ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:287
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:207
LoopVectorizationLegality.h
llvm::LoopAccessInfo::getNumRuntimePointerChecks
unsigned getNumRuntimePointerChecks() const
Number of memchecks required to prove independence of otherwise may-alias pointers.
Definition: LoopAccessAnalysis.h:539
llvm::PredicatedScalarEvolution::addPredicate
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
Definition: ScalarEvolution.cpp:13157
llvm::LoopBase::getNumBackEdges
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:250
llvm::OptimizationRemarkAnalysis::AlwaysPrint
static const char * AlwaysPrint
Definition: DiagnosticInfo.h:805
llvm::LoopVectorizeHints::LoopVectorizeHints
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE)
Definition: LoopVectorizationLegality.cpp:72
llvm::UnivariateLinearPolyBase::isZero
bool isZero() const
Definition: TypeSize.h:229
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:403
Predicate
llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:66
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoopVectorizationLegality.cpp:32
llvm::LoopVectorizeHints::setAlreadyVectorized
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
Definition: LoopVectorizationLegality.cpp:99
llvm::VectorizerParams
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
Definition: LoopAccessAnalysis.h:37
llvm::PHINode
Definition: Instructions.h:2572
llvm::SmallVectorImpl< Instruction * >
llvm::TargetLibraryInfo::getWidestVF
void getWidestVF(StringRef ScalarF, ElementCount &FixedVF, ElementCount &ScalableVF) const
Returns the largest vectorization factor used in the list of vector functions.
Definition: TargetLibraryInfo.h:406
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:397
llvm::SmallPtrSetImpl< Value * >
llvm::getLoadStorePointerOperand
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Definition: Instructions.h:5237
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1450
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::getWiderType
static Type * getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1)
Definition: LoopVectorizationLegality.cpp:352
llvm::TargetLibraryInfo::hasOptimizedCodeGen
bool hasOptimizedCodeGen(LibFunc F) const
Tests if the function is both available and a candidate for optimized code generation.
Definition: TargetLibraryInfo.h:326
llvm::RecurrenceDescriptor::getLoopExitInstr
Instruction * getLoopExitInstr() const
Definition: IVDescriptors.h:188
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::cl::desc
Definition: CommandLine.h:411
llvm::hasOutsideLoopUser
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value * > &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable.
Definition: LoopVectorizationLegality.cpp:362
llvm::LoopAccessInfo::getPSE
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
Definition: LoopAccessAnalysis.h:591
llvm::PredicatedScalarEvolution::getSE
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
Definition: ScalarEvolution.h:2169
llvm::LoopVectorizeHints::FK_Disabled
@ FK_Disabled
Forcing disabled.
Definition: LoopVectorizationLegality.h:95
Reduction
loop Loop Strength Reduction
Definition: LoopStrengthReduce.cpp:6003
llvm::MDString
A single uniqued string.
Definition: Metadata.h:611
llvm::LoopVectorizationLegality::canVectorize
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
Definition: LoopVectorizationLegality.cpp:1109
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:434
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38