LLVM  9.0.0svn
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1 //===- LoopVectorizationLegality.cpp --------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides loop vectorization legality analysis. Original code
10 // resided in LoopVectorize.cpp for a long time.
11 //
12 // At this point, it is implemented as a utility class, not as an analysis
13 // pass. It should be easy to create an analysis pass around it if there
14 // is a need (but D45420 needs to happen first).
15 //
18 #include "llvm/IR/IntrinsicInst.h"
19 
20 using namespace llvm;
21 
22 #define LV_NAME "loop-vectorize"
23 #define DEBUG_TYPE LV_NAME
24 
25 static cl::opt<bool>
26  EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
27  cl::desc("Enable if-conversion during vectorization."));
28 
30  "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
31  cl::desc("The maximum allowed number of runtime memory checks with a "
32  "vectorize(enable) pragma."));
33 
35  "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
36  cl::desc("The maximum number of SCEV checks allowed."));
37 
39  "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
40  cl::desc("The maximum number of SCEV checks allowed with a "
41  "vectorize(enable) pragma"));
42 
43 /// Maximum vectorization interleave count.
44 static const unsigned MaxInterleaveFactor = 16;
45 
46 namespace llvm {
47 
49  StringRef RemarkName,
50  Loop *TheLoop,
51  Instruction *I) {
52  Value *CodeRegion = TheLoop->getHeader();
53  DebugLoc DL = TheLoop->getStartLoc();
54 
55  if (I) {
56  CodeRegion = I->getParent();
57  // If there is no debug location attached to the instruction, revert back to
58  // using the loop's.
59  if (I->getDebugLoc())
60  DL = I->getDebugLoc();
61  }
62 
63  OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion);
64  R << "loop not vectorized: ";
65  return R;
66 }
67 
68 bool LoopVectorizeHints::Hint::validate(unsigned Val) {
69  switch (Kind) {
70  case HK_WIDTH:
71  return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
72  case HK_UNROLL:
73  return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
74  case HK_FORCE:
75  return (Val <= 1);
76  case HK_ISVECTORIZED:
77  return (Val == 0 || Val == 1);
78  }
79  return false;
80 }
81 
83  bool InterleaveOnlyWhenForced,
85  : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
86  Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
87  Force("vectorize.enable", FK_Undefined, HK_FORCE),
88  IsVectorized("isvectorized", 0, HK_ISVECTORIZED), TheLoop(L), ORE(ORE) {
89  // Populate values with existing loop metadata.
90  getHintsFromMetadata();
91 
92  // force-vector-interleave overrides DisableInterleaving.
95 
96  if (IsVectorized.Value != 1)
97  // If the vectorization width and interleaving count are both 1 then
98  // consider the loop to have been already vectorized because there's
99  // nothing more that we can do.
100  IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
101  LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
102  << "LV: Interleaving disabled by the pass manager\n");
103 }
104 
106  Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
108  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
110  return false;
111  }
112 
113  if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
114  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
116  return false;
117  }
118 
119  if (getIsVectorized() == 1) {
120  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
121  // FIXME: Add interleave.disable metadata. This will allow
122  // vectorize.disable to be used without disabling the pass and errors
123  // to differentiate between disabled vectorization and a width of 1.
124  ORE.emit([&]() {
126  "AllDisabled", L->getStartLoc(),
127  L->getHeader())
128  << "loop not vectorized: vectorization and interleaving are "
129  "explicitly disabled, or the loop has already been "
130  "vectorized";
131  });
132  return false;
133  }
134 
135  return true;
136 }
137 
139  using namespace ore;
140 
141  ORE.emit([&]() {
142  if (Force.Value == LoopVectorizeHints::FK_Disabled)
143  return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
144  TheLoop->getStartLoc(),
145  TheLoop->getHeader())
146  << "loop not vectorized: vectorization is explicitly disabled";
147  else {
148  OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
149  TheLoop->getStartLoc(), TheLoop->getHeader());
150  R << "loop not vectorized";
151  if (Force.Value == LoopVectorizeHints::FK_Enabled) {
152  R << " (Force=" << NV("Force", true);
153  if (Width.Value != 0)
154  R << ", Vector Width=" << NV("VectorWidth", Width.Value);
155  if (Interleave.Value != 0)
156  R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
157  R << ")";
158  }
159  return R;
160  }
161  });
162 }
163 
165  if (getWidth() == 1)
166  return LV_NAME;
168  return LV_NAME;
170  return LV_NAME;
172 }
173 
174 void LoopVectorizeHints::getHintsFromMetadata() {
175  MDNode *LoopID = TheLoop->getLoopID();
176  if (!LoopID)
177  return;
178 
179  // First operand should refer to the loop id itself.
180  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
181  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
182 
183  for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
184  const MDString *S = nullptr;
186 
187  // The expected hint is either a MDString or a MDNode with the first
188  // operand a MDString.
189  if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
190  if (!MD || MD->getNumOperands() == 0)
191  continue;
192  S = dyn_cast<MDString>(MD->getOperand(0));
193  for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
194  Args.push_back(MD->getOperand(i));
195  } else {
196  S = dyn_cast<MDString>(LoopID->getOperand(i));
197  assert(Args.size() == 0 && "too many arguments for MDString");
198  }
199 
200  if (!S)
201  continue;
202 
203  // Check if the hint starts with the loop metadata prefix.
204  StringRef Name = S->getString();
205  if (Args.size() == 1)
206  setHint(Name, Args[0]);
207  }
208 }
209 
210 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
211  if (!Name.startswith(Prefix()))
212  return;
213  Name = Name.substr(Prefix().size(), StringRef::npos);
214 
215  const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
216  if (!C)
217  return;
218  unsigned Val = C->getZExtValue();
219 
220  Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized};
221  for (auto H : Hints) {
222  if (Name == H->Name) {
223  if (H->validate(Val))
224  H->Value = Val;
225  else
226  LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
227  break;
228  }
229  }
230 }
231 
232 MDNode *LoopVectorizeHints::createHintMetadata(StringRef Name,
233  unsigned V) const {
234  LLVMContext &Context = TheLoop->getHeader()->getContext();
235  Metadata *MDs[] = {
236  MDString::get(Context, Name),
238  return MDNode::get(Context, MDs);
239 }
240 
241 bool LoopVectorizeHints::matchesHintMetadataName(MDNode *Node,
242  ArrayRef<Hint> HintTypes) {
243  MDString *Name = dyn_cast<MDString>(Node->getOperand(0));
244  if (!Name)
245  return false;
246 
247  for (auto H : HintTypes)
248  if (Name->getString().endswith(H.Name))
249  return true;
250  return false;
251 }
252 
253 void LoopVectorizeHints::writeHintsToMetadata(ArrayRef<Hint> HintTypes) {
254  if (HintTypes.empty())
255  return;
256 
257  // Reserve the first element to LoopID (see below).
259  // If the loop already has metadata, then ignore the existing operands.
260  MDNode *LoopID = TheLoop->getLoopID();
261  if (LoopID) {
262  for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
263  MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
264  // If node in update list, ignore old value.
265  if (!matchesHintMetadataName(Node, HintTypes))
266  MDs.push_back(Node);
267  }
268  }
269 
270  // Now, add the missing hints.
271  for (auto H : HintTypes)
272  MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value));
273 
274  // Replace current metadata node with new one.
275  LLVMContext &Context = TheLoop->getHeader()->getContext();
276  MDNode *NewLoopID = MDNode::get(Context, MDs);
277  // Set operand 0 to refer to the loop id itself.
278  NewLoopID->replaceOperandWith(0, NewLoopID);
279 
280  TheLoop->setLoopID(NewLoopID);
281 }
282 
284  Function *F, Loop *L, const LoopVectorizeHints &Hints) {
285  const char *PassName = Hints.vectorizeAnalysisPassName();
286  bool Failed = false;
287  if (UnsafeAlgebraInst && !Hints.allowReordering()) {
288  ORE.emit([&]() {
290  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
291  UnsafeAlgebraInst->getParent())
292  << "loop not vectorized: cannot prove it is safe to reorder "
293  "floating-point operations";
294  });
295  Failed = true;
296  }
297 
298  // Test if runtime memcheck thresholds are exceeded.
299  bool PragmaThresholdReached =
300  NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
301  bool ThresholdReached =
302  NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
303  if ((ThresholdReached && !Hints.allowReordering()) ||
304  PragmaThresholdReached) {
305  ORE.emit([&]() {
306  return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
307  L->getStartLoc(),
308  L->getHeader())
309  << "loop not vectorized: cannot prove it is safe to reorder "
310  "memory operations";
311  });
312  LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
313  Failed = true;
314  }
315 
316  return Failed;
317 }
318 
319 // Return true if the inner loop \p Lp is uniform with regard to the outer loop
320 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
321 // executing the inner loop will execute the same iterations). This check is
322 // very constrained for now but it will be relaxed in the future. \p Lp is
323 // considered uniform if it meets all the following conditions:
324 // 1) it has a canonical IV (starting from 0 and with stride 1),
325 // 2) its latch terminator is a conditional branch and,
326 // 3) its latch condition is a compare instruction whose operands are the
327 // canonical IV and an OuterLp invariant.
328 // This check doesn't take into account the uniformity of other conditions not
329 // related to the loop latch because they don't affect the loop uniformity.
330 //
331 // NOTE: We decided to keep all these checks and its associated documentation
332 // together so that we can easily have a picture of the current supported loop
333 // nests. However, some of the current checks don't depend on \p OuterLp and
334 // would be redundantly executed for each \p Lp if we invoked this function for
335 // different candidate outer loops. This is not the case for now because we
336 // don't currently have the infrastructure to evaluate multiple candidate outer
337 // loops and \p OuterLp will be a fixed parameter while we only support explicit
338 // outer loop vectorization. It's also very likely that these checks go away
339 // before introducing the aforementioned infrastructure. However, if this is not
340 // the case, we should move the \p OuterLp independent checks to a separate
341 // function that is only executed once for each \p Lp.
342 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
343  assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
344 
345  // If Lp is the outer loop, it's uniform by definition.
346  if (Lp == OuterLp)
347  return true;
348  assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
349 
350  // 1.
352  if (!IV) {
353  LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
354  return false;
355  }
356 
357  // 2.
358  BasicBlock *Latch = Lp->getLoopLatch();
359  auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
360  if (!LatchBr || LatchBr->isUnconditional()) {
361  LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
362  return false;
363  }
364 
365  // 3.
366  auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
367  if (!LatchCmp) {
368  LLVM_DEBUG(
369  dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
370  return false;
371  }
372 
373  Value *CondOp0 = LatchCmp->getOperand(0);
374  Value *CondOp1 = LatchCmp->getOperand(1);
375  Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
376  if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
377  !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
378  LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
379  return false;
380  }
381 
382  return true;
383 }
384 
385 // Return true if \p Lp and all its nested loops are uniform with regard to \p
386 // OuterLp.
387 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
388  if (!isUniformLoop(Lp, OuterLp))
389  return false;
390 
391  // Check if nested loops are uniform.
392  for (Loop *SubLp : *Lp)
393  if (!isUniformLoopNest(SubLp, OuterLp))
394  return false;
395 
396  return true;
397 }
398 
399 /// Check whether it is safe to if-convert this phi node.
400 ///
401 /// Phi nodes with constant expressions that can trap are not safe to if
402 /// convert.
404  for (PHINode &Phi : BB->phis()) {
405  for (Value *V : Phi.incoming_values())
406  if (auto *C = dyn_cast<Constant>(V))
407  if (C->canTrap())
408  return false;
409  }
410  return true;
411 }
412 
414  if (Ty->isPointerTy())
415  return DL.getIntPtrType(Ty);
416 
417  // It is possible that char's or short's overflow when we ask for the loop's
418  // trip count, work around this by changing the type size.
419  if (Ty->getScalarSizeInBits() < 32)
420  return Type::getInt32Ty(Ty->getContext());
421 
422  return Ty;
423 }
424 
425 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
426  Ty0 = convertPointerToIntegerType(DL, Ty0);
427  Ty1 = convertPointerToIntegerType(DL, Ty1);
428  if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
429  return Ty0;
430  return Ty1;
431 }
432 
433 /// Check that the instruction has outside loop users and is not an
434 /// identified reduction variable.
435 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
436  SmallPtrSetImpl<Value *> &AllowedExit) {
437  // Reductions, Inductions and non-header phis are allowed to have exit users. All
438  // other instructions must not have external users.
439  if (!AllowedExit.count(Inst))
440  // Check that all of the users of the loop are inside the BB.
441  for (User *U : Inst->users()) {
442  Instruction *UI = cast<Instruction>(U);
443  // This user may be a reduction exit value.
444  if (!TheLoop->contains(UI)) {
445  LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
446  return true;
447  }
448  }
449  return false;
450 }
451 
453  const ValueToValueMap &Strides =
454  getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
455 
456  int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
457  if (Stride == 1 || Stride == -1)
458  return Stride;
459  return 0;
460 }
461 
463  return LAI->isUniform(V);
464 }
465 
466 bool LoopVectorizationLegality::canVectorizeOuterLoop() {
467  assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
468  // Store the result and return it at the end instead of exiting early, in case
469  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
470  bool Result = true;
471  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
472 
473  for (BasicBlock *BB : TheLoop->blocks()) {
474  // Check whether the BB terminator is a BranchInst. Any other terminator is
475  // not supported yet.
476  auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
477  if (!Br) {
478  LLVM_DEBUG(dbgs() << "LV: Unsupported basic block terminator.\n");
479  ORE->emit(createMissedAnalysis("CFGNotUnderstood")
480  << "loop control flow is not understood by vectorizer");
481  if (DoExtraAnalysis)
482  Result = false;
483  else
484  return false;
485  }
486 
487  // Check whether the BranchInst is a supported one. Only unconditional
488  // branches, conditional branches with an outer loop invariant condition or
489  // backedges are supported.
490  if (Br && Br->isConditional() &&
491  !TheLoop->isLoopInvariant(Br->getCondition()) &&
492  !LI->isLoopHeader(Br->getSuccessor(0)) &&
493  !LI->isLoopHeader(Br->getSuccessor(1))) {
494  LLVM_DEBUG(dbgs() << "LV: Unsupported conditional branch.\n");
495  ORE->emit(createMissedAnalysis("CFGNotUnderstood")
496  << "loop control flow is not understood by vectorizer");
497  if (DoExtraAnalysis)
498  Result = false;
499  else
500  return false;
501  }
502  }
503 
504  // Check whether inner loops are uniform. At this point, we only support
505  // simple outer loops scenarios with uniform nested loops.
506  if (!isUniformLoopNest(TheLoop /*loop nest*/,
507  TheLoop /*context outer loop*/)) {
508  LLVM_DEBUG(
509  dbgs()
510  << "LV: Not vectorizing: Outer loop contains divergent loops.\n");
511  ORE->emit(createMissedAnalysis("CFGNotUnderstood")
512  << "loop control flow is not understood by vectorizer");
513  if (DoExtraAnalysis)
514  Result = false;
515  else
516  return false;
517  }
518 
519  // Check whether we are able to set up outer loop induction.
520  if (!setupOuterLoopInductions()) {
521  LLVM_DEBUG(
522  dbgs() << "LV: Not vectorizing: Unsupported outer loop Phi(s).\n");
523  ORE->emit(createMissedAnalysis("UnsupportedPhi")
524  << "Unsupported outer loop Phi(s)");
525  if (DoExtraAnalysis)
526  Result = false;
527  else
528  return false;
529  }
530 
531  return Result;
532 }
533 
534 void LoopVectorizationLegality::addInductionPhi(
535  PHINode *Phi, const InductionDescriptor &ID,
536  SmallPtrSetImpl<Value *> &AllowedExit) {
537  Inductions[Phi] = ID;
538 
539  // In case this induction also comes with casts that we know we can ignore
540  // in the vectorized loop body, record them here. All casts could be recorded
541  // here for ignoring, but suffices to record only the first (as it is the
542  // only one that may bw used outside the cast sequence).
543  const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
544  if (!Casts.empty())
545  InductionCastsToIgnore.insert(*Casts.begin());
546 
547  Type *PhiTy = Phi->getType();
548  const DataLayout &DL = Phi->getModule()->getDataLayout();
549 
550  // Get the widest type.
551  if (!PhiTy->isFloatingPointTy()) {
552  if (!WidestIndTy)
553  WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
554  else
555  WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
556  }
557 
558  // Int inductions are special because we only allow one IV.
561  isa<Constant>(ID.getStartValue()) &&
562  cast<Constant>(ID.getStartValue())->isNullValue()) {
563 
564  // Use the phi node with the widest type as induction. Use the last
565  // one if there are multiple (no good reason for doing this other
566  // than it is expedient). We've checked that it begins at zero and
567  // steps by one, so this is a canonical induction variable.
568  if (!PrimaryInduction || PhiTy == WidestIndTy)
569  PrimaryInduction = Phi;
570  }
571 
572  // Both the PHI node itself, and the "post-increment" value feeding
573  // back into the PHI node may have external users.
574  // We can allow those uses, except if the SCEVs we have for them rely
575  // on predicates that only hold within the loop, since allowing the exit
576  // currently means re-using this SCEV outside the loop (see PR33706 for more
577  // details).
578  if (PSE.getUnionPredicate().isAlwaysTrue()) {
579  AllowedExit.insert(Phi);
580  AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
581  }
582 
583  LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
584 }
585 
586 bool LoopVectorizationLegality::setupOuterLoopInductions() {
587  BasicBlock *Header = TheLoop->getHeader();
588 
589  // Returns true if a given Phi is a supported induction.
590  auto isSupportedPhi = [&](PHINode &Phi) -> bool {
592  if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
594  addInductionPhi(&Phi, ID, AllowedExit);
595  return true;
596  } else {
597  // Bail out for any Phi in the outer loop header that is not a supported
598  // induction.
599  LLVM_DEBUG(
600  dbgs()
601  << "LV: Found unsupported PHI for outer loop vectorization.\n");
602  return false;
603  }
604  };
605 
606  if (llvm::all_of(Header->phis(), isSupportedPhi))
607  return true;
608  else
609  return false;
610 }
611 
612 bool LoopVectorizationLegality::canVectorizeInstrs() {
613  BasicBlock *Header = TheLoop->getHeader();
614 
615  // Look for the attribute signaling the absence of NaNs.
616  Function &F = *Header->getParent();
617  HasFunNoNaNAttr =
618  F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
619 
620  // For each block in the loop.
621  for (BasicBlock *BB : TheLoop->blocks()) {
622  // Scan the instructions in the block and look for hazards.
623  for (Instruction &I : *BB) {
624  if (auto *Phi = dyn_cast<PHINode>(&I)) {
625  Type *PhiTy = Phi->getType();
626  // Check that this PHI type is allowed.
627  if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
628  !PhiTy->isPointerTy()) {
629  ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi)
630  << "loop control flow is not understood by vectorizer");
631  LLVM_DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
632  return false;
633  }
634 
635  // If this PHINode is not in the header block, then we know that we
636  // can convert it to select during if-conversion. No need to check if
637  // the PHIs in this block are induction or reduction variables.
638  if (BB != Header) {
639  // Non-header phi nodes that have outside uses can be vectorized. Add
640  // them to the list of allowed exits.
641  // Unsafe cyclic dependencies with header phis are identified during
642  // legalization for reduction, induction and first order
643  // recurrences.
644  continue;
645  }
646 
647  // We only allow if-converted PHIs with exactly two incoming values.
648  if (Phi->getNumIncomingValues() != 2) {
649  ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi)
650  << "control flow not understood by vectorizer");
651  LLVM_DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
652  return false;
653  }
654 
655  RecurrenceDescriptor RedDes;
656  if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
657  DT)) {
658  if (RedDes.hasUnsafeAlgebra())
659  Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
660  AllowedExit.insert(RedDes.getLoopExitInstr());
661  Reductions[Phi] = RedDes;
662  continue;
663  }
664 
665  // TODO: Instead of recording the AllowedExit, it would be good to record the
666  // complementary set: NotAllowedExit. These include (but may not be
667  // limited to):
668  // 1. Reduction phis as they represent the one-before-last value, which
669  // is not available when vectorized
670  // 2. Induction phis and increment when SCEV predicates cannot be used
671  // outside the loop - see addInductionPhi
672  // 3. Non-Phis with outside uses when SCEV predicates cannot be used
673  // outside the loop - see call to hasOutsideLoopUser in the non-phi
674  // handling below
675  // 4. FirstOrderRecurrence phis that can possibly be handled by
676  // extraction.
677  // By recording these, we can then reason about ways to vectorize each
678  // of these NotAllowedExit.
680  if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
681  addInductionPhi(Phi, ID, AllowedExit);
682  if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
683  Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
684  continue;
685  }
686 
688  SinkAfter, DT)) {
689  FirstOrderRecurrences.insert(Phi);
690  continue;
691  }
692 
693  // As a last resort, coerce the PHI to a AddRec expression
694  // and re-try classifying it a an induction PHI.
695  if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
696  addInductionPhi(Phi, ID, AllowedExit);
697  continue;
698  }
699 
700  ORE->emit(createMissedAnalysis("NonReductionValueUsedOutsideLoop", Phi)
701  << "value that could not be identified as "
702  "reduction is used outside the loop");
703  LLVM_DEBUG(dbgs() << "LV: Found an unidentified PHI." << *Phi << "\n");
704  return false;
705  } // end of PHI handling
706 
707  // We handle calls that:
708  // * Are debug info intrinsics.
709  // * Have a mapping to an IR intrinsic.
710  // * Have a vector version available.
711  auto *CI = dyn_cast<CallInst>(&I);
712  if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
713  !isa<DbgInfoIntrinsic>(CI) &&
714  !(CI->getCalledFunction() && TLI &&
715  TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
716  // If the call is a recognized math libary call, it is likely that
717  // we can vectorize it given loosened floating-point constraints.
718  LibFunc Func;
719  bool IsMathLibCall =
720  TLI && CI->getCalledFunction() &&
721  CI->getType()->isFloatingPointTy() &&
722  TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
723  TLI->hasOptimizedCodeGen(Func);
724 
725  if (IsMathLibCall) {
726  // TODO: Ideally, we should not use clang-specific language here,
727  // but it's hard to provide meaningful yet generic advice.
728  // Also, should this be guarded by allowExtraAnalysis() and/or be part
729  // of the returned info from isFunctionVectorizable()?
730  ORE->emit(createMissedAnalysis("CantVectorizeLibcall", CI)
731  << "library call cannot be vectorized. "
732  "Try compiling with -fno-math-errno, -ffast-math, "
733  "or similar flags");
734  } else {
735  ORE->emit(createMissedAnalysis("CantVectorizeCall", CI)
736  << "call instruction cannot be vectorized");
737  }
738  LLVM_DEBUG(
739  dbgs() << "LV: Found a non-intrinsic callsite.\n");
740  return false;
741  }
742 
743  // Intrinsics such as powi,cttz and ctlz are legal to vectorize if the
744  // second argument is the same (i.e. loop invariant)
746  getVectorIntrinsicIDForCall(CI, TLI), 1)) {
747  auto *SE = PSE.getSE();
748  if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) {
749  ORE->emit(createMissedAnalysis("CantVectorizeIntrinsic", CI)
750  << "intrinsic instruction cannot be vectorized");
751  LLVM_DEBUG(dbgs()
752  << "LV: Found unvectorizable intrinsic " << *CI << "\n");
753  return false;
754  }
755  }
756 
757  // Check that the instruction return type is vectorizable.
758  // Also, we can't vectorize extractelement instructions.
759  if ((!VectorType::isValidElementType(I.getType()) &&
760  !I.getType()->isVoidTy()) ||
761  isa<ExtractElementInst>(I)) {
762  ORE->emit(createMissedAnalysis("CantVectorizeInstructionReturnType", &I)
763  << "instruction return type cannot be vectorized");
764  LLVM_DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
765  return false;
766  }
767 
768  // Check that the stored type is vectorizable.
769  if (auto *ST = dyn_cast<StoreInst>(&I)) {
770  Type *T = ST->getValueOperand()->getType();
772  ORE->emit(createMissedAnalysis("CantVectorizeStore", ST)
773  << "store instruction cannot be vectorized");
774  return false;
775  }
776 
777  // FP instructions can allow unsafe algebra, thus vectorizable by
778  // non-IEEE-754 compliant SIMD units.
779  // This applies to floating-point math operations and calls, not memory
780  // operations, shuffles, or casts, as they don't change precision or
781  // semantics.
782  } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
783  !I.isFast()) {
784  LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
785  Hints->setPotentiallyUnsafe();
786  }
787 
788  // Reduction instructions are allowed to have exit users.
789  // All other instructions must not have external users.
790  if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
791  // We can safely vectorize loops where instructions within the loop are
792  // used outside the loop only if the SCEV predicates within the loop is
793  // same as outside the loop. Allowing the exit means reusing the SCEV
794  // outside the loop.
795  if (PSE.getUnionPredicate().isAlwaysTrue()) {
796  AllowedExit.insert(&I);
797  continue;
798  }
799  ORE->emit(createMissedAnalysis("ValueUsedOutsideLoop", &I)
800  << "value cannot be used outside the loop");
801  return false;
802  }
803  } // next instr.
804  }
805 
806  if (!PrimaryInduction) {
807  LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
808  if (Inductions.empty()) {
809  ORE->emit(createMissedAnalysis("NoInductionVariable")
810  << "loop induction variable could not be identified");
811  return false;
812  } else if (!WidestIndTy) {
813  ORE->emit(createMissedAnalysis("NoIntegerInductionVariable")
814  << "integer loop induction variable could not be identified");
815  return false;
816  }
817  }
818 
819  // Now we know the widest induction type, check if our found induction
820  // is the same size. If it's not, unset it here and InnerLoopVectorizer
821  // will create another.
822  if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
823  PrimaryInduction = nullptr;
824 
825  return true;
826 }
827 
828 bool LoopVectorizationLegality::canVectorizeMemory() {
829  LAI = &(*GetLAA)(*TheLoop);
830  const OptimizationRemarkAnalysis *LAR = LAI->getReport();
831  if (LAR) {
832  ORE->emit([&]() {
833  return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
834  "loop not vectorized: ", *LAR);
835  });
836  }
837  if (!LAI->canVectorizeMemory())
838  return false;
839 
840  if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
841  ORE->emit(createMissedAnalysis("CantVectorizeStoreToLoopInvariantAddress")
842  << "write to a loop invariant address could not "
843  "be vectorized");
844  LLVM_DEBUG(
845  dbgs() << "LV: Non vectorizable stores to a uniform address\n");
846  return false;
847  }
848  Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
849  PSE.addPredicate(LAI->getPSE().getUnionPredicate());
850 
851  return true;
852 }
853 
855  Value *In0 = const_cast<Value *>(V);
856  PHINode *PN = dyn_cast_or_null<PHINode>(In0);
857  if (!PN)
858  return false;
859 
860  return Inductions.count(PN);
861 }
862 
864  auto *Inst = dyn_cast<Instruction>(V);
865  return (Inst && InductionCastsToIgnore.count(Inst));
866 }
867 
869  return isInductionPhi(V) || isCastedInductionVariable(V);
870 }
871 
873  return FirstOrderRecurrences.count(Phi);
874 }
875 
877  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
878 }
879 
880 bool LoopVectorizationLegality::blockCanBePredicated(
881  BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs) {
882  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
883 
884  for (Instruction &I : *BB) {
885  // Check that we don't have a constant expression that can trap as operand.
886  for (Value *Operand : I.operands()) {
887  if (auto *C = dyn_cast<Constant>(Operand))
888  if (C->canTrap())
889  return false;
890  }
891  // We might be able to hoist the load.
892  if (I.mayReadFromMemory()) {
893  auto *LI = dyn_cast<LoadInst>(&I);
894  if (!LI)
895  return false;
896  if (!SafePtrs.count(LI->getPointerOperand())) {
897  // !llvm.mem.parallel_loop_access implies if-conversion safety.
898  // Otherwise, record that the load needs (real or emulated) masking
899  // and let the cost model decide.
900  if (!IsAnnotatedParallel)
901  MaskedOp.insert(LI);
902  continue;
903  }
904  }
905 
906  if (I.mayWriteToMemory()) {
907  auto *SI = dyn_cast<StoreInst>(&I);
908  if (!SI)
909  return false;
910  // Predicated store requires some form of masking:
911  // 1) masked store HW instruction,
912  // 2) emulation via load-blend-store (only if safe and legal to do so,
913  // be aware on the race conditions), or
914  // 3) element-by-element predicate check and scalar store.
915  MaskedOp.insert(SI);
916  continue;
917  }
918  if (I.mayThrow())
919  return false;
920  }
921 
922  return true;
923 }
924 
925 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
926  if (!EnableIfConversion) {
927  ORE->emit(createMissedAnalysis("IfConversionDisabled")
928  << "if-conversion is disabled");
929  return false;
930  }
931 
932  assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
933 
934  // A list of pointers that we can safely read and write to.
935  SmallPtrSet<Value *, 8> SafePointes;
936 
937  // Collect safe addresses.
938  for (BasicBlock *BB : TheLoop->blocks()) {
939  if (blockNeedsPredication(BB))
940  continue;
941 
942  for (Instruction &I : *BB)
943  if (auto *Ptr = getLoadStorePointerOperand(&I))
944  SafePointes.insert(Ptr);
945  }
946 
947  // Collect the blocks that need predication.
948  BasicBlock *Header = TheLoop->getHeader();
949  for (BasicBlock *BB : TheLoop->blocks()) {
950  // We don't support switch statements inside loops.
951  if (!isa<BranchInst>(BB->getTerminator())) {
952  ORE->emit(createMissedAnalysis("LoopContainsSwitch", BB->getTerminator())
953  << "loop contains a switch statement");
954  return false;
955  }
956 
957  // We must be able to predicate all blocks that need to be predicated.
958  if (blockNeedsPredication(BB)) {
959  if (!blockCanBePredicated(BB, SafePointes)) {
960  ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
961  << "control flow cannot be substituted for a select");
962  return false;
963  }
964  } else if (BB != Header && !canIfConvertPHINodes(BB)) {
965  ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
966  << "control flow cannot be substituted for a select");
967  return false;
968  }
969  }
970 
971  // We can if-convert this loop.
972  return true;
973 }
974 
975 // Helper function to canVectorizeLoopNestCFG.
976 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
977  bool UseVPlanNativePath) {
978  assert((UseVPlanNativePath || Lp->empty()) &&
979  "VPlan-native path is not enabled.");
980 
981  // TODO: ORE should be improved to show more accurate information when an
982  // outer loop can't be vectorized because a nested loop is not understood or
983  // legal. Something like: "outer_loop_location: loop not vectorized:
984  // (inner_loop_location) loop control flow is not understood by vectorizer".
985 
986  // Store the result and return it at the end instead of exiting early, in case
987  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
988  bool Result = true;
989  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
990 
991  // We must have a loop in canonical form. Loops with indirectbr in them cannot
992  // be canonicalized.
993  if (!Lp->getLoopPreheader()) {
994  LLVM_DEBUG(dbgs() << "LV: Loop doesn't have a legal pre-header.\n");
995  ORE->emit(createMissedAnalysis("CFGNotUnderstood")
996  << "loop control flow is not understood by vectorizer");
997  if (DoExtraAnalysis)
998  Result = false;
999  else
1000  return false;
1001  }
1002 
1003  // We must have a single backedge.
1004  if (Lp->getNumBackEdges() != 1) {
1005  ORE->emit(createMissedAnalysis("CFGNotUnderstood")
1006  << "loop control flow is not understood by vectorizer");
1007  if (DoExtraAnalysis)
1008  Result = false;
1009  else
1010  return false;
1011  }
1012 
1013  // We must have a single exiting block.
1014  if (!Lp->getExitingBlock()) {
1015  ORE->emit(createMissedAnalysis("CFGNotUnderstood")
1016  << "loop control flow is not understood by vectorizer");
1017  if (DoExtraAnalysis)
1018  Result = false;
1019  else
1020  return false;
1021  }
1022 
1023  // We only handle bottom-tested loops, i.e. loop in which the condition is
1024  // checked at the end of each iteration. With that we can assume that all
1025  // instructions in the loop are executed the same number of times.
1026  if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
1027  ORE->emit(createMissedAnalysis("CFGNotUnderstood")
1028  << "loop control flow is not understood by vectorizer");
1029  if (DoExtraAnalysis)
1030  Result = false;
1031  else
1032  return false;
1033  }
1034 
1035  return Result;
1036 }
1037 
1038 bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1039  Loop *Lp, bool UseVPlanNativePath) {
1040  // Store the result and return it at the end instead of exiting early, in case
1041  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1042  bool Result = true;
1043  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1044  if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1045  if (DoExtraAnalysis)
1046  Result = false;
1047  else
1048  return false;
1049  }
1050 
1051  // Recursively check whether the loop control flow of nested loops is
1052  // understood.
1053  for (Loop *SubLp : *Lp)
1054  if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1055  if (DoExtraAnalysis)
1056  Result = false;
1057  else
1058  return false;
1059  }
1060 
1061  return Result;
1062 }
1063 
1064 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1065  // Store the result and return it at the end instead of exiting early, in case
1066  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1067  bool Result = true;
1068 
1069  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1070  // Check whether the loop-related control flow in the loop nest is expected by
1071  // vectorizer.
1072  if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1073  if (DoExtraAnalysis)
1074  Result = false;
1075  else
1076  return false;
1077  }
1078 
1079  // We need to have a loop header.
1080  LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1081  << '\n');
1082 
1083  // Specific checks for outer loops. We skip the remaining legal checks at this
1084  // point because they don't support outer loops.
1085  if (!TheLoop->empty()) {
1086  assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1087 
1088  if (!canVectorizeOuterLoop()) {
1089  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Unsupported outer loop.\n");
1090  // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1091  // outer loops.
1092  return false;
1093  }
1094 
1095  LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1096  return Result;
1097  }
1098 
1099  assert(TheLoop->empty() && "Inner loop expected.");
1100  // Check if we can if-convert non-single-bb loops.
1101  unsigned NumBlocks = TheLoop->getNumBlocks();
1102  if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1103  LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1104  if (DoExtraAnalysis)
1105  Result = false;
1106  else
1107  return false;
1108  }
1109 
1110  // Check if we can vectorize the instructions and CFG in this loop.
1111  if (!canVectorizeInstrs()) {
1112  LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1113  if (DoExtraAnalysis)
1114  Result = false;
1115  else
1116  return false;
1117  }
1118 
1119  // Go over each instruction and look at memory deps.
1120  if (!canVectorizeMemory()) {
1121  LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1122  if (DoExtraAnalysis)
1123  Result = false;
1124  else
1125  return false;
1126  }
1127 
1128  LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1129  << (LAI->getRuntimePointerChecking()->Need
1130  ? " (with a runtime bound check)"
1131  : "")
1132  << "!\n");
1133 
1134  unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1135  if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1136  SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1137 
1138  if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
1139  ORE->emit(createMissedAnalysis("TooManySCEVRunTimeChecks")
1140  << "Too many SCEV assumptions need to be made and checked "
1141  << "at runtime");
1142  LLVM_DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n");
1143  if (DoExtraAnalysis)
1144  Result = false;
1145  else
1146  return false;
1147  }
1148 
1149  // Okay! We've done all the tests. If any have failed, return false. Otherwise
1150  // we can vectorize, and at this point we don't have any other mem analysis
1151  // which may limit our maximum vectorization factor, so just return true with
1152  // no restrictions.
1153  return Result;
1154 }
1155 
1157 
1158  LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1159 
1160  if (!PrimaryInduction) {
1161  ORE->emit(createMissedAnalysis("NoPrimaryInduction")
1162  << "Missing a primary induction variable in the loop, which is "
1163  << "needed in order to fold tail by masking as required.");
1164  LLVM_DEBUG(dbgs() << "LV: No primary induction, cannot fold tail by "
1165  << "masking.\n");
1166  return false;
1167  }
1168 
1169  // TODO: handle reductions when tail is folded by masking.
1170  if (!Reductions.empty()) {
1171  ORE->emit(createMissedAnalysis("ReductionFoldingTailByMasking")
1172  << "Cannot fold tail by masking in the presence of reductions.");
1173  LLVM_DEBUG(dbgs() << "LV: Loop has reductions, cannot fold tail by "
1174  << "masking.\n");
1175  return false;
1176  }
1177 
1178  // TODO: handle outside users when tail is folded by masking.
1179  for (auto *AE : AllowedExit) {
1180  // Check that all users of allowed exit values are inside the loop.
1181  for (User *U : AE->users()) {
1182  Instruction *UI = cast<Instruction>(U);
1183  if (TheLoop->contains(UI))
1184  continue;
1185  ORE->emit(createMissedAnalysis("LiveOutFoldingTailByMasking")
1186  << "Cannot fold tail by masking in the presence of live outs.");
1187  LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop has an "
1188  << "outside user for : " << *UI << '\n');
1189  return false;
1190  }
1191  }
1192 
1193  // The list of pointers that we can safely read and write to remains empty.
1194  SmallPtrSet<Value *, 8> SafePointers;
1195 
1196  // Check and mark all blocks for predication, including those that ordinarily
1197  // do not need predication such as the header block.
1198  for (BasicBlock *BB : TheLoop->blocks()) {
1199  if (!blockCanBePredicated(BB, SafePointers)) {
1200  ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
1201  << "control flow cannot be substituted for a select");
1202  LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as required.\n");
1203  return false;
1204  }
1205  }
1206 
1207  LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1208  return true;
1209 }
1210 
1211 } // namespace llvm
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
static unsigned RuntimeMemoryCheckThreshold
performing memory disambiguation checks at runtime do not make more than this number of comparisons...
uint64_t CallInst * C
#define LV_NAME
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:635
Diagnostic information for missed-optimization remarks.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:224
LLVMContext & Context
static Type * getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1)
DiagnosticInfoOptimizationBase::Argument NV
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Instruction * getUnsafeAlgebraInst()
Returns first unsafe algebra instruction in the PHI node&#39;s use-chain.
bool isCastedInductionVariable(const Value *V)
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction *> *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
Definition: Metadata.cpp:858
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:453
ConstantInt * getConstIntStepValue() const
bool isUniform(Value *V)
Returns true if the value V is uniform within the loop.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class...
This class represents a function call, abstracting a target machine&#39;s calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:173
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1185
InductionKind getKind() const
A debug info location.
Definition: DebugLoc.h:33
Metadata node.
Definition: Metadata.h:863
F(f)
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1068
An instruction for reading from memory.
Definition: Instructions.h:167
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
Value * getStartValue() const
#define DEBUG_TYPE
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool endswith(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:278
bool hasUnsafeAlgebra()
Returns true if the recurrence has unsafe algebra which requires a relaxed floating-point model...
amdgpu Simplify well known AMD library false Value Value const Twine & Name
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:226
This file defines the LoopVectorizationLegality class.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:370
static const unsigned MaxVectorWidth
Maximum SIMD width.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:161
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
Diagnostic information for optimization analysis remarks.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
BlockT * getHeader() const
Definition: LoopInfo.h:99
int isConsecutivePtr(Value *Ptr)
Check if this pointer is consecutive when vectorizing.
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:200
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:266
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
bool isInductionVariable(const Value *V)
Returns True if V can be considered as an induction variable in this loop.
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:620
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Value * getLoadStorePointerOperand(Value *V)
A helper function that returns the pointer operand of a load or store instruction.
An instruction for storing to memory.
Definition: Instructions.h:320
static cl::opt< unsigned > PragmaVectorizeMemoryCheckThreshold("pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks with a " "vectorize(enable) pragma."))
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:597
bool blockNeedsPredication(BasicBlock *BB)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Diagnostic information for optimization analysis remarks related to pointer aliasing.
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:409
StringRef getString() const
Definition: Metadata.cpp:463
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:749
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:422
Integer induction variable. Step = C.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value *> &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
Conditional or Unconditional Branch instruction.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:128
Instruction * getUnsafeAlgebraInst()
Returns induction operator that does not have "fast-math" property and requires FP unsafe mode...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
#define H(x, y, z)
Definition: MD5.cpp:57
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName, StringRef RemarkName, Loop *TheLoop, Instruction *I=nullptr)
Create an analysis remark that explains why vectorization failed.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Returns true if Phi is a reduction in TheLoop.
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints)
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:364
size_t size() const
Definition: SmallVector.h:52
static bool isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, DenseMap< Instruction *, Instruction *> &SinkAfter, DominatorTree *DT)
Returns true if Phi is a first-order recurrence.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:57
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:62
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:109
Diagnostic information for optimization analysis remarks related to floating-point non-commutativity...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1166
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
A struct for saving information about induction variables.
bool canFoldTailByMasking()
Return true if we can vectorize this loop while folding its tail by masking.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:147
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
DenseMap< const Value *, Value * > ValueToValueMap
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:845
bool isFirstOrderRecurrence(const PHINode *Phi)
Returns True if Phi is a first-order recurrence in this loop.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:621
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:88
unsigned getNumIncomingValues() const
Return the number of incoming edges.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition: LoopInfo.cpp:112
static bool canIfConvertPHINodes(BasicBlock *BB)
Check whether it is safe to if-convert this phi node.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
bool isInductionPhi(const Value *V)
Returns True if V is a Phi node of an induction variable in this loop.
iterator_range< user_iterator > users()
Definition: Value.h:399
bool hasUnsafeAlgebra()
Returns true if the induction type is FP and the binary operator does not have the "fast-math" proper...
amdgpu Simplify well known AMD library false Value Value * Arg
const SmallVectorImpl< Instruction * > & getCastInsts() const
Returns a reference to the type cast instructions in the induction update chain, that are redundant w...
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:310
static const size_t npos
Definition: StringRef.h:50
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Instruction * getLoopExitInstr()
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:194
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:324
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
bool empty() const
Definition: LoopInfo.h:145
const unsigned Kind
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:72
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:330
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
static Type * convertPointerToIntegerType(const DataLayout &DL, Type *Ty)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
A single uniqued string.
Definition: Metadata.h:603
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1074
#define LLVM_DEBUG(X)
Definition: Debug.h:122
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
Root of the metadata hierarchy.
Definition: Metadata.h:57
The optimization diagnostic interface.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
void emitRemarkWithHints() const
Dumps all the hint information.
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
const BasicBlock * getParent() const
Definition: Instruction.h:66