LLVM  10.0.0svn
LoopVectorizationLegality.h
Go to the documentation of this file.
1 //===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file defines the LoopVectorizationLegality class. Original code
11 /// in Loop Vectorizer has been moved out to its own file for modularity
12 /// and reusability.
13 ///
14 /// Currently, it works for innermost loop vectorization. Extending this to
15 /// outer loop vectorization is a TODO item.
16 ///
17 /// Also provides:
18 /// 1) LoopVectorizeHints class which keeps a number of loop annotations
19 /// locally for easy look up. It has the ability to write them back as
20 /// loop metadata, upon request.
21 /// 2) LoopVectorizationRequirements class for lazy bail out for the purpose
22 /// of reporting useful failure to vectorize message.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
27 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
28 
29 #include "llvm/ADT/MapVector.h"
33 
34 namespace llvm {
35 
36 /// Utility class for getting and setting loop vectorizer hints in the form
37 /// of loop metadata.
38 /// This class keeps a number of loop annotations locally (as member variables)
39 /// and can, upon request, write them back as metadata on the loop. It will
40 /// initially scan the loop for existing metadata, and will update the local
41 /// values based on information in the loop.
42 /// We cannot write all values to metadata, as the mere presence of some info,
43 /// for example 'force', means a decision has been made. So, we need to be
44 /// careful NOT to add them if the user hasn't specifically asked so.
46  enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED,
47  HK_PREDICATE };
48 
49  /// Hint - associates name and validation with the hint value.
50  struct Hint {
51  const char *Name;
52  unsigned Value; // This may have to change for non-numeric values.
53  HintKind Kind;
54 
55  Hint(const char *Name, unsigned Value, HintKind Kind)
56  : Name(Name), Value(Value), Kind(Kind) {}
57 
58  bool validate(unsigned Val);
59  };
60 
61  /// Vectorization width.
62  Hint Width;
63 
64  /// Vectorization interleave factor.
65  Hint Interleave;
66 
67  /// Vectorization forced
68  Hint Force;
69 
70  /// Already Vectorized
71  Hint IsVectorized;
72 
73  /// Vector Predicate
74  Hint Predicate;
75 
76  /// Return the loop metadata prefix.
77  static StringRef Prefix() { return "llvm.loop."; }
78 
79  /// True if there is any unsafe math in the loop.
80  bool PotentiallyUnsafe = false;
81 
82 public:
83  enum ForceKind {
84  FK_Undefined = -1, ///< Not selected.
85  FK_Disabled = 0, ///< Forcing disabled.
86  FK_Enabled = 1, ///< Forcing enabled.
87  };
88 
89  LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced,
91 
92  /// Mark the loop L as already vectorized by setting the width to 1.
93  void setAlreadyVectorized();
94 
96  bool VectorizeOnlyWhenForced) const;
97 
98  /// Dumps all the hint information.
99  void emitRemarkWithHints() const;
100 
101  unsigned getWidth() const { return Width.Value; }
102  unsigned getInterleave() const { return Interleave.Value; }
103  unsigned getIsVectorized() const { return IsVectorized.Value; }
104  unsigned getPredicate() const { return Predicate.Value; }
105  enum ForceKind getForce() const {
106  if ((ForceKind)Force.Value == FK_Undefined &&
108  return FK_Disabled;
109  return (ForceKind)Force.Value;
110  }
111 
112  /// If hints are provided that force vectorization, use the AlwaysPrint
113  /// pass name to force the frontend to print the diagnostic.
114  const char *vectorizeAnalysisPassName() const;
115 
116  bool allowReordering() const {
117  // When enabling loop hints are provided we allow the vectorizer to change
118  // the order of operations that is given by the scalar loop. This is not
119  // enabled by default because can be unsafe or inefficient. For example,
120  // reordering floating-point operations will change the way round-off
121  // error accumulates in the loop.
122  return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
123  }
124 
125  bool isPotentiallyUnsafe() const {
126  // Avoid FP vectorization if the target is unsure about proper support.
127  // This may be related to the SIMD unit in the target not handling
128  // IEEE 754 FP ops properly, or bad single-to-double promotions.
129  // Otherwise, a sequence of vectorized loops, even without reduction,
130  // could lead to different end results on the destination vectors.
131  return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe;
132  }
133 
134  void setPotentiallyUnsafe() { PotentiallyUnsafe = true; }
135 
136 private:
137  /// Find hints specified in the loop metadata and update local values.
138  void getHintsFromMetadata();
139 
140  /// Checks string hint with one operand and set value if valid.
141  void setHint(StringRef Name, Metadata *Arg);
142 
143  /// The loop these hints belong to.
144  const Loop *TheLoop;
145 
146  /// Interface to emit optimization remarks.
148 };
149 
150 /// This holds vectorization requirements that must be verified late in
151 /// the process. The requirements are set by legalize and costmodel. Once
152 /// vectorization has been determined to be possible and profitable the
153 /// requirements can be verified by looking for metadata or compiler options.
154 /// For example, some loops require FP commutativity which is only allowed if
155 /// vectorization is explicitly specified or if the fast-math compiler option
156 /// has been provided.
157 /// Late evaluation of these requirements allows helpful diagnostics to be
158 /// composed that tells the user what need to be done to vectorize the loop. For
159 /// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
160 /// evaluation should be used only when diagnostics can generated that can be
161 /// followed by a non-expert user.
163 public:
165 
167  // First unsafe algebra instruction.
168  if (!UnsafeAlgebraInst)
169  UnsafeAlgebraInst = I;
170  }
171 
172  void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
173 
174  bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints);
175 
176 private:
177  unsigned NumRuntimePointerChecks = 0;
178  Instruction *UnsafeAlgebraInst = nullptr;
179 
180  /// Interface to emit optimization remarks.
182 };
183 
184 /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
185 /// to what vectorization factor.
186 /// This class does not look at the profitability of vectorization, only the
187 /// legality. This class has two main kinds of checks:
188 /// * Memory checks - The code in canVectorizeMemory checks if vectorization
189 /// will change the order of memory accesses in a way that will change the
190 /// correctness of the program.
191 /// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
192 /// checks for a number of different conditions, such as the availability of a
193 /// single induction variable, that all types are supported and vectorize-able,
194 /// etc. This code reflects the capabilities of InnerLoopVectorizer.
195 /// This class is also used by InnerLoopVectorizer for identifying
196 /// induction variable and the different reduction variables.
198 public:
202  Function *F, std::function<const LoopAccessInfo &(Loop &)> *GetLAA,
205  AssumptionCache *AC)
206  : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT),
207  GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
208 
209  /// ReductionList contains the reduction descriptors for all
210  /// of the reductions that were found in the loop.
212 
213  /// InductionList saves induction variables and maps them to the
214  /// induction descriptor.
216 
217  /// RecurrenceSet contains the phi nodes that are recurrences other than
218  /// inductions and reductions.
220 
221  /// Returns true if it is legal to vectorize this loop.
222  /// This does not mean that it is profitable to vectorize this
223  /// loop, only that it is legal to do so.
224  /// Temporarily taking UseVPlanNativePath parameter. If true, take
225  /// the new code path being implemented for outer loop vectorization
226  /// (should be functional for inner loop vectorization) based on VPlan.
227  /// If false, good old LV code.
228  bool canVectorize(bool UseVPlanNativePath);
229 
230  /// Return true if we can vectorize this loop while folding its tail by
231  /// masking, and mark all respective loads/stores for masking.
232  bool prepareToFoldTailByMasking();
233 
234  /// Returns the primary induction variable.
235  PHINode *getPrimaryInduction() { return PrimaryInduction; }
236 
237  /// Returns the reduction variables found in the loop.
238  ReductionList *getReductionVars() { return &Reductions; }
239 
240  /// Returns the induction variables found in the loop.
241  InductionList *getInductionVars() { return &Inductions; }
242 
243  /// Return the first-order recurrences found in the loop.
244  RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; }
245 
246  /// Return the set of instructions to sink to handle first-order recurrences.
248 
249  /// Returns the widest induction type.
250  Type *getWidestInductionType() { return WidestIndTy; }
251 
252  /// Returns True if V is a Phi node of an induction variable in this loop.
253  bool isInductionPhi(const Value *V);
254 
255  /// Returns True if V is a cast that is part of an induction def-use chain,
256  /// and had been proven to be redundant under a runtime guard (in other
257  /// words, the cast has the same SCEV expression as the induction phi).
258  bool isCastedInductionVariable(const Value *V);
259 
260  /// Returns True if V can be considered as an induction variable in this
261  /// loop. V can be the induction phi, or some redundant cast in the def-use
262  /// chain of the inducion phi.
263  bool isInductionVariable(const Value *V);
264 
265  /// Returns True if PN is a reduction variable in this loop.
266  bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); }
267 
268  /// Returns True if Phi is a first-order recurrence in this loop.
269  bool isFirstOrderRecurrence(const PHINode *Phi);
270 
271  /// Return true if the block BB needs to be predicated in order for the loop
272  /// to be vectorized.
273  bool blockNeedsPredication(BasicBlock *BB);
274 
275  /// Check if this pointer is consecutive when vectorizing. This happens
276  /// when the last index of the GEP is the induction variable, or that the
277  /// pointer itself is an induction variable.
278  /// This check allows us to vectorize A[idx] into a wide load/store.
279  /// Returns:
280  /// 0 - Stride is unknown or non-consecutive.
281  /// 1 - Address is consecutive.
282  /// -1 - Address is consecutive, and decreasing.
283  /// NOTE: This method must only be used before modifying the original scalar
284  /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
285  int isConsecutivePtr(Value *Ptr);
286 
287  /// Returns true if the value V is uniform within the loop.
288  bool isUniform(Value *V);
289 
290  /// Returns the information that we collected about runtime memory check.
292  return LAI->getRuntimePointerChecking();
293  }
294 
295  const LoopAccessInfo *getLAI() const { return LAI; }
296 
297  unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
298 
299  uint64_t getMaxSafeRegisterWidth() const {
300  return LAI->getDepChecker().getMaxSafeRegisterWidth();
301  }
302 
303  bool hasStride(Value *V) { return LAI->hasStride(V); }
304 
305  /// Returns true if vector representation of the instruction \p I
306  /// requires mask.
307  bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); }
308 
309  unsigned getNumStores() const { return LAI->getNumStores(); }
310  unsigned getNumLoads() const { return LAI->getNumLoads(); }
311 
312  // Returns true if the NoNaN attribute is set on the function.
313  bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; }
314 
315 private:
316  /// Return true if the pre-header, exiting and latch blocks of \p Lp and all
317  /// its nested loops are considered legal for vectorization. These legal
318  /// checks are common for inner and outer loop vectorization.
319  /// Temporarily taking UseVPlanNativePath parameter. If true, take
320  /// the new code path being implemented for outer loop vectorization
321  /// (should be functional for inner loop vectorization) based on VPlan.
322  /// If false, good old LV code.
323  bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath);
324 
325  /// Set up outer loop inductions by checking Phis in outer loop header for
326  /// supported inductions (int inductions). Return false if any of these Phis
327  /// is not a supported induction or if we fail to find an induction.
328  bool setupOuterLoopInductions();
329 
330  /// Return true if the pre-header, exiting and latch blocks of \p Lp
331  /// (non-recursive) are considered legal for vectorization.
332  /// Temporarily taking UseVPlanNativePath parameter. If true, take
333  /// the new code path being implemented for outer loop vectorization
334  /// (should be functional for inner loop vectorization) based on VPlan.
335  /// If false, good old LV code.
336  bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath);
337 
338  /// Check if a single basic block loop is vectorizable.
339  /// At this point we know that this is a loop with a constant trip count
340  /// and we only need to check individual instructions.
341  bool canVectorizeInstrs();
342 
343  /// When we vectorize loops we may change the order in which
344  /// we read and write from memory. This method checks if it is
345  /// legal to vectorize the code, considering only memory constrains.
346  /// Returns true if the loop is vectorizable
347  bool canVectorizeMemory();
348 
349  /// Return true if we can vectorize this loop using the IF-conversion
350  /// transformation.
351  bool canVectorizeWithIfConvert();
352 
353  /// Return true if we can vectorize this outer loop. The method performs
354  /// specific checks for outer loop vectorization.
355  bool canVectorizeOuterLoop();
356 
357  /// Return true if all of the instructions in the block can be speculatively
358  /// executed, and record the loads/stores that require masking. If's that
359  /// guard loads can be ignored under "assume safety" unless \p PreserveGuards
360  /// is true. This can happen when we introduces guards for which the original
361  /// "unguarded-loads are safe" assumption does not hold. For example, the
362  /// vectorizer's fold-tail transformation changes the loop to execute beyond
363  /// its original trip-count, under a proper guard, which should be preserved.
364  /// \p SafePtrs is a list of addresses that are known to be legal and we know
365  /// that we can read from them without segfault.
366  bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
367  bool PreserveGuards = false);
368 
369  /// Updates the vectorization state by adding \p Phi to the inductions list.
370  /// This can set \p Phi as the main induction of the loop if \p Phi is a
371  /// better choice for the main induction than the existing one.
372  void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
373  SmallPtrSetImpl<Value *> &AllowedExit);
374 
375  /// If an access has a symbolic strides, this maps the pointer value to
376  /// the stride symbol.
377  const ValueToValueMap *getSymbolicStrides() {
378  // FIXME: Currently, the set of symbolic strides is sometimes queried before
379  // it's collected. This happens from canVectorizeWithIfConvert, when the
380  // pointer is checked to reference consecutive elements suitable for a
381  // masked access.
382  return LAI ? &LAI->getSymbolicStrides() : nullptr;
383  }
384 
385  /// The loop that we evaluate.
386  Loop *TheLoop;
387 
388  /// Loop Info analysis.
389  LoopInfo *LI;
390 
391  /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
392  /// Applies dynamic knowledge to simplify SCEV expressions in the context
393  /// of existing SCEV assumptions. The analysis will also add a minimal set
394  /// of new predicates if this is required to enable vectorization and
395  /// unrolling.
397 
398  /// Target Transform Info.
399  TargetTransformInfo *TTI;
400 
401  /// Target Library Info.
402  TargetLibraryInfo *TLI;
403 
404  /// Dominator Tree.
405  DominatorTree *DT;
406 
407  // LoopAccess analysis.
408  std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
409 
410  // And the loop-accesses info corresponding to this loop. This pointer is
411  // null until canVectorizeMemory sets it up.
412  const LoopAccessInfo *LAI = nullptr;
413 
414  /// Interface to emit optimization remarks.
416 
417  // --- vectorization state --- //
418 
419  /// Holds the primary induction variable. This is the counter of the
420  /// loop.
421  PHINode *PrimaryInduction = nullptr;
422 
423  /// Holds the reduction variables.
424  ReductionList Reductions;
425 
426  /// Holds all of the induction variables that we found in the loop.
427  /// Notice that inductions don't need to start at zero and that induction
428  /// variables can be pointers.
429  InductionList Inductions;
430 
431  /// Holds all the casts that participate in the update chain of the induction
432  /// variables, and that have been proven to be redundant (possibly under a
433  /// runtime guard). These casts can be ignored when creating the vectorized
434  /// loop body.
435  SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
436 
437  /// Holds the phi nodes that are first-order recurrences.
438  RecurrenceSet FirstOrderRecurrences;
439 
440  /// Holds instructions that need to sink past other instructions to handle
441  /// first-order recurrences.
443 
444  /// Holds the widest induction type encountered.
445  Type *WidestIndTy = nullptr;
446 
447  /// Allowed outside users. This holds the variables that can be accessed from
448  /// outside the loop.
449  SmallPtrSet<Value *, 4> AllowedExit;
450 
451  /// Can we assume the absence of NaNs.
452  bool HasFunNoNaNAttr = false;
453 
454  /// Vectorization requirements that will go through late-evaluation.
455  LoopVectorizationRequirements *Requirements;
456 
457  /// Used to emit an analysis of any legality issues.
458  LoopVectorizeHints *Hints;
459 
460  /// The demanded bits analysis is used to compute the minimum type size in
461  /// which a reduction can be computed.
462  DemandedBits *DB;
463 
464  /// The assumption cache analysis is used to compute the minimum type size in
465  /// which a reduction can be computed.
466  AssumptionCache *AC;
467 
468  /// While vectorizing these instructions we have to generate a
469  /// call to the appropriate masked intrinsic
471 };
472 
473 } // namespace llvm
474 
475 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
Type * getWidestInductionType()
Returns the widest induction type.
bool isMaskRequired(const Instruction *I)
Returns true if vector representation of the instruction I requires mask.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
LoopVectorizationLegality(Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT, TargetTransformInfo *TTI, TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F, std::function< const LoopAccessInfo &(Loop &)> *GetLAA, LoopInfo *LI, OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC)
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
A cache of @llvm.assume calls within a function.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
F(f)
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
ReductionList * getReductionVars()
Returns the reduction variables found in the loop.
PHINode * getPrimaryInduction()
Returns the primary induction variable.
LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
#define H(x, y, z)
Definition: MD5.cpp:57
bool isReductionVariable(PHINode *PN)
Returns True if PN is a reduction variable in this loop.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
const RuntimePointerChecking * getRuntimePointerChecking() const
Returns the information that we collected about runtime memory check.
A struct for saving information about induction variables.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This holds vectorization requirements that must be verified late in the process.
Provides information about what library functions are available for the current target.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Drive the analysis of memory accesses in the loop.
const LoopAccessInfo * getLAI() const
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE)
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
InductionList * getInductionVars()
Returns the induction variables found in the loop.
#define I(x, y, z)
Definition: MD5.cpp:58
RecurrenceSet * getFirstOrderRecurrences()
Return the first-order recurrences found in the loop.
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LLVM Value Representation.
Definition: Value.h:74
print Print MemDeps of function
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
Definition: LoopUtils.cpp:383
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
Root of the metadata hierarchy.
Definition: Metadata.h:57
The optimization diagnostic interface.
DenseMap< Instruction *, Instruction * > & getSinkAfter()
Return the set of instructions to sink to handle first-order recurrences.
void emitRemarkWithHints() const
Dumps all the hint information.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)