LLVM 17.0.0git
LoopVectorizationPlanner.h
Go to the documentation of this file.
1//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides a LoopVectorizationPlanner class.
11/// InnerLoopVectorizer vectorizes loops which contain only one basic
12/// LoopVectorizationPlanner - drives the vectorization process after having
13/// passed Legality checks.
14/// The planner builds and optimizes the Vectorization Plans which record the
15/// decisions how to vectorize the given loop. In particular, represent the
16/// control-flow of the vectorized version, the replication of instructions that
17/// are to be scalarized, and interleave access groups.
18///
19/// Also provides a VPlan-based builder utility analogous to IRBuilder.
20/// It provides an instruction-level API for generating VPInstructions while
21/// abstracting away the Recipe manipulation details.
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
25#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
26
27#include "VPlan.h"
29
30namespace llvm {
31
32class LoopInfo;
33class LoopVectorizationLegality;
34class LoopVectorizationCostModel;
35class PredicatedScalarEvolution;
36class LoopVectorizeHints;
37class OptimizationRemarkEmitter;
38class TargetTransformInfo;
39class TargetLibraryInfo;
40class VPRecipeBuilder;
41
42/// VPlan-based builder utility analogous to IRBuilder.
43class VPBuilder {
44 VPBasicBlock *BB = nullptr;
46
47 VPInstruction *createInstruction(unsigned Opcode,
49 const Twine &Name = "") {
50 VPInstruction *Instr = new VPInstruction(Opcode, Operands, DL, Name);
51 if (BB)
52 BB->insert(Instr, InsertPt);
53 return Instr;
54 }
55
56 VPInstruction *createInstruction(unsigned Opcode,
57 std::initializer_list<VPValue *> Operands,
58 DebugLoc DL, const Twine &Name = "") {
59 return createInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name);
60 }
61
62public:
63 VPBuilder() = default;
64
65 /// Clear the insertion point: created instructions will not be inserted into
66 /// a block.
68 BB = nullptr;
69 InsertPt = VPBasicBlock::iterator();
70 }
71
72 VPBasicBlock *getInsertBlock() const { return BB; }
73 VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
74
75 /// InsertPoint - A saved insertion point.
77 VPBasicBlock *Block = nullptr;
79
80 public:
81 /// Creates a new insertion point which doesn't point to anything.
82 VPInsertPoint() = default;
83
84 /// Creates a new insertion point at the given location.
86 : Block(InsertBlock), Point(InsertPoint) {}
87
88 /// Returns true if this insert point is set.
89 bool isSet() const { return Block != nullptr; }
90
91 VPBasicBlock *getBlock() const { return Block; }
92 VPBasicBlock::iterator getPoint() const { return Point; }
93 };
94
95 /// Sets the current insert point to a previously-saved location.
97 if (IP.isSet())
99 else
101 }
102
103 /// This specifies that created VPInstructions should be appended to the end
104 /// of the specified block.
106 assert(TheBB && "Attempting to set a null insert point");
107 BB = TheBB;
108 InsertPt = BB->end();
109 }
110
111 /// This specifies that created instructions should be inserted at the
112 /// specified point.
114 BB = TheBB;
115 InsertPt = IP;
116 }
117
118 /// Insert and return the specified instruction.
120 BB->insert(I, InsertPt);
121 return I;
122 }
123
124 /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
125 /// its underlying Instruction.
127 Instruction *Inst = nullptr, const Twine &Name = "") {
128 DebugLoc DL;
129 if (Inst)
130 DL = Inst->getDebugLoc();
131 VPInstruction *NewVPInst = createInstruction(Opcode, Operands, DL, Name);
132 NewVPInst->setUnderlyingValue(Inst);
133 return NewVPInst;
134 }
136 DebugLoc DL, const Twine &Name = "") {
137 return createInstruction(Opcode, Operands, DL, Name);
138 }
139
140 VPValue *createNot(VPValue *Operand, DebugLoc DL, const Twine &Name = "") {
141 return createInstruction(VPInstruction::Not, {Operand}, DL, Name);
142 }
143
145 const Twine &Name = "") {
146 return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name);
147 }
148
150 const Twine &Name = "") {
151 return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}, DL, Name);
152 }
153
155 DebugLoc DL, const Twine &Name = "") {
156 return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal}, DL,
157 Name);
158 }
159
160 //===--------------------------------------------------------------------===//
161 // RAII helpers.
162 //===--------------------------------------------------------------------===//
163
164 /// RAII object that stores the current insertion point and restores it when
165 /// the object is destroyed.
167 VPBuilder &Builder;
168 VPBasicBlock *Block;
170
171 public:
173 : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}
174
177
178 ~InsertPointGuard() { Builder.restoreIP(VPInsertPoint(Block, Point)); }
179 };
180};
181
182/// TODO: The following VectorizationFactor was pulled out of
183/// LoopVectorizationCostModel class. LV also deals with
184/// VectorizerParams::VectorizationFactor and VectorizationCostTy.
185/// We need to streamline them.
186
187/// Information about vectorization costs.
189 /// Vector width with best cost.
191
192 /// Cost of the loop with that width.
194
195 /// Cost of the scalar loop.
197
198 /// The minimum trip count required to make vectorization profitable, e.g. due
199 /// to runtime checks.
201
205
206 /// Width 1 means no vectorization, cost 0 means uncomputed cost.
208 return {ElementCount::getFixed(1), 0, 0};
209 }
210
211 bool operator==(const VectorizationFactor &rhs) const {
212 return Width == rhs.Width && Cost == rhs.Cost;
213 }
214
215 bool operator!=(const VectorizationFactor &rhs) const {
216 return !(*this == rhs);
217 }
218};
219
220/// A class that represents two vectorization factors (initialized with 0 by
221/// default). One for fixed-width vectorization and one for scalable
222/// vectorization. This can be used by the vectorizer to choose from a range of
223/// fixed and/or scalable VFs in order to find the most cost-effective VF to
224/// vectorize with.
228
230 : FixedVF(ElementCount::getFixed(0)),
231 ScalableVF(ElementCount::getScalable(0)) {}
233 *(Max.isScalable() ? &ScalableVF : &FixedVF) = Max;
234 }
239 "Invalid scalable properties");
240 }
241
243
244 /// \return true if either fixed- or scalable VF is non-zero.
245 explicit operator bool() const { return FixedVF || ScalableVF; }
246
247 /// \return true if either fixed- or scalable VF is a valid vector VF.
248 bool hasVector() const { return FixedVF.isVector() || ScalableVF.isVector(); }
249};
250
251/// Planner drives the vectorization process after having passed
252/// Legality checks.
254 /// The loop that we evaluate.
255 Loop *OrigLoop;
256
257 /// Loop Info analysis.
258 LoopInfo *LI;
259
260 /// Target Library Info.
261 const TargetLibraryInfo *TLI;
262
263 /// Target Transform Info.
265
266 /// The legality analysis.
268
269 /// The profitability analysis.
271
272 /// The interleaved access analysis.
274
276
277 const LoopVectorizeHints &Hints;
278
280
282
283 /// A builder used to construct the current plan.
284 VPBuilder Builder;
285
286public:
293 const LoopVectorizeHints &Hints,
295 : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), IAI(IAI),
296 PSE(PSE), Hints(Hints), ORE(ORE) {}
297
298 /// Plan how to best vectorize, return the best VF and its cost, or
299 /// std::nullopt if vectorization and interleaving should be avoided up front.
300 std::optional<VectorizationFactor> plan(ElementCount UserVF, unsigned UserIC);
301
302 /// Use the VPlan-native path to plan how to best vectorize, return the best
303 /// VF and its cost.
305
306 /// Return the best VPlan for \p VF.
308
309 /// Generate the IR code for the body of the vectorized loop according to the
310 /// best selected \p VF, \p UF and VPlan \p BestPlan.
311 /// TODO: \p IsEpilogueVectorization is needed to avoid issues due to epilogue
312 /// vectorization re-using plans for both the main and epilogue vector loops.
313 /// It should be removed once the re-use issue has been fixed.
314 void executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,
316 bool IsEpilogueVectorization);
317
318#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
319 void printPlans(raw_ostream &O);
320#endif
321
322 /// Look through the existing plans and return true if we have one with all
323 /// the vectorization factors in question.
325 return any_of(VPlans,
326 [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
327 }
328
329 /// Test a \p Predicate on a \p Range of VF's. Return the value of applying
330 /// \p Predicate on Range.Start, possibly decreasing Range.End such that the
331 /// returned value holds for the entire \p Range.
332 static bool
333 getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
334 VFRange &Range);
335
336 /// Check if the number of runtime checks exceeds the threshold.
338
339protected:
340 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
341 /// according to the information gathered by Legal when it checked if it is
342 /// legal to vectorize the loop.
343 void buildVPlans(ElementCount MinVF, ElementCount MaxVF);
344
345private:
346 /// Build a VPlan according to the information gathered by Legal. \return a
347 /// VPlan for vectorization factors \p Range.Start and up to \p Range.End
348 /// exclusive, possibly decreasing \p Range.End.
349 VPlanPtr buildVPlan(VFRange &Range);
350
351 /// Build a VPlan using VPRecipes according to the information gather by
352 /// Legal. This method is only used for the legacy inner loop vectorizer.
353 VPlanPtr buildVPlanWithVPRecipes(
354 VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions,
356
357 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
358 /// according to the information gathered by Legal when it checked if it is
359 /// legal to vectorize the loop. This method creates VPlans using VPRecipes.
360 void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
361
362 // Adjust the recipes for reductions. For in-loop reductions the chain of
363 // instructions leading from the loop exit instr to the phi need to be
364 // converted to reductions, with one operand being vector and the other being
365 // the scalar reduction chain. For other reductions, a select is introduced
366 // between the phi and live-out recipes when folding the tail.
367 void adjustRecipesForReductions(VPBasicBlock *LatchVPBB, VPlanPtr &Plan,
368 VPRecipeBuilder &RecipeBuilder,
369 ElementCount MinVF);
370};
371
372} // namespace llvm
373
374#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
assume Assume Builder
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
std::string Name
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
Value * LHS
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A debug info location.
Definition: DebugLoc.h:33
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:306
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:291
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:765
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Planner drives the vectorization process after having passed Legality checks.
std::optional< VectorizationFactor > plan(ElementCount UserVF, unsigned UserIC)
Plan how to best vectorize, return the best VF and its cost, or std::nullopt if vectorization and int...
VectorizationFactor planInVPlanNativePath(ElementCount UserVF)
Use the VPlan-native path to plan how to best vectorize, return the best VF and its cost.
bool requiresTooManyRuntimeChecks() const
Check if the number of runtime checks exceeds the threshold.
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
void executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, InnerLoopVectorizer &LB, DominatorTree *DT, bool IsEpilogueVectorization)
Generate the IR code for the body of the vectorized loop according to the best selected VF,...
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI, PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints, OptimizationRemarkEmitter *ORE)
VPlan & getBestPlanFor(ElementCount VF) const
Return the best VPlan for VF.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
void printPlans(raw_ostream &O)
bool hasPlanWithVF(ElementCount VF) const
Look through the existing plans and return true if we have one with all the vectorization factors in ...
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
The optimization diagnostic interface.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:1939
RecipeListTy::iterator iterator
Instruction iterators...
Definition: VPlan.h:1960
iterator end()
Definition: VPlan.h:1970
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:1998
RAII object that stores the current insertion point and restores it when the object is destroyed.
InsertPointGuard(const InsertPointGuard &)=delete
InsertPointGuard & operator=(const InsertPointGuard &)=delete
InsertPoint - A saved insertion point.
VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
Creates a new insertion point at the given location.
VPBasicBlock::iterator getPoint() const
VPInsertPoint()=default
Creates a new insertion point which doesn't point to anything.
bool isSet() const
Returns true if this insert point is set.
VPlan-based builder utility analogous to IRBuilder.
void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
This specifies that created instructions should be inserted at the specified point.
void restoreIP(VPInsertPoint IP)
Sets the current insert point to a previously-saved location.
VPValue * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL, const Twine &Name="")
VPBasicBlock * getInsertBlock() const
VPBasicBlock::iterator getInsertPoint() const
VPValue * createNot(VPValue *Operand, DebugLoc DL, const Twine &Name="")
VPValue * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
VPInstruction * insert(VPInstruction *I) const
Insert and return the specified instruction.
VPValue * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
void clearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
VPBuilder()=default
VPValue * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL, const Twine &Name="")
VPValue * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL, const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:779
Helper class to create VPRecipies from IR instructions.
void setUnderlyingValue(Value *Val)
Definition: VPlanValue.h:77
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:2160
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
std::unique_ptr< VPlan > VPlanPtr
Definition: VPlan.h:104
A class that represents two vectorization factors (initialized with 0 by default).
FixedScalableVFPair(const ElementCount &FixedVF, const ElementCount &ScalableVF)
FixedScalableVFPair(const ElementCount &Max)
static FixedScalableVFPair getNone()
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Definition: VPlan.h:84
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
InstructionCost Cost
Cost of the loop with that width.
ElementCount MinProfitableTripCount
The minimum trip count required to make vectorization profitable, e.g.
bool operator==(const VectorizationFactor &rhs) const
ElementCount Width
Vector width with best cost.
InstructionCost ScalarCost
Cost of the scalar loop.
bool operator!=(const VectorizationFactor &rhs) const
static VectorizationFactor Disabled()
Width 1 means no vectorization, cost 0 means uncomputed cost.
VectorizationFactor(ElementCount Width, InstructionCost Cost, InstructionCost ScalarCost)