LLVM 17.0.0git
LoopVectorizationPlanner.h
Go to the documentation of this file.
1//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides a LoopVectorizationPlanner class.
11/// InnerLoopVectorizer vectorizes loops which contain only one basic
12/// LoopVectorizationPlanner - drives the vectorization process after having
13/// passed Legality checks.
14/// The planner builds and optimizes the Vectorization Plans which record the
15/// decisions how to vectorize the given loop. In particular, represent the
16/// control-flow of the vectorized version, the replication of instructions that
17/// are to be scalarized, and interleave access groups.
18///
19/// Also provides a VPlan-based builder utility analogous to IRBuilder.
20/// It provides an instruction-level API for generating VPInstructions while
21/// abstracting away the Recipe manipulation details.
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
25#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
26
27#include "VPlan.h"
28#include "llvm/ADT/SmallSet.h"
30
31namespace llvm {
32
33class LoopInfo;
34class LoopVectorizationLegality;
35class LoopVectorizationCostModel;
36class PredicatedScalarEvolution;
37class LoopVectorizeHints;
38class OptimizationRemarkEmitter;
39class TargetTransformInfo;
40class TargetLibraryInfo;
41class VPRecipeBuilder;
42
43/// VPlan-based builder utility analogous to IRBuilder.
44class VPBuilder {
45 VPBasicBlock *BB = nullptr;
47
48 VPInstruction *createInstruction(unsigned Opcode,
50 const Twine &Name = "") {
51 VPInstruction *Instr = new VPInstruction(Opcode, Operands, DL, Name);
52 if (BB)
53 BB->insert(Instr, InsertPt);
54 return Instr;
55 }
56
57 VPInstruction *createInstruction(unsigned Opcode,
58 std::initializer_list<VPValue *> Operands,
59 DebugLoc DL, const Twine &Name = "") {
60 return createInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name);
61 }
62
63public:
64 VPBuilder() = default;
65
66 /// Clear the insertion point: created instructions will not be inserted into
67 /// a block.
69 BB = nullptr;
70 InsertPt = VPBasicBlock::iterator();
71 }
72
73 VPBasicBlock *getInsertBlock() const { return BB; }
74 VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
75
76 /// InsertPoint - A saved insertion point.
78 VPBasicBlock *Block = nullptr;
80
81 public:
82 /// Creates a new insertion point which doesn't point to anything.
83 VPInsertPoint() = default;
84
85 /// Creates a new insertion point at the given location.
87 : Block(InsertBlock), Point(InsertPoint) {}
88
89 /// Returns true if this insert point is set.
90 bool isSet() const { return Block != nullptr; }
91
92 VPBasicBlock *getBlock() const { return Block; }
93 VPBasicBlock::iterator getPoint() const { return Point; }
94 };
95
96 /// Sets the current insert point to a previously-saved location.
98 if (IP.isSet())
100 else
102 }
103
104 /// This specifies that created VPInstructions should be appended to the end
105 /// of the specified block.
107 assert(TheBB && "Attempting to set a null insert point");
108 BB = TheBB;
109 InsertPt = BB->end();
110 }
111
112 /// This specifies that created instructions should be inserted at the
113 /// specified point.
115 BB = TheBB;
116 InsertPt = IP;
117 }
118
119 /// Insert and return the specified instruction.
121 BB->insert(I, InsertPt);
122 return I;
123 }
124
125 /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
126 /// its underlying Instruction.
128 Instruction *Inst = nullptr, const Twine &Name = "") {
129 DebugLoc DL;
130 if (Inst)
131 DL = Inst->getDebugLoc();
132 VPInstruction *NewVPInst = createInstruction(Opcode, Operands, DL, Name);
133 NewVPInst->setUnderlyingValue(Inst);
134 return NewVPInst;
135 }
137 DebugLoc DL, const Twine &Name = "") {
138 return createInstruction(Opcode, Operands, DL, Name);
139 }
140
141 VPValue *createNot(VPValue *Operand, DebugLoc DL, const Twine &Name = "") {
142 return createInstruction(VPInstruction::Not, {Operand}, DL, Name);
143 }
144
146 const Twine &Name = "") {
147 return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name);
148 }
149
151 const Twine &Name = "") {
152 return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}, DL, Name);
153 }
154
156 DebugLoc DL, const Twine &Name = "") {
157 return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal}, DL,
158 Name);
159 }
160
161 //===--------------------------------------------------------------------===//
162 // RAII helpers.
163 //===--------------------------------------------------------------------===//
164
165 /// RAII object that stores the current insertion point and restores it when
166 /// the object is destroyed.
168 VPBuilder &Builder;
169 VPBasicBlock *Block;
171
172 public:
174 : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}
175
178
179 ~InsertPointGuard() { Builder.restoreIP(VPInsertPoint(Block, Point)); }
180 };
181};
182
183/// TODO: The following VectorizationFactor was pulled out of
184/// LoopVectorizationCostModel class. LV also deals with
185/// VectorizerParams::VectorizationFactor and VectorizationCostTy.
186/// We need to streamline them.
187
188/// Information about vectorization costs.
190 /// Vector width with best cost.
192
193 /// Cost of the loop with that width.
195
196 /// Cost of the scalar loop.
198
199 /// The minimum trip count required to make vectorization profitable, e.g. due
200 /// to runtime checks.
202
206
207 /// Width 1 means no vectorization, cost 0 means uncomputed cost.
209 return {ElementCount::getFixed(1), 0, 0};
210 }
211
212 bool operator==(const VectorizationFactor &rhs) const {
213 return Width == rhs.Width && Cost == rhs.Cost;
214 }
215
216 bool operator!=(const VectorizationFactor &rhs) const {
217 return !(*this == rhs);
218 }
219};
220
221/// ElementCountComparator creates a total ordering for ElementCount
222/// for the purposes of using it in a set structure.
224 bool operator()(const ElementCount &LHS, const ElementCount &RHS) const {
225 return std::make_tuple(LHS.isScalable(), LHS.getKnownMinValue()) <
226 std::make_tuple(RHS.isScalable(), RHS.getKnownMinValue());
227 }
228};
230
231/// A class that represents two vectorization factors (initialized with 0 by
232/// default). One for fixed-width vectorization and one for scalable
233/// vectorization. This can be used by the vectorizer to choose from a range of
234/// fixed and/or scalable VFs in order to find the most cost-effective VF to
235/// vectorize with.
239
241 : FixedVF(ElementCount::getFixed(0)),
242 ScalableVF(ElementCount::getScalable(0)) {}
244 *(Max.isScalable() ? &ScalableVF : &FixedVF) = Max;
245 }
250 "Invalid scalable properties");
251 }
252
254
255 /// \return true if either fixed- or scalable VF is non-zero.
256 explicit operator bool() const { return FixedVF || ScalableVF; }
257
258 /// \return true if either fixed- or scalable VF is a valid vector VF.
259 bool hasVector() const { return FixedVF.isVector() || ScalableVF.isVector(); }
260};
261
262/// Planner drives the vectorization process after having passed
263/// Legality checks.
265 /// The loop that we evaluate.
266 Loop *OrigLoop;
267
268 /// Loop Info analysis.
269 LoopInfo *LI;
270
271 /// Target Library Info.
272 const TargetLibraryInfo *TLI;
273
274 /// Target Transform Info.
276
277 /// The legality analysis.
279
280 /// The profitability analysis.
282
283 /// The interleaved access analysis.
285
287
288 const LoopVectorizeHints &Hints;
289
291
293
294 /// Profitable vector factors.
296
297 /// A builder used to construct the current plan.
298 VPBuilder Builder;
299
300public:
307 const LoopVectorizeHints &Hints,
309 : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), IAI(IAI),
310 PSE(PSE), Hints(Hints), ORE(ORE) {}
311
312 /// Plan how to best vectorize, return the best VF and its cost, or
313 /// std::nullopt if vectorization and interleaving should be avoided up front.
314 std::optional<VectorizationFactor> plan(ElementCount UserVF, unsigned UserIC);
315
316 /// Use the VPlan-native path to plan how to best vectorize, return the best
317 /// VF and its cost.
319
320 /// Return the best VPlan for \p VF.
322
323 /// Generate the IR code for the body of the vectorized loop according to the
324 /// best selected \p VF, \p UF and VPlan \p BestPlan.
325 /// TODO: \p IsEpilogueVectorization is needed to avoid issues due to epilogue
326 /// vectorization re-using plans for both the main and epilogue vector loops.
327 /// It should be removed once the re-use issue has been fixed.
328 /// \p ExpandedSCEVs is passed during execution of the plan for epilogue loop
329 /// to re-use expansion results generated during main plan execution. Returns
330 /// a mapping of SCEVs to their expanded IR values. Note that this is a
331 /// temporary workaround needed due to the current epilogue handling.
333 executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,
335 bool IsEpilogueVectorization,
336 DenseMap<const SCEV *, Value *> *ExpandedSCEVs = nullptr);
337
338#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
339 void printPlans(raw_ostream &O);
340#endif
341
342 /// Look through the existing plans and return true if we have one with
343 /// vectorization factor \p VF.
345 return any_of(VPlans,
346 [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
347 }
348
349 /// Test a \p Predicate on a \p Range of VF's. Return the value of applying
350 /// \p Predicate on Range.Start, possibly decreasing Range.End such that the
351 /// returned value holds for the entire \p Range.
352 static bool
353 getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
354 VFRange &Range);
355
356 /// \return The most profitable vectorization factor and the cost of that VF
357 /// for vectorizing the epilogue. Returns VectorizationFactor::Disabled if
358 /// epilogue vectorization is not supported for the loop.
361
362protected:
363 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
364 /// according to the information gathered by Legal when it checked if it is
365 /// legal to vectorize the loop.
366 void buildVPlans(ElementCount MinVF, ElementCount MaxVF);
367
368private:
369 /// Build a VPlan according to the information gathered by Legal. \return a
370 /// VPlan for vectorization factors \p Range.Start and up to \p Range.End
371 /// exclusive, possibly decreasing \p Range.End.
372 VPlanPtr buildVPlan(VFRange &Range);
373
374 /// Build a VPlan using VPRecipes according to the information gather by
375 /// Legal. This method is only used for the legacy inner loop vectorizer.
376 /// \p Range's largest included VF is restricted to the maximum VF the
377 /// returned VPlan is valid for. If no VPlan can be built for the input range,
378 /// set the largest included VF to the maximum VF for which no plan could be
379 /// built.
380 std::optional<VPlanPtr> tryToBuildVPlanWithVPRecipes(
381 VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions);
382
383 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
384 /// according to the information gathered by Legal when it checked if it is
385 /// legal to vectorize the loop. This method creates VPlans using VPRecipes.
386 void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
387
388 // Adjust the recipes for reductions. For in-loop reductions the chain of
389 // instructions leading from the loop exit instr to the phi need to be
390 // converted to reductions, with one operand being vector and the other being
391 // the scalar reduction chain. For other reductions, a select is introduced
392 // between the phi and live-out recipes when folding the tail.
393 void adjustRecipesForReductions(VPBasicBlock *LatchVPBB, VPlanPtr &Plan,
394 VPRecipeBuilder &RecipeBuilder,
395 ElementCount MinVF);
396
397 /// \return The most profitable vectorization factor and the cost of that VF.
398 /// This method checks every VF in \p CandidateVFs.
400 selectVectorizationFactor(const ElementCountSet &CandidateVFs);
401
402 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
403 /// that of B.
404 bool isMoreProfitable(const VectorizationFactor &A,
405 const VectorizationFactor &B) const;
406
407 /// Determines if we have the infrastructure to vectorize the loop and its
408 /// epilogue, assuming the main loop is vectorized by \p VF.
409 bool isCandidateForEpilogueVectorization(const ElementCount VF) const;
410};
411
412} // namespace llvm
413
414#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
assume Assume Builder
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
std::string Name
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
Value * LHS
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A debug info location.
Definition: DebugLoc.h:33
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:306
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:291
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:778
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Planner drives the vectorization process after having passed Legality checks.
std::optional< VectorizationFactor > plan(ElementCount UserVF, unsigned UserIC)
Plan how to best vectorize, return the best VF and its cost, or std::nullopt if vectorization and int...
VectorizationFactor planInVPlanNativePath(ElementCount UserVF)
Use the VPlan-native path to plan how to best vectorize, return the best VF and its cost.
VectorizationFactor selectEpilogueVectorizationFactor(const ElementCount MaxVF)
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
VPlan & getBestPlanFor(ElementCount VF) const
Return the best VPlan for VF.
DenseMap< const SCEV *, Value * > executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, InnerLoopVectorizer &LB, DominatorTree *DT, bool IsEpilogueVectorization, DenseMap< const SCEV *, Value * > *ExpandedSCEVs=nullptr)
Generate the IR code for the body of the vectorized loop according to the best selected VF,...
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI, const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI, PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints, OptimizationRemarkEmitter *ORE)
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
void printPlans(raw_ostream &O)
bool hasPlanWithVF(ElementCount VF) const
Look through the existing plans and return true if we have one with vectorization factor VF.
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:47
The optimization diagnostic interface.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:2192
RecipeListTy::iterator iterator
Instruction iterators...
Definition: VPlan.h:2213
iterator end()
Definition: VPlan.h:2223
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:2251
RAII object that stores the current insertion point and restores it when the object is destroyed.
InsertPointGuard(const InsertPointGuard &)=delete
InsertPointGuard & operator=(const InsertPointGuard &)=delete
InsertPoint - A saved insertion point.
VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
Creates a new insertion point at the given location.
VPBasicBlock::iterator getPoint() const
VPInsertPoint()=default
Creates a new insertion point which doesn't point to anything.
bool isSet() const
Returns true if this insert point is set.
VPlan-based builder utility analogous to IRBuilder.
void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
This specifies that created instructions should be inserted at the specified point.
void restoreIP(VPInsertPoint IP)
Sets the current insert point to a previously-saved location.
VPValue * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL, const Twine &Name="")
VPBasicBlock * getInsertBlock() const
VPBasicBlock::iterator getInsertPoint() const
VPValue * createNot(VPValue *Operand, DebugLoc DL, const Twine &Name="")
VPValue * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
VPInstruction * insert(VPInstruction *I) const
Insert and return the specified instruction.
VPValue * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
void clearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
VPBuilder()=default
VPValue * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL, const Twine &Name="")
VPValue * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL, const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:816
Helper class to create VPRecipies from IR instructions.
void setUnderlyingValue(Value *Val)
Definition: VPlanValue.h:77
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:2413
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1826
std::unique_ptr< VPlan > VPlanPtr
Definition: VPlan.h:132
ElementCountComparator creates a total ordering for ElementCount for the purposes of using it in a se...
bool operator()(const ElementCount &LHS, const ElementCount &RHS) const
A class that represents two vectorization factors (initialized with 0 by default).
FixedScalableVFPair(const ElementCount &FixedVF, const ElementCount &ScalableVF)
FixedScalableVFPair(const ElementCount &Max)
static FixedScalableVFPair getNone()
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Definition: VPlan.h:85
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
InstructionCost Cost
Cost of the loop with that width.
ElementCount MinProfitableTripCount
The minimum trip count required to make vectorization profitable, e.g.
bool operator==(const VectorizationFactor &rhs) const
ElementCount Width
Vector width with best cost.
InstructionCost ScalarCost
Cost of the scalar loop.
bool operator!=(const VectorizationFactor &rhs) const
static VectorizationFactor Disabled()
Width 1 means no vectorization, cost 0 means uncomputed cost.
VectorizationFactor(ElementCount Width, InstructionCost Cost, InstructionCost ScalarCost)