LLVM 23.0.0git
VPlanAnalysis.cpp
Go to the documentation of this file.
1//===- VPlanAnalysis.cpp - Various Analyses working on VPlan ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "VPlanAnalysis.h"
10#include "VPlan.h"
11#include "VPlanCFG.h"
12#include "VPlanDominatorTree.h"
13#include "VPlanHelpers.h"
14#include "VPlanPatternMatch.h"
18#include "llvm/IR/Instruction.h"
19
20using namespace llvm;
21using namespace VPlanPatternMatch;
22
23#define DEBUG_TYPE "vplan"
24
26 Type *Ty = V->getScalarType();
27 assert(Ty && "Scalar type must be set by recipe construction");
28 return Ty;
29}
30
32 VPlan &Plan, DenseSet<VPRecipeBase *> &EphRecipes) {
33 // First, collect seed recipes which are operands of assumes.
37 for (VPRecipeBase &R : *VPBB) {
38 auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
39 if (!RepR || !match(RepR, m_Intrinsic<Intrinsic::assume>()))
40 continue;
41 Worklist.push_back(RepR);
42 EphRecipes.insert(RepR);
43 }
44 }
45
46 // Process operands of candidates in worklist and add them to the set of
47 // ephemeral recipes, if they don't have side-effects and are only used by
48 // other ephemeral recipes.
49 while (!Worklist.empty()) {
50 VPRecipeBase *Cur = Worklist.pop_back_val();
51 for (VPValue *Op : Cur->operands()) {
52 auto *OpR = Op->getDefiningRecipe();
53 if (!OpR || OpR->mayHaveSideEffects() || EphRecipes.contains(OpR))
54 continue;
55 if (any_of(Op->users(), [EphRecipes](VPUser *U) {
56 auto *UR = dyn_cast<VPRecipeBase>(U);
57 return !UR || !EphRecipes.contains(UR);
58 }))
59 continue;
60 EphRecipes.insert(OpR);
61 Worklist.push_back(OpR);
62 }
63 }
64}
65
68
70 const VPRecipeBase *B) {
71 if (A == B)
72 return false;
73
74 auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) {
75 for (auto &R : *A->getParent()) {
76 if (&R == A)
77 return true;
78 if (&R == B)
79 return false;
80 }
81 llvm_unreachable("recipe not found");
82 };
83 const VPBlockBase *ParentA = A->getParent();
84 const VPBlockBase *ParentB = B->getParent();
85 if (ParentA == ParentB)
86 return LocalComesBefore(A, B);
87
88 return Base::properlyDominates(ParentA, ParentB);
89}
90
94 unsigned OverrideMaxNumRegs) const {
96 for (const auto &[RegClass, MaxUsers] : MaxLocalUsers) {
97 unsigned AvailableRegs = OverrideMaxNumRegs > 0
98 ? OverrideMaxNumRegs
99 : TTI.getNumberOfRegisters(RegClass);
100 if (MaxUsers > AvailableRegs) {
101 // Assume that for each register used past what's available we get one
102 // spill and reload.
103 unsigned Spills = MaxUsers - AvailableRegs;
104 InstructionCost SpillCost =
105 TTI.getRegisterClassSpillCost(RegClass, CostKind) +
106 TTI.getRegisterClassReloadCost(RegClass, CostKind);
107 InstructionCost TotalCost = Spills * SpillCost;
108 LLVM_DEBUG(dbgs() << "LV(REG): Cost of " << TotalCost << " from "
109 << Spills << " spills of "
110 << TTI.getRegisterClassName(RegClass) << "\n");
111 Cost += TotalCost;
112 }
113 }
114 return Cost;
115}
116
119 const SmallPtrSetImpl<const Value *> &ValuesToIgnore) {
120 // Each 'key' in the map opens a new interval. The values
121 // of the map are the index of the 'last seen' usage of the
122 // VPValue that is the key.
124
125 // Maps indices to recipes.
127 // Marks the end of each interval.
128 IntervalMap EndPoint;
129 // Saves the list of VPValues that are used in the loop.
131 // Saves the list of values that are used in the loop but are defined outside
132 // the loop (not including non-recipe values such as arguments and
133 // constants).
134 SmallSetVector<VPValue *, 8> LoopInvariants;
135 if (Plan.getVectorTripCount().getNumUsers() > 0)
136 LoopInvariants.insert(&Plan.getVectorTripCount());
137
138 // We scan the loop in a topological order in order and assign a number to
139 // each recipe. We use RPO to ensure that defs are met before their users. We
140 // assume that each recipe that has in-loop users starts an interval. We
141 // record every time that an in-loop value is used, so we have a list of the
142 // first occurences of each recipe and last occurrence of each VPValue.
143 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
145 LoopRegion);
147 if (!VPBB->getParent())
148 break;
149 for (VPRecipeBase &R : *VPBB) {
150 Idx2Recipe.push_back(&R);
151
152 // Save the end location of each USE.
153 for (VPValue *U : R.operands()) {
154 if (isa<VPRecipeValue>(U)) {
155 // Overwrite previous end points.
156 EndPoint[U] = Idx2Recipe.size();
157 Ends.insert(U);
158 } else if (auto *IRV = dyn_cast<VPIRValue>(U)) {
159 // Ignore non-recipe values such as arguments, constants, etc.
160 // FIXME: Might need some motivation why these values are ignored. If
161 // for example an argument is used inside the loop it will increase
162 // the register pressure (so shouldn't we add it to LoopInvariants).
163 if (!isa<Instruction>(IRV->getValue()))
164 continue;
165 // This recipe is outside the loop, record it and continue.
166 LoopInvariants.insert(U);
167 }
168 // Other types of VPValue are currently not tracked.
169 }
170 }
171 if (VPBB == LoopRegion->getExiting()) {
172 // VPWidenIntOrFpInductionRecipes are used implicitly at the end of the
173 // exiting block, where their increment will get materialized eventually.
174 for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
175 if (auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
176 EndPoint[WideIV] = Idx2Recipe.size();
177 Ends.insert(WideIV);
178 }
179 }
180 }
181 }
182
183 // Saves the list of intervals that end with the index in 'key'.
184 using VPValueList = SmallVector<VPValue *, 2>;
186
187 // Next, we transpose the EndPoints into a multi map that holds the list of
188 // intervals that *end* at a specific location.
189 for (auto &Interval : EndPoint)
190 TransposeEnds[Interval.second].push_back(Interval.first);
191
192 SmallPtrSet<VPValue *, 8> OpenIntervals;
195
196 LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
197
198 VPTypeAnalysis TypeInfo(Plan);
199
200 const auto &TTICapture = TTI;
201 auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned {
202 if (Ty->isTokenTy() || !VectorType::isValidElementType(Ty) ||
203 (VF.isScalable() &&
204 !TTICapture.isElementTypeLegalForScalableVector(Ty)))
205 return 0;
206 return TTICapture.getRegUsageForType(VectorType::get(Ty, VF));
207 };
208
209 VPValue *CanIV = LoopRegion->getCanonicalIV();
210 // Note: canonical IVs are retained even if they have no users.
211 if (CanIV->getNumUsers() != 0)
212 OpenIntervals.insert(CanIV);
213
214 // We scan the instructions linearly and record each time that a new interval
215 // starts, by placing it in a set. If we find this value in TransposEnds then
216 // we remove it from the set. The max register usage is the maximum register
217 // usage of the recipes of the set.
218 for (unsigned int Idx = 0, Sz = Idx2Recipe.size(); Idx < Sz; ++Idx) {
219 VPRecipeBase *R = Idx2Recipe[Idx];
220
221 // Remove all of the VPValues that end at this location.
222 VPValueList &List = TransposeEnds[Idx];
223 for (VPValue *ToRemove : List)
224 OpenIntervals.erase(ToRemove);
225
226 // Ignore recipes that are never used within the loop and do not have side
227 // effects.
228 if (none_of(R->definedValues(),
229 [&Ends](VPValue *Def) { return Ends.count(Def); }) &&
230 !R->mayHaveSideEffects())
231 continue;
232
233 // Skip recipes for ignored values.
234 // TODO: Should mark recipes for ephemeral values that cannot be removed
235 // explictly in VPlan.
236 if (isa<VPSingleDefRecipe>(R) &&
237 ValuesToIgnore.contains(
238 cast<VPSingleDefRecipe>(R)->getUnderlyingValue()))
239 continue;
240
241 // For each VF find the maximum usage of registers.
242 for (unsigned J = 0, E = VFs.size(); J < E; ++J) {
243 // Count the number of registers used, per register class, given all open
244 // intervals.
245 // Note that elements in this SmallMapVector will be default constructed
246 // as 0. So we can use "RegUsage[ClassID] += n" in the code below even if
247 // there is no previous entry for ClassID.
249
250 for (auto *VPV : OpenIntervals) {
251 // Skip artificial values or values that weren't present in the original
252 // loop.
253 // TODO: Remove skipping values that weren't present in the original
254 // loop after removing the legacy
255 // LoopVectorizationCostModel::calculateRegisterUsage
257 VPBranchOnMaskRecipe>(VPV) ||
259 continue;
260
261 if (VFs[J].isScalar() ||
266 (cast<VPReductionPHIRecipe>(VPV))->isInLoop())) {
267 unsigned ClassID =
268 TTI.getRegisterClassForType(false, TypeInfo.inferScalarType(VPV));
269 // FIXME: The target might use more than one register for the type
270 // even in the scalar case.
271 RegUsage[ClassID] += 1;
272 } else {
273 // The output from scaled phis and scaled reductions actually has
274 // fewer lanes than the VF.
275 unsigned ScaleFactor =
276 vputils::getVFScaleFactor(VPV->getDefiningRecipe());
277 ElementCount VF = VFs[J];
278 if (ScaleFactor > 1) {
279 VF = VFs[J].divideCoefficientBy(ScaleFactor);
280 LLVM_DEBUG(dbgs() << "LV(REG): Scaled down VF from " << VFs[J]
281 << " to " << VF << " for " << *R << "\n";);
282 }
283
284 Type *ScalarTy = TypeInfo.inferScalarType(VPV);
285 unsigned ClassID = TTI.getRegisterClassForType(true, ScalarTy);
286 RegUsage[ClassID] += GetRegUsage(ScalarTy, VF);
287 }
288 }
289
290 for (const auto &Pair : RegUsage) {
291 auto &Entry = MaxUsages[J][Pair.first];
292 Entry = std::max(Entry, Pair.second);
293 }
294 }
295
296 LLVM_DEBUG(dbgs() << "LV(REG): At #" << Idx << " Interval # "
297 << OpenIntervals.size() << '\n');
298
299 // Add used VPValues defined by the current recipe to the list of open
300 // intervals.
301 for (VPValue *DefV : R->definedValues())
302 if (Ends.contains(DefV))
303 OpenIntervals.insert(DefV);
304 }
305
306 // We also search for instructions that are defined outside the loop, but are
307 // used inside the loop. We need this number separately from the max-interval
308 // usage number because when we unroll, loop-invariant values do not take
309 // more register.
311 for (unsigned Idx = 0, End = VFs.size(); Idx < End; ++Idx) {
312 // Note that elements in this SmallMapVector will be default constructed
313 // as 0. So we can use "Invariant[ClassID] += n" in the code below even if
314 // there is no previous entry for ClassID.
316
317 for (auto *In : LoopInvariants) {
318 // FIXME: The target might use more than one register for the type
319 // even in the scalar case.
320 bool IsScalar = vputils::onlyScalarValuesUsed(In);
321
322 ElementCount VF = IsScalar ? ElementCount::getFixed(1) : VFs[Idx];
323 unsigned ClassID = TTI.getRegisterClassForType(
324 VF.isVector(), TypeInfo.inferScalarType(In));
325 Invariant[ClassID] += GetRegUsage(TypeInfo.inferScalarType(In), VF);
326 }
327
328 LLVM_DEBUG({
329 dbgs() << "LV(REG): VF = " << VFs[Idx] << '\n';
330 dbgs() << "LV(REG): Found max usage: " << MaxUsages[Idx].size()
331 << " item\n";
332 for (const auto &pair : MaxUsages[Idx]) {
333 dbgs() << "LV(REG): RegisterClass: "
334 << TTI.getRegisterClassName(pair.first) << ", " << pair.second
335 << " registers\n";
336 }
337 dbgs() << "LV(REG): Found invariant usage: " << Invariant.size()
338 << " item\n";
339 for (const auto &pair : Invariant) {
340 dbgs() << "LV(REG): RegisterClass: "
341 << TTI.getRegisterClassName(pair.first) << ", " << pair.second
342 << " registers\n";
343 }
344 });
345
346 RU.LoopInvariantRegs = Invariant;
347 RU.MaxLocalUsers = MaxUsages[Idx];
348 RUs[Idx] = RU;
349 }
350
351 return RUs;
352}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
ReachingDefInfo InstSet & ToRemove
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
std::pair< uint64_t, uint64_t > Interval
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
#define LLVM_DEBUG(...)
Definition Debug.h:119
This pass exposes codegen information to IR-level passes.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file contains the declarations of the Vectorization Plan base classes:
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
Implements a dense probed hash-table based set.
Definition DenseSet.h:289
Core dominator tree base class.
bool properlyDominates(const DomTreeNodeBase< VPBlockBase > *A, const DomTreeNodeBase< VPBlockBase > *B) const
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:324
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
size_type size() const
Definition MapVector.h:58
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4295
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4383
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:93
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:216
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition VPlanUtils.h:323
A recipe for generating conditional branches on the bits of a mask.
Definition VPlan.h:3413
A recipe for generating the phi node tracking the current scalar iteration index.
Definition VPlan.h:3976
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition VPlan.h:4070
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:401
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4505
const VPBlockBase * getEntry() const
Definition VPlan.h:4549
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
Definition VPlan.h:4617
const VPBlockBase * getExiting() const
Definition VPlan.h:4561
VPValues defined by a VPRegionBlock, like the canonical IV.
Definition VPlanValue.h:215
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3322
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition VPlan.h:4140
An analysis for type-inference for VPValues.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:384
operand_range operands()
Definition VPlanValue.h:455
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:50
unsigned getNumUsers() const
Definition VPlanValue.h:115
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
Definition VPlan.h:2256
A recipe to compute the pointers for widened memory accesses of SourceElementTy, with the Stride expr...
Definition VPlan.h:2330
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4653
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4842
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1068
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:212
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition DenseSet.h:185
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
unsigned getVFScaleFactor(VPRecipeBase *R)
Get the VF scaling factor applied to the recipe's output, if the recipe has one.
This is an optimization pass for GlobalISel generic memory operations.
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
Definition VPlanCFG.h:288
SmallVector< VPRegisterUsage, 8 > calculateRegisterUsageForPlan(VPlan &Plan, ArrayRef< ElementCount > VFs, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &ValuesToIgnore)
Estimate the register usage for Plan and vectorization factors in VFs by calculating the highest numb...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
void collectEphemeralRecipesForVPlan(VPlan &Plan, DenseSet< VPRecipeBase * > &EphRecipes)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
TargetTransformInfo TTI
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:334
A struct that represents some properties of the register usage of a loop.
SmallMapVector< unsigned, unsigned, 4 > MaxLocalUsers
Holds the maximum number of concurrent live intervals in the loop.
InstructionCost spillCost(const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, unsigned OverrideMaxNumRegs=0) const
Calculate the estimated cost of any spills due to using more registers than the number available for ...
SmallMapVector< unsigned, unsigned, 4 > LoopInvariantRegs
Holds the number of loop invariant values that are used in the loop.