LLVM 23.0.0git
LoopVectorizationPlanner.h
Go to the documentation of this file.
1//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides a LoopVectorizationPlanner class.
11/// InnerLoopVectorizer vectorizes loops which contain only one basic
12/// LoopVectorizationPlanner - drives the vectorization process after having
13/// passed Legality checks.
14/// The planner builds and optimizes the Vectorization Plans which record the
15/// decisions how to vectorize the given loop. In particular, represent the
16/// control-flow of the vectorized version, the replication of instructions that
17/// are to be scalarized, and interleave access groups.
18///
19/// Also provides a VPlan-based builder utility analogous to IRBuilder.
20/// It provides an instruction-level API for generating VPInstructions while
21/// abstracting away the Recipe manipulation details.
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
25#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
26
27#include "VPlan.h"
28#include "llvm/ADT/SmallSet.h"
30
31namespace {
32class GeneratedRTChecks;
33}
34
35namespace llvm {
36
37class LoopInfo;
38class DominatorTree;
43class LoopVersioning;
47class VPRecipeBuilder;
48struct VPRegisterUsage;
49struct VFRange;
50
53
54/// VPlan-based builder utility analogous to IRBuilder.
55class VPBuilder {
56 VPBasicBlock *BB = nullptr;
58
59 /// Insert \p VPI in BB at InsertPt if BB is set.
60 template <typename T> T *tryInsertInstruction(T *R) {
61 if (BB)
62 BB->insert(R, InsertPt);
63 return R;
64 }
65
66 VPInstruction *createInstruction(unsigned Opcode,
67 ArrayRef<VPValue *> Operands,
68 const VPIRMetadata &MD, DebugLoc DL,
69 const Twine &Name = "") {
70 return tryInsertInstruction(
71 new VPInstruction(Opcode, Operands, {}, MD, DL, Name));
72 }
73
74public:
75 VPBuilder() = default;
76 VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
77 VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
81
82 /// Clear the insertion point: created instructions will not be inserted into
83 /// a block.
85 BB = nullptr;
86 InsertPt = VPBasicBlock::iterator();
87 }
88
89 VPBasicBlock *getInsertBlock() const { return BB; }
90 VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
91
92 /// Create a VPBuilder to insert after \p R.
95 B.setInsertPoint(R->getParent(), std::next(R->getIterator()));
96 return B;
97 }
98
99 /// InsertPoint - A saved insertion point.
101 VPBasicBlock *Block = nullptr;
103
104 public:
105 /// Creates a new insertion point which doesn't point to anything.
106 VPInsertPoint() = default;
107
108 /// Creates a new insertion point at the given location.
110 : Block(InsertBlock), Point(InsertPoint) {}
111
112 /// Returns true if this insert point is set.
113 bool isSet() const { return Block != nullptr; }
114
115 VPBasicBlock *getBlock() const { return Block; }
116 VPBasicBlock::iterator getPoint() const { return Point; }
117 };
118
119 /// Sets the current insert point to a previously-saved location.
121 if (IP.isSet())
122 setInsertPoint(IP.getBlock(), IP.getPoint());
123 else
125 }
126
127 /// This specifies that created VPInstructions should be appended to the end
128 /// of the specified block.
130 assert(TheBB && "Attempting to set a null insert point");
131 BB = TheBB;
132 InsertPt = BB->end();
133 }
134
135 /// This specifies that created instructions should be inserted at the
136 /// specified point.
138 BB = TheBB;
139 InsertPt = IP;
140 }
141
142 /// This specifies that created instructions should be inserted at the
143 /// specified point.
145 BB = IP->getParent();
146 InsertPt = IP->getIterator();
147 }
148
149 /// Insert \p R at the current insertion point. Returns \p R unchanged.
150 template <typename T> [[maybe_unused]] T *insert(T *R) {
151 BB->insert(R, InsertPt);
152 return R;
153 }
154
155 /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
156 /// its underlying Instruction.
158 Instruction *Inst = nullptr,
159 const VPIRFlags &Flags = {},
160 const VPIRMetadata &MD = {},
162 const Twine &Name = "") {
163 VPInstruction *NewVPInst = tryInsertInstruction(
164 new VPInstruction(Opcode, Operands, Flags, MD, DL, Name));
165 NewVPInst->setUnderlyingValue(Inst);
166 return NewVPInst;
167 }
169 DebugLoc DL, const Twine &Name = "") {
170 return createInstruction(Opcode, Operands, {}, DL, Name);
171 }
173 const VPIRFlags &Flags,
175 const Twine &Name = "") {
176 return tryInsertInstruction(
177 new VPInstruction(Opcode, Operands, Flags, {}, DL, Name));
178 }
179
181 Type *ResultTy, const VPIRFlags &Flags = {},
183 const Twine &Name = "") {
184 return tryInsertInstruction(new VPInstructionWithType(
185 Opcode, Operands, ResultTy, Flags, {}, DL, Name));
186 }
187
189 unsigned Opcode, ArrayRef<VPValue *> Operands,
190 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false},
191 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") {
192 return tryInsertInstruction(
193 new VPInstruction(Opcode, Operands, WrapFlags, {}, DL, Name));
194 }
195
198 const Twine &Name = "") {
199 return createInstruction(VPInstruction::Not, {Operand}, {}, DL, Name);
200 }
201
204 const Twine &Name = "") {
205 return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, {}, DL,
206 Name);
207 }
208
211 const Twine &Name = "") {
212
213 return tryInsertInstruction(new VPInstruction(
214 Instruction::BinaryOps::Or, {LHS, RHS},
215 VPRecipeWithIRFlags::DisjointFlagsTy(false), {}, DL, Name));
216 }
217
220 const Twine &Name = "",
221 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
222 return createOverflowingOp(Instruction::Add, {LHS, RHS}, WrapFlags, DL,
223 Name);
224 }
225
226 VPInstruction *
228 const Twine &Name = "",
229 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
230 return createOverflowingOp(Instruction::Sub, {LHS, RHS}, WrapFlags, DL,
231 Name);
232 }
233
239
245
247 VPValue *FalseVal,
249 const Twine &Name = "",
250 const VPIRFlags &Flags = {}) {
251 return tryInsertInstruction(new VPInstruction(
252 Instruction::Select, {Cond, TrueVal, FalseVal}, Flags, {}, DL, Name));
253 }
254
255 /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
256 /// and \p B.
259 const Twine &Name = "") {
261 Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
262 return tryInsertInstruction(
263 new VPInstruction(Instruction::ICmp, {A, B}, Pred, {}, DL, Name));
264 }
265
266 /// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A
267 /// and \p B.
270 const Twine &Name = "") {
272 Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate");
273 return tryInsertInstruction(
274 new VPInstruction(Instruction::FCmp, {A, B},
275 VPIRFlags(Pred, FastMathFlags()), {}, DL, Name));
276 }
277
278 /// Create an AnyOf reduction pattern: or-reduce \p ChainOp, freeze the
279 /// result, then select between \p TrueVal and \p FalseVal.
281 VPValue *FalseVal,
283
286 const Twine &Name = "") {
287 return tryInsertInstruction(
289 GEPNoWrapFlags::none(), {}, DL, Name));
290 }
291
293 GEPNoWrapFlags GEPFlags,
295 const Twine &Name = "") {
296 return tryInsertInstruction(new VPInstruction(
297 VPInstruction::PtrAdd, {Ptr, Offset}, GEPFlags, {}, DL, Name));
298 }
299
302 const Twine &Name = "") {
303 return tryInsertInstruction(
305 GEPNoWrapFlags::none(), {}, DL, Name));
306 }
307
310 const Twine &Name = "", const VPIRFlags &Flags = {}) {
311 return tryInsertInstruction(new VPPhi(IncomingValues, Flags, DL, Name));
312 }
313
315 VPlan &Plan = *getInsertBlock()->getPlan();
316 VPValue *RuntimeEC = Plan.getConstantInt(Ty, EC.getKnownMinValue());
317 if (EC.isScalable()) {
318 VPValue *VScale = createNaryOp(VPInstruction::VScale, {}, Ty);
319 RuntimeEC = EC.getKnownMinValue() == 1
320 ? VScale
321 : createOverflowingOp(Instruction::Mul,
322 {VScale, RuntimeEC}, {true, false});
323 }
324 return RuntimeEC;
325 }
326
327 /// Convert the input value \p Current to the corresponding value of an
328 /// induction with \p Start and \p Step values, using \p Start + \p Current *
329 /// \p Step.
331 FPMathOperator *FPBinOp, VPIRValue *Start,
332 VPValue *Current, VPValue *Step,
333 const Twine &Name = "") {
334 return tryInsertInstruction(
335 new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name));
336 }
337
339 DebugLoc DL,
340 const VPIRMetadata &Metadata = {}) {
341 return tryInsertInstruction(new VPInstructionWithType(
342 Instruction::Load, Addr, ResultTy, {}, Metadata, DL));
343 }
344
346 Type *ResultTy, DebugLoc DL,
347 const VPIRMetadata &Metadata = {}) {
348 return tryInsertInstruction(new VPInstructionWithType(
349 Opcode, Op, ResultTy, VPIRFlags::getDefaultFlags(Opcode), Metadata,
350 DL));
351 }
352
354 Type *ResultTy, DebugLoc DL,
355 const VPIRFlags &Flags,
356 const VPIRMetadata &Metadata = {}) {
357 return tryInsertInstruction(
358 new VPInstructionWithType(Opcode, Op, ResultTy, Flags, Metadata, DL));
359 }
360
362 DebugLoc DL) {
363 if (ResultTy == SrcTy)
364 return Op;
365 Instruction::CastOps CastOp =
366 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
367 ? Instruction::Trunc
368 : Instruction::ZExt;
369 return createScalarCast(CastOp, Op, ResultTy, DL);
370 }
371
373 DebugLoc DL) {
374 if (ResultTy == SrcTy)
375 return Op;
376 Instruction::CastOps CastOp =
377 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
378 ? Instruction::Trunc
379 : Instruction::SExt;
380 return createScalarCast(CastOp, Op, ResultTy, DL);
381 }
382
384 Type *ResultTy) {
385 return tryInsertInstruction(new VPWidenCastRecipe(
386 Opcode, Op, ResultTy, nullptr, VPIRFlags::getDefaultFlags(Opcode)));
387 }
388
391 FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step,
392 VPValue *VF, DebugLoc DL) {
393 return tryInsertInstruction(new VPScalarIVStepsRecipe(
394 IV, Step, VF, InductionOpcode,
395 FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags(), DL));
396 }
397
399 return tryInsertInstruction(new VPExpandSCEVRecipe(Expr));
400 }
401
402 //===--------------------------------------------------------------------===//
403 // RAII helpers.
404 //===--------------------------------------------------------------------===//
405
406 /// RAII object that stores the current insertion point and restores it when
407 /// the object is destroyed.
409 VPBuilder &Builder;
410 VPBasicBlock *Block;
412
413 public:
415 : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}
416
419
420 ~InsertPointGuard() { Builder.restoreIP(VPInsertPoint(Block, Point)); }
421 };
422};
423
424/// TODO: The following VectorizationFactor was pulled out of
425/// LoopVectorizationCostModel class. LV also deals with
426/// VectorizerParams::VectorizationFactor.
427/// We need to streamline them.
428
429/// Information about vectorization costs.
431 /// Vector width with best cost.
433
434 /// Cost of the loop with that width.
436
437 /// Cost of the scalar loop.
439
440 /// The minimum trip count required to make vectorization profitable, e.g. due
441 /// to runtime checks.
443
447
448 /// Width 1 means no vectorization, cost 0 means uncomputed cost.
450 return {ElementCount::getFixed(1), 0, 0};
451 }
452
453 bool operator==(const VectorizationFactor &rhs) const {
454 return Width == rhs.Width && Cost == rhs.Cost;
455 }
456
457 bool operator!=(const VectorizationFactor &rhs) const {
458 return !(*this == rhs);
459 }
460};
461
462/// A class that represents two vectorization factors (initialized with 0 by
463/// default). One for fixed-width vectorization and one for scalable
464/// vectorization. This can be used by the vectorizer to choose from a range of
465/// fixed and/or scalable VFs in order to find the most cost-effective VF to
466/// vectorize with.
470
472 : FixedVF(ElementCount::getFixed(0)),
473 ScalableVF(ElementCount::getScalable(0)) {}
475 *(Max.isScalable() ? &ScalableVF : &FixedVF) = Max;
476 }
480 assert(!FixedVF.isScalable() && ScalableVF.isScalable() &&
481 "Invalid scalable properties");
482 }
483
485
486 /// \return true if either fixed- or scalable VF is non-zero.
487 explicit operator bool() const { return FixedVF || ScalableVF; }
488
489 /// \return true if either fixed- or scalable VF is a valid vector VF.
490 bool hasVector() const { return FixedVF.isVector() || ScalableVF.isVector(); }
491};
492
493/// Planner drives the vectorization process after having passed
494/// Legality checks.
496 /// The loop that we evaluate.
497 Loop *OrigLoop;
498
499 /// Loop Info analysis.
500 LoopInfo *LI;
501
502 /// The dominator tree.
503 DominatorTree *DT;
504
505 /// Target Library Info.
506 const TargetLibraryInfo *TLI;
507
508 /// Target Transform Info.
509 const TargetTransformInfo &TTI;
510
511 /// The legality analysis.
513
514 /// The profitability analysis.
516
517 /// The interleaved access analysis.
519
521
522 const LoopVectorizeHints &Hints;
523
525
527
528 /// Profitable vector factors.
530
531 /// A builder used to construct the current plan.
532 VPBuilder Builder;
533
534 /// Computes the cost of \p Plan for vectorization factor \p VF.
535 ///
536 /// The current implementation requires access to the
537 /// LoopVectorizationLegality to handle inductions and reductions, which is
538 /// why it is kept separate from the VPlan-only cost infrastructure.
539 ///
540 /// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
541 /// been retired.
542 InstructionCost cost(VPlan &Plan, ElementCount VF, VPRegisterUsage *RU) const;
543
544 /// Precompute costs for certain instructions using the legacy cost model. The
545 /// function is used to bring up the VPlan-based cost model to initially avoid
546 /// taking different decisions due to inaccuracies in the legacy cost model.
547 InstructionCost precomputeCosts(VPlan &Plan, ElementCount VF,
548 VPCostContext &CostCtx) const;
549
550public:
552 Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
557 : OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
558 IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
559
560 /// Build VPlans for the specified \p UserVF and \p UserIC if they are
561 /// non-zero or all applicable candidate VFs otherwise. If vectorization and
562 /// interleaving should be avoided up-front, no plans are generated.
563 void plan(ElementCount UserVF, unsigned UserIC);
564
565 /// Use the VPlan-native path to plan how to best vectorize, return the best
566 /// VF and its cost.
568
569 /// Return the VPlan for \p VF. At the moment, there is always a single VPlan
570 /// for each VF.
571 VPlan &getPlanFor(ElementCount VF) const;
572
573 /// Compute and return the most profitable vectorization factor and the
574 /// corresponding best VPlan. Also collect all profitable VFs in
575 /// ProfitableVFs.
576 std::pair<VectorizationFactor, VPlan *> computeBestVF();
577
578 /// \return The desired interleave count.
579 /// If interleave count has been specified by metadata it will be returned.
580 /// Otherwise, the interleave count is computed and returned. VF and LoopCost
581 /// are the selected vectorization factor and the cost of the selected VF.
582 unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF,
583 InstructionCost LoopCost);
584
585 /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
586 /// according to the best selected \p VF and \p UF.
587 ///
588 /// TODO: \p EpilogueVecKind should be removed once the re-use issue has been
589 /// fixed.
590 ///
591 /// Returns a mapping of SCEVs to their expanded IR values.
592 /// Note that this is a temporary workaround needed due to the current
593 /// epilogue handling.
595 None, ///< Not part of epilogue vectorization.
596 MainLoop, ///< Vectorizing the main loop of epilogue vectorization.
597 Epilogue ///< Vectorizing the epilogue loop.
598 };
600 executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,
602 EpilogueVectorizationKind EpilogueVecKind =
604
605#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
606 void printPlans(raw_ostream &O);
607#endif
608
609 /// Look through the existing plans and return true if we have one with
610 /// vectorization factor \p VF.
612 return any_of(VPlans,
613 [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
614 }
615
616 /// Test a \p Predicate on a \p Range of VF's. Return the value of applying
617 /// \p Predicate on Range.Start, possibly decreasing Range.End such that the
618 /// returned value holds for the entire \p Range.
619 static bool
620 getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
621 VFRange &Range);
622
623 /// \return A VPlan for the most profitable epilogue vectorization, with its
624 /// VF narrowed to the chosen factor. The returned plan is a duplicate.
625 /// Returns nullptr if epilogue vectorization is not supported or not
626 /// profitable for the loop.
627 std::unique_ptr<VPlan>
628 selectBestEpiloguePlan(VPlan &MainPlan, ElementCount MainLoopVF, unsigned IC);
629
630 /// Emit remarks for recipes with invalid costs in the available VPlans.
632
633 /// Create a check to \p Plan to see if the vector loop should be executed
634 /// based on its trip count.
635 void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF,
636 ElementCount MinProfitableTripCount) const;
637
638 /// Attach the runtime checks of \p RTChecks to \p Plan.
639 void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks,
640 bool HasBranchWeights) const;
641
642 /// Update loop metadata and profile info for both the scalar remainder loop
643 /// and \p VectorLoop, if it exists. Keeps all loop hints from the original
644 /// loop on the vector loop and replaces vectorizer-specific metadata. The
645 /// loop ID of the original loop \p OrigLoopID must be passed, together with
646 /// the average trip count and invocation weight of the original loop (\p
647 /// OrigAverageTripCount and \p OrigLoopInvocationWeight respectively). They
648 /// cannot be retrieved after the plan has been executed, as the original loop
649 /// may have been removed.
651 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
652 bool VectorizingEpilogue, MDNode *OrigLoopID,
653 std::optional<unsigned> OrigAverageTripCount,
654 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
655 bool DisableRuntimeUnroll);
656
657protected:
658 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
659 /// according to the information gathered by Legal when it checked if it is
660 /// legal to vectorize the loop.
661 void buildVPlans(ElementCount MinVF, ElementCount MaxVF);
662
663private:
664 /// Build a VPlan according to the information gathered by Legal. \return a
665 /// VPlan for vectorization factors \p Range.Start and up to \p Range.End
666 /// exclusive, possibly decreasing \p Range.End. If no VPlan can be built for
667 /// the input range, set the largest included VF to the maximum VF for which
668 /// no plan could be built.
669 VPlanPtr tryToBuildVPlan(VFRange &Range);
670
671 /// Build a VPlan using VPRecipes according to the information gather by
672 /// Legal. This method is only used for the legacy inner loop vectorizer.
673 /// \p Range's largest included VF is restricted to the maximum VF the
674 /// returned VPlan is valid for. If no VPlan can be built for the input range,
675 /// set the largest included VF to the maximum VF for which no plan could be
676 /// built. Each VPlan is built starting from a copy of \p InitialPlan, which
677 /// is a plain CFG VPlan wrapping the original scalar loop.
678 VPlanPtr tryToBuildVPlanWithVPRecipes(VPlanPtr InitialPlan, VFRange &Range,
679 LoopVersioning *LVer);
680
681 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
682 /// according to the information gathered by Legal when it checked if it is
683 /// legal to vectorize the loop. This method creates VPlans using VPRecipes.
684 void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
685
686 /// Add ComputeReductionResult recipes to the middle block to compute the
687 /// final reduction results. Add Select recipes to the latch block when
688 /// folding tail, to feed ComputeReductionResult with the last or penultimate
689 /// iteration values according to the header mask.
690 void addReductionResultComputation(VPlanPtr &Plan,
691 VPRecipeBuilder &RecipeBuilder,
692 ElementCount MinVF);
693
694 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
695 /// that of B.
696 bool isMoreProfitable(const VectorizationFactor &A,
697 const VectorizationFactor &B, bool HasTail,
698 bool IsEpilogue = false) const;
699
700 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
701 /// that of B in the context of vectorizing a loop with known \p MaxTripCount.
702 bool isMoreProfitable(const VectorizationFactor &A,
703 const VectorizationFactor &B,
704 const unsigned MaxTripCount, bool HasTail,
705 bool IsEpilogue = false) const;
706
707 /// Determines if we have the infrastructure to vectorize the loop and its
708 /// epilogue, assuming the main loop is vectorized by \p MainPlan.
709 bool isCandidateForEpilogueVectorization(VPlan &MainPlan) const;
710};
711
712} // namespace llvm
713
714#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
dxil translate DXIL Translate Metadata
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallSet class.
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Definition Operator.h:333
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
Drive the analysis of interleaved memory accesses in the loop.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
DenseMap< const SCEV *, Value * > executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, InnerLoopVectorizer &LB, DominatorTree *DT, EpilogueVectorizationKind EpilogueVecKind=EpilogueVectorizationKind::None)
EpilogueVectorizationKind
Generate the IR code for the vectorized loop captured in VPlan BestPlan according to the best selecte...
@ MainLoop
Vectorizing the main loop of epilogue vectorization.
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
Definition VPlan.cpp:1720
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI, PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints, OptimizationRemarkEmitter *ORE)
VectorizationFactor planInVPlanNativePath(ElementCount UserVF)
Use the VPlan-native path to plan how to best vectorize, return the best VF and its cost.
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
Definition VPlan.cpp:1771
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
Definition VPlan.cpp:1704
void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks, bool HasBranchWeights) const
Attach the runtime checks of RTChecks to Plan.
unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF, InstructionCost LoopCost)
void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE)
Emit remarks for recipes with invalid costs in the available VPlans.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Definition VPlan.cpp:1685
void printPlans(raw_ostream &O)
Definition VPlan.cpp:1866
void plan(ElementCount UserVF, unsigned UserIC)
Build VPlans for the specified UserVF and UserIC if they are non-zero or all applicable candidate VFs...
std::unique_ptr< VPlan > selectBestEpiloguePlan(VPlan &MainPlan, ElementCount MainLoopVF, unsigned IC)
void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount) const
Create a check to Plan to see if the vector loop should be executed based on its trip count.
bool hasPlanWithVF(ElementCount VF) const
Look through the existing plans and return true if we have one with vectorization factor VF.
std::pair< VectorizationFactor, VPlan * > computeBestVF()
Compute and return the most profitable vectorization factor and the corresponding best VPlan.
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1080
Root of the metadata hierarchy.
Definition Metadata.h:64
The optimization diagnostic interface.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
This class represents an analyzed expression in the program.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4168
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4195
iterator end()
Definition VPlan.h:4205
VPlan * getPlan()
Definition VPlan.cpp:177
InsertPointGuard(const InsertPointGuard &)=delete
InsertPointGuard & operator=(const InsertPointGuard &)=delete
InsertPoint - A saved insertion point.
VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
Creates a new insertion point at the given location.
VPBasicBlock::iterator getPoint() const
VPInsertPoint()=default
Creates a new insertion point which doesn't point to anything.
bool isSet() const
Returns true if this insert point is set.
VPlan-based builder utility analogous to IRBuilder.
VPValue * createScalarSExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createAdd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false})
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createSub(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false})
void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
This specifies that created instructions should be inserted at the specified point.
void setInsertPoint(VPRecipeBase *IP)
This specifies that created instructions should be inserted at the specified point.
VPValue * createElementCount(Type *Ty, ElementCount EC)
T * insert(T *R)
Insert R at the current insertion point. Returns R unchanged.
VPInstruction * createLogicalOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void restoreIP(VPInsertPoint IP)
Sets the current insert point to a previously-saved location.
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
Definition VPlan.cpp:1670
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBasicBlock * getInsertBlock() const
VPBasicBlock::iterator getInsertPoint() const
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPScalarIVStepsRecipe * createScalarIVSteps(Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL)
VPBuilder(VPBasicBlock *InsertBB)
VPInstruction * createNoWrapPtrAdd(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags GEPFlags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstructionWithType * createScalarLoad(Type *ResultTy, VPValue *Addr, DebugLoc DL, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPBuilder(VPRecipeBase *InsertPt)
VPWidenCastRecipe * createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
void clearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags, const VPIRMetadata &Metadata={})
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBuilder()=default
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPExpandSCEVRecipe * createExpandSCEV(const SCEV *Expr)
VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
VPInstruction * createWidePtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition VPlan.h:3917
Recipe to expand a SCEV expression.
Definition VPlan.h:3765
Class to record and manage LLVM IR flags.
Definition VPlan.h:688
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1168
A specialization of VPInstruction augmenting it with a dedicated result type, to be used when the opc...
Definition VPlan.h:1516
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1223
@ VScale
Returns the value for vscale.
Definition VPlan.h:1336
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:406
VPBasicBlock * getParent()
Definition VPlan.h:480
Helper class to create VPRecipies from IR instructions.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition VPlan.h:3988
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:49
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1837
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4526
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4824
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
cl::opt< unsigned > ForceTargetInstructionCost
DWARFExpression::Operation Op
cl::opt< bool > EnableVPlanNativePath
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:78
FixedScalableVFPair(const ElementCount &FixedVF, const ElementCount &ScalableVF)
FixedScalableVFPair(const ElementCount &Max)
static FixedScalableVFPair getNone()
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:240
A struct that represents some properties of the register usage of a loop.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
InstructionCost Cost
Cost of the loop with that width.
ElementCount MinProfitableTripCount
The minimum trip count required to make vectorization profitable, e.g.
bool operator==(const VectorizationFactor &rhs) const
ElementCount Width
Vector width with best cost.
InstructionCost ScalarCost
Cost of the scalar loop.
bool operator!=(const VectorizationFactor &rhs) const
static VectorizationFactor Disabled()
Width 1 means no vectorization, cost 0 means uncomputed cost.
VectorizationFactor(ElementCount Width, InstructionCost Cost, InstructionCost ScalarCost)