LLVM 23.0.0git
LoopVectorizationPlanner.h
Go to the documentation of this file.
1//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides a LoopVectorizationPlanner class.
11/// InnerLoopVectorizer vectorizes loops which contain only one basic
12/// LoopVectorizationPlanner - drives the vectorization process after having
13/// passed Legality checks.
14/// The planner builds and optimizes the Vectorization Plans which record the
15/// decisions how to vectorize the given loop. In particular, represent the
16/// control-flow of the vectorized version, the replication of instructions that
17/// are to be scalarized, and interleave access groups.
18///
19/// Also provides a VPlan-based builder utility analogous to IRBuilder.
20/// It provides an instruction-level API for generating VPInstructions while
21/// abstracting away the Recipe manipulation details.
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
25#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
26
27#include "VPlan.h"
28#include "llvm/ADT/SmallSet.h"
31
32namespace {
33class GeneratedRTChecks;
34}
35
36namespace llvm {
37
38class LoopInfo;
39class DominatorTree;
45class LoopVersioning;
48class VPRecipeBuilder;
49struct VPRegisterUsage;
50struct VFRange;
51
55
56/// \return An upper bound for vscale based on TTI or the vscale_range
57/// attribute.
58std::optional<unsigned> getMaxVScale(const Function &F,
60
61/// Reports an informative message: print \p Msg for debugging purposes as well
62/// as an optimization remark. Uses either \p I as location of the remark, or
63/// otherwise \p TheLoop. If \p DL is passed, use it as debug location for the
64/// remark.
65void reportVectorizationInfo(const StringRef Msg, const StringRef ORETag,
67 const Loop *TheLoop, Instruction *I = nullptr,
68 DebugLoc DL = {});
69
70/// VPlan-based builder utility analogous to IRBuilder.
71class VPBuilder {
72 VPBasicBlock *BB = nullptr;
74
75 /// Lightweight SCEV-to-VPlan expander. Converts SCEVConstant, SCEVUnknown,
76 /// SCEVVScale and SCEVMulExpr into VPInstructions. Other SCEV expressions are
77 /// unsupported.
78 class VPSCEVExpander {
79 VPBuilder &Builder;
80 VPlan &Plan;
82
83 public:
84 VPSCEVExpander(VPBuilder &Builder, VPlan &Plan, DebugLoc DL)
85 : Builder(Builder), Plan(Plan), DL(DL) {}
86
87 /// Expand \p S into recipes and live-ins using the builder. Returns nullptr
88 /// if \p S cannot be expanded yet.
89 VPValue *expand(const SCEV *S);
90 };
91
92 /// Insert \p VPI in BB at InsertPt if BB is set.
93 template <typename T> T *tryInsertInstruction(T *R) {
94 if (BB)
95 BB->insert(R, InsertPt);
96 return R;
97 }
98
99 VPInstruction *createInstruction(unsigned Opcode,
100 ArrayRef<VPValue *> Operands,
101 const VPIRMetadata &MD, DebugLoc DL,
102 const Twine &Name = "") {
103 return tryInsertInstruction(
104 new VPInstruction(Opcode, Operands, {}, MD, DL, Name));
105 }
106
107 VPlan &getPlan() const {
108 assert(getInsertBlock() && "Insert block must be set");
109 return *getInsertBlock()->getPlan();
110 }
111
112public:
113 VPBuilder() = default;
114 VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
115 VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
117 setInsertPoint(TheBB, IP);
118 }
119
120 /// Clear the insertion point: created instructions will not be inserted into
121 /// a block.
123 BB = nullptr;
124 InsertPt = VPBasicBlock::iterator();
125 }
126
127 VPBasicBlock *getInsertBlock() const { return BB; }
128 VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
129
130 /// Create a VPBuilder to insert after \p R.
132 VPBuilder B;
133 B.setInsertPoint(R->getParent(), std::next(R->getIterator()));
134 return B;
135 }
136
137 /// InsertPoint - A saved insertion point.
139 VPBasicBlock *Block = nullptr;
141
142 public:
143 /// Creates a new insertion point which doesn't point to anything.
144 VPInsertPoint() = default;
145
146 /// Creates a new insertion point at the given location.
148 : Block(InsertBlock), Point(InsertPoint) {}
149
150 /// Returns true if this insert point is set.
151 bool isSet() const { return Block != nullptr; }
152
153 VPBasicBlock *getBlock() const { return Block; }
154 VPBasicBlock::iterator getPoint() const { return Point; }
155 };
156
157 /// Sets the current insert point to a previously-saved location.
159 if (IP.isSet())
160 setInsertPoint(IP.getBlock(), IP.getPoint());
161 else
163 }
164
165 /// This specifies that created VPInstructions should be appended to the end
166 /// of the specified block.
168 assert(TheBB && "Attempting to set a null insert point");
169 BB = TheBB;
170 InsertPt = BB->end();
171 }
172
173 /// This specifies that created instructions should be inserted at the
174 /// specified point.
176 BB = TheBB;
177 InsertPt = IP;
178 }
179
180 /// This specifies that created instructions should be inserted at the
181 /// specified point.
183 BB = IP->getParent();
184 InsertPt = IP->getIterator();
185 }
186
187 /// Insert \p R at the current insertion point. Returns \p R unchanged.
188 template <typename T> [[maybe_unused]] T *insert(T *R) {
189 BB->insert(R, InsertPt);
190 return R;
191 }
192
193 /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
194 /// its underlying Instruction.
196 Instruction *Inst = nullptr,
197 const VPIRFlags &Flags = {},
198 const VPIRMetadata &MD = {},
200 const Twine &Name = "") {
201 VPInstruction *NewVPInst = tryInsertInstruction(
202 new VPInstruction(Opcode, Operands, Flags, MD, DL, Name));
203 NewVPInst->setUnderlyingValue(Inst);
204 return NewVPInst;
205 }
207 DebugLoc DL, const Twine &Name = "") {
208 return createInstruction(Opcode, Operands, {}, DL, Name);
209 }
211 const VPIRFlags &Flags,
213 const Twine &Name = "") {
214 return tryInsertInstruction(
215 new VPInstruction(Opcode, Operands, Flags, {}, DL, Name));
216 }
217
219 Type *ResultTy, const VPIRFlags &Flags = {},
221 const Twine &Name = "") {
222 return tryInsertInstruction(new VPInstructionWithType(
223 Opcode, Operands, ResultTy, Flags, {}, DL, Name));
224 }
225
228 const Twine &Name = "") {
229 return tryInsertInstruction(new VPInstruction(
230 VPInstruction::FirstActiveLane, Masks, {}, {}, DL, Name));
231 }
232
235 const Twine &Name = "") {
236 return tryInsertInstruction(new VPInstruction(VPInstruction::LastActiveLane,
237 Masks, {}, {}, DL, Name));
238 }
239
241 unsigned Opcode, ArrayRef<VPValue *> Operands,
242 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false},
243 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") {
244 return tryInsertInstruction(
245 new VPInstruction(Opcode, Operands, WrapFlags, {}, DL, Name));
246 }
247
250 const Twine &Name = "") {
251 return createInstruction(VPInstruction::Not, {Operand}, {}, DL, Name);
252 }
253
256 const Twine &Name = "") {
257 return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, {}, DL,
258 Name);
259 }
260
263 const Twine &Name = "") {
264
265 return tryInsertInstruction(new VPInstruction(
266 Instruction::BinaryOps::Or, {LHS, RHS},
267 VPRecipeWithIRFlags::DisjointFlagsTy(false), {}, DL, Name));
268 }
269
272 const Twine &Name = "",
273 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
274 return createOverflowingOp(Instruction::Add, {LHS, RHS}, WrapFlags, DL,
275 Name);
276 }
277
278 VPInstruction *
280 const Twine &Name = "",
281 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
282 return createOverflowingOp(Instruction::Sub, {LHS, RHS}, WrapFlags, DL,
283 Name);
284 }
285
291
297
299 VPValue *FalseVal,
301 const Twine &Name = "",
302 const VPIRFlags &Flags = {}) {
303 return tryInsertInstruction(new VPInstruction(
304 Instruction::Select, {Cond, TrueVal, FalseVal}, Flags, {}, DL, Name));
305 }
306
307 /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
308 /// and \p B.
311 const Twine &Name = "") {
313 Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
314 return tryInsertInstruction(
315 new VPInstruction(Instruction::ICmp, {A, B}, Pred, {}, DL, Name));
316 }
317
318 /// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A
319 /// and \p B.
322 const Twine &Name = "") {
324 Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate");
325 return tryInsertInstruction(
326 new VPInstruction(Instruction::FCmp, {A, B},
327 VPIRFlags(Pred, FastMathFlags()), {}, DL, Name));
328 }
329
330 /// Create an AnyOf reduction pattern: or-reduce \p ChainOp, freeze the
331 /// result, then select between \p TrueVal and \p FalseVal.
333 VPValue *FalseVal,
335
338 const Twine &Name = "") {
339 return tryInsertInstruction(
341 GEPNoWrapFlags::none(), {}, DL, Name));
342 }
343
345 GEPNoWrapFlags GEPFlags,
347 const Twine &Name = "") {
348 return tryInsertInstruction(new VPInstruction(
349 VPInstruction::PtrAdd, {Ptr, Offset}, GEPFlags, {}, DL, Name));
350 }
351
354 const Twine &Name = "") {
355 return tryInsertInstruction(
357 GEPNoWrapFlags::none(), {}, DL, Name));
358 }
359
362 const Twine &Name = "", const VPIRFlags &Flags = {}) {
363 return tryInsertInstruction(new VPPhi(IncomingValues, Flags, DL, Name));
364 }
365
368 const Twine &Name = "") {
369 return tryInsertInstruction(new VPWidenPHIRecipe(IncomingValues, DL, Name));
370 }
371
373 VPlan &Plan = *getInsertBlock()->getPlan();
374 VPValue *RuntimeEC = Plan.getConstantInt(Ty, EC.getKnownMinValue());
375 if (EC.isScalable()) {
376 VPValue *VScale = createNaryOp(VPInstruction::VScale, {}, Ty);
377 RuntimeEC = EC.getKnownMinValue() == 1
378 ? VScale
379 : createOverflowingOp(Instruction::Mul,
380 {VScale, RuntimeEC}, {true, false});
381 }
382 return RuntimeEC;
383 }
384
385 /// Convert the input value \p Current to the corresponding value of an
386 /// induction with \p Start and \p Step values, using \p Start + \p Current *
387 /// \p Step.
389 FPMathOperator *FPBinOp, VPIRValue *Start,
390 VPValue *Current, VPValue *Step) {
391 return tryInsertInstruction(
392 new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step));
393 }
394
396 DebugLoc DL,
397 const VPIRMetadata &Metadata = {}) {
398 return tryInsertInstruction(new VPInstructionWithType(
399 Instruction::Load, Addr, ResultTy, {}, Metadata, DL));
400 }
401
403 Type *ResultTy, DebugLoc DL,
404 const VPIRMetadata &Metadata = {}) {
405 return tryInsertInstruction(new VPInstructionWithType(
406 Opcode, Op, ResultTy, VPIRFlags::getDefaultFlags(Opcode), Metadata,
407 DL));
408 }
409
411 Type *ResultTy, DebugLoc DL,
412 const VPIRFlags &Flags,
413 const VPIRMetadata &Metadata = {}) {
414 return tryInsertInstruction(
415 new VPInstructionWithType(Opcode, Op, ResultTy, Flags, Metadata, DL));
416 }
417
419 DebugLoc DL) {
420 if (ResultTy == SrcTy)
421 return Op;
422 Instruction::CastOps CastOp =
423 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
424 ? Instruction::Trunc
425 : Instruction::ZExt;
426 return createScalarCast(CastOp, Op, ResultTy, DL);
427 }
428
430 DebugLoc DL) {
431 if (ResultTy == SrcTy)
432 return Op;
433 Instruction::CastOps CastOp =
434 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
435 ? Instruction::Trunc
436 : Instruction::SExt;
437 return createScalarCast(CastOp, Op, ResultTy, DL);
438 }
439
441 Type *ResultTy) {
442 return tryInsertInstruction(new VPWidenCastRecipe(
443 Opcode, Op, ResultTy, nullptr, VPIRFlags::getDefaultFlags(Opcode)));
444 }
445
448 FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step,
449 VPValue *VF, DebugLoc DL) {
450 return tryInsertInstruction(new VPScalarIVStepsRecipe(
451 IV, Step, VF, InductionOpcode,
452 FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags(), DL));
453 }
454
455 /// Expand \p Expr using VPSCEVExpander. Returns nullptr if \p S cannot be
456 /// expanded yet.
458 return VPSCEVExpander(*this, getPlan(), DL).expand(Expr);
459 }
460
462 return tryInsertInstruction(new VPExpandSCEVRecipe(Expr));
463 }
464
466 createVectorPointer(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride,
467 GEPNoWrapFlags GEPFlags, DebugLoc DL) {
468 return tryInsertInstruction(
469 new VPVectorPointerRecipe(Ptr, SourceElementTy, Stride, GEPFlags, DL));
470 }
471
473 Intrinsic::ID VectorIntrinsicID, ArrayRef<VPValue *> CallArguments,
474 Type *Ty, Align Alignment, const VPIRMetadata &MD, DebugLoc DL) {
475 return tryInsertInstruction(new VPWidenMemIntrinsicRecipe(
476 VectorIntrinsicID, CallArguments, Ty, Alignment, MD, DL));
477 }
478
479 //===--------------------------------------------------------------------===//
480 // RAII helpers.
481 //===--------------------------------------------------------------------===//
482
483 /// RAII object that stores the current insertion point and restores it when
484 /// the object is destroyed.
486 VPBuilder &Builder;
487 VPBasicBlock *Block;
489
490 public:
492 : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}
493
496
497 ~InsertPointGuard() { Builder.restoreIP(VPInsertPoint(Block, Point)); }
498 };
499};
500
501/// TODO: The following VectorizationFactor was pulled out of
502/// LoopVectorizationCostModel class. LV also deals with
503/// VectorizerParams::VectorizationFactor.
504/// We need to streamline them.
505
506/// Information about vectorization costs.
508 /// Vector width with best cost.
510
511 /// Cost of the loop with that width.
513
514 /// Cost of the scalar loop.
516
517 /// The minimum trip count required to make vectorization profitable, e.g. due
518 /// to runtime checks.
520
524
525 /// Width 1 means no vectorization, cost 0 means uncomputed cost.
527 return {ElementCount::getFixed(1), 0, 0};
528 }
529
530 bool operator==(const VectorizationFactor &rhs) const {
531 return Width == rhs.Width && Cost == rhs.Cost;
532 }
533
534 bool operator!=(const VectorizationFactor &rhs) const {
535 return !(*this == rhs);
536 }
537};
538
539/// A class that represents two vectorization factors (initialized with 0 by
540/// default). One for fixed-width vectorization and one for scalable
541/// vectorization. This can be used by the vectorizer to choose from a range of
542/// fixed and/or scalable VFs in order to find the most cost-effective VF to
543/// vectorize with.
547
549 : FixedVF(ElementCount::getFixed(0)),
550 ScalableVF(ElementCount::getScalable(0)) {}
552 *(Max.isScalable() ? &ScalableVF : &FixedVF) = Max;
553 }
557 assert(!FixedVF.isScalable() && ScalableVF.isScalable() &&
558 "Invalid scalable properties");
559 }
560
562
563 /// \return true if either fixed- or scalable VF is non-zero.
564 explicit operator bool() const { return FixedVF || ScalableVF; }
565
566 /// \return true if either fixed- or scalable VF is a valid vector VF.
567 bool hasVector() const { return FixedVF.isVector() || ScalableVF.isVector(); }
568};
569
570/// Holds state needed to make cost decisions before computing costs per-VF,
571/// including the maximum VFs.
573 /// \return True if maximizing vector bandwidth is enabled by the target or
574 /// user options, for the given register kind (scalable or fixed-width).
575 bool useMaxBandwidth(bool IsScalable) const;
576
577 /// \return the maximized element count based on the targets vector
578 /// registers and the loop trip-count, but limited to a maximum safe VF.
579 /// This is a helper function of computeFeasibleMaxVF.
580 ElementCount getMaximizedVFForTarget(unsigned MaxTripCount,
581 unsigned SmallestType,
582 unsigned WidestType,
583 ElementCount MaxSafeVF, unsigned UserIC,
584 bool FoldTailByMasking,
585 bool RequiresScalarEpilogue);
586
587 /// If \p VF * \p UserIC > MaxTripcount, clamps VF to the next lower VF
588 /// that results in VF * UserIC <= MaxTripCount.
589 ElementCount clampVFByMaxTripCount(ElementCount VF, unsigned MaxTripCount,
590 unsigned UserIC, bool FoldTailByMasking,
591 bool RequiresScalarEpilogue) const;
592
593 /// Checks if scalable vectorization is supported and enabled. Caches the
594 /// result to avoid repeated debug dumps for repeated queries.
595 bool isScalableVectorizationAllowed();
596
597 /// \return the maximum legal scalable VF, based on the safe max number
598 /// of elements.
599 ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements);
600
601 /// Initializes the value of vscale used for tuning the cost model. If
602 /// vscale_range.min == vscale_range.max then return vscale_range.max, else
603 /// return the value returned by the corresponding TTI method.
604 void initializeVScaleForTuning();
605
606 const TargetTransformInfo &TTI;
607 const LoopVectorizationLegality *Legal;
608 const Loop *TheLoop;
609 const Function &F;
611 DemandedBits *DB;
613 const LoopVectorizeHints *Hints;
614
615 /// Cached result of isScalableVectorizationAllowed.
616 std::optional<bool> IsScalableVectorizationAllowed;
617
618 /// Used to store the value of vscale used for tuning the cost model. It is
619 /// initialized during object construction.
620 std::optional<unsigned> VScaleForTuning;
621
622 /// The highest VF possible for this loop, without using MaxBandwidth.
623 FixedScalableVFPair MaxPermissibleVFWithoutMaxBW;
624
625 /// All element types found in the loop.
626 SmallPtrSet<Type *, 16> ElementTypesInLoop;
627
628 /// PHINodes of the reductions that should be expanded in-loop. Set by
629 /// collectInLoopReductions.
630 SmallPtrSet<PHINode *, 4> InLoopReductions;
631
632 /// A Map of inloop reduction operations and their immediate chain operand.
633 /// FIXME: This can be removed once reductions can be costed correctly in
634 /// VPlan. This was added to allow quick lookup of the inloop operations.
635 /// Set by collectInLoopReductions.
636 DenseMap<Instruction *, Instruction *> InLoopReductionImmediateChains;
637
638 /// Maximum safe number of elements to be processed per vector iteration,
639 /// which do not prevent store-load forwarding and are safe with regard to the
640 /// memory dependencies. Required for EVL-based vectorization, where this
641 /// value is used as the upper bound of the safe AVL. Set by
642 /// computeFeasibleMaxVF.
643 std::optional<unsigned> MaxSafeElements;
644
645 /// Map of scalar integer values to the smallest bitwidth they can be legally
646 /// represented as. The vector equivalents of these values should be truncated
647 /// to this type.
649
650public:
651 /// The kind of cost that we are calculating.
653
654 /// Whether this loop should be optimized for size based on function attribute
655 /// or profile information.
656 const bool OptForSize;
657
659 const LoopVectorizationLegality *Legal,
660 const Loop *TheLoop, const Function &F,
663 const LoopVectorizeHints *Hints, bool OptForSize)
664 : TTI(TTI), Legal(Legal), TheLoop(TheLoop), F(F), PSE(PSE), DB(DB),
665 ORE(ORE), Hints(Hints),
666 CostKind(F.hasMinSize() ? TTI::TCK_CodeSize : TTI::TCK_RecipThroughput),
668 initializeVScaleForTuning();
669 }
670
671 /// \return The vscale value used for tuning the cost model.
672 std::optional<unsigned> getVScaleForTuning() const { return VScaleForTuning; }
673
674 /// \return True if register pressure should be considered for the given VF.
676
677 /// \return True if scalable vectors are supported by the target or forced.
678 bool supportsScalableVectors() const;
679
680 /// Collect element types in the loop that need widening.
682 const SmallPtrSetImpl<const Value *> *ValuesToIgnore = nullptr);
683
684 /// \return The size (in bits) of the smallest and widest types in the code
685 /// that need to be vectorized. We ignore values that remain scalar such as
686 /// 64 bit loop indices.
687 std::pair<unsigned, unsigned> getSmallestAndWidestTypes() const;
688
689 /// \return An upper bound for the vectorization factors for both
690 /// fixed and scalable vectorization, where the minimum-known number of
691 /// elements is a power-of-2 larger than zero. If scalable vectorization is
692 /// disabled or unsupported, then the scalable part will be equal to
693 /// ElementCount::getScalable(0). Also sets MaxSafeElements.
694 FixedScalableVFPair computeFeasibleMaxVF(unsigned MaxTripCount,
695 ElementCount UserVF, unsigned UserIC,
696 bool FoldTailByMasking,
697 bool RequiresScalarEpilogue);
698
699 /// Return maximum safe number of elements to be processed per vector
700 /// iteration, which do not prevent store-load forwarding and are safe with
701 /// regard to the memory dependencies. Required for EVL-based VPlans to
702 /// correctly calculate AVL (application vector length) as min(remaining AVL,
703 /// MaxSafeElements). Set by computeFeasibleMaxVF.
704 /// TODO: need to consider adjusting cost model to use this value as a
705 /// vectorization factor for EVL-based vectorization.
706 std::optional<unsigned> getMaxSafeElements() const { return MaxSafeElements; }
707
708 /// Returns true if we should use strict in-order reductions for the given
709 /// RdxDesc. This is true if the -enable-strict-reductions flag is passed,
710 /// the IsOrdered flag of RdxDesc is set and we do not allow reordering
711 /// of FP operations.
712 bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const;
713
714 /// Returns true if the target machine supports masked loads or stores
715 /// for \p I's data type and alignment. The caller must ensure the access is
716 /// consecutive or part of an interleave group.
718
719 /// Returns true if the target machine can represent \p V as a masked gather
720 /// or scatter operation.
721 bool isLegalGatherOrScatter(Value *V, ElementCount VF) const;
722
723 /// Split reductions into those that happen in the loop, and those that
724 /// happen outside. In-loop reductions are collected into InLoopReductions.
725 /// InLoopReductionImmediateChains is filled with each in-loop reduction
726 /// operation and its immediate chain operand for use during cost modelling.
728
729 /// Returns true if the Phi is part of an inloop reduction.
730 bool isInLoopReduction(PHINode *Phi) const {
731 return InLoopReductions.contains(Phi);
732 }
733
734 /// Returns the set of in-loop reduction PHIs.
736 return InLoopReductions;
737 }
738
739 /// Returns the immediate chain operand of in-loop reduction operation \p I,
740 /// or nullptr if \p I is not an in-loop reduction operation.
742 return InLoopReductionImmediateChains.lookup(I);
743 }
744
745 /// Check whether vectorization would require runtime checks. When optimizing
746 /// for size, returning true here aborts vectorization.
748
749 /// Returns a scalable VF to use for outer-loop vectorization if the target
750 /// supports it and a fixed VF otherwise.
752
753 /// Compute smallest bitwidth each instruction can be represented with.
754 /// The vector equivalents of these instructions should be truncated to this
755 /// type.
757
758 /// \returns The smallest bitwidth each instruction can be represented with.
760 return MinBWs;
761 }
762};
763
764/// Planner drives the vectorization process after having passed
765/// Legality checks.
767 /// The loop that we evaluate.
768 Loop *OrigLoop;
769
770 /// Loop Info analysis.
771 LoopInfo *LI;
772
773 /// The dominator tree.
774 DominatorTree *DT;
775
776 /// Target Library Info.
777 const TargetLibraryInfo *TLI;
778
779 /// Target Transform Info.
780 const TargetTransformInfo &TTI;
781
782 /// The legality analysis.
784
785 /// The profitability analysis.
787
788 /// VF selection state independent of cost-modeling decisions.
789 VFSelectionContext &Config;
790
791 /// The interleaved access analysis.
793
795
796 const LoopVectorizeHints &Hints;
797
799
801
802 /// Profitable vector factors.
804
805 /// A builder used to construct the current plan.
806 VPBuilder Builder;
807
808 /// Computes the cost of \p Plan for vectorization factor \p VF.
809 ///
810 /// The current implementation requires access to the
811 /// LoopVectorizationLegality to handle inductions and reductions, which is
812 /// why it is kept separate from the VPlan-only cost infrastructure.
813 ///
814 /// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
815 /// been retired.
816 InstructionCost cost(VPlan &Plan, ElementCount VF, VPRegisterUsage *RU) const;
817
818 /// Precompute costs for certain instructions using the legacy cost model. The
819 /// function is used to bring up the VPlan-based cost model to initially avoid
820 /// taking different decisions due to inaccuracies in the legacy cost model.
821 InstructionCost precomputeCosts(VPlan &Plan, ElementCount VF,
822 VPCostContext &CostCtx) const;
823
824public:
826 Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
831 : OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
832 Config(Config), IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
833
834 /// Build VPlans for the specified \p UserVF and \p UserIC if they are
835 /// non-zero or all applicable candidate VFs otherwise. If vectorization and
836 /// interleaving should be avoided up-front, no plans are generated.
837 void plan(ElementCount UserVF, unsigned UserIC);
838
839 /// Return the VPlan for \p VF. At the moment, there is always a single VPlan
840 /// for each VF.
841 VPlan &getPlanFor(ElementCount VF) const;
842
843 /// Compute and return the most profitable vectorization factor and the
844 /// corresponding best VPlan. Also collect all profitable VFs in
845 /// ProfitableVFs.
846 std::pair<VectorizationFactor, VPlan *> computeBestVF();
847
848 /// \return The desired interleave count.
849 /// If interleave count has been specified by metadata it will be returned.
850 /// Otherwise, the interleave count is computed and returned. VF and LoopCost
851 /// are the selected vectorization factor and the cost of the selected VF.
852 unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF,
853 InstructionCost LoopCost);
854
855 /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
856 /// according to the best selected \p VF and \p UF.
857 ///
858 /// TODO: \p EpilogueVecKind should be removed once the re-use issue has been
859 /// fixed.
860 ///
861 /// Returns a mapping of SCEVs to their expanded IR values.
862 /// Note that this is a temporary workaround needed due to the current
863 /// epilogue handling.
865 None, ///< Not part of epilogue vectorization.
866 MainLoop, ///< Vectorizing the main loop of epilogue vectorization.
867 Epilogue ///< Vectorizing the epilogue loop.
868 };
870 executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,
872 EpilogueVectorizationKind EpilogueVecKind =
874
875#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
876 void printPlans(raw_ostream &O);
877#endif
878
879 /// Look through the existing plans and return true if we have one with
880 /// vectorization factor \p VF.
882 return any_of(VPlans,
883 [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
884 }
885
886 /// Test a \p Predicate on a \p Range of VF's. Return the value of applying
887 /// \p Predicate on Range.Start, possibly decreasing Range.End such that the
888 /// returned value holds for the entire \p Range.
889 static bool
890 getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
891 VFRange &Range);
892
893 /// \return A VPlan for the most profitable epilogue vectorization, with its
894 /// VF narrowed to the chosen factor. The returned plan is a duplicate.
895 /// Returns nullptr if epilogue vectorization is not supported or not
896 /// profitable for the loop.
897 std::unique_ptr<VPlan>
898 selectBestEpiloguePlan(VPlan &MainPlan, ElementCount MainLoopVF, unsigned IC);
899
900 /// Emit remarks for recipes with invalid costs in the available VPlans.
902
903 /// Create a check to \p Plan to see if the vector loop should be executed
904 /// based on its trip count.
905 void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF,
906 ElementCount MinProfitableTripCount) const;
907
908 /// Attach the runtime checks of \p RTChecks to \p Plan.
909 void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks,
910 bool HasBranchWeights) const;
911
912 /// Update loop metadata and profile info for both the scalar remainder loop
913 /// and \p VectorLoop, if it exists. Keeps all loop hints from the original
914 /// loop on the vector loop and replaces vectorizer-specific metadata. The
915 /// loop ID of the original loop \p OrigLoopID must be passed, together with
916 /// the average trip count and invocation weight of the original loop (\p
917 /// OrigAverageTripCount and \p OrigLoopInvocationWeight respectively). They
918 /// cannot be retrieved after the plan has been executed, as the original loop
919 /// may have been removed.
921 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
922 bool VectorizingEpilogue, MDNode *OrigLoopID,
923 std::optional<unsigned> OrigAverageTripCount,
924 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
925 bool DisableRuntimeUnroll);
926
927private:
928 /// Build a VPlan using VPRecipes according to the information gathered by
929 /// Legal and VPlan-based analysis. For outer loops, performs basic recipe
930 /// conversion only. For inner loops, \p Range's largest included VF is
931 /// restricted to the maximum VF the returned VPlan is valid for. If no VPlan
932 /// can be built for the input range, set the largest included VF to the
933 /// maximum VF for which no plan could be built. Each VPlan is built starting
934 /// from a copy of \p InitialPlan, which is a plain CFG VPlan wrapping the
935 /// original scalar loop.
936 VPlanPtr tryToBuildVPlan(VPlanPtr InitialPlan, VFRange &Range);
937
938 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
939 /// according to the information gathered by Legal when it checked if it is
940 /// legal to vectorize the loop.
941 void buildVPlans(ElementCount MinVF, ElementCount MaxVF);
942
943 /// Add ComputeReductionResult recipes to the middle block to compute the
944 /// final reduction results. Add Select recipes to the latch block when
945 /// folding tail, to feed ComputeReductionResult with the last or penultimate
946 /// iteration values according to the header mask.
947 void addReductionResultComputation(VPlanPtr &Plan,
948 VPRecipeBuilder &RecipeBuilder,
949 ElementCount MinVF);
950
951 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
952 /// that of B.
953 bool isMoreProfitable(const VectorizationFactor &A,
954 const VectorizationFactor &B, bool HasTail,
955 bool IsEpilogue = false) const;
956
957 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
958 /// that of B in the context of vectorizing a loop with known \p MaxTripCount.
959 bool isMoreProfitable(const VectorizationFactor &A,
960 const VectorizationFactor &B,
961 const unsigned MaxTripCount, bool HasTail,
962 bool IsEpilogue = false) const;
963
964 /// Determines if we have the infrastructure to vectorize the loop and its
965 /// epilogue, assuming the main loop is vectorized by \p MainPlan.
966 bool isCandidateForEpilogueVectorization(VPlan &MainPlan) const;
967};
968
969} // namespace llvm
970
971#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
dxil translate DXIL Translate Metadata
static Expected< BitVector > expand(StringRef S, StringRef Original)
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallSet class.
This pass exposes codegen information to IR-level passes.
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Definition Operator.h:289
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
Drive the analysis of interleaved memory accesses in the loop.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
DenseMap< const SCEV *, Value * > executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, InnerLoopVectorizer &LB, DominatorTree *DT, EpilogueVectorizationKind EpilogueVecKind=EpilogueVectorizationKind::None)
EpilogueVectorizationKind
Generate the IR code for the vectorized loop captured in VPlan BestPlan according to the best selecte...
@ MainLoop
Vectorizing the main loop of epilogue vectorization.
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
Definition VPlan.cpp:1712
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
Definition VPlan.cpp:1763
void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks, bool HasBranchWeights) const
Attach the runtime checks of RTChecks to Plan.
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM, VFSelectionContext &Config, InterleavedAccessInfo &IAI, PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints, OptimizationRemarkEmitter *ORE)
unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF, InstructionCost LoopCost)
void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE)
Emit remarks for recipes with invalid costs in the available VPlans.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Definition VPlan.cpp:1698
void printPlans(raw_ostream &O)
Definition VPlan.cpp:1869
void plan(ElementCount UserVF, unsigned UserIC)
Build VPlans for the specified UserVF and UserIC if they are non-zero or all applicable candidate VFs...
std::unique_ptr< VPlan > selectBestEpiloguePlan(VPlan &MainPlan, ElementCount MainLoopVF, unsigned IC)
void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount) const
Create a check to Plan to see if the vector loop should be executed based on its trip count.
bool hasPlanWithVF(ElementCount VF) const
Look through the existing plans and return true if we have one with vectorization factor VF.
std::pair< VectorizationFactor, VPlan * > computeBestVF()
Compute and return the most profitable vectorization factor and the corresponding best VPlan.
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1080
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:38
Root of the metadata hierarchy.
Definition Metadata.h:64
The optimization diagnostic interface.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
Holds state needed to make cost decisions before computing costs per-VF, including the maximum VFs.
const bool OptForSize
Whether this loop should be optimized for size based on function attribute or profile information.
FixedScalableVFPair computeVPlanOuterloopVF(ElementCount UserVF)
Returns a scalable VF to use for outer-loop vectorization if the target supports it and a fixed VF ot...
bool isInLoopReduction(PHINode *Phi) const
Returns true if the Phi is part of an inloop reduction.
std::pair< unsigned, unsigned > getSmallestAndWidestTypes() const
const TTI::TargetCostKind CostKind
The kind of cost that we are calculating.
bool runtimeChecksRequired()
Check whether vectorization would require runtime checks.
bool isLegalGatherOrScatter(Value *V, ElementCount VF) const
Returns true if the target machine can represent V as a masked gather or scatter operation.
void collectInLoopReductions()
Split reductions into those that happen in the loop, and those that happen outside.
const SmallPtrSetImpl< PHINode * > & getInLoopReductions() const
Returns the set of in-loop reduction PHIs.
std::optional< unsigned > getMaxSafeElements() const
Return maximum safe number of elements to be processed per vector iteration, which do not prevent sto...
FixedScalableVFPair computeFeasibleMaxVF(unsigned MaxTripCount, ElementCount UserVF, unsigned UserIC, bool FoldTailByMasking, bool RequiresScalarEpilogue)
const MapVector< Instruction *, uint64_t > & getMinimalBitwidths() const
VFSelectionContext(const TargetTransformInfo &TTI, const LoopVectorizationLegality *Legal, const Loop *TheLoop, const Function &F, PredicatedScalarEvolution &PSE, DemandedBits *DB, OptimizationRemarkEmitter *ORE, const LoopVectorizeHints *Hints, bool OptForSize)
Instruction * getInLoopReductionImmediateChain(Instruction *I) const
Returns the immediate chain operand of in-loop reduction operation I, or nullptr if I is not an in-lo...
bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const
Returns true if we should use strict in-order reductions for the given RdxDesc.
bool shouldConsiderRegPressureForVF(ElementCount VF) const
void collectElementTypesForWidening(const SmallPtrSetImpl< const Value * > *ValuesToIgnore=nullptr)
Collect element types in the loop that need widening.
bool isLegalMaskedLoadOrStore(Instruction *I, ElementCount VF) const
Returns true if the target machine supports masked loads or stores for I's data type and alignment.
std::optional< unsigned > getVScaleForTuning() const
void computeMinimalBitwidths()
Compute smallest bitwidth each instruction can be represented with.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4263
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4290
iterator end()
Definition VPlan.h:4300
VPlan * getPlan()
Definition VPlan.cpp:211
InsertPointGuard(const InsertPointGuard &)=delete
InsertPointGuard & operator=(const InsertPointGuard &)=delete
InsertPoint - A saved insertion point.
VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
Creates a new insertion point at the given location.
VPBasicBlock::iterator getPoint() const
VPInsertPoint()=default
Creates a new insertion point which doesn't point to anything.
bool isSet() const
Returns true if this insert point is set.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createFirstActiveLane(ArrayRef< VPValue * > Masks, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarSExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createAdd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false})
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createSub(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false})
void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
This specifies that created instructions should be inserted at the specified point.
void setInsertPoint(VPRecipeBase *IP)
This specifies that created instructions should be inserted at the specified point.
VPValue * createElementCount(Type *Ty, ElementCount EC)
T * insert(T *R)
Insert R at the current insertion point. Returns R unchanged.
VPInstruction * createLogicalOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void restoreIP(VPInsertPoint IP)
Sets the current insert point to a previously-saved location.
VPVectorPointerRecipe * createVectorPointer(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
Definition VPlan.cpp:1683
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBasicBlock * getInsertBlock() const
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step)
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPBasicBlock::iterator getInsertPoint() const
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPScalarIVStepsRecipe * createScalarIVSteps(Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL)
VPBuilder(VPBasicBlock *InsertBB)
VPInstruction * createNoWrapPtrAdd(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags GEPFlags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPWidenPHIRecipe * createWidenPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstructionWithType * createScalarLoad(Type *ResultTy, VPValue *Addr, DebugLoc DL, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * expandSCEV(const SCEV *Expr, DebugLoc DL)
Expand Expr using VPSCEVExpander.
VPInstruction * createLastActiveLane(ArrayRef< VPValue * > Masks, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPBuilder(VPRecipeBase *InsertPt)
VPWidenMemIntrinsicRecipe * createWidenMemIntrinsic(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, Align Alignment, const VPIRMetadata &MD, DebugLoc DL)
VPWidenCastRecipe * createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
void clearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags, const VPIRMetadata &Metadata={})
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBuilder()=default
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPExpandSCEVRecipe * createExpandSCEV(const SCEV *Expr)
VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
VPInstruction * createWidePtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition VPlan.h:4038
Recipe to expand a SCEV expression.
Definition VPlan.h:3877
Class to record and manage LLVM IR flags.
Definition VPlan.h:696
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1172
A specialization of VPInstruction augmenting it with a dedicated result type, to be used when the opc...
Definition VPlan.h:1524
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1227
@ VScale
Returns the value for vscale.
Definition VPlan.h:1345
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:401
VPBasicBlock * getParent()
Definition VPlan.h:476
Helper class to create VPRecipies from IR instructions.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition VPlan.h:4108
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:50
A recipe to compute the pointers for widened memory accesses of SourceElementTy, with the Stride expr...
Definition VPlan.h:2314
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1848
A recipe for widening vector memory intrinsics.
Definition VPlan.h:2024
A recipe for widened phis.
Definition VPlan.h:2685
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4621
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4928
LLVM Value Representation.
Definition Value.h:75
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This is an optimization pass for GlobalISel generic memory operations.
void reportVectorizationInfo(const StringRef Msg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr, DebugLoc DL={})
Reports an informative message: print Msg for debugging purposes as well as an optimization remark.
@ Offset
Definition DWP.cpp:558
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
cl::opt< unsigned > ForceTargetInstructionCost
TargetTransformInfo TTI
DWARFExpression::Operation Op
cl::opt< bool > EnableVPlanNativePath
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:73
cl::opt< bool > PreferInLoopReductions
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
A class that represents two vectorization factors (initialized with 0 by default).
FixedScalableVFPair(const ElementCount &FixedVF, const ElementCount &ScalableVF)
FixedScalableVFPair(const ElementCount &Max)
static FixedScalableVFPair getNone()
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:246
A struct that represents some properties of the register usage of a loop.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
InstructionCost Cost
Cost of the loop with that width.
ElementCount MinProfitableTripCount
The minimum trip count required to make vectorization profitable, e.g.
bool operator==(const VectorizationFactor &rhs) const
ElementCount Width
Vector width with best cost.
InstructionCost ScalarCost
Cost of the scalar loop.
bool operator!=(const VectorizationFactor &rhs) const
static VectorizationFactor Disabled()
Width 1 means no vectorization, cost 0 means uncomputed cost.
VectorizationFactor(ElementCount Width, InstructionCost Cost, InstructionCost ScalarCost)