LLVM 23.0.0git
LoopVectorizationPlanner.h
Go to the documentation of this file.
1//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides a LoopVectorizationPlanner class.
11/// InnerLoopVectorizer vectorizes loops which contain only one basic
12/// LoopVectorizationPlanner - drives the vectorization process after having
13/// passed Legality checks.
14/// The planner builds and optimizes the Vectorization Plans which record the
15/// decisions how to vectorize the given loop. In particular, represent the
16/// control-flow of the vectorized version, the replication of instructions that
17/// are to be scalarized, and interleave access groups.
18///
19/// Also provides a VPlan-based builder utility analogous to IRBuilder.
20/// It provides an instruction-level API for generating VPInstructions while
21/// abstracting away the Recipe manipulation details.
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
25#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
26
27#include "VPlan.h"
28#include "llvm/ADT/SmallSet.h"
31
32namespace {
33class GeneratedRTChecks;
34}
35
36namespace llvm {
37
38class LoopInfo;
39class DominatorTree;
45class LoopVersioning;
48class VPRecipeBuilder;
49struct VPRegisterUsage;
50struct VFRange;
51
55
56/// \return An upper bound for vscale based on TTI or the vscale_range
57/// attribute.
58std::optional<unsigned> getMaxVScale(const Function &F,
60
61// Utility functions that are used by different vectorization classes
63
64/// Reports a vectorization failure: print \p DebugMsg for debugging
65/// purposes along with the corresponding optimization remark \p RemarkName.
66/// If \p I is passed, it is an instruction that prevents vectorization.
67/// Otherwise, the loop \p TheLoop is used for the location of the remark.
68void reportVectorizationFailure(const StringRef DebugMsg,
69 const StringRef OREMsg, const StringRef ORETag,
71 const Loop *TheLoop, Instruction *I = nullptr);
72
73/// Same as above, but the debug message and optimization remark are identical
74inline void reportVectorizationFailure(const StringRef DebugMsg,
75 const StringRef ORETag,
77 const Loop *TheLoop,
78 Instruction *I = nullptr) {
79 reportVectorizationFailure(DebugMsg, DebugMsg, ORETag, ORE, TheLoop, I);
80}
81
82/// Reports an informative message: print \p Msg for debugging purposes as well
83/// as an optimization remark. Uses either \p I as location of the remark, or
84/// otherwise \p TheLoop. If \p DL is passed, use it as debug location for the
85/// remark.
86void reportVectorizationInfo(const StringRef Msg, const StringRef ORETag,
88 const Loop *TheLoop, Instruction *I = nullptr,
89 DebugLoc DL = {});
90
91/// Report successful vectorization of the loop. In case an outer loop is
92/// vectorized, prepend "outer" to the vectorization remark.
93void reportVectorization(OptimizationRemarkEmitter *ORE, Loop *TheLoop,
94 ElementCount VFWidth, unsigned IC);
95
96} // namespace LoopVectorizationUtils
97
98/// VPlan-based builder utility analogous to IRBuilder.
99class VPBuilder {
100 VPBasicBlock *BB = nullptr;
102
103 /// Insert \p VPI in BB at InsertPt if BB is set.
104 template <typename T> T *tryInsertInstruction(T *R) {
105 if (BB)
106 BB->insert(R, InsertPt);
107 return R;
108 }
109
110 VPInstruction *createInstruction(unsigned Opcode,
111 ArrayRef<VPValue *> Operands,
112 const VPIRMetadata &MD, DebugLoc DL,
113 const Twine &Name = "") {
114 return tryInsertInstruction(
115 new VPInstruction(Opcode, Operands, {}, MD, DL, Name));
116 }
117
118public:
119 VPlan &getPlan() const {
120 assert(getInsertBlock() && "Insert block must be set");
121 return *getInsertBlock()->getPlan();
122 }
123
124 VPBuilder() = default;
125 VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
126 VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
128 setInsertPoint(TheBB, IP);
129 }
130
131 /// Clear the insertion point: created instructions will not be inserted into
132 /// a block.
134 BB = nullptr;
135 InsertPt = VPBasicBlock::iterator();
136 }
137
138 VPBasicBlock *getInsertBlock() const { return BB; }
139 VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
140
141 /// Create a VPBuilder to insert after \p R.
143 VPBuilder B;
144 B.setInsertPoint(R->getParent(), std::next(R->getIterator()));
145 return B;
146 }
147
148 /// InsertPoint - A saved insertion point.
150 VPBasicBlock *Block = nullptr;
152
153 public:
154 /// Creates a new insertion point which doesn't point to anything.
155 VPInsertPoint() = default;
156
157 /// Creates a new insertion point at the given location.
159 : Block(InsertBlock), Point(InsertPoint) {}
160
161 /// Returns true if this insert point is set.
162 bool isSet() const { return Block != nullptr; }
163
164 VPBasicBlock *getBlock() const { return Block; }
165 VPBasicBlock::iterator getPoint() const { return Point; }
166 };
167
168 /// Sets the current insert point to a previously-saved location.
170 if (IP.isSet())
171 setInsertPoint(IP.getBlock(), IP.getPoint());
172 else
174 }
175
176 /// This specifies that created VPInstructions should be appended to the end
177 /// of the specified block.
179 assert(TheBB && "Attempting to set a null insert point");
180 BB = TheBB;
181 InsertPt = BB->end();
182 }
183
184 /// This specifies that created instructions should be inserted at the
185 /// specified point.
187 BB = TheBB;
188 InsertPt = IP;
189 }
190
191 /// This specifies that created instructions should be inserted at the
192 /// specified point.
194 BB = IP->getParent();
195 InsertPt = IP->getIterator();
196 }
197
198 /// Insert \p R at the current insertion point. Returns \p R unchanged.
199 template <typename T> [[maybe_unused]] T *insert(T *R) {
200 BB->insert(R, InsertPt);
201 return R;
202 }
203
204 /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
205 /// its underlying Instruction.
207 Instruction *Inst = nullptr,
208 const VPIRFlags &Flags = {},
209 const VPIRMetadata &MD = {},
211 const Twine &Name = "",
212 Type *ResultTy = nullptr) {
213 VPInstruction *NewVPInst = tryInsertInstruction(
214 new VPInstruction(Opcode, Operands, Flags, MD, DL, Name, ResultTy));
215 NewVPInst->setUnderlyingValue(Inst);
216 return NewVPInst;
217 }
219 DebugLoc DL, const Twine &Name = "") {
220 return createInstruction(Opcode, Operands, {}, DL, Name);
221 }
223 const VPIRFlags &Flags,
225 const Twine &Name = "") {
226 return tryInsertInstruction(
227 new VPInstruction(Opcode, Operands, Flags, {}, DL, Name));
228 }
229
231 Type *ResultTy, const VPIRFlags &Flags = {},
233 const Twine &Name = "") {
234 return tryInsertInstruction(new VPInstructionWithType(
235 Opcode, Operands, ResultTy, Flags, {}, DL, Name));
236 }
237
240 const Twine &Name = "") {
241 // Assume that the maximum possible number of elements in a vector fits
242 // within the index type for the default address space.
243 VPlan &Plan = getPlan();
244 Type *IndexTy = Plan.getDataLayout().getIndexType(Plan.getContext(), 0);
245 return tryInsertInstruction(new VPInstruction(
246 VPInstruction::FirstActiveLane, Masks, {}, {}, DL, Name, IndexTy));
247 }
248
251 const Twine &Name = "") {
252 // Assume that the maximum possible number of elements in a vector fits
253 // within the index type for the default address space.
254 VPlan &Plan = getPlan();
255 Type *IndexTy = Plan.getDataLayout().getIndexType(Plan.getContext(), 0);
256 return tryInsertInstruction(new VPInstruction(
257 VPInstruction::LastActiveLane, Masks, {}, {}, DL, Name, IndexTy));
258 }
259
261 unsigned Opcode, ArrayRef<VPValue *> Operands,
262 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false},
263 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") {
264 return tryInsertInstruction(
265 new VPInstruction(Opcode, Operands, WrapFlags, {}, DL, Name));
266 }
267
270 const Twine &Name = "") {
271 return createInstruction(VPInstruction::Not, {Operand}, {}, DL, Name);
272 }
273
276 const Twine &Name = "") {
277 return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, {}, DL,
278 Name);
279 }
280
283 const Twine &Name = "") {
284
285 return tryInsertInstruction(new VPInstruction(
286 Instruction::BinaryOps::Or, {LHS, RHS},
287 VPRecipeWithIRFlags::DisjointFlagsTy(false), {}, DL, Name));
288 }
289
292 const Twine &Name = "",
293 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
294 return createOverflowingOp(Instruction::Add, {LHS, RHS}, WrapFlags, DL,
295 Name);
296 }
297
298 VPInstruction *
300 const Twine &Name = "",
301 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
302 return createOverflowingOp(Instruction::Sub, {LHS, RHS}, WrapFlags, DL,
303 Name);
304 }
305
311
317
319 VPValue *FalseVal,
321 const Twine &Name = "",
322 const VPIRFlags &Flags = {}) {
323 return tryInsertInstruction(new VPInstruction(
324 Instruction::Select, {Cond, TrueVal, FalseVal}, Flags, {}, DL, Name));
325 }
326
327 /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
328 /// and \p B.
331 const Twine &Name = "") {
333 Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
334 return tryInsertInstruction(
335 new VPInstruction(Instruction::ICmp, {A, B}, Pred, {}, DL, Name));
336 }
337
338 /// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A
339 /// and \p B.
342 const Twine &Name = "") {
344 Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate");
345 return tryInsertInstruction(
346 new VPInstruction(Instruction::FCmp, {A, B},
347 VPIRFlags(Pred, FastMathFlags()), {}, DL, Name));
348 }
349
350 /// Create an AnyOf reduction pattern: or-reduce \p ChainOp, freeze the
351 /// result, then select between \p TrueVal and \p FalseVal.
353 VPValue *FalseVal,
355
358 const Twine &Name = "") {
359 return tryInsertInstruction(
361 GEPNoWrapFlags::none(), {}, DL, Name));
362 }
363
365 GEPNoWrapFlags GEPFlags,
367 const Twine &Name = "") {
368 return tryInsertInstruction(new VPInstruction(
369 VPInstruction::PtrAdd, {Ptr, Offset}, GEPFlags, {}, DL, Name));
370 }
371
374 const Twine &Name = "") {
375 return tryInsertInstruction(
377 GEPNoWrapFlags::none(), {}, DL, Name));
378 }
379
382 const Twine &Name = "", const VPIRFlags &Flags = {},
383 Type *ResultTy = nullptr) {
384 return tryInsertInstruction(
385 new VPPhi(IncomingValues, Flags, DL, Name, ResultTy));
386 }
387
390 const Twine &Name = "") {
391 return tryInsertInstruction(new VPWidenPHIRecipe(IncomingValues, DL, Name));
392 }
393
395 VPlan &Plan = *getInsertBlock()->getPlan();
396 VPValue *RuntimeEC = Plan.getConstantInt(Ty, EC.getKnownMinValue());
397 if (EC.isScalable()) {
398 VPValue *VScale = createNaryOp(VPInstruction::VScale, {}, Ty);
399 RuntimeEC = EC.getKnownMinValue() == 1
400 ? VScale
401 : createOverflowingOp(Instruction::Mul,
402 {VScale, RuntimeEC}, {true, false});
403 }
404 return RuntimeEC;
405 }
406
407 /// Convert the input value \p Current to the corresponding value of an
408 /// induction with \p Start and \p Step values, using \p Start + \p Current *
409 /// \p Step.
411 FPMathOperator *FPBinOp, VPIRValue *Start,
412 VPValue *Current, VPValue *Step) {
413 return tryInsertInstruction(
414 new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step));
415 }
416
418 DebugLoc DL,
419 const VPIRMetadata &Metadata = {}) {
420 return tryInsertInstruction(new VPInstructionWithType(
421 Instruction::Load, Addr, ResultTy, {}, Metadata, DL));
422 }
423
425 Type *ResultTy, DebugLoc DL,
426 const VPIRMetadata &Metadata = {}) {
427 return tryInsertInstruction(new VPInstructionWithType(
428 Opcode, Op, ResultTy, VPIRFlags::getDefaultFlags(Opcode), Metadata,
429 DL));
430 }
431
433 Type *ResultTy, DebugLoc DL,
434 const VPIRFlags &Flags,
435 const VPIRMetadata &Metadata = {}) {
436 return tryInsertInstruction(
437 new VPInstructionWithType(Opcode, Op, ResultTy, Flags, Metadata, DL));
438 }
439
441 DebugLoc DL) {
442 if (ResultTy == SrcTy)
443 return Op;
444 Instruction::CastOps CastOp =
445 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
446 ? Instruction::Trunc
447 : Instruction::ZExt;
448 return createScalarCast(CastOp, Op, ResultTy, DL);
449 }
450
452 DebugLoc DL) {
453 if (ResultTy == SrcTy)
454 return Op;
455 Instruction::CastOps CastOp =
456 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
457 ? Instruction::Trunc
458 : Instruction::SExt;
459 return createScalarCast(CastOp, Op, ResultTy, DL);
460 }
461
463 return tryInsertInstruction(
464 new VPInstruction(Instruction::Freeze, Op, {}, {}, DL));
465 }
466
468 Type *ResultTy) {
469 return tryInsertInstruction(new VPWidenCastRecipe(
470 Opcode, Op, ResultTy, nullptr, VPIRFlags::getDefaultFlags(Opcode)));
471 }
472
475 FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step,
476 VPValue *VF, DebugLoc DL) {
477 return tryInsertInstruction(new VPScalarIVStepsRecipe(
478 IV, Step, VF, InductionOpcode,
479 FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags(), DL));
480 }
481
483 return tryInsertInstruction(new VPExpandSCEVRecipe(Expr));
484 }
485
487 createVectorPointer(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride,
488 GEPNoWrapFlags GEPFlags, DebugLoc DL) {
489 return tryInsertInstruction(
490 new VPVectorPointerRecipe(Ptr, SourceElementTy, Stride, GEPFlags, DL));
491 }
492
494 Intrinsic::ID VectorIntrinsicID, ArrayRef<VPValue *> CallArguments,
495 Type *Ty, Align Alignment, const VPIRMetadata &MD, DebugLoc DL) {
496 return tryInsertInstruction(new VPWidenMemIntrinsicRecipe(
497 VectorIntrinsicID, CallArguments, Ty, Alignment, MD, DL));
498 }
499
500 //===--------------------------------------------------------------------===//
501 // RAII helpers.
502 //===--------------------------------------------------------------------===//
503
504 /// RAII object that stores the current insertion point and restores it when
505 /// the object is destroyed.
507 VPBuilder &Builder;
508 VPBasicBlock *Block;
510
511 public:
513 : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}
514
517
518 ~InsertPointGuard() { Builder.restoreIP(VPInsertPoint(Block, Point)); }
519 };
520};
521
522/// TODO: The following VectorizationFactor was pulled out of
523/// LoopVectorizationCostModel class. LV also deals with
524/// VectorizerParams::VectorizationFactor.
525/// We need to streamline them.
526
527/// Information about vectorization costs.
529 /// Vector width with best cost.
531
532 /// Cost of the loop with that width.
534
535 /// Cost of the scalar loop.
537
538 /// The minimum trip count required to make vectorization profitable, e.g. due
539 /// to runtime checks.
541
545
546 /// Width 1 means no vectorization, cost 0 means uncomputed cost.
548 return {ElementCount::getFixed(1), 0, 0};
549 }
550
551 bool operator==(const VectorizationFactor &rhs) const {
552 return Width == rhs.Width && Cost == rhs.Cost;
553 }
554
555 bool operator!=(const VectorizationFactor &rhs) const {
556 return !(*this == rhs);
557 }
558};
559
560/// A class that represents two vectorization factors (initialized with 0 by
561/// default). One for fixed-width vectorization and one for scalable
562/// vectorization. This can be used by the vectorizer to choose from a range of
563/// fixed and/or scalable VFs in order to find the most cost-effective VF to
564/// vectorize with.
568
570 : FixedVF(ElementCount::getFixed(0)),
571 ScalableVF(ElementCount::getScalable(0)) {}
573 *(Max.isScalable() ? &ScalableVF : &FixedVF) = Max;
574 }
578 assert(!FixedVF.isScalable() && ScalableVF.isScalable() &&
579 "Invalid scalable properties");
580 }
581
583
584 /// \return true if either fixed- or scalable VF is non-zero.
585 explicit operator bool() const { return FixedVF || ScalableVF; }
586
587 /// \return true if either fixed- or scalable VF is a valid vector VF.
588 bool hasVector() const { return FixedVF.isVector() || ScalableVF.isVector(); }
589};
590
591/// Holds state needed to make cost decisions before computing costs per-VF,
592/// including the maximum VFs.
594 /// \return True if maximizing vector bandwidth is enabled by the target or
595 /// user options, for the given register kind (scalable or fixed-width).
596 bool useMaxBandwidth(bool IsScalable) const;
597
598 /// \return the maximized element count based on the targets vector
599 /// registers and the loop trip-count, but limited to a maximum safe VF.
600 /// This is a helper function of computeFeasibleMaxVF.
601 ElementCount getMaximizedVFForTarget(unsigned MaxTripCount,
602 unsigned SmallestType,
603 unsigned WidestType,
604 ElementCount MaxSafeVF, unsigned UserIC,
605 bool FoldTailByMasking,
606 bool RequiresScalarEpilogue);
607
608 /// If \p VF * \p UserIC > MaxTripcount, clamps VF to the next lower VF
609 /// that results in VF * UserIC <= MaxTripCount.
610 ElementCount clampVFByMaxTripCount(ElementCount VF, unsigned MaxTripCount,
611 unsigned UserIC, bool FoldTailByMasking,
612 bool RequiresScalarEpilogue) const;
613
614 /// Checks if scalable vectorization is supported and enabled. Caches the
615 /// result to avoid repeated debug dumps for repeated queries.
616 bool isScalableVectorizationAllowed();
617
618 /// \return the maximum legal scalable VF, based on the safe max number
619 /// of elements.
620 ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements);
621
622 /// Initializes the value of vscale used for tuning the cost model. If
623 /// vscale_range.min == vscale_range.max then return vscale_range.max, else
624 /// return the value returned by the corresponding TTI method.
625 void initializeVScaleForTuning();
626
627 const TargetTransformInfo &TTI;
628 const LoopVectorizationLegality *Legal;
629 const Loop *TheLoop;
630 const Function &F;
632 DemandedBits *DB;
634 const LoopVectorizeHints *Hints;
635
636 /// Cached result of isScalableVectorizationAllowed.
637 std::optional<bool> IsScalableVectorizationAllowed;
638
639 /// Used to store the value of vscale used for tuning the cost model. It is
640 /// initialized during object construction.
641 std::optional<unsigned> VScaleForTuning;
642
643 /// The highest VF possible for this loop, without using MaxBandwidth.
644 FixedScalableVFPair MaxPermissibleVFWithoutMaxBW;
645
646 /// All element types found in the loop.
647 SmallPtrSet<Type *, 16> ElementTypesInLoop;
648
649 /// PHINodes of the reductions that should be expanded in-loop. Set by
650 /// collectInLoopReductions.
651 SmallPtrSet<PHINode *, 4> InLoopReductions;
652
653 /// A Map of inloop reduction operations and their immediate chain operand.
654 /// FIXME: This can be removed once reductions can be costed correctly in
655 /// VPlan. This was added to allow quick lookup of the inloop operations.
656 /// Set by collectInLoopReductions.
657 DenseMap<Instruction *, Instruction *> InLoopReductionImmediateChains;
658
659 /// Maximum safe number of elements to be processed per vector iteration,
660 /// which do not prevent store-load forwarding and are safe with regard to the
661 /// memory dependencies. Required for EVL-based vectorization, where this
662 /// value is used as the upper bound of the safe AVL. Set by
663 /// computeFeasibleMaxVF.
664 std::optional<unsigned> MaxSafeElements;
665
666 /// Map of scalar integer values to the smallest bitwidth they can be legally
667 /// represented as. The vector equivalents of these values should be truncated
668 /// to this type.
670
671public:
672 /// The kind of cost that we are calculating.
674
675 /// Whether this loop should be optimized for size based on function attribute
676 /// or profile information.
677 const bool OptForSize;
678
680 const LoopVectorizationLegality *Legal,
681 const Loop *TheLoop, const Function &F,
684 const LoopVectorizeHints *Hints, bool OptForSize)
685 : TTI(TTI), Legal(Legal), TheLoop(TheLoop), F(F), PSE(PSE), DB(DB),
686 ORE(ORE), Hints(Hints),
687 CostKind(F.hasMinSize() ? TTI::TCK_CodeSize : TTI::TCK_RecipThroughput),
689 initializeVScaleForTuning();
690 }
691
692 /// \return The vscale value used for tuning the cost model.
693 std::optional<unsigned> getVScaleForTuning() const { return VScaleForTuning; }
694
695 /// \return True if register pressure should be considered for the given VF.
697
698 /// \return True if scalable vectors are supported by the target or forced.
699 bool supportsScalableVectors() const;
700
701 /// Collect element types in the loop that need widening.
703 const SmallPtrSetImpl<const Value *> *ValuesToIgnore = nullptr);
704
705 /// \return The size (in bits) of the smallest and widest types in the code
706 /// that need to be vectorized. We ignore values that remain scalar such as
707 /// 64 bit loop indices.
708 std::pair<unsigned, unsigned> getSmallestAndWidestTypes() const;
709
710 /// \return An upper bound for the vectorization factors for both
711 /// fixed and scalable vectorization, where the minimum-known number of
712 /// elements is a power-of-2 larger than zero. If scalable vectorization is
713 /// disabled or unsupported, then the scalable part will be equal to
714 /// ElementCount::getScalable(0). Also sets MaxSafeElements.
715 FixedScalableVFPair computeFeasibleMaxVF(unsigned MaxTripCount,
716 ElementCount UserVF, unsigned UserIC,
717 bool FoldTailByMasking,
718 bool RequiresScalarEpilogue);
719
720 /// Return maximum safe number of elements to be processed per vector
721 /// iteration, which do not prevent store-load forwarding and are safe with
722 /// regard to the memory dependencies. Required for EVL-based VPlans to
723 /// correctly calculate AVL (application vector length) as min(remaining AVL,
724 /// MaxSafeElements). Set by computeFeasibleMaxVF.
725 /// TODO: need to consider adjusting cost model to use this value as a
726 /// vectorization factor for EVL-based vectorization.
727 std::optional<unsigned> getMaxSafeElements() const { return MaxSafeElements; }
728
729 /// Returns true if we should use strict in-order reductions for the given
730 /// RdxDesc. This is true if the -enable-strict-reductions flag is passed,
731 /// the IsOrdered flag of RdxDesc is set and we do not allow reordering
732 /// of FP operations.
733 bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const;
734
735 /// Returns true if the target machine supports masked loads or stores
736 /// for \p I's data type and alignment. The caller must ensure the access is
737 /// consecutive or part of an interleave group.
739
740 /// Returns true if the target machine can represent \p V as a masked gather
741 /// or scatter operation.
742 bool isLegalGatherOrScatter(Value *V, ElementCount VF) const;
743
744 /// Split reductions into those that happen in the loop, and those that
745 /// happen outside. In-loop reductions are collected into InLoopReductions.
746 /// InLoopReductionImmediateChains is filled with each in-loop reduction
747 /// operation and its immediate chain operand for use during cost modelling.
749
750 /// Returns true if the Phi is part of an inloop reduction.
751 bool isInLoopReduction(PHINode *Phi) const {
752 return InLoopReductions.contains(Phi);
753 }
754
755 /// Returns the set of in-loop reduction PHIs.
757 return InLoopReductions;
758 }
759
760 /// Returns the immediate chain operand of in-loop reduction operation \p I,
761 /// or nullptr if \p I is not an in-loop reduction operation.
763 return InLoopReductionImmediateChains.lookup(I);
764 }
765
766 /// Check whether vectorization would require runtime checks. When optimizing
767 /// for size, returning true here aborts vectorization.
769
770 /// Returns a scalable VF to use for outer-loop vectorization if the target
771 /// supports it and a fixed VF otherwise.
773
774 /// Compute smallest bitwidth each instruction can be represented with.
775 /// The vector equivalents of these instructions should be truncated to this
776 /// type.
778
779 /// \returns The smallest bitwidth each instruction can be represented with.
781 return MinBWs;
782 }
783};
784
785/// Planner drives the vectorization process after having passed
786/// Legality checks.
788 /// The loop that we evaluate.
789 Loop *OrigLoop;
790
791 /// Loop Info analysis.
792 LoopInfo *LI;
793
794 /// The dominator tree.
795 DominatorTree *DT;
796
797 /// Target Library Info.
798 const TargetLibraryInfo *TLI;
799
800 /// Target Transform Info.
801 const TargetTransformInfo &TTI;
802
803 /// The legality analysis.
805
806 /// The profitability analysis.
808
809 /// VF selection state independent of cost-modeling decisions.
810 VFSelectionContext &Config;
811
812 /// The interleaved access analysis.
814
816
817 const LoopVectorizeHints &Hints;
818
820
822
823 /// Profitable vector factors.
825
826 /// A builder used to construct the current plan.
827 VPBuilder Builder;
828
829 /// Computes the cost of \p Plan for vectorization factor \p VF.
830 ///
831 /// The current implementation requires access to the
832 /// LoopVectorizationLegality to handle inductions and reductions, which is
833 /// why it is kept separate from the VPlan-only cost infrastructure.
834 ///
835 /// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
836 /// been retired.
837 InstructionCost cost(VPlan &Plan, ElementCount VF, VPRegisterUsage *RU) const;
838
839 /// Precompute costs for certain instructions using the legacy cost model. The
840 /// function is used to bring up the VPlan-based cost model to initially avoid
841 /// taking different decisions due to inaccuracies in the legacy cost model.
842 InstructionCost precomputeCosts(VPlan &Plan, ElementCount VF,
843 VPCostContext &CostCtx) const;
844
845public:
847 Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
852 : OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
853 Config(Config), IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
854
855 /// Build VPlans for the specified \p UserVF and \p UserIC if they are
856 /// non-zero or all applicable candidate VFs otherwise. If vectorization and
857 /// interleaving should be avoided up-front, no plans are generated.
858 void plan(ElementCount UserVF, unsigned UserIC);
859
860 /// Return the VPlan for \p VF. At the moment, there is always a single VPlan
861 /// for each VF.
862 VPlan &getPlanFor(ElementCount VF) const;
863
864 /// Compute and return the most profitable vectorization factor and the
865 /// corresponding best VPlan. Also collect all profitable VFs in
866 /// ProfitableVFs.
867 std::pair<VectorizationFactor, VPlan *> computeBestVF();
868
869 /// \return The desired interleave count.
870 /// If interleave count has been specified by metadata it will be returned.
871 /// Otherwise, the interleave count is computed and returned. VF and LoopCost
872 /// are the selected vectorization factor and the cost of the selected VF.
873 unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF,
874 InstructionCost LoopCost);
875
876 /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
877 /// according to the best selected \p VF and \p UF.
878 ///
879 /// TODO: \p EpilogueVecKind should be removed once the re-use issue has been
880 /// fixed.
881 ///
882 /// Returns a mapping of SCEVs to their expanded IR values.
883 /// Note that this is a temporary workaround needed due to the current
884 /// epilogue handling.
886 None, ///< Not part of epilogue vectorization.
887 MainLoop, ///< Vectorizing the main loop of epilogue vectorization.
888 Epilogue ///< Vectorizing the epilogue loop.
889 };
891 executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,
893 EpilogueVectorizationKind EpilogueVecKind =
895
896#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
897 void printPlans(raw_ostream &O);
898#endif
899
900 /// Look through the existing plans and return true if we have one with
901 /// vectorization factor \p VF.
903 return any_of(VPlans,
904 [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
905 }
906
907 /// Test a \p Predicate on a \p Range of VF's. Return the value of applying
908 /// \p Predicate on Range.Start, possibly decreasing Range.End such that the
909 /// returned value holds for the entire \p Range.
910 static bool
911 getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
912 VFRange &Range);
913
914 /// \return A VPlan for the most profitable epilogue vectorization, with its
915 /// VF narrowed to the chosen factor. The returned plan is a duplicate.
916 /// Returns nullptr if epilogue vectorization is not supported or not
917 /// profitable for the loop.
918 std::unique_ptr<VPlan>
919 selectBestEpiloguePlan(VPlan &MainPlan, ElementCount MainLoopVF, unsigned IC);
920
921 /// Emit remarks for recipes with invalid costs in the available VPlans.
923
924 /// Create a check to \p Plan to see if the vector loop should be executed
925 /// based on its trip count.
926 void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF,
927 ElementCount MinProfitableTripCount) const;
928
929 /// Attach the runtime checks of \p RTChecks to \p Plan.
930 void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks,
931 bool HasBranchWeights) const;
932
933 /// Update loop metadata and profile info for both the scalar remainder loop
934 /// and \p VectorLoop, if it exists. Keeps all loop hints from the original
935 /// loop on the vector loop and replaces vectorizer-specific metadata. The
936 /// loop ID of the original loop \p OrigLoopID must be passed, together with
937 /// the average trip count and invocation weight of the original loop (\p
938 /// OrigAverageTripCount and \p OrigLoopInvocationWeight respectively). They
939 /// cannot be retrieved after the plan has been executed, as the original loop
940 /// may have been removed.
942 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
943 bool VectorizingEpilogue, MDNode *OrigLoopID,
944 std::optional<unsigned> OrigAverageTripCount,
945 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
946 bool DisableRuntimeUnroll);
947
948private:
949 /// Build an initial VPlan, with HCFG wrapping the original scalar loop and
950 /// scalar transformations applied. Returns null if an initial VPlan cannot
951 /// be built.
952 VPlanPtr tryToBuildVPlan1();
953
954 /// Build a VPlan using VPRecipes according to the information gathered by
955 /// Legal and VPlan-based analysis. For outer loops, performs basic recipe
956 /// conversion only. For inner loops, \p Range's largest included VF is
957 /// restricted to the maximum VF the returned VPlan is valid for. If no VPlan
958 /// can be built for the input range, set the largest included VF to the
959 /// maximum VF for which no plan could be built. Each VPlan is built starting
960 /// from a copy of \p InitialPlan, which is a plain CFG VPlan wrapping the
961 /// original scalar loop.
962 VPlanPtr tryToBuildVPlan(VPlanPtr InitialPlan, VFRange &Range);
963
964 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
965 /// based on \p VPlan1 and according to the information gathered by Legal
966 /// when it checked if it is legal to vectorize the loop.
967 void buildVPlans(VPlan &VPlan1, ElementCount MinVF, ElementCount MaxVF);
968
969 /// Add ComputeReductionResult recipes to the middle block to compute the
970 /// final reduction results. Add Select recipes to the latch block when
971 /// folding tail, to feed ComputeReductionResult with the last or penultimate
972 /// iteration values according to the header mask.
973 void addReductionResultComputation(VPlanPtr &Plan,
974 VPRecipeBuilder &RecipeBuilder,
975 ElementCount MinVF);
976
977 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
978 /// that of B.
979 bool isMoreProfitable(const VectorizationFactor &A,
980 const VectorizationFactor &B, bool HasTail,
981 bool IsEpilogue = false) const;
982
983 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
984 /// that of B in the context of vectorizing a loop with known \p MaxTripCount.
985 bool isMoreProfitable(const VectorizationFactor &A,
986 const VectorizationFactor &B,
987 const unsigned MaxTripCount, bool HasTail,
988 bool IsEpilogue = false) const;
989
990 /// Determines if we have the infrastructure to vectorize the loop and its
991 /// epilogue, assuming the main loop is vectorized by \p MainPlan.
992 bool isCandidateForEpilogueVectorization(VPlan &MainPlan) const;
993};
994
995} // namespace llvm
996
997#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
dxil translate DXIL Translate Metadata
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallSet class.
This pass exposes codegen information to IR-level passes.
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
LLVM_ABI IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:202
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Definition Operator.h:291
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
Drive the analysis of interleaved memory accesses in the loop.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
DenseMap< const SCEV *, Value * > executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, InnerLoopVectorizer &LB, DominatorTree *DT, EpilogueVectorizationKind EpilogueVecKind=EpilogueVectorizationKind::None)
EpilogueVectorizationKind
Generate the IR code for the vectorized loop captured in VPlan BestPlan according to the best selecte...
@ MainLoop
Vectorizing the main loop of epilogue vectorization.
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
Definition VPlan.cpp:1682
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
Definition VPlan.cpp:1733
void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks, bool HasBranchWeights) const
Attach the runtime checks of RTChecks to Plan.
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM, VFSelectionContext &Config, InterleavedAccessInfo &IAI, PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints, OptimizationRemarkEmitter *ORE)
unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF, InstructionCost LoopCost)
void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE)
Emit remarks for recipes with invalid costs in the available VPlans.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Definition VPlan.cpp:1668
void printPlans(raw_ostream &O)
Definition VPlan.cpp:1839
void plan(ElementCount UserVF, unsigned UserIC)
Build VPlans for the specified UserVF and UserIC if they are non-zero or all applicable candidate VFs...
std::unique_ptr< VPlan > selectBestEpiloguePlan(VPlan &MainPlan, ElementCount MainLoopVF, unsigned IC)
void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount) const
Create a check to Plan to see if the vector loop should be executed based on its trip count.
bool hasPlanWithVF(ElementCount VF) const
Look through the existing plans and return true if we have one with vectorization factor VF.
std::pair< VectorizationFactor, VPlan * > computeBestVF()
Compute and return the most profitable vectorization factor and the corresponding best VPlan.
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1080
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:38
Root of the metadata hierarchy.
Definition Metadata.h:64
The optimization diagnostic interface.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
Holds state needed to make cost decisions before computing costs per-VF, including the maximum VFs.
const bool OptForSize
Whether this loop should be optimized for size based on function attribute or profile information.
FixedScalableVFPair computeVPlanOuterloopVF(ElementCount UserVF)
Returns a scalable VF to use for outer-loop vectorization if the target supports it and a fixed VF ot...
bool isInLoopReduction(PHINode *Phi) const
Returns true if the Phi is part of an inloop reduction.
std::pair< unsigned, unsigned > getSmallestAndWidestTypes() const
const TTI::TargetCostKind CostKind
The kind of cost that we are calculating.
bool runtimeChecksRequired()
Check whether vectorization would require runtime checks.
bool isLegalGatherOrScatter(Value *V, ElementCount VF) const
Returns true if the target machine can represent V as a masked gather or scatter operation.
void collectInLoopReductions()
Split reductions into those that happen in the loop, and those that happen outside.
const SmallPtrSetImpl< PHINode * > & getInLoopReductions() const
Returns the set of in-loop reduction PHIs.
std::optional< unsigned > getMaxSafeElements() const
Return maximum safe number of elements to be processed per vector iteration, which do not prevent sto...
FixedScalableVFPair computeFeasibleMaxVF(unsigned MaxTripCount, ElementCount UserVF, unsigned UserIC, bool FoldTailByMasking, bool RequiresScalarEpilogue)
const MapVector< Instruction *, uint64_t > & getMinimalBitwidths() const
VFSelectionContext(const TargetTransformInfo &TTI, const LoopVectorizationLegality *Legal, const Loop *TheLoop, const Function &F, PredicatedScalarEvolution &PSE, DemandedBits *DB, OptimizationRemarkEmitter *ORE, const LoopVectorizeHints *Hints, bool OptForSize)
Instruction * getInLoopReductionImmediateChain(Instruction *I) const
Returns the immediate chain operand of in-loop reduction operation I, or nullptr if I is not an in-lo...
bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const
Returns true if we should use strict in-order reductions for the given RdxDesc.
bool shouldConsiderRegPressureForVF(ElementCount VF) const
void collectElementTypesForWidening(const SmallPtrSetImpl< const Value * > *ValuesToIgnore=nullptr)
Collect element types in the loop that need widening.
bool isLegalMaskedLoadOrStore(Instruction *I, ElementCount VF) const
Returns true if the target machine supports masked loads or stores for I's data type and alignment.
std::optional< unsigned > getVScaleForTuning() const
void computeMinimalBitwidths()
Compute smallest bitwidth each instruction can be represented with.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4295
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4322
iterator end()
Definition VPlan.h:4332
VPlan * getPlan()
Definition VPlan.cpp:211
InsertPointGuard(const InsertPointGuard &)=delete
InsertPointGuard & operator=(const InsertPointGuard &)=delete
InsertPoint - A saved insertion point.
VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
Creates a new insertion point at the given location.
VPBasicBlock::iterator getPoint() const
VPInsertPoint()=default
Creates a new insertion point which doesn't point to anything.
bool isSet() const
Returns true if this insert point is set.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createFirstActiveLane(ArrayRef< VPValue * > Masks, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarSExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createAdd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false})
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createSub(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false})
void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
This specifies that created instructions should be inserted at the specified point.
void setInsertPoint(VPRecipeBase *IP)
This specifies that created instructions should be inserted at the specified point.
VPValue * createElementCount(Type *Ty, ElementCount EC)
T * insert(T *R)
Insert R at the current insertion point. Returns R unchanged.
VPInstruction * createLogicalOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void restoreIP(VPInsertPoint IP)
Sets the current insert point to a previously-saved location.
VPVectorPointerRecipe * createVectorPointer(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
Definition VPlan.cpp:1653
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBasicBlock * getInsertBlock() const
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step)
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPBasicBlock::iterator getInsertPoint() const
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPScalarIVStepsRecipe * createScalarIVSteps(Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL)
VPBuilder(VPBasicBlock *InsertBB)
VPInstruction * createNoWrapPtrAdd(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags GEPFlags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPWidenPHIRecipe * createWidenPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstructionWithType * createScalarLoad(Type *ResultTy, VPValue *Addr, DebugLoc DL, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
VPValue * createScalarFreeze(VPValue *Op, Type *ResultTy, DebugLoc DL)
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createLastActiveLane(ArrayRef< VPValue * > Masks, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBuilder(VPRecipeBase *InsertPt)
VPWidenMemIntrinsicRecipe * createWidenMemIntrinsic(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, Align Alignment, const VPIRMetadata &MD, DebugLoc DL)
VPWidenCastRecipe * createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
void clearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags, const VPIRMetadata &Metadata={})
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBuilder()=default
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={}, Type *ResultTy=nullptr)
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPExpandSCEVRecipe * createExpandSCEV(const SCEV *Expr)
VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", Type *ResultTy=nullptr)
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
VPInstruction * createWidePtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition VPlan.h:4070
Recipe to expand a SCEV expression.
Definition VPlan.h:3909
Class to record and manage LLVM IR flags.
Definition VPlan.h:700
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1176
A specialization of VPInstruction augmenting it with a dedicated result type, to be used when the opc...
Definition VPlan.h:1537
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1231
@ VScale
Returns the value for vscale.
Definition VPlan.h:1353
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:401
VPBasicBlock * getParent()
Definition VPlan.h:476
Helper class to create VPRecipies from IR instructions.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition VPlan.h:4140
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:50
A recipe to compute the pointers for widened memory accesses of SourceElementTy, with the Stride expr...
Definition VPlan.h:2330
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1861
A recipe for widening vector memory intrinsics.
Definition VPlan.h:2037
A recipe for widened phis.
Definition VPlan.h:2701
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4653
const DataLayout & getDataLayout() const
Definition VPlan.h:4858
LLVMContext & getContext() const
Definition VPlan.h:4854
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4960
LLVM Value Representation.
Definition Value.h:75
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
void reportVectorizationInfo(const StringRef Msg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr, DebugLoc DL={})
Reports an informative message: print Msg for debugging purposes as well as an optimization remark.
void reportVectorization(OptimizationRemarkEmitter *ORE, Loop *TheLoop, ElementCount VFWidth, unsigned IC)
Report successful vectorization of the loop.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
cl::opt< unsigned > ForceTargetInstructionCost
TargetTransformInfo TTI
DWARFExpression::Operation Op
cl::opt< bool > EnableVPlanNativePath
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:73
cl::opt< bool > PreferInLoopReductions
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
A class that represents two vectorization factors (initialized with 0 by default).
FixedScalableVFPair(const ElementCount &FixedVF, const ElementCount &ScalableVF)
FixedScalableVFPair(const ElementCount &Max)
static FixedScalableVFPair getNone()
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:246
A struct that represents some properties of the register usage of a loop.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
InstructionCost Cost
Cost of the loop with that width.
ElementCount MinProfitableTripCount
The minimum trip count required to make vectorization profitable, e.g.
bool operator==(const VectorizationFactor &rhs) const
ElementCount Width
Vector width with best cost.
InstructionCost ScalarCost
Cost of the scalar loop.
bool operator!=(const VectorizationFactor &rhs) const
static VectorizationFactor Disabled()
Width 1 means no vectorization, cost 0 means uncomputed cost.
VectorizationFactor(ElementCount Width, InstructionCost Cost, InstructionCost ScalarCost)