LLVM 20.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21/// These are documented in docs/VectorizationPlan.rst.
22//
23//===----------------------------------------------------------------------===//
24
25#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
27
28#include "VPlanAnalysis.h"
29#include "VPlanValue.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/MapVector.h"
35#include "llvm/ADT/Twine.h"
36#include "llvm/ADT/ilist.h"
37#include "llvm/ADT/ilist_node.h"
42#include "llvm/IR/DebugLoc.h"
43#include "llvm/IR/FMF.h"
44#include "llvm/IR/Operator.h"
46#include <algorithm>
47#include <cassert>
48#include <cstddef>
49#include <string>
50
51namespace llvm {
52
53class BasicBlock;
54class DominatorTree;
55class InnerLoopVectorizer;
56class IRBuilderBase;
57class LoopInfo;
58class raw_ostream;
59class RecurrenceDescriptor;
60class SCEV;
61class Type;
62class VPBasicBlock;
63class VPRegionBlock;
64class VPlan;
65class VPReplicateRecipe;
66class VPlanSlp;
67class Value;
68class LoopVectorizationCostModel;
69class LoopVersioning;
70
71struct VPCostContext;
72
73namespace Intrinsic {
74typedef unsigned ID;
75}
76
77/// Returns a calculation for the total number of elements for a given \p VF.
78/// For fixed width vectors this value is a constant, whereas for scalable
79/// vectors it is an expression determined at runtime.
80Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
81
82/// Return a value for Step multiplied by VF.
83Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
84 int64_t Step);
85
86const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE,
87 Loop *CurLoop = nullptr);
88
89/// A helper function that returns the reciprocal of the block probability of
90/// predicated blocks. If we return X, we are assuming the predicated block
91/// will execute once for every X iterations of the loop header.
92///
93/// TODO: We should use actual block probability here, if available. Currently,
94/// we always assume predicated blocks have a 50% chance of executing.
95inline unsigned getReciprocalPredBlockProb() { return 2; }
96
97/// A range of powers-of-2 vectorization factors with fixed start and
98/// adjustable end. The range includes start and excludes end, e.g.,:
99/// [1, 16) = {1, 2, 4, 8}
100struct VFRange {
101 // A power of 2.
103
104 // A power of 2. If End <= Start range is empty.
106
107 bool isEmpty() const {
109 }
110
112 : Start(Start), End(End) {
114 "Both Start and End should have the same scalable flag");
116 "Expected Start to be a power of 2");
118 "Expected End to be a power of 2");
119 }
120
121 /// Iterator to iterate over vectorization factors in a VFRange.
123 : public iterator_facade_base<iterator, std::forward_iterator_tag,
124 ElementCount> {
125 ElementCount VF;
126
127 public:
128 iterator(ElementCount VF) : VF(VF) {}
129
130 bool operator==(const iterator &Other) const { return VF == Other.VF; }
131
132 ElementCount operator*() const { return VF; }
133
135 VF *= 2;
136 return *this;
137 }
138 };
139
143 return iterator(End);
144 }
145};
146
147using VPlanPtr = std::unique_ptr<VPlan>;
148
149/// In what follows, the term "input IR" refers to code that is fed into the
150/// vectorizer whereas the term "output IR" refers to code that is generated by
151/// the vectorizer.
152
153/// VPLane provides a way to access lanes in both fixed width and scalable
154/// vectors, where for the latter the lane index sometimes needs calculating
155/// as a runtime expression.
156class VPLane {
157public:
158 /// Kind describes how to interpret Lane.
159 enum class Kind : uint8_t {
160 /// For First, Lane is the index into the first N elements of a
161 /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
162 First,
163 /// For ScalableLast, Lane is the offset from the start of the last
164 /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
165 /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
166 /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
168 };
169
170private:
171 /// in [0..VF)
172 unsigned Lane;
173
174 /// Indicates how the Lane should be interpreted, as described above.
175 Kind LaneKind;
176
177public:
178 VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
179
181
182 static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset) {
183 assert(Offset > 0 && Offset <= VF.getKnownMinValue() &&
184 "trying to extract with invalid offset");
185 unsigned LaneOffset = VF.getKnownMinValue() - Offset;
186 Kind LaneKind;
187 if (VF.isScalable())
188 // In this case 'LaneOffset' refers to the offset from the start of the
189 // last subvector with VF.getKnownMinValue() elements.
191 else
192 LaneKind = VPLane::Kind::First;
193 return VPLane(LaneOffset, LaneKind);
194 }
195
197 return getLaneFromEnd(VF, 1);
198 }
199
200 /// Returns a compile-time known value for the lane index and asserts if the
201 /// lane can only be calculated at runtime.
202 unsigned getKnownLane() const {
203 assert(LaneKind == Kind::First);
204 return Lane;
205 }
206
207 /// Returns an expression describing the lane index that can be used at
208 /// runtime.
209 Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
210
211 /// Returns the Kind of lane offset.
212 Kind getKind() const { return LaneKind; }
213
214 /// Returns true if this is the first lane of the whole vector.
215 bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
216
217 /// Maps the lane to a cache index based on \p VF.
218 unsigned mapToCacheIndex(const ElementCount &VF) const {
219 switch (LaneKind) {
221 assert(VF.isScalable() && Lane < VF.getKnownMinValue());
222 return VF.getKnownMinValue() + Lane;
223 default:
224 assert(Lane < VF.getKnownMinValue());
225 return Lane;
226 }
227 }
228
229 /// Returns the maxmimum number of lanes that we are able to consider
230 /// caching for \p VF.
231 static unsigned getNumCachedLanes(const ElementCount &VF) {
232 return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);
233 }
234};
235
236/// VPIteration represents a single point in the iteration space of the output
237/// (vectorized and/or unrolled) IR loop.
239 /// in [0..UF)
240 unsigned Part;
241
243
244 VPIteration(unsigned Part, unsigned Lane,
246 : Part(Part), Lane(Lane, Kind) {}
247
248 VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {}
249
250 bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); }
251};
252
253/// VPTransformState holds information passed down when "executing" a VPlan,
254/// needed for generating the output IR.
259
260 /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
262 unsigned UF;
263
264 /// Hold the indices to generate specific scalar instructions. Null indicates
265 /// that all instances are to be generated, using either scalar or vector
266 /// instructions.
267 std::optional<VPIteration> Instance;
268
269 struct DataState {
270 /// A type for vectorized values in the new loop. Each value from the
271 /// original loop, when vectorized, is represented by UF vector values in
272 /// the new unrolled loop, where UF is the unroll factor.
274
276
280
281 /// Get the generated vector Value for a given VPValue \p Def and a given \p
282 /// Part if \p IsScalar is false, otherwise return the generated scalar
283 /// for \p Part. \See set.
284 Value *get(VPValue *Def, unsigned Part, bool IsScalar = false);
285
286 /// Get the generated Value for a given VPValue and given Part and Lane.
287 Value *get(VPValue *Def, const VPIteration &Instance);
288
289 bool hasVectorValue(VPValue *Def, unsigned Part) {
290 auto I = Data.PerPartOutput.find(Def);
291 return I != Data.PerPartOutput.end() && Part < I->second.size() &&
292 I->second[Part];
293 }
294
296 auto I = Data.PerPartScalars.find(Def);
297 if (I == Data.PerPartScalars.end())
298 return false;
299 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
300 return Instance.Part < I->second.size() &&
301 CacheIdx < I->second[Instance.Part].size() &&
302 I->second[Instance.Part][CacheIdx];
303 }
304
305 /// Set the generated vector Value for a given VPValue and a given Part, if \p
306 /// IsScalar is false. If \p IsScalar is true, set the scalar in (Part, 0).
307 void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar = false) {
308 if (IsScalar) {
309 set(Def, V, VPIteration(Part, 0));
310 return;
311 }
312 assert((VF.isScalar() || V->getType()->isVectorTy()) &&
313 "scalar values must be stored as (Part, 0)");
314 if (!Data.PerPartOutput.count(Def)) {
316 Data.PerPartOutput[Def] = Entry;
317 }
318 Data.PerPartOutput[Def][Part] = V;
319 }
320
321 /// Reset an existing vector value for \p Def and a given \p Part.
322 void reset(VPValue *Def, Value *V, unsigned Part) {
323 auto Iter = Data.PerPartOutput.find(Def);
324 assert(Iter != Data.PerPartOutput.end() &&
325 "need to overwrite existing value");
326 Iter->second[Part] = V;
327 }
328
329 /// Set the generated scalar \p V for \p Def and the given \p Instance.
330 void set(VPValue *Def, Value *V, const VPIteration &Instance) {
331 auto Iter = Data.PerPartScalars.insert({Def, {}});
332 auto &PerPartVec = Iter.first->second;
333 if (PerPartVec.size() <= Instance.Part)
334 PerPartVec.resize(Instance.Part + 1);
335 auto &Scalars = PerPartVec[Instance.Part];
336 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
337 if (Scalars.size() <= CacheIdx)
338 Scalars.resize(CacheIdx + 1);
339 assert(!Scalars[CacheIdx] && "should overwrite existing value");
340 Scalars[CacheIdx] = V;
341 }
342
343 /// Reset an existing scalar value for \p Def and a given \p Instance.
344 void reset(VPValue *Def, Value *V, const VPIteration &Instance) {
345 auto Iter = Data.PerPartScalars.find(Def);
346 assert(Iter != Data.PerPartScalars.end() &&
347 "need to overwrite existing value");
348 assert(Instance.Part < Iter->second.size() &&
349 "need to overwrite existing value");
350 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
351 assert(CacheIdx < Iter->second[Instance.Part].size() &&
352 "need to overwrite existing value");
353 Iter->second[Instance.Part][CacheIdx] = V;
354 }
355
356 /// Add additional metadata to \p To that was not present on \p Orig.
357 ///
358 /// Currently this is used to add the noalias annotations based on the
359 /// inserted memchecks. Use this for instructions that are *cloned* into the
360 /// vector loop.
361 void addNewMetadata(Instruction *To, const Instruction *Orig);
362
363 /// Add metadata from one instruction to another.
364 ///
365 /// This includes both the original MDs from \p From and additional ones (\see
366 /// addNewMetadata). Use this for *newly created* instructions in the vector
367 /// loop.
368 void addMetadata(Value *To, Instruction *From);
369
370 /// Set the debug location in the builder using the debug location \p DL.
372
373 /// Construct the vector value of a scalarized value \p V one lane at a time.
375
376 /// Hold state information used when constructing the CFG of the output IR,
377 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
378 struct CFGState {
379 /// The previous VPBasicBlock visited. Initially set to null.
381
382 /// The previous IR BasicBlock created or used. Initially set to the new
383 /// header BasicBlock.
384 BasicBlock *PrevBB = nullptr;
385
386 /// The last IR BasicBlock in the output IR. Set to the exit block of the
387 /// vector loop.
388 BasicBlock *ExitBB = nullptr;
389
390 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
391 /// of replication, maps the BasicBlock of the last replica created.
393
394 /// Updater for the DominatorTree.
396
398 : DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {}
399
400 /// Returns the BasicBlock* mapped to the pre-header of the loop region
401 /// containing \p R.
404
405 /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
407
408 /// Hold a reference to the IRBuilder used to generate output IR code.
410
411 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
413
414 /// Pointer to the VPlan code is generated for.
416
417 /// The loop object for the current parent region, or nullptr.
419
420 /// LoopVersioning. It's only set up (non-null) if memchecks were
421 /// used.
422 ///
423 /// This is currently only used to add no-alias metadata based on the
424 /// memchecks. The actually versioning is performed manually.
426
427 /// Map SCEVs to their expanded values. Populated when executing
428 /// VPExpandSCEVRecipes.
430
431 /// VPlan-based type analysis.
433};
434
435/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
436/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
438 friend class VPBlockUtils;
439
440 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
441
442 /// An optional name for the block.
443 std::string Name;
444
445 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
446 /// it is a topmost VPBlockBase.
447 VPRegionBlock *Parent = nullptr;
448
449 /// List of predecessor blocks.
451
452 /// List of successor blocks.
454
455 /// VPlan containing the block. Can only be set on the entry block of the
456 /// plan.
457 VPlan *Plan = nullptr;
458
459 /// Add \p Successor as the last successor to this block.
460 void appendSuccessor(VPBlockBase *Successor) {
461 assert(Successor && "Cannot add nullptr successor!");
462 Successors.push_back(Successor);
463 }
464
465 /// Add \p Predecessor as the last predecessor to this block.
466 void appendPredecessor(VPBlockBase *Predecessor) {
467 assert(Predecessor && "Cannot add nullptr predecessor!");
468 Predecessors.push_back(Predecessor);
469 }
470
471 /// Remove \p Predecessor from the predecessors of this block.
472 void removePredecessor(VPBlockBase *Predecessor) {
473 auto Pos = find(Predecessors, Predecessor);
474 assert(Pos && "Predecessor does not exist");
475 Predecessors.erase(Pos);
476 }
477
478 /// Remove \p Successor from the successors of this block.
479 void removeSuccessor(VPBlockBase *Successor) {
480 auto Pos = find(Successors, Successor);
481 assert(Pos && "Successor does not exist");
482 Successors.erase(Pos);
483 }
484
485protected:
486 VPBlockBase(const unsigned char SC, const std::string &N)
487 : SubclassID(SC), Name(N) {}
488
489public:
490 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
491 /// that are actually instantiated. Values of this enumeration are kept in the
492 /// SubclassID field of the VPBlockBase objects. They are used for concrete
493 /// type identification.
494 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
495
497
498 virtual ~VPBlockBase() = default;
499
500 const std::string &getName() const { return Name; }
501
502 void setName(const Twine &newName) { Name = newName.str(); }
503
504 /// \return an ID for the concrete type of this object.
505 /// This is used to implement the classof checks. This should not be used
506 /// for any other purpose, as the values may change as LLVM evolves.
507 unsigned getVPBlockID() const { return SubclassID; }
508
509 VPRegionBlock *getParent() { return Parent; }
510 const VPRegionBlock *getParent() const { return Parent; }
511
512 /// \return A pointer to the plan containing the current block.
513 VPlan *getPlan();
514 const VPlan *getPlan() const;
515
516 /// Sets the pointer of the plan containing the block. The block must be the
517 /// entry block into the VPlan.
518 void setPlan(VPlan *ParentPlan);
519
520 void setParent(VPRegionBlock *P) { Parent = P; }
521
522 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
523 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
524 /// VPBlockBase is a VPBasicBlock, it is returned.
525 const VPBasicBlock *getEntryBasicBlock() const;
527
528 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
529 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
530 /// VPBlockBase is a VPBasicBlock, it is returned.
531 const VPBasicBlock *getExitingBasicBlock() const;
533
534 const VPBlocksTy &getSuccessors() const { return Successors; }
535 VPBlocksTy &getSuccessors() { return Successors; }
536
538
539 const VPBlocksTy &getPredecessors() const { return Predecessors; }
540 VPBlocksTy &getPredecessors() { return Predecessors; }
541
542 /// \return the successor of this VPBlockBase if it has a single successor.
543 /// Otherwise return a null pointer.
545 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
546 }
547
548 /// \return the predecessor of this VPBlockBase if it has a single
549 /// predecessor. Otherwise return a null pointer.
551 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
552 }
553
554 size_t getNumSuccessors() const { return Successors.size(); }
555 size_t getNumPredecessors() const { return Predecessors.size(); }
556
557 /// An Enclosing Block of a block B is any block containing B, including B
558 /// itself. \return the closest enclosing block starting from "this", which
559 /// has successors. \return the root enclosing block if all enclosing blocks
560 /// have no successors.
562
563 /// \return the closest enclosing block starting from "this", which has
564 /// predecessors. \return the root enclosing block if all enclosing blocks
565 /// have no predecessors.
567
568 /// \return the successors either attached directly to this VPBlockBase or, if
569 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
570 /// successors of its own, search recursively for the first enclosing
571 /// VPRegionBlock that has successors and return them. If no such
572 /// VPRegionBlock exists, return the (empty) successors of the topmost
573 /// VPBlockBase reached.
576 }
577
578 /// \return the hierarchical successor of this VPBlockBase if it has a single
579 /// hierarchical successor. Otherwise return a null pointer.
582 }
583
584 /// \return the predecessors either attached directly to this VPBlockBase or,
585 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
586 /// predecessors of its own, search recursively for the first enclosing
587 /// VPRegionBlock that has predecessors and return them. If no such
588 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
589 /// VPBlockBase reached.
592 }
593
594 /// \return the hierarchical predecessor of this VPBlockBase if it has a
595 /// single hierarchical predecessor. Otherwise return a null pointer.
598 }
599
600 /// Set a given VPBlockBase \p Successor as the single successor of this
601 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
602 /// This VPBlockBase must have no successors.
604 assert(Successors.empty() && "Setting one successor when others exist.");
605 assert(Successor->getParent() == getParent() &&
606 "connected blocks must have the same parent");
607 appendSuccessor(Successor);
608 }
609
610 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
611 /// successors of this VPBlockBase. This VPBlockBase is not added as
612 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
613 /// successors.
614 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
615 assert(Successors.empty() && "Setting two successors when others exist.");
616 appendSuccessor(IfTrue);
617 appendSuccessor(IfFalse);
618 }
619
620 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
621 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
622 /// as successor of any VPBasicBlock in \p NewPreds.
624 assert(Predecessors.empty() && "Block predecessors already set.");
625 for (auto *Pred : NewPreds)
626 appendPredecessor(Pred);
627 }
628
629 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
630 /// This VPBlockBase must have no successors. This VPBlockBase is not added
631 /// as predecessor of any VPBasicBlock in \p NewSuccs.
633 assert(Successors.empty() && "Block successors already set.");
634 for (auto *Succ : NewSuccs)
635 appendSuccessor(Succ);
636 }
637
638 /// Remove all the predecessor of this block.
639 void clearPredecessors() { Predecessors.clear(); }
640
641 /// Remove all the successors of this block.
642 void clearSuccessors() { Successors.clear(); }
643
644 /// The method which generates the output IR that correspond to this
645 /// VPBlockBase, thereby "executing" the VPlan.
646 virtual void execute(VPTransformState *State) = 0;
647
648 /// Return the cost of the block.
650
651 /// Delete all blocks reachable from a given VPBlockBase, inclusive.
652 static void deleteCFG(VPBlockBase *Entry);
653
654 /// Return true if it is legal to hoist instructions into this block.
656 // There are currently no constraints that prevent an instruction to be
657 // hoisted into a VPBlockBase.
658 return true;
659 }
660
661 /// Replace all operands of VPUsers in the block with \p NewValue and also
662 /// replaces all uses of VPValues defined in the block with NewValue.
663 virtual void dropAllReferences(VPValue *NewValue) = 0;
664
665#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
666 void printAsOperand(raw_ostream &OS, bool PrintType) const {
667 OS << getName();
668 }
669
670 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
671 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
672 /// consequtive numbers.
673 ///
674 /// Note that the numbering is applied to the whole VPlan, so printing
675 /// individual blocks is consistent with the whole VPlan printing.
676 virtual void print(raw_ostream &O, const Twine &Indent,
677 VPSlotTracker &SlotTracker) const = 0;
678
679 /// Print plain-text dump of this VPlan to \p O.
680 void print(raw_ostream &O) const {
682 print(O, "", SlotTracker);
683 }
684
685 /// Print the successors of this block to \p O, prefixing all lines with \p
686 /// Indent.
687 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
688
689 /// Dump this VPBlockBase to dbgs().
690 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
691#endif
692
693 /// Clone the current block and it's recipes without updating the operands of
694 /// the cloned recipes, including all blocks in the single-entry single-exit
695 /// region for VPRegionBlocks.
696 virtual VPBlockBase *clone() = 0;
697};
698
699/// A value that is used outside the VPlan. The operand of the user needs to be
700/// added to the associated phi node. The incoming block from VPlan is
701/// determined by where the VPValue is defined: if it is defined by a recipe
702/// outside a region, its parent block is used, otherwise the middle block is
703/// used.
704class VPLiveOut : public VPUser {
705 PHINode *Phi;
706
707public:
709 : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
710
711 static inline bool classof(const VPUser *U) {
712 return U->getVPUserID() == VPUser::VPUserID::LiveOut;
713 }
714
715 /// Fix the wrapped phi node. This means adding an incoming value to exit
716 /// block phi's from the vector loop via middle block (values from scalar loop
717 /// already reach these phi's), and updating the value to scalar header phi's
718 /// from the scalar preheader.
719 void fixPhi(VPlan &Plan, VPTransformState &State);
720
721 /// Returns true if the VPLiveOut uses scalars of operand \p Op.
722 bool usesScalars(const VPValue *Op) const override {
724 "Op must be an operand of the recipe");
725 return true;
726 }
727
728 PHINode *getPhi() const { return Phi; }
729
730#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
731 /// Print the VPLiveOut to \p O.
733#endif
734};
735
736/// Struct to hold various analysis needed for cost computations.
743
746 : TTI(TTI), Types(CanIVTy, LLVMCtx), LLVMCtx(LLVMCtx), CM(CM) {}
747
748 /// Return the cost for \p UI with \p VF using the legacy cost model as
749 /// fallback until computing the cost of all recipes migrates to VPlan.
751
752 /// Return true if the cost for \p UI shouldn't be computed, e.g. because it
753 /// has already been pre-computed.
754 bool skipCostComputation(Instruction *UI, bool IsVector) const;
755};
756
757/// VPRecipeBase is a base class modeling a sequence of one or more output IR
758/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
759/// and is responsible for deleting its defined values. Single-value
760/// recipes must inherit from VPSingleDef instead of inheriting from both
761/// VPRecipeBase and VPValue separately.
762class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
763 public VPDef,
764 public VPUser {
765 friend VPBasicBlock;
766 friend class VPBlockUtils;
767
768 /// Each VPRecipe belongs to a single VPBasicBlock.
769 VPBasicBlock *Parent = nullptr;
770
771 /// The debug location for the recipe.
772 DebugLoc DL;
773
774public:
776 DebugLoc DL = {})
778
779 template <typename IterT>
781 DebugLoc DL = {})
783 virtual ~VPRecipeBase() = default;
784
785 /// Clone the current recipe.
786 virtual VPRecipeBase *clone() = 0;
787
788 /// \return the VPBasicBlock which this VPRecipe belongs to.
789 VPBasicBlock *getParent() { return Parent; }
790 const VPBasicBlock *getParent() const { return Parent; }
791
792 /// The method which generates the output IR instructions that correspond to
793 /// this VPRecipe, thereby "executing" the VPlan.
794 virtual void execute(VPTransformState &State) = 0;
795
796 /// Return the cost of this recipe, taking into account if the cost
797 /// computation should be skipped and the ForceTargetInstructionCost flag.
798 /// Also takes care of printing the cost for debugging.
800
801 /// Insert an unlinked recipe into a basic block immediately before
802 /// the specified recipe.
803 void insertBefore(VPRecipeBase *InsertPos);
804 /// Insert an unlinked recipe into \p BB immediately before the insertion
805 /// point \p IP;
807
808 /// Insert an unlinked Recipe into a basic block immediately after
809 /// the specified Recipe.
810 void insertAfter(VPRecipeBase *InsertPos);
811
812 /// Unlink this recipe from its current VPBasicBlock and insert it into
813 /// the VPBasicBlock that MovePos lives in, right after MovePos.
814 void moveAfter(VPRecipeBase *MovePos);
815
816 /// Unlink this recipe and insert into BB before I.
817 ///
818 /// \pre I is a valid iterator into BB.
820
821 /// This method unlinks 'this' from the containing basic block, but does not
822 /// delete it.
823 void removeFromParent();
824
825 /// This method unlinks 'this' from the containing basic block and deletes it.
826 ///
827 /// \returns an iterator pointing to the element after the erased one
829
830 /// Method to support type inquiry through isa, cast, and dyn_cast.
831 static inline bool classof(const VPDef *D) {
832 // All VPDefs are also VPRecipeBases.
833 return true;
834 }
835
836 static inline bool classof(const VPUser *U) {
837 return U->getVPUserID() == VPUser::VPUserID::Recipe;
838 }
839
840 /// Returns true if the recipe may have side-effects.
841 bool mayHaveSideEffects() const;
842
843 /// Returns true for PHI-like recipes.
844 bool isPhi() const {
845 return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;
846 }
847
848 /// Returns true if the recipe may read from memory.
849 bool mayReadFromMemory() const;
850
851 /// Returns true if the recipe may write to memory.
852 bool mayWriteToMemory() const;
853
854 /// Returns true if the recipe may read from or write to memory.
855 bool mayReadOrWriteMemory() const {
857 }
858
859 /// Returns the debug location of the recipe.
860 DebugLoc getDebugLoc() const { return DL; }
861
862protected:
863 /// Compute the cost of this recipe using the legacy cost model and the
864 /// underlying instructions.
866};
867
868// Helper macro to define common classof implementations for recipes.
869#define VP_CLASSOF_IMPL(VPDefID) \
870 static inline bool classof(const VPDef *D) { \
871 return D->getVPDefID() == VPDefID; \
872 } \
873 static inline bool classof(const VPValue *V) { \
874 auto *R = V->getDefiningRecipe(); \
875 return R && R->getVPDefID() == VPDefID; \
876 } \
877 static inline bool classof(const VPUser *U) { \
878 auto *R = dyn_cast<VPRecipeBase>(U); \
879 return R && R->getVPDefID() == VPDefID; \
880 } \
881 static inline bool classof(const VPRecipeBase *R) { \
882 return R->getVPDefID() == VPDefID; \
883 } \
884 static inline bool classof(const VPSingleDefRecipe *R) { \
885 return R->getVPDefID() == VPDefID; \
886 }
887
888/// VPSingleDef is a base class for recipes for modeling a sequence of one or
889/// more output IR that define a single result VPValue.
890/// Note that VPRecipeBase must be inherited from before VPValue.
891class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
892public:
893 template <typename IterT>
894 VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL = {})
895 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
896
897 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
898 DebugLoc DL = {})
899 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
900
901 template <typename IterT>
902 VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV,
903 DebugLoc DL = {})
904 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
905
906 static inline bool classof(const VPRecipeBase *R) {
907 switch (R->getVPDefID()) {
908 case VPRecipeBase::VPDerivedIVSC:
909 case VPRecipeBase::VPEVLBasedIVPHISC:
910 case VPRecipeBase::VPExpandSCEVSC:
911 case VPRecipeBase::VPInstructionSC:
912 case VPRecipeBase::VPReductionEVLSC:
913 case VPRecipeBase::VPReductionSC:
914 case VPRecipeBase::VPReplicateSC:
915 case VPRecipeBase::VPScalarIVStepsSC:
916 case VPRecipeBase::VPVectorPointerSC:
917 case VPRecipeBase::VPWidenCallSC:
918 case VPRecipeBase::VPWidenCanonicalIVSC:
919 case VPRecipeBase::VPWidenCastSC:
920 case VPRecipeBase::VPWidenGEPSC:
921 case VPRecipeBase::VPWidenSC:
922 case VPRecipeBase::VPWidenSelectSC:
923 case VPRecipeBase::VPBlendSC:
924 case VPRecipeBase::VPPredInstPHISC:
925 case VPRecipeBase::VPCanonicalIVPHISC:
926 case VPRecipeBase::VPActiveLaneMaskPHISC:
927 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
928 case VPRecipeBase::VPWidenPHISC:
929 case VPRecipeBase::VPWidenIntOrFpInductionSC:
930 case VPRecipeBase::VPWidenPointerInductionSC:
931 case VPRecipeBase::VPReductionPHISC:
932 case VPRecipeBase::VPScalarCastSC:
933 return true;
934 case VPRecipeBase::VPInterleaveSC:
935 case VPRecipeBase::VPBranchOnMaskSC:
936 case VPRecipeBase::VPWidenLoadEVLSC:
937 case VPRecipeBase::VPWidenLoadSC:
938 case VPRecipeBase::VPWidenStoreEVLSC:
939 case VPRecipeBase::VPWidenStoreSC:
940 // TODO: Widened stores don't define a value, but widened loads do. Split
941 // the recipes to be able to make widened loads VPSingleDefRecipes.
942 return false;
943 }
944 llvm_unreachable("Unhandled VPDefID");
945 }
946
947 static inline bool classof(const VPUser *U) {
948 auto *R = dyn_cast<VPRecipeBase>(U);
949 return R && classof(R);
950 }
951
952 virtual VPSingleDefRecipe *clone() override = 0;
953
954 /// Returns the underlying instruction.
956 return cast<Instruction>(getUnderlyingValue());
957 }
959 return cast<Instruction>(getUnderlyingValue());
960 }
961};
962
963/// Class to record LLVM IR flag for a recipe along with it.
965 enum class OperationType : unsigned char {
966 Cmp,
967 OverflowingBinOp,
968 DisjointOp,
969 PossiblyExactOp,
970 GEPOp,
971 FPMathOp,
972 NonNegOp,
973 Other
974 };
975
976public:
977 struct WrapFlagsTy {
978 char HasNUW : 1;
979 char HasNSW : 1;
980
982 };
983
985 char IsDisjoint : 1;
987 };
988
989protected:
990 struct GEPFlagsTy {
991 char IsInBounds : 1;
993 };
994
995private:
996 struct ExactFlagsTy {
997 char IsExact : 1;
998 };
999 struct NonNegFlagsTy {
1000 char NonNeg : 1;
1001 };
1002 struct FastMathFlagsTy {
1003 char AllowReassoc : 1;
1004 char NoNaNs : 1;
1005 char NoInfs : 1;
1006 char NoSignedZeros : 1;
1007 char AllowReciprocal : 1;
1008 char AllowContract : 1;
1009 char ApproxFunc : 1;
1010
1011 FastMathFlagsTy(const FastMathFlags &FMF);
1012 };
1013
1014 OperationType OpType;
1015
1016 union {
1020 ExactFlagsTy ExactFlags;
1022 NonNegFlagsTy NonNegFlags;
1023 FastMathFlagsTy FMFs;
1024 unsigned AllFlags;
1025 };
1026
1027protected:
1029 OpType = Other.OpType;
1030 AllFlags = Other.AllFlags;
1031 }
1032
1033public:
1034 template <typename IterT>
1035 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})
1036 : VPSingleDefRecipe(SC, Operands, DL) {
1037 OpType = OperationType::Other;
1038 AllFlags = 0;
1039 }
1040
1041 template <typename IterT>
1042 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
1044 if (auto *Op = dyn_cast<CmpInst>(&I)) {
1045 OpType = OperationType::Cmp;
1046 CmpPredicate = Op->getPredicate();
1047 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
1048 OpType = OperationType::DisjointOp;
1049 DisjointFlags.IsDisjoint = Op->isDisjoint();
1050 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
1051 OpType = OperationType::OverflowingBinOp;
1052 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
1053 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
1054 OpType = OperationType::PossiblyExactOp;
1055 ExactFlags.IsExact = Op->isExact();
1056 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1057 OpType = OperationType::GEPOp;
1058 GEPFlags.IsInBounds = GEP->isInBounds();
1059 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
1060 OpType = OperationType::NonNegOp;
1061 NonNegFlags.NonNeg = PNNI->hasNonNeg();
1062 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
1063 OpType = OperationType::FPMathOp;
1064 FMFs = Op->getFastMathFlags();
1065 } else {
1066 OpType = OperationType::Other;
1067 AllFlags = 0;
1068 }
1069 }
1070
1071 template <typename IterT>
1072 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1073 CmpInst::Predicate Pred, DebugLoc DL = {})
1074 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::Cmp),
1075 CmpPredicate(Pred) {}
1076
1077 template <typename IterT>
1078 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1080 : VPSingleDefRecipe(SC, Operands, DL),
1081 OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
1082
1083 template <typename IterT>
1084 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1085 FastMathFlags FMFs, DebugLoc DL = {})
1086 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp),
1087 FMFs(FMFs) {}
1088
1089 template <typename IterT>
1090 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1092 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::DisjointOp),
1094
1095protected:
1096 template <typename IterT>
1097 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1098 GEPFlagsTy GEPFlags, DebugLoc DL = {})
1099 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
1100 GEPFlags(GEPFlags) {}
1101
1102public:
1103 static inline bool classof(const VPRecipeBase *R) {
1104 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
1105 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1106 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
1107 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
1108 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
1109 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
1110 }
1111
1112 static inline bool classof(const VPUser *U) {
1113 auto *R = dyn_cast<VPRecipeBase>(U);
1114 return R && classof(R);
1115 }
1116
1117 /// Drop all poison-generating flags.
1119 // NOTE: This needs to be kept in-sync with
1120 // Instruction::dropPoisonGeneratingFlags.
1121 switch (OpType) {
1122 case OperationType::OverflowingBinOp:
1123 WrapFlags.HasNUW = false;
1124 WrapFlags.HasNSW = false;
1125 break;
1126 case OperationType::DisjointOp:
1127 DisjointFlags.IsDisjoint = false;
1128 break;
1129 case OperationType::PossiblyExactOp:
1130 ExactFlags.IsExact = false;
1131 break;
1132 case OperationType::GEPOp:
1133 GEPFlags.IsInBounds = false;
1134 break;
1135 case OperationType::FPMathOp:
1136 FMFs.NoNaNs = false;
1137 FMFs.NoInfs = false;
1138 break;
1139 case OperationType::NonNegOp:
1140 NonNegFlags.NonNeg = false;
1141 break;
1142 case OperationType::Cmp:
1143 case OperationType::Other:
1144 break;
1145 }
1146 }
1147
1148 /// Set the IR flags for \p I.
1149 void setFlags(Instruction *I) const {
1150 switch (OpType) {
1151 case OperationType::OverflowingBinOp:
1152 I->setHasNoUnsignedWrap(WrapFlags.HasNUW);
1153 I->setHasNoSignedWrap(WrapFlags.HasNSW);
1154 break;
1155 case OperationType::DisjointOp:
1156 cast<PossiblyDisjointInst>(I)->setIsDisjoint(DisjointFlags.IsDisjoint);
1157 break;
1158 case OperationType::PossiblyExactOp:
1159 I->setIsExact(ExactFlags.IsExact);
1160 break;
1161 case OperationType::GEPOp:
1162 // TODO(gep_nowrap): Track the full GEPNoWrapFlags in VPlan.
1163 cast<GetElementPtrInst>(I)->setNoWrapFlags(
1166 break;
1167 case OperationType::FPMathOp:
1168 I->setHasAllowReassoc(FMFs.AllowReassoc);
1169 I->setHasNoNaNs(FMFs.NoNaNs);
1170 I->setHasNoInfs(FMFs.NoInfs);
1171 I->setHasNoSignedZeros(FMFs.NoSignedZeros);
1172 I->setHasAllowReciprocal(FMFs.AllowReciprocal);
1173 I->setHasAllowContract(FMFs.AllowContract);
1174 I->setHasApproxFunc(FMFs.ApproxFunc);
1175 break;
1176 case OperationType::NonNegOp:
1177 I->setNonNeg(NonNegFlags.NonNeg);
1178 break;
1179 case OperationType::Cmp:
1180 case OperationType::Other:
1181 break;
1182 }
1183 }
1184
1186 assert(OpType == OperationType::Cmp &&
1187 "recipe doesn't have a compare predicate");
1188 return CmpPredicate;
1189 }
1190
1191 bool isInBounds() const {
1192 assert(OpType == OperationType::GEPOp &&
1193 "recipe doesn't have inbounds flag");
1194 return GEPFlags.IsInBounds;
1195 }
1196
1197 /// Returns true if the recipe has fast-math flags.
1198 bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
1199
1201
1202 bool hasNoUnsignedWrap() const {
1203 assert(OpType == OperationType::OverflowingBinOp &&
1204 "recipe doesn't have a NUW flag");
1205 return WrapFlags.HasNUW;
1206 }
1207
1208 bool hasNoSignedWrap() const {
1209 assert(OpType == OperationType::OverflowingBinOp &&
1210 "recipe doesn't have a NSW flag");
1211 return WrapFlags.HasNSW;
1212 }
1213
1214 bool isDisjoint() const {
1215 assert(OpType == OperationType::DisjointOp &&
1216 "recipe cannot have a disjoing flag");
1218 }
1219
1220#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1221 void printFlags(raw_ostream &O) const;
1222#endif
1223};
1224
1225/// This is a concrete Recipe that models a single VPlan-level instruction.
1226/// While as any Recipe it may generate a sequence of IR instructions when
1227/// executed, these instructions would always form a single-def expression as
1228/// the VPInstruction is also a single def-use vertex.
1230 friend class VPlanSlp;
1231
1232public:
1233 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1234 enum {
1236 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1237 // values of a first-order recurrence.
1243 /// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
1244 /// The first operand is the incoming value from the predecessor in VPlan,
1245 /// the second operand is the incoming value for all other predecessors
1246 /// (which are currently not modeled in VPlan).
1249 // Increment the canonical IV separately for each unrolled part.
1254 // Takes the VPValue to extract from as first operand and the lane or part
1255 // to extract as second operand, counting from the end starting with 1 for
1256 // last. The second operand must be a positive constant and <= VF when
1257 // extracting from a vector or <= UF when extracting from an unrolled
1258 // scalar.
1260 LogicalAnd, // Non-poison propagating logical And.
1261 // Add an offset in bytes (second operand) to a base pointer (first
1262 // operand). Only generates scalar values (either for the first lane only or
1263 // for all lanes, depending on its uses).
1265 };
1266
1267private:
1268 typedef unsigned char OpcodeTy;
1269 OpcodeTy Opcode;
1270
1271 /// An optional name that can be used for the generated IR instruction.
1272 const std::string Name;
1273
1274 /// Returns true if this VPInstruction generates scalar values for all lanes.
1275 /// Most VPInstructions generate a single value per part, either vector or
1276 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1277 /// values per all lanes, stemming from an original ingredient. This method
1278 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1279 /// underlying ingredient.
1280 bool doesGeneratePerAllLanes() const;
1281
1282 /// Returns true if we can generate a scalar for the first lane only if
1283 /// needed.
1284 bool canGenerateScalarForFirstLane() const;
1285
1286 /// Utility methods serving execute(): generates a single instance of the
1287 /// modeled instruction for a given part. \returns the generated value for \p
1288 /// Part. In some cases an existing value is returned rather than a generated
1289 /// one.
1290 Value *generatePerPart(VPTransformState &State, unsigned Part);
1291
1292 /// Utility methods serving execute(): generates a scalar single instance of
1293 /// the modeled instruction for a given lane. \returns the scalar generated
1294 /// value for lane \p Lane.
1295 Value *generatePerLane(VPTransformState &State, const VPIteration &Lane);
1296
1297#if !defined(NDEBUG)
1298 /// Return true if the VPInstruction is a floating point math operation, i.e.
1299 /// has fast-math flags.
1300 bool isFPMathOp() const;
1301#endif
1302
1303public:
1305 const Twine &Name = "")
1306 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
1307 Opcode(Opcode), Name(Name.str()) {}
1308
1309 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1310 DebugLoc DL = {}, const Twine &Name = "")
1312
1313 VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, VPValue *A,
1314 VPValue *B, DebugLoc DL = {}, const Twine &Name = "");
1315
1316 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1317 WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "")
1318 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL),
1319 Opcode(Opcode), Name(Name.str()) {}
1320
1321 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1322 DisjointFlagsTy DisjointFlag, DebugLoc DL = {},
1323 const Twine &Name = "")
1324 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DisjointFlag, DL),
1325 Opcode(Opcode), Name(Name.str()) {
1326 assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
1327 }
1328
1329 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1330 FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
1331
1332 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1333
1334 VPInstruction *clone() override {
1336 auto *New = new VPInstruction(Opcode, Operands, getDebugLoc(), Name);
1337 New->transferFlags(*this);
1338 return New;
1339 }
1340
1341 unsigned getOpcode() const { return Opcode; }
1342
1343 /// Generate the instruction.
1344 /// TODO: We currently execute only per-part unless a specific instance is
1345 /// provided.
1346 void execute(VPTransformState &State) override;
1347
1348#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1349 /// Print the VPInstruction to \p O.
1350 void print(raw_ostream &O, const Twine &Indent,
1351 VPSlotTracker &SlotTracker) const override;
1352
1353 /// Print the VPInstruction to dbgs() (for debugging).
1354 LLVM_DUMP_METHOD void dump() const;
1355#endif
1356
1357 /// Return true if this instruction may modify memory.
1358 bool mayWriteToMemory() const {
1359 // TODO: we can use attributes of the called function to rule out memory
1360 // modifications.
1361 return Opcode == Instruction::Store || Opcode == Instruction::Call ||
1362 Opcode == Instruction::Invoke || Opcode == SLPStore;
1363 }
1364
1365 bool hasResult() const {
1366 // CallInst may or may not have a result, depending on the called function.
1367 // Conservatively return calls have results for now.
1368 switch (getOpcode()) {
1369 case Instruction::Ret:
1370 case Instruction::Br:
1371 case Instruction::Store:
1372 case Instruction::Switch:
1373 case Instruction::IndirectBr:
1374 case Instruction::Resume:
1375 case Instruction::CatchRet:
1376 case Instruction::Unreachable:
1377 case Instruction::Fence:
1378 case Instruction::AtomicRMW:
1381 return false;
1382 default:
1383 return true;
1384 }
1385 }
1386
1387 /// Returns true if the recipe only uses the first lane of operand \p Op.
1388 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1389
1390 /// Returns true if the recipe only uses the first part of operand \p Op.
1391 bool onlyFirstPartUsed(const VPValue *Op) const override;
1392
1393 /// Returns true if this VPInstruction produces a scalar value from a vector,
1394 /// e.g. by performing a reduction or extracting a lane.
1395 bool isVectorToScalar() const;
1396
1397 /// Returns true if this VPInstruction's operands are single scalars and the
1398 /// result is also a single scalar.
1399 bool isSingleScalar() const;
1400};
1401
1402/// VPWidenRecipe is a recipe for producing a widened instruction using the
1403/// opcode and operands of the recipe. This recipe covers most of the
1404/// traditional vectorization cases where each recipe transforms into a
1405/// vectorized version of itself.
1407 unsigned Opcode;
1408
1409public:
1410 template <typename IterT>
1412 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
1413 Opcode(I.getOpcode()) {}
1414
1415 ~VPWidenRecipe() override = default;
1416
1417 VPWidenRecipe *clone() override {
1418 auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands());
1419 R->transferFlags(*this);
1420 return R;
1421 }
1422
1423 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1424
1425 /// Produce a widened instruction using the opcode and operands of the recipe,
1426 /// processing State.VF elements.
1427 void execute(VPTransformState &State) override;
1428
1429 unsigned getOpcode() const { return Opcode; }
1430
1431#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1432 /// Print the recipe.
1433 void print(raw_ostream &O, const Twine &Indent,
1434 VPSlotTracker &SlotTracker) const override;
1435#endif
1436};
1437
1438/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1440 /// Cast instruction opcode.
1441 Instruction::CastOps Opcode;
1442
1443 /// Result type for the cast.
1444 Type *ResultTy;
1445
1446public:
1448 CastInst &UI)
1449 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
1450 ResultTy(ResultTy) {
1451 assert(UI.getOpcode() == Opcode &&
1452 "opcode of underlying cast doesn't match");
1453 }
1454
1456 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
1457 ResultTy(ResultTy) {}
1458
1459 ~VPWidenCastRecipe() override = default;
1460
1462 if (auto *UV = getUnderlyingValue())
1463 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1464 *cast<CastInst>(UV));
1465
1466 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
1467 }
1468
1469 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1470
1471 /// Produce widened copies of the cast.
1472 void execute(VPTransformState &State) override;
1473
1474#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1475 /// Print the recipe.
1476 void print(raw_ostream &O, const Twine &Indent,
1477 VPSlotTracker &SlotTracker) const override;
1478#endif
1479
1480 Instruction::CastOps getOpcode() const { return Opcode; }
1481
1482 /// Returns the result type of the cast.
1483 Type *getResultType() const { return ResultTy; }
1484};
1485
1486/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
1488 Instruction::CastOps Opcode;
1489
1490 Type *ResultTy;
1491
1492 Value *generate(VPTransformState &State, unsigned Part);
1493
1494public:
1496 : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}), Opcode(Opcode),
1497 ResultTy(ResultTy) {}
1498
1499 ~VPScalarCastRecipe() override = default;
1500
1502 return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy);
1503 }
1504
1505 VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)
1506
1507 void execute(VPTransformState &State) override;
1508
1509#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1510 void print(raw_ostream &O, const Twine &Indent,
1511 VPSlotTracker &SlotTracker) const override;
1512#endif
1513
1514 /// Returns the result type of the cast.
1515 Type *getResultType() const { return ResultTy; }
1516
1517 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1518 // At the moment, only uniform codegen is implemented.
1520 "Op must be an operand of the recipe");
1521 return true;
1522 }
1523};
1524
1525/// A recipe for widening Call instructions.
1527 /// ID of the vector intrinsic to call when widening the call. If set the
1528 /// Intrinsic::not_intrinsic, a library call will be used instead.
1529 Intrinsic::ID VectorIntrinsicID;
1530 /// If this recipe represents a library call, Variant stores a pointer to
1531 /// the chosen function. There is a 1:1 mapping between a given VF and the
1532 /// chosen vectorized variant, so there will be a different vplan for each
1533 /// VF with a valid variant.
1534 Function *Variant;
1535
1536public:
1537 template <typename IterT>
1539 Intrinsic::ID VectorIntrinsicID, DebugLoc DL = {},
1540 Function *Variant = nullptr)
1541 : VPSingleDefRecipe(VPDef::VPWidenCallSC, CallArguments, UV, DL),
1542 VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {
1543 assert(
1544 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1545 "last operand must be the called function");
1546 }
1547
1548 ~VPWidenCallRecipe() override = default;
1549
1552 VectorIntrinsicID, getDebugLoc(), Variant);
1553 }
1554
1555 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1556
1557 /// Produce a widened version of the call instruction.
1558 void execute(VPTransformState &State) override;
1559
1561 return cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
1562 }
1563
1565 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1566 }
1568 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1569 }
1570
1571#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1572 /// Print the recipe.
1573 void print(raw_ostream &O, const Twine &Indent,
1574 VPSlotTracker &SlotTracker) const override;
1575#endif
1576};
1577
1578/// A recipe for widening select instructions.
1580 template <typename IterT>
1582 : VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I,
1583 I.getDebugLoc()) {}
1584
1585 ~VPWidenSelectRecipe() override = default;
1586
1588 return new VPWidenSelectRecipe(*cast<SelectInst>(getUnderlyingInstr()),
1589 operands());
1590 }
1591
1592 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1593
1594 /// Produce a widened version of the select instruction.
1595 void execute(VPTransformState &State) override;
1596
1597#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1598 /// Print the recipe.
1599 void print(raw_ostream &O, const Twine &Indent,
1600 VPSlotTracker &SlotTracker) const override;
1601#endif
1602
1603 VPValue *getCond() const {
1604 return getOperand(0);
1605 }
1606
1607 bool isInvariantCond() const {
1609 }
1610};
1611
1612/// A recipe for handling GEP instructions.
1614 bool isPointerLoopInvariant() const {
1616 }
1617
1618 bool isIndexLoopInvariant(unsigned I) const {
1620 }
1621
1622 bool areAllOperandsInvariant() const {
1623 return all_of(operands(), [](VPValue *Op) {
1624 return Op->isDefinedOutsideVectorRegions();
1625 });
1626 }
1627
1628public:
1629 template <typename IterT>
1631 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {}
1632
1633 ~VPWidenGEPRecipe() override = default;
1634
1636 return new VPWidenGEPRecipe(cast<GetElementPtrInst>(getUnderlyingInstr()),
1637 operands());
1638 }
1639
1640 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1641
1642 /// Generate the gep nodes.
1643 void execute(VPTransformState &State) override;
1644
1645#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1646 /// Print the recipe.
1647 void print(raw_ostream &O, const Twine &Indent,
1648 VPSlotTracker &SlotTracker) const override;
1649#endif
1650};
1651
1652/// A recipe to compute the pointers for widened memory accesses of IndexTy for
1653/// all parts. If IsReverse is true, compute pointers for accessing the input in
1654/// reverse order per part.
1656 Type *IndexedTy;
1657 bool IsReverse;
1658
1659public:
1660 VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
1661 bool IsInBounds, DebugLoc DL)
1662 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1663 GEPFlagsTy(IsInBounds), DL),
1664 IndexedTy(IndexedTy), IsReverse(IsReverse) {}
1665
1666 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1667
1668 void execute(VPTransformState &State) override;
1669
1670 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1672 "Op must be an operand of the recipe");
1673 return true;
1674 }
1675
1677 return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse,
1678 isInBounds(), getDebugLoc());
1679 }
1680
1681#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1682 /// Print the recipe.
1683 void print(raw_ostream &O, const Twine &Indent,
1684 VPSlotTracker &SlotTracker) const override;
1685#endif
1686};
1687
1688/// A pure virtual base class for all recipes modeling header phis, including
1689/// phis for first order recurrences, pointer inductions and reductions. The
1690/// start value is the first operand of the recipe and the incoming value from
1691/// the backedge is the second operand.
1692///
1693/// Inductions are modeled using the following sub-classes:
1694/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
1695/// starting at a specified value (zero for the main vector loop, the resume
1696/// value for the epilogue vector loop) and stepping by 1. The induction
1697/// controls exiting of the vector loop by comparing against the vector trip
1698/// count. Produces a single scalar PHI for the induction value per
1699/// iteration.
1700/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
1701/// floating point inductions with arbitrary start and step values. Produces
1702/// a vector PHI per-part.
1703/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
1704/// value of an IV with different start and step values. Produces a single
1705/// scalar value per iteration
1706/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
1707/// canonical or derived induction.
1708/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
1709/// pointer induction. Produces either a vector PHI per-part or scalar values
1710/// per-lane based on the canonical induction.
1712protected:
1713 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
1714 VPValue *Start = nullptr, DebugLoc DL = {})
1715 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>(), UnderlyingInstr, DL) {
1716 if (Start)
1717 addOperand(Start);
1718 }
1719
1720public:
1721 ~VPHeaderPHIRecipe() override = default;
1722
1723 /// Method to support type inquiry through isa, cast, and dyn_cast.
1724 static inline bool classof(const VPRecipeBase *B) {
1725 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
1726 B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
1727 }
1728 static inline bool classof(const VPValue *V) {
1729 auto *B = V->getDefiningRecipe();
1730 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
1731 B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
1732 }
1733
1734 /// Generate the phi nodes.
1735 void execute(VPTransformState &State) override = 0;
1736
1737#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1738 /// Print the recipe.
1739 void print(raw_ostream &O, const Twine &Indent,
1740 VPSlotTracker &SlotTracker) const override = 0;
1741#endif
1742
1743 /// Returns the start value of the phi, if one is set.
1745 return getNumOperands() == 0 ? nullptr : getOperand(0);
1746 }
1748 return getNumOperands() == 0 ? nullptr : getOperand(0);
1749 }
1750
1751 /// Update the start value of the recipe.
1753
1754 /// Returns the incoming value from the loop backedge.
1756 return getOperand(1);
1757 }
1758
1759 /// Returns the backedge value as a recipe. The backedge value is guaranteed
1760 /// to be a recipe.
1763 }
1764};
1765
1766/// A recipe for handling phi nodes of integer and floating-point inductions,
1767/// producing their vector values.
1769 PHINode *IV;
1770 TruncInst *Trunc;
1771 const InductionDescriptor &IndDesc;
1772
1773public:
1775 const InductionDescriptor &IndDesc)
1776 : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start), IV(IV),
1777 Trunc(nullptr), IndDesc(IndDesc) {
1778 addOperand(Step);
1779 }
1780
1782 const InductionDescriptor &IndDesc,
1783 TruncInst *Trunc)
1784 : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, Trunc, Start),
1785 IV(IV), Trunc(Trunc), IndDesc(IndDesc) {
1786 addOperand(Step);
1787 }
1788
1790
1793 getStepValue(), IndDesc, Trunc);
1794 }
1795
1796 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
1797
1798 /// Generate the vectorized and scalarized versions of the phi node as
1799 /// needed by their users.
1800 void execute(VPTransformState &State) override;
1801
1802#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1803 /// Print the recipe.
1804 void print(raw_ostream &O, const Twine &Indent,
1805 VPSlotTracker &SlotTracker) const override;
1806#endif
1807
1809 // TODO: All operands of base recipe must exist and be at same index in
1810 // derived recipe.
1812 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
1813 }
1814
1816 // TODO: All operands of base recipe must exist and be at same index in
1817 // derived recipe.
1819 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
1820 }
1821
1822 /// Returns the step value of the induction.
1824 const VPValue *getStepValue() const { return getOperand(1); }
1825
1826 /// Returns the first defined value as TruncInst, if it is one or nullptr
1827 /// otherwise.
1828 TruncInst *getTruncInst() { return Trunc; }
1829 const TruncInst *getTruncInst() const { return Trunc; }
1830
1831 PHINode *getPHINode() { return IV; }
1832
1833 /// Returns the induction descriptor for the recipe.
1834 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1835
1836 /// Returns true if the induction is canonical, i.e. starting at 0 and
1837 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
1838 /// same type as the canonical induction.
1839 bool isCanonical() const;
1840
1841 /// Returns the scalar type of the induction.
1843 return Trunc ? Trunc->getType() : IV->getType();
1844 }
1845};
1846
1848 const InductionDescriptor &IndDesc;
1849
1850 bool IsScalarAfterVectorization;
1851
1852public:
1853 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
1854 /// Start.
1856 const InductionDescriptor &IndDesc,
1857 bool IsScalarAfterVectorization)
1858 : VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC, Phi),
1859 IndDesc(IndDesc),
1860 IsScalarAfterVectorization(IsScalarAfterVectorization) {
1861 addOperand(Start);
1862 addOperand(Step);
1863 }
1864
1866
1869 cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
1870 IndDesc, IsScalarAfterVectorization);
1871 }
1872
1873 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
1874
1875 /// Generate vector values for the pointer induction.
1876 void execute(VPTransformState &State) override;
1877
1878 /// Returns true if only scalar values will be generated.
1879 bool onlyScalarsGenerated(bool IsScalable);
1880
1881 /// Returns the induction descriptor for the recipe.
1882 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1883
1884#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1885 /// Print the recipe.
1886 void print(raw_ostream &O, const Twine &Indent,
1887 VPSlotTracker &SlotTracker) const override;
1888#endif
1889};
1890
1891/// A recipe for handling phis that are widened in the vector loop.
1892/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
1893/// managed in the recipe directly.
1895 /// List of incoming blocks. Only used in the VPlan native path.
1896 SmallVector<VPBasicBlock *, 2> IncomingBlocks;
1897
1898public:
1899 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
1900 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
1901 : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi) {
1902 if (Start)
1903 addOperand(Start);
1904 }
1905
1907 llvm_unreachable("cloning not implemented yet");
1908 }
1909
1910 ~VPWidenPHIRecipe() override = default;
1911
1912 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
1913
1914 /// Generate the phi/select nodes.
1915 void execute(VPTransformState &State) override;
1916
1917#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1918 /// Print the recipe.
1919 void print(raw_ostream &O, const Twine &Indent,
1920 VPSlotTracker &SlotTracker) const override;
1921#endif
1922
1923 /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
1924 void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
1925 addOperand(IncomingV);
1926 IncomingBlocks.push_back(IncomingBlock);
1927 }
1928
1929 /// Returns the \p I th incoming VPBasicBlock.
1930 VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
1931
1932 /// Returns the \p I th incoming VPValue.
1933 VPValue *getIncomingValue(unsigned I) { return getOperand(I); }
1934};
1935
1936/// A recipe for handling first-order recurrence phis. The start value is the
1937/// first operand of the recipe and the incoming value from the backedge is the
1938/// second operand.
1941 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
1942
1943 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
1944
1946 return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;
1947 }
1948
1951 cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
1952 }
1953
1954 void execute(VPTransformState &State) override;
1955
1956#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1957 /// Print the recipe.
1958 void print(raw_ostream &O, const Twine &Indent,
1959 VPSlotTracker &SlotTracker) const override;
1960#endif
1961};
1962
1963/// A recipe for handling reduction phis. The start value is the first operand
1964/// of the recipe and the incoming value from the backedge is the second
1965/// operand.
1967 /// Descriptor for the reduction.
1968 const RecurrenceDescriptor &RdxDesc;
1969
1970 /// The phi is part of an in-loop reduction.
1971 bool IsInLoop;
1972
1973 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
1974 bool IsOrdered;
1975
1976public:
1977 /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
1978 /// RdxDesc.
1980 VPValue &Start, bool IsInLoop = false,
1981 bool IsOrdered = false)
1982 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
1983 RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) {
1984 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
1985 }
1986
1987 ~VPReductionPHIRecipe() override = default;
1988
1990 auto *R =
1991 new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()), RdxDesc,
1992 *getOperand(0), IsInLoop, IsOrdered);
1993 R->addOperand(getBackedgeValue());
1994 return R;
1995 }
1996
1997 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
1998
2000 return R->getVPDefID() == VPDef::VPReductionPHISC;
2001 }
2002
2003 /// Generate the phi/select nodes.
2004 void execute(VPTransformState &State) override;
2005
2006#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2007 /// Print the recipe.
2008 void print(raw_ostream &O, const Twine &Indent,
2009 VPSlotTracker &SlotTracker) const override;
2010#endif
2011
2013 return RdxDesc;
2014 }
2015
2016 /// Returns true, if the phi is part of an ordered reduction.
2017 bool isOrdered() const { return IsOrdered; }
2018
2019 /// Returns true, if the phi is part of an in-loop reduction.
2020 bool isInLoop() const { return IsInLoop; }
2021};
2022
2023/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2024/// instructions.
2026public:
2027 /// The blend operation is a User of the incoming values and of their
2028 /// respective masks, ordered [I0, I1, M1, I2, M2, ...]. Note that the first
2029 /// incoming value does not have a mask associated.
2031 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
2032 assert((Operands.size() + 1) % 2 == 0 &&
2033 "Expected an odd number of operands");
2034 }
2035
2036 VPBlendRecipe *clone() override {
2038 return new VPBlendRecipe(cast<PHINode>(getUnderlyingValue()), Ops);
2039 }
2040
2041 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2042
2043 /// Return the number of incoming values, taking into account that the first
2044 /// incoming value has no mask.
2045 unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }
2046
2047 /// Return incoming value number \p Idx.
2048 VPValue *getIncomingValue(unsigned Idx) const {
2049 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - 1);
2050 }
2051
2052 /// Return mask number \p Idx.
2053 VPValue *getMask(unsigned Idx) const {
2054 assert(Idx > 0 && "First index has no mask associated.");
2055 return getOperand(Idx * 2);
2056 }
2057
2058 /// Generate the phi/select nodes.
2059 void execute(VPTransformState &State) override;
2060
2061#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2062 /// Print the recipe.
2063 void print(raw_ostream &O, const Twine &Indent,
2064 VPSlotTracker &SlotTracker) const override;
2065#endif
2066
2067 /// Returns true if the recipe only uses the first lane of operand \p Op.
2068 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2070 "Op must be an operand of the recipe");
2071 // Recursing through Blend recipes only, must terminate at header phi's the
2072 // latest.
2073 return all_of(users(),
2074 [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
2075 }
2076};
2077
2078/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2079/// or stores into one wide load/store and shuffles. The first operand of a
2080/// VPInterleave recipe is the address, followed by the stored values, followed
2081/// by an optional mask.
2084
2085 /// Indicates if the interleave group is in a conditional block and requires a
2086 /// mask.
2087 bool HasMask = false;
2088
2089 /// Indicates if gaps between members of the group need to be masked out or if
2090 /// unusued gaps can be loaded speculatively.
2091 bool NeedsMaskForGaps = false;
2092
2093public:
2095 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2096 bool NeedsMaskForGaps)
2097 : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG),
2098 NeedsMaskForGaps(NeedsMaskForGaps) {
2099 for (unsigned i = 0; i < IG->getFactor(); ++i)
2100 if (Instruction *I = IG->getMember(i)) {
2101 if (I->getType()->isVoidTy())
2102 continue;
2103 new VPValue(I, this);
2104 }
2105
2106 for (auto *SV : StoredValues)
2107 addOperand(SV);
2108 if (Mask) {
2109 HasMask = true;
2110 addOperand(Mask);
2111 }
2112 }
2113 ~VPInterleaveRecipe() override = default;
2114
2116 return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2117 NeedsMaskForGaps);
2118 }
2119
2120 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2121
2122 /// Return the address accessed by this recipe.
2123 VPValue *getAddr() const {
2124 return getOperand(0); // Address is the 1st, mandatory operand.
2125 }
2126
2127 /// Return the mask used by this recipe. Note that a full mask is represented
2128 /// by a nullptr.
2129 VPValue *getMask() const {
2130 // Mask is optional and therefore the last, currently 2nd operand.
2131 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2132 }
2133
2134 /// Return the VPValues stored by this interleave group. If it is a load
2135 /// interleave group, return an empty ArrayRef.
2137 // The first operand is the address, followed by the stored values, followed
2138 // by an optional mask.
2141 }
2142
2143 /// Generate the wide load or store, and shuffles.
2144 void execute(VPTransformState &State) override;
2145
2146#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2147 /// Print the recipe.
2148 void print(raw_ostream &O, const Twine &Indent,
2149 VPSlotTracker &SlotTracker) const override;
2150#endif
2151
2153
2154 /// Returns the number of stored operands of this interleave group. Returns 0
2155 /// for load interleave groups.
2156 unsigned getNumStoreOperands() const {
2157 return getNumOperands() - (HasMask ? 2 : 1);
2158 }
2159
2160 /// The recipe only uses the first lane of the address.
2161 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2163 "Op must be an operand of the recipe");
2164 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2165 }
2166
2167 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2168};
2169
2170/// A recipe to represent inloop reduction operations, performing a reduction on
2171/// a vector operand into a scalar value, and adding the result to a chain.
2172/// The Operands are {ChainOp, VecOp, [Condition]}.
2174 /// The recurrence decriptor for the reduction in question.
2175 const RecurrenceDescriptor &RdxDesc;
2176 bool IsOrdered;
2177 /// Whether the reduction is conditional.
2178 bool IsConditional = false;
2179
2180protected:
2181 VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
2183 VPValue *CondOp, bool IsOrdered)
2184 : VPSingleDefRecipe(SC, Operands, I), RdxDesc(R), IsOrdered(IsOrdered) {
2185 if (CondOp) {
2186 IsConditional = true;
2187 addOperand(CondOp);
2188 }
2189 }
2190
2191public:
2193 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2194 bool IsOrdered)
2195 : VPReductionRecipe(VPDef::VPReductionSC, R, I,
2196 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2197 IsOrdered) {}
2198
2199 ~VPReductionRecipe() override = default;
2200
2202 return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
2203 getVecOp(), getCondOp(), IsOrdered);
2204 }
2205
2206 static inline bool classof(const VPRecipeBase *R) {
2207 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2208 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
2209 }
2210
2211 static inline bool classof(const VPUser *U) {
2212 auto *R = dyn_cast<VPRecipeBase>(U);
2213 return R && classof(R);
2214 }
2215
2216 /// Generate the reduction in the loop
2217 void execute(VPTransformState &State) override;
2218
2219#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2220 /// Print the recipe.
2221 void print(raw_ostream &O, const Twine &Indent,
2222 VPSlotTracker &SlotTracker) const override;
2223#endif
2224
2225 /// Return the recurrence decriptor for the in-loop reduction.
2227 return RdxDesc;
2228 }
2229 /// Return true if the in-loop reduction is ordered.
2230 bool isOrdered() const { return IsOrdered; };
2231 /// Return true if the in-loop reduction is conditional.
2232 bool isConditional() const { return IsConditional; };
2233 /// The VPValue of the scalar Chain being accumulated.
2234 VPValue *getChainOp() const { return getOperand(0); }
2235 /// The VPValue of the vector value to be reduced.
2236 VPValue *getVecOp() const { return getOperand(1); }
2237 /// The VPValue of the condition for the block.
2239 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2240 }
2241};
2242
2243/// A recipe to represent inloop reduction operations with vector-predication
2244/// intrinsics, performing a reduction on a vector operand with the explicit
2245/// vector length (EVL) into a scalar value, and adding the result to a chain.
2246/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2248public:
2251 VPDef::VPReductionEVLSC, R->getRecurrenceDescriptor(),
2253 ArrayRef<VPValue *>({R->getChainOp(), R->getVecOp(), EVL}), CondOp,
2254 R->isOrdered()) {}
2255
2256 ~VPReductionEVLRecipe() override = default;
2257
2259 llvm_unreachable("cloning not implemented yet");
2260 }
2261
2262 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2263
2264 /// Generate the reduction in the loop
2265 void execute(VPTransformState &State) override;
2266
2267#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2268 /// Print the recipe.
2269 void print(raw_ostream &O, const Twine &Indent,
2270 VPSlotTracker &SlotTracker) const override;
2271#endif
2272
2273 /// The VPValue of the explicit vector length.
2274 VPValue *getEVL() const { return getOperand(2); }
2275
2276 /// Returns true if the recipe only uses the first lane of operand \p Op.
2277 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2279 "Op must be an operand of the recipe");
2280 return Op == getEVL();
2281 }
2282};
2283
2284/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2285/// copies of the original scalar type, one per lane, instead of producing a
2286/// single copy of widened type for all lanes. If the instruction is known to be
2287/// uniform only one copy, per lane zero, will be generated.
2289 /// Indicator if only a single replica per lane is needed.
2290 bool IsUniform;
2291
2292 /// Indicator if the replicas are also predicated.
2293 bool IsPredicated;
2294
2295public:
2296 template <typename IterT>
2298 bool IsUniform, VPValue *Mask = nullptr)
2299 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2300 IsUniform(IsUniform), IsPredicated(Mask) {
2301 if (Mask)
2302 addOperand(Mask);
2303 }
2304
2305 ~VPReplicateRecipe() override = default;
2306
2308 auto *Copy =
2309 new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
2310 isPredicated() ? getMask() : nullptr);
2311 Copy->transferFlags(*this);
2312 return Copy;
2313 }
2314
2315 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2316
2317 /// Generate replicas of the desired Ingredient. Replicas will be generated
2318 /// for all parts and lanes unless a specific part and lane are specified in
2319 /// the \p State.
2320 void execute(VPTransformState &State) override;
2321
2322#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2323 /// Print the recipe.
2324 void print(raw_ostream &O, const Twine &Indent,
2325 VPSlotTracker &SlotTracker) const override;
2326#endif
2327
2328 bool isUniform() const { return IsUniform; }
2329
2330 bool isPredicated() const { return IsPredicated; }
2331
2332 /// Returns true if the recipe only uses the first lane of operand \p Op.
2333 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2335 "Op must be an operand of the recipe");
2336 return isUniform();
2337 }
2338
2339 /// Returns true if the recipe uses scalars of operand \p Op.
2340 bool usesScalars(const VPValue *Op) const override {
2342 "Op must be an operand of the recipe");
2343 return true;
2344 }
2345
2346 /// Returns true if the recipe is used by a widened recipe via an intervening
2347 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2348 /// in a vector.
2349 bool shouldPack() const;
2350
2351 /// Return the mask of a predicated VPReplicateRecipe.
2353 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2354 return getOperand(getNumOperands() - 1);
2355 }
2356
2357 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2358};
2359
2360/// A recipe for generating conditional branches on the bits of a mask.
2362public:
2364 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {}) {
2365 if (BlockInMask) // nullptr means all-one mask.
2366 addOperand(BlockInMask);
2367 }
2368
2370 return new VPBranchOnMaskRecipe(getOperand(0));
2371 }
2372
2373 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
2374
2375 /// Generate the extraction of the appropriate bit from the block mask and the
2376 /// conditional branch.
2377 void execute(VPTransformState &State) override;
2378
2379#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2380 /// Print the recipe.
2381 void print(raw_ostream &O, const Twine &Indent,
2382 VPSlotTracker &SlotTracker) const override {
2383 O << Indent << "BRANCH-ON-MASK ";
2384 if (VPValue *Mask = getMask())
2385 Mask->printAsOperand(O, SlotTracker);
2386 else
2387 O << " All-One";
2388 }
2389#endif
2390
2391 /// Return the mask used by this recipe. Note that a full mask is represented
2392 /// by a nullptr.
2393 VPValue *getMask() const {
2394 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
2395 // Mask is optional.
2396 return getNumOperands() == 1 ? getOperand(0) : nullptr;
2397 }
2398
2399 /// Returns true if the recipe uses scalars of operand \p Op.
2400 bool usesScalars(const VPValue *Op) const override {
2402 "Op must be an operand of the recipe");
2403 return true;
2404 }
2405};
2406
2407/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
2408/// control converges back from a Branch-on-Mask. The phi nodes are needed in
2409/// order to merge values that are set under such a branch and feed their uses.
2410/// The phi nodes can be scalar or vector depending on the users of the value.
2411/// This recipe works in concert with VPBranchOnMaskRecipe.
2413public:
2414 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
2415 /// nodes after merging back from a Branch-on-Mask.
2417 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV) {}
2418 ~VPPredInstPHIRecipe() override = default;
2419
2421 return new VPPredInstPHIRecipe(getOperand(0));
2422 }
2423
2424 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
2425
2426 /// Generates phi nodes for live-outs as needed to retain SSA form.
2427 void execute(VPTransformState &State) override;
2428
2429#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2430 /// Print the recipe.
2431 void print(raw_ostream &O, const Twine &Indent,
2432 VPSlotTracker &SlotTracker) const override;
2433#endif
2434
2435 /// Returns true if the recipe uses scalars of operand \p Op.
2436 bool usesScalars(const VPValue *Op) const override {
2438 "Op must be an operand of the recipe");
2439 return true;
2440 }
2441};
2442
2443/// A common base class for widening memory operations. An optional mask can be
2444/// provided as the last operand.
2446protected:
2448
2449 /// Whether the accessed addresses are consecutive.
2451
2452 /// Whether the consecutive accessed addresses are in reverse order.
2454
2455 /// Whether the memory access is masked.
2456 bool IsMasked = false;
2457
2458 void setMask(VPValue *Mask) {
2459 assert(!IsMasked && "cannot re-set mask");
2460 if (!Mask)
2461 return;
2462 addOperand(Mask);
2463 IsMasked = true;
2464 }
2465
2466 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
2467 std::initializer_list<VPValue *> Operands,
2468 bool Consecutive, bool Reverse, DebugLoc DL)
2470 Reverse(Reverse) {
2471 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
2472 }
2473
2474public:
2476 llvm_unreachable("cloning not supported");
2477 }
2478
2479 static inline bool classof(const VPRecipeBase *R) {
2480 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
2481 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
2482 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
2483 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
2484 }
2485
2486 static inline bool classof(const VPUser *U) {
2487 auto *R = dyn_cast<VPRecipeBase>(U);
2488 return R && classof(R);
2489 }
2490
2491 /// Return whether the loaded-from / stored-to addresses are consecutive.
2492 bool isConsecutive() const { return Consecutive; }
2493
2494 /// Return whether the consecutive loaded/stored addresses are in reverse
2495 /// order.
2496 bool isReverse() const { return Reverse; }
2497
2498 /// Return the address accessed by this recipe.
2499 VPValue *getAddr() const { return getOperand(0); }
2500
2501 /// Returns true if the recipe is masked.
2502 bool isMasked() const { return IsMasked; }
2503
2504 /// Return the mask used by this recipe. Note that a full mask is represented
2505 /// by a nullptr.
2506 VPValue *getMask() const {
2507 // Mask is optional and therefore the last operand.
2508 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
2509 }
2510
2511 /// Generate the wide load/store.
2512 void execute(VPTransformState &State) override {
2513 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
2514 }
2515
2517};
2518
2519/// A recipe for widening load operations, using the address to load from and an
2520/// optional mask.
2521struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
2523 bool Consecutive, bool Reverse, DebugLoc DL)
2524 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
2525 Reverse, DL),
2526 VPValue(this, &Load) {
2527 setMask(Mask);
2528 }
2529
2531 return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
2533 getDebugLoc());
2534 }
2535
2536 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
2537
2538 /// Generate a wide load or gather.
2539 void execute(VPTransformState &State) override;
2540
2541#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2542 /// Print the recipe.
2543 void print(raw_ostream &O, const Twine &Indent,
2544 VPSlotTracker &SlotTracker) const override;
2545#endif
2546
2547 /// Returns true if the recipe only uses the first lane of operand \p Op.
2548 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2550 "Op must be an operand of the recipe");
2551 // Widened, consecutive loads operations only demand the first lane of
2552 // their address.
2553 return Op == getAddr() && isConsecutive();
2554 }
2555};
2556
2557/// A recipe for widening load operations with vector-predication intrinsics,
2558/// using the address to load from, the explicit vector length and an optional
2559/// mask.
2560struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
2562 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L->getIngredient(),
2563 {L->getAddr(), EVL}, L->isConsecutive(),
2564 L->isReverse(), L->getDebugLoc()),
2565 VPValue(this, &getIngredient()) {
2566 setMask(Mask);
2567 }
2568
2569 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
2570
2571 /// Return the EVL operand.
2572 VPValue *getEVL() const { return getOperand(1); }
2573
2574 /// Generate the wide load or gather.
2575 void execute(VPTransformState &State) override;
2576
2577#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2578 /// Print the recipe.
2579 void print(raw_ostream &O, const Twine &Indent,
2580 VPSlotTracker &SlotTracker) const override;
2581#endif
2582
2583 /// Returns true if the recipe only uses the first lane of operand \p Op.
2584 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2586 "Op must be an operand of the recipe");
2587 // Widened loads only demand the first lane of EVL and consecutive loads
2588 // only demand the first lane of their address.
2589 return Op == getEVL() || (Op == getAddr() && isConsecutive());
2590 }
2591};
2592
2593/// A recipe for widening store operations, using the stored value, the address
2594/// to store to and an optional mask.
2597 VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
2598 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
2600 setMask(Mask);
2601 }
2602
2604 return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
2606 Reverse, getDebugLoc());
2607 }
2608
2609 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
2610
2611 /// Return the value stored by this recipe.
2612 VPValue *getStoredValue() const { return getOperand(1); }
2613
2614 /// Generate a wide store or scatter.
2615 void execute(VPTransformState &State) override;
2616
2617#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2618 /// Print the recipe.
2619 void print(raw_ostream &O, const Twine &Indent,
2620 VPSlotTracker &SlotTracker) const override;
2621#endif
2622
2623 /// Returns true if the recipe only uses the first lane of operand \p Op.
2624 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2626 "Op must be an operand of the recipe");
2627 // Widened, consecutive stores only demand the first lane of their address,
2628 // unless the same operand is also stored.
2629 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
2630 }
2631};
2632
2633/// A recipe for widening store operations with vector-predication intrinsics,
2634/// using the value to store, the address to store to, the explicit vector
2635/// length and an optional mask.
2638 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S->getIngredient(),
2639 {S->getAddr(), S->getStoredValue(), EVL},
2640 S->isConsecutive(), S->isReverse(),
2641 S->getDebugLoc()) {
2642 setMask(Mask);
2643 }
2644
2645 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
2646
2647 /// Return the address accessed by this recipe.
2648 VPValue *getStoredValue() const { return getOperand(1); }
2649
2650 /// Return the EVL operand.
2651 VPValue *getEVL() const { return getOperand(2); }
2652
2653 /// Generate the wide store or scatter.
2654 void execute(VPTransformState &State) override;
2655
2656#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2657 /// Print the recipe.
2658 void print(raw_ostream &O, const Twine &Indent,
2659 VPSlotTracker &SlotTracker) const override;
2660#endif
2661
2662 /// Returns true if the recipe only uses the first lane of operand \p Op.
2663 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2665 "Op must be an operand of the recipe");
2666 if (Op == getEVL()) {
2667 assert(getStoredValue() != Op && "unexpected store of EVL");
2668 return true;
2669 }
2670 // Widened, consecutive memory operations only demand the first lane of
2671 // their address, unless the same operand is also stored. That latter can
2672 // happen with opaque pointers.
2673 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
2674 }
2675};
2676
2677/// Recipe to expand a SCEV expression.
2679 const SCEV *Expr;
2680 ScalarEvolution &SE;
2681
2682public:
2684 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {}
2685
2686 ~VPExpandSCEVRecipe() override = default;
2687
2689 return new VPExpandSCEVRecipe(Expr, SE);
2690 }
2691
2692 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
2693
2694 /// Generate a canonical vector induction variable of the vector loop, with
2695 void execute(VPTransformState &State) override;
2696
2697#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2698 /// Print the recipe.
2699 void print(raw_ostream &O, const Twine &Indent,
2700 VPSlotTracker &SlotTracker) const override;
2701#endif
2702
2703 const SCEV *getSCEV() const { return Expr; }
2704};
2705
2706/// Canonical scalar induction phi of the vector loop. Starting at the specified
2707/// start value (either 0 or the resume value when vectorizing the epilogue
2708/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
2709/// canonical induction variable.
2711public:
2713 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
2714
2715 ~VPCanonicalIVPHIRecipe() override = default;
2716
2718 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
2719 R->addOperand(getBackedgeValue());
2720 return R;
2721 }
2722
2723 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
2724
2726 return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
2727 }
2728
2729 /// Generate the canonical scalar induction phi of the vector loop.
2730 void execute(VPTransformState &State) override;
2731
2732#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2733 /// Print the recipe.
2734 void print(raw_ostream &O, const Twine &Indent,
2735 VPSlotTracker &SlotTracker) const override;
2736#endif
2737
2738 /// Returns the scalar type of the induction.
2740 return getStartValue()->getLiveInIRValue()->getType();
2741 }
2742
2743 /// Returns true if the recipe only uses the first lane of operand \p Op.
2744 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2746 "Op must be an operand of the recipe");
2747 return true;
2748 }
2749
2750 /// Returns true if the recipe only uses the first part of operand \p Op.
2751 bool onlyFirstPartUsed(const VPValue *Op) const override {
2753 "Op must be an operand of the recipe");
2754 return true;
2755 }
2756
2757 /// Check if the induction described by \p Kind, /p Start and \p Step is
2758 /// canonical, i.e. has the same start and step (of 1) as the canonical IV.
2760 VPValue *Step) const;
2761};
2762
2763/// A recipe for generating the active lane mask for the vector loop that is
2764/// used to predicate the vector operations.
2765/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2766/// remove VPActiveLaneMaskPHIRecipe.
2768public:
2770 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
2771 DL) {}
2772
2773 ~VPActiveLaneMaskPHIRecipe() override = default;
2774
2777 }
2778
2779 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
2780
2782 return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC;
2783 }
2784
2785 /// Generate the active lane mask phi of the vector loop.
2786 void execute(VPTransformState &State) override;
2787
2788#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2789 /// Print the recipe.
2790 void print(raw_ostream &O, const Twine &Indent,
2791 VPSlotTracker &SlotTracker) const override;
2792#endif
2793};
2794
2795/// A recipe for generating the phi node for the current index of elements,
2796/// adjusted in accordance with EVL value. It starts at the start value of the
2797/// canonical induction and gets incremented by EVL in each iteration of the
2798/// vector loop.
2800public:
2802 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
2803
2804 ~VPEVLBasedIVPHIRecipe() override = default;
2805
2807 llvm_unreachable("cloning not implemented yet");
2808 }
2809
2810 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
2811
2813 return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC;
2814 }
2815
2816 /// Generate phi for handling IV based on EVL over iterations correctly.
2817 /// TODO: investigate if it can share the code with VPCanonicalIVPHIRecipe.
2818 void execute(VPTransformState &State) override;
2819
2820 /// Returns true if the recipe only uses the first lane of operand \p Op.
2821 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2823 "Op must be an operand of the recipe");
2824 return true;
2825 }
2826
2827#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2828 /// Print the recipe.
2829 void print(raw_ostream &O, const Twine &Indent,
2830 VPSlotTracker &SlotTracker) const override;
2831#endif
2832};
2833
2834/// A Recipe for widening the canonical induction variable of the vector loop.
2836public:
2838 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
2839
2840 ~VPWidenCanonicalIVRecipe() override = default;
2841
2843 return new VPWidenCanonicalIVRecipe(
2844 cast<VPCanonicalIVPHIRecipe>(getOperand(0)));
2845 }
2846
2847 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
2848
2849 /// Generate a canonical vector induction variable of the vector loop, with
2850 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
2851 /// step = <VF*UF, VF*UF, ..., VF*UF>.
2852 void execute(VPTransformState &State) override;
2853
2854#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2855 /// Print the recipe.
2856 void print(raw_ostream &O, const Twine &Indent,
2857 VPSlotTracker &SlotTracker) const override;
2858#endif
2859};
2860
2861/// A recipe for converting the input value \p IV value to the corresponding
2862/// value of an IV with different start and step values, using Start + IV *
2863/// Step.
2865 /// Kind of the induction.
2867 /// If not nullptr, the floating point induction binary operator. Must be set
2868 /// for floating point inductions.
2869 const FPMathOperator *FPBinOp;
2870
2871public:
2873 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
2875 IndDesc.getKind(),
2876 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
2877 Start, CanonicalIV, Step) {}
2878
2880 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
2881 VPValue *Step)
2882 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
2883 FPBinOp(FPBinOp) {}
2884
2885 ~VPDerivedIVRecipe() override = default;
2886
2888 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
2889 getStepValue());
2890 }
2891
2892 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
2893
2894 /// Generate the transformed value of the induction at offset StartValue (1.
2895 /// operand) + IV (2. operand) * StepValue (3, operand).
2896 void execute(VPTransformState &State) override;
2897
2898#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2899 /// Print the recipe.
2900 void print(raw_ostream &O, const Twine &Indent,
2901 VPSlotTracker &SlotTracker) const override;
2902#endif
2903
2905 return getStartValue()->getLiveInIRValue()->getType();
2906 }
2907
2908 VPValue *getStartValue() const { return getOperand(0); }
2909 VPValue *getStepValue() const { return getOperand(2); }
2910
2911 /// Returns true if the recipe only uses the first lane of operand \p Op.
2912 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2914 "Op must be an operand of the recipe");
2915 return true;
2916 }
2917};
2918
2919/// A recipe for handling phi nodes of integer and floating-point inductions,
2920/// producing their scalar values.
2922 Instruction::BinaryOps InductionOpcode;
2923
2924public:
2927 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
2928 ArrayRef<VPValue *>({IV, Step}), FMFs),
2929 InductionOpcode(Opcode) {}
2930
2932 VPValue *Step)
2934 IV, Step, IndDesc.getInductionOpcode(),
2935 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
2936 ? IndDesc.getInductionBinOp()->getFastMathFlags()
2937 : FastMathFlags()) {}
2938
2939 ~VPScalarIVStepsRecipe() override = default;
2940
2942 return new VPScalarIVStepsRecipe(
2943 getOperand(0), getOperand(1), InductionOpcode,
2945 }
2946
2947 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
2948
2949 /// Generate the scalarized versions of the phi node as needed by their users.
2950 void execute(VPTransformState &State) override;
2951
2952#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2953 /// Print the recipe.
2954 void print(raw_ostream &O, const Twine &Indent,
2955 VPSlotTracker &SlotTracker) const override;
2956#endif
2957
2958 VPValue *getStepValue() const { return getOperand(1); }
2959
2960 /// Returns true if the recipe only uses the first lane of operand \p Op.
2961 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2963 "Op must be an operand of the recipe");
2964 return true;
2965 }
2966};
2967
2968/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
2969/// holds a sequence of zero or more VPRecipe's each representing a sequence of
2970/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
2972public:
2974
2975protected:
2976 /// The VPRecipes held in the order of output instructions to generate.
2978
2979 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
2980 : VPBlockBase(BlockSC, Name.str()) {}
2981
2982public:
2983 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
2984 : VPBlockBase(VPBasicBlockSC, Name.str()) {
2985 if (Recipe)
2986 appendRecipe(Recipe);
2987 }
2988
2989 ~VPBasicBlock() override {
2990 while (!Recipes.empty())
2991 Recipes.pop_back();
2992 }
2993
2994 /// Instruction iterators...
2999
3000 //===--------------------------------------------------------------------===//
3001 /// Recipe iterator methods
3002 ///
3003 inline iterator begin() { return Recipes.begin(); }
3004 inline const_iterator begin() const { return Recipes.begin(); }
3005 inline iterator end() { return Recipes.end(); }
3006 inline const_iterator end() const { return Recipes.end(); }
3007
3008 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
3009 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
3010 inline reverse_iterator rend() { return Recipes.rend(); }
3011 inline const_reverse_iterator rend() const { return Recipes.rend(); }
3012
3013 inline size_t size() const { return Recipes.size(); }
3014 inline bool empty() const { return Recipes.empty(); }
3015 inline const VPRecipeBase &front() const { return Recipes.front(); }
3016 inline VPRecipeBase &front() { return Recipes.front(); }
3017 inline const VPRecipeBase &back() const { return Recipes.back(); }
3018 inline VPRecipeBase &back() { return Recipes.back(); }
3019
3020 /// Returns a reference to the list of recipes.
3022
3023 /// Returns a pointer to a member of the recipe list.
3025 return &VPBasicBlock::Recipes;
3026 }
3027
3028 /// Method to support type inquiry through isa, cast, and dyn_cast.
3029 static inline bool classof(const VPBlockBase *V) {
3030 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
3031 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3032 }
3033
3034 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
3035 assert(Recipe && "No recipe to append.");
3036 assert(!Recipe->Parent && "Recipe already in VPlan");
3037 Recipe->Parent = this;
3038 Recipes.insert(InsertPt, Recipe);
3039 }
3040
3041 /// Augment the existing recipes of a VPBasicBlock with an additional
3042 /// \p Recipe as the last recipe.
3043 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
3044
3045 /// The method which generates the output IR instructions that correspond to
3046 /// this VPBasicBlock, thereby "executing" the VPlan.
3047 void execute(VPTransformState *State) override;
3048
3049 /// Return the cost of this VPBasicBlock.
3051
3052 /// Return the position of the first non-phi node recipe in the block.
3054
3055 /// Returns an iterator range over the PHI-like recipes in the block.
3057 return make_range(begin(), getFirstNonPhi());
3058 }
3059
3060 void dropAllReferences(VPValue *NewValue) override;
3061
3062 /// Split current block at \p SplitAt by inserting a new block between the
3063 /// current block and its successors and moving all recipes starting at
3064 /// SplitAt to the new block. Returns the new block.
3065 VPBasicBlock *splitAt(iterator SplitAt);
3066
3068
3069#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3070 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
3071 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
3072 ///
3073 /// Note that the numbering is applied to the whole VPlan, so printing
3074 /// individual blocks is consistent with the whole VPlan printing.
3075 void print(raw_ostream &O, const Twine &Indent,
3076 VPSlotTracker &SlotTracker) const override;
3077 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3078#endif
3079
3080 /// If the block has multiple successors, return the branch recipe terminating
3081 /// the block. If there are no or only a single successor, return nullptr;
3083 const VPRecipeBase *getTerminator() const;
3084
3085 /// Returns true if the block is exiting it's parent region.
3086 bool isExiting() const;
3087
3088 /// Clone the current block and it's recipes, without updating the operands of
3089 /// the cloned recipes.
3090 VPBasicBlock *clone() override {
3091 auto *NewBlock = new VPBasicBlock(getName());
3092 for (VPRecipeBase &R : *this)
3093 NewBlock->appendRecipe(R.clone());
3094 return NewBlock;
3095 }
3096
3097protected:
3098 /// Execute the recipes in the IR basic block \p BB.
3099 void executeRecipes(VPTransformState *State, BasicBlock *BB);
3100
3101private:
3102 /// Create an IR BasicBlock to hold the output instructions generated by this
3103 /// VPBasicBlock, and return it. Update the CFGState accordingly.
3104 BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
3105};
3106
3107/// A special type of VPBasicBlock that wraps an existing IR basic block.
3108/// Recipes of the block get added before the first non-phi instruction in the
3109/// wrapped block.
3110/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
3111/// preheader block.
3113 BasicBlock *IRBB;
3114
3115public:
3117 : VPBasicBlock(VPIRBasicBlockSC,
3118 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
3119 IRBB(IRBB) {}
3120
3121 ~VPIRBasicBlock() override {}
3122
3123 static inline bool classof(const VPBlockBase *V) {
3124 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3125 }
3126
3127 /// The method which generates the output IR instructions that correspond to
3128 /// this VPBasicBlock, thereby "executing" the VPlan.
3129 void execute(VPTransformState *State) override;
3130
3131 VPIRBasicBlock *clone() override {
3132 auto *NewBlock = new VPIRBasicBlock(IRBB);
3133 for (VPRecipeBase &R : Recipes)
3134 NewBlock->appendRecipe(R.clone());
3135 return NewBlock;
3136 }
3137
3138 BasicBlock *getIRBasicBlock() const { return IRBB; }
3139};
3140
3141/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
3142/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
3143/// A VPRegionBlock may indicate that its contents are to be replicated several
3144/// times. This is designed to support predicated scalarization, in which a
3145/// scalar if-then code structure needs to be generated VF * UF times. Having
3146/// this replication indicator helps to keep a single model for multiple
3147/// candidate VF's. The actual replication takes place only once the desired VF
3148/// and UF have been determined.
3150 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
3151 VPBlockBase *Entry;
3152
3153 /// Hold the Single Exiting block of the SESE region modelled by the
3154 /// VPRegionBlock.
3155 VPBlockBase *Exiting;
3156
3157 /// An indicator whether this region is to generate multiple replicated
3158 /// instances of output IR corresponding to its VPBlockBases.
3159 bool IsReplicator;
3160
3161public:
3163 const std::string &Name = "", bool IsReplicator = false)
3164 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
3165 IsReplicator(IsReplicator) {
3166 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
3167 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
3168 Entry->setParent(this);
3169 Exiting->setParent(this);
3170 }
3171 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
3172 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
3173 IsReplicator(IsReplicator) {}
3174
3175 ~VPRegionBlock() override {
3176 if (Entry) {
3177 VPValue DummyValue;
3178 Entry->dropAllReferences(&DummyValue);
3179 deleteCFG(Entry);
3180 }
3181 }
3182
3183 /// Method to support type inquiry through isa, cast, and dyn_cast.
3184 static inline bool classof(const VPBlockBase *V) {
3185 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
3186 }
3187
3188 const VPBlockBase *getEntry() const { return Entry; }
3189 VPBlockBase *getEntry() { return Entry; }
3190
3191 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
3192 /// EntryBlock must have no predecessors.
3193 void setEntry(VPBlockBase *EntryBlock) {
3194 assert(EntryBlock->getPredecessors().empty() &&
3195 "Entry block cannot have predecessors.");
3196 Entry = EntryBlock;
3197 EntryBlock->setParent(this);
3198 }
3199
3200 const VPBlockBase *getExiting() const { return Exiting; }
3201 VPBlockBase *getExiting() { return Exiting; }
3202
3203 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
3204 /// ExitingBlock must have no successors.
3205 void setExiting(VPBlockBase *ExitingBlock) {
3206 assert(ExitingBlock->getSuccessors().empty() &&
3207 "Exit block cannot have successors.");
3208 Exiting = ExitingBlock;
3209 ExitingBlock->setParent(this);
3210 }
3211
3212 /// Returns the pre-header VPBasicBlock of the loop region.
3214 assert(!isReplicator() && "should only get pre-header of loop regions");
3216 }
3217
3218 /// An indicator whether this region is to generate multiple replicated
3219 /// instances of output IR corresponding to its VPBlockBases.
3220 bool isReplicator() const { return IsReplicator; }
3221
3222 /// The method which generates the output IR instructions that correspond to
3223 /// this VPRegionBlock, thereby "executing" the VPlan.
3224 void execute(VPTransformState *State) override;
3225
3226 // Return the cost of this region.
3228
3229 void dropAllReferences(VPValue *NewValue) override;
3230
3231#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3232 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
3233 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
3234 /// consequtive numbers.
3235 ///
3236 /// Note that the numbering is applied to the whole VPlan, so printing
3237 /// individual regions is consistent with the whole VPlan printing.
3238 void print(raw_ostream &O, const Twine &Indent,
3239 VPSlotTracker &SlotTracker) const override;
3240 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3241#endif
3242
3243 /// Clone all blocks in the single-entry single-exit region of the block and
3244 /// their recipes without updating the operands of the cloned recipes.
3245 VPRegionBlock *clone() override;
3246};
3247
3248/// VPlan models a candidate for vectorization, encoding various decisions take
3249/// to produce efficient output IR, including which branches, basic-blocks and
3250/// output IR instructions to generate, and their cost. VPlan holds a
3251/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
3252/// VPBasicBlock.
3253class VPlan {
3254 friend class VPlanPrinter;
3255 friend class VPSlotTracker;
3256
3257 /// Hold the single entry to the Hierarchical CFG of the VPlan, i.e. the
3258 /// preheader of the vector loop.
3259 VPBasicBlock *Entry;
3260
3261 /// VPBasicBlock corresponding to the original preheader. Used to place
3262 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
3263 /// rest of VPlan execution.
3264 VPBasicBlock *Preheader;
3265
3266 /// Holds the VFs applicable to this VPlan.
3268
3269 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
3270 /// any UF.
3272
3273 /// Holds the name of the VPlan, for printing.
3274 std::string Name;
3275
3276 /// Represents the trip count of the original loop, for folding
3277 /// the tail.
3278 VPValue *TripCount = nullptr;
3279
3280 /// Represents the backedge taken count of the original loop, for folding
3281 /// the tail. It equals TripCount - 1.
3282 VPValue *BackedgeTakenCount = nullptr;
3283
3284 /// Represents the vector trip count.
3285 VPValue VectorTripCount;
3286
3287 /// Represents the loop-invariant VF * UF of the vector loop region.
3288 VPValue VFxUF;
3289
3290 /// Holds a mapping between Values and their corresponding VPValue inside
3291 /// VPlan.
3292 Value2VPValueTy Value2VPValue;
3293
3294 /// Contains all the external definitions created for this VPlan. External
3295 /// definitions are VPValues that hold a pointer to their underlying IR.
3296 SmallVector<VPValue *, 16> VPLiveInsToFree;
3297
3298 /// Values used outside the plan. It contains live-outs that need fixing. Any
3299 /// live-out that is fixed outside VPlan needs to be removed. The remaining
3300 /// live-outs are fixed via VPLiveOut::fixPhi.
3302
3303 /// Mapping from SCEVs to the VPValues representing their expansions.
3304 /// NOTE: This mapping is temporary and will be removed once all users have
3305 /// been modeled in VPlan directly.
3306 DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
3307
3308public:
3309 /// Construct a VPlan with original preheader \p Preheader, trip count \p TC
3310 /// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to
3311 /// be disconnected, as the bypass blocks between them are not yet modeled in
3312 /// VPlan.
3313 VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)
3314 : VPlan(Preheader, Entry) {
3315 TripCount = TC;
3316 }
3317
3318 /// Construct a VPlan with original preheader \p Preheader and \p Entry to
3319 /// the plan. At the moment, \p Preheader and \p Entry need to be
3320 /// disconnected, as the bypass blocks between them are not yet modeled in
3321 /// VPlan.
3322 VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)
3323 : Entry(Entry), Preheader(Preheader) {
3324 Entry->setPlan(this);
3325 Preheader->setPlan(this);
3326 assert(Preheader->getNumSuccessors() == 0 &&
3327 Preheader->getNumPredecessors() == 0 &&
3328 "preheader must be disconnected");
3329 }
3330
3331 ~VPlan();
3332
3333 /// Create initial VPlan, having an "entry" VPBasicBlock (wrapping
3334 /// original scalar pre-header ) which contains SCEV expansions that need
3335 /// to happen before the CFG is modified; a VPBasicBlock for the vector
3336 /// pre-header, followed by a region for the vector loop, followed by the
3337 /// middle VPBasicBlock. If a check is needed to guard executing the scalar
3338 /// epilogue loop, it will be added to the middle block, together with
3339 /// VPBasicBlocks for the scalar preheader and exit blocks.
3340 static VPlanPtr createInitialVPlan(const SCEV *TripCount,
3341 ScalarEvolution &PSE,
3342 bool RequiresScalarEpilogueCheck,
3343 bool TailFolded, Loop *TheLoop);
3344
3345 /// Prepare the plan for execution, setting up the required live-in values.
3346 void prepareToExecute(Value *TripCount, Value *VectorTripCount,
3347 Value *CanonicalIVStartValue, VPTransformState &State);
3348
3349 /// Generate the IR code for this VPlan.
3350 void execute(VPTransformState *State);
3351
3352 /// Return the cost of this plan.
3354
3355 VPBasicBlock *getEntry() { return Entry; }
3356 const VPBasicBlock *getEntry() const { return Entry; }
3357
3358 /// The trip count of the original loop.
3360 assert(TripCount && "trip count needs to be set before accessing it");
3361 return TripCount;
3362 }
3363
3364 /// Resets the trip count for the VPlan. The caller must make sure all uses of
3365 /// the original trip count have been replaced.
3366 void resetTripCount(VPValue *NewTripCount) {
3367 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
3368 "TripCount always must be set");
3369 TripCount = NewTripCount;
3370 }
3371
3372 /// The backedge taken count of the original loop.
3374 if (!BackedgeTakenCount)
3375 BackedgeTakenCount = new VPValue();
3376 return BackedgeTakenCount;
3377 }
3378
3379 /// The vector trip count.
3380 VPValue &getVectorTripCount() { return VectorTripCount; }
3381
3382 /// Returns VF * UF of the vector loop region.
3383 VPValue &getVFxUF() { return VFxUF; }
3384
3385 void addVF(ElementCount VF) { VFs.insert(VF); }
3386
3388 assert(hasVF(VF) && "Cannot set VF not already in plan");
3389 VFs.clear();
3390 VFs.insert(VF);
3391 }
3392
3393 bool hasVF(ElementCount VF) { return VFs.count(VF); }
3395 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
3396 }
3397
3398 /// Returns an iterator range over all VFs of the plan.
3401 return {VFs.begin(), VFs.end()};
3402 }
3403
3404 bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
3405
3406 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
3407
3408 void setUF(unsigned UF) {
3409 assert(hasUF(UF) && "Cannot set the UF not already in plan");
3410 UFs.clear();
3411 UFs.insert(UF);
3412 }
3413
3414 /// Return a string with the name of the plan and the applicable VFs and UFs.
3415 std::string getName() const;
3416
3417 void setName(const Twine &newName) { Name = newName.str(); }
3418
3419 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
3420 /// yet) for \p V.
3422 assert(V && "Trying to get or add the VPValue of a null Value");
3423 if (!Value2VPValue.count(V)) {
3424 VPValue *VPV = new VPValue(V);
3425 VPLiveInsToFree.push_back(VPV);
3426 assert(VPV->isLiveIn() && "VPV must be a live-in.");
3427 assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
3428 Value2VPValue[V] = VPV;
3429 }
3430
3431 assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
3432 assert(Value2VPValue[V]->isLiveIn() &&
3433 "Only live-ins should be in mapping");
3434 return Value2VPValue[V];
3435 }
3436
3437 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
3438 VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
3439
3440#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3441 /// Print the live-ins of this VPlan to \p O.
3442 void printLiveIns(raw_ostream &O) const;
3443
3444 /// Print this VPlan to \p O.
3445 void print(raw_ostream &O) const;
3446
3447 /// Print this VPlan in DOT format to \p O.
3448 void printDOT(raw_ostream &O) const;
3449
3450 /// Dump the plan to stderr (for debugging).
3451 LLVM_DUMP_METHOD void dump() const;
3452#endif
3453
3454 /// Returns the VPRegionBlock of the vector loop.
3456 return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
3457 }
3459 return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
3460 }
3461
3462 /// Returns the canonical induction recipe of the vector loop.
3465 if (EntryVPBB->empty()) {
3466 // VPlan native path.
3467 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
3468 }
3469 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
3470 }
3471
3472 void addLiveOut(PHINode *PN, VPValue *V);
3473
3475 delete LiveOuts[PN];
3476 LiveOuts.erase(PN);
3477 }
3478
3480 return LiveOuts;
3481 }
3482
3483 VPValue *getSCEVExpansion(const SCEV *S) const {
3484 return SCEVToExpansion.lookup(S);
3485 }
3486
3487 void addSCEVExpansion(const SCEV *S, VPValue *V) {
3488 assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
3489 SCEVToExpansion[S] = V;
3490 }
3491
3492 /// \return The block corresponding to the original preheader.
3493 VPBasicBlock *getPreheader() { return Preheader; }
3494 const VPBasicBlock *getPreheader() const { return Preheader; }
3495
3496 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
3497 /// recipes to refer to the clones, and return it.
3498 VPlan *duplicate();
3499};
3500
3501#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3502/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
3503/// indented and follows the dot format.
3505 raw_ostream &OS;
3506 const VPlan &Plan;
3507 unsigned Depth = 0;
3508 unsigned TabWidth = 2;
3509 std::string Indent;
3510 unsigned BID = 0;
3512
3514
3515 /// Handle indentation.
3516 void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
3517
3518 /// Print a given \p Block of the Plan.
3519 void dumpBlock(const VPBlockBase *Block);
3520
3521 /// Print the information related to the CFG edges going out of a given
3522 /// \p Block, followed by printing the successor blocks themselves.
3523 void dumpEdges(const VPBlockBase *Block);
3524
3525 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
3526 /// its successor blocks.
3527 void dumpBasicBlock(const VPBasicBlock *BasicBlock);
3528
3529 /// Print a given \p Region of the Plan.
3530 void dumpRegion(const VPRegionBlock *Region);
3531
3532 unsigned getOrCreateBID(const VPBlockBase *Block) {
3533 return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
3534 }
3535
3536 Twine getOrCreateName(const VPBlockBase *Block);
3537
3538 Twine getUID(const VPBlockBase *Block);
3539
3540 /// Print the information related to a CFG edge between two VPBlockBases.
3541 void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
3542 const Twine &Label);
3543
3544public:
3546 : OS(O), Plan(P), SlotTracker(&P) {}
3547
3548 LLVM_DUMP_METHOD void dump();
3549};
3550
3552 const Value *V;
3553
3554 VPlanIngredient(const Value *V) : V(V) {}
3555
3556 void print(raw_ostream &O) const;
3557};
3558
3560 I.print(OS);
3561 return OS;
3562}
3563
3565 Plan.print(OS);
3566 return OS;
3567}
3568#endif
3569
3570//===----------------------------------------------------------------------===//
3571// VPlan Utilities
3572//===----------------------------------------------------------------------===//
3573
3574/// Class that provides utilities for VPBlockBases in VPlan.
3576public:
3577 VPBlockUtils() = delete;
3578
3579 /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
3580 /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
3581 /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
3582 /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
3583 /// have neither successors nor predecessors.
3584 static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
3585 assert(NewBlock->getSuccessors().empty() &&
3586 NewBlock->getPredecessors().empty() &&
3587 "Can't insert new block with predecessors or successors.");
3588 NewBlock->setParent(BlockPtr->getParent());
3589 SmallVector<VPBlockBase *> Succs(BlockPtr->successors());
3590 for (VPBlockBase *Succ : Succs) {
3591 disconnectBlocks(BlockPtr, Succ);
3592 connectBlocks(NewBlock, Succ);
3593 }
3594 connectBlocks(BlockPtr, NewBlock);
3595 }
3596
3597 /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
3598 /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
3599 /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
3600 /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
3601 /// and \p IfTrue and \p IfFalse must have neither successors nor
3602 /// predecessors.
3603 static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
3604 VPBlockBase *BlockPtr) {
3605 assert(IfTrue->getSuccessors().empty() &&
3606 "Can't insert IfTrue with successors.");
3607 assert(IfFalse->getSuccessors().empty() &&
3608 "Can't insert IfFalse with successors.");
3609 BlockPtr->setTwoSuccessors(IfTrue, IfFalse);
3610 IfTrue->setPredecessors({BlockPtr});
3611 IfFalse->setPredecessors({BlockPtr});
3612 IfTrue->setParent(BlockPtr->getParent());
3613 IfFalse->setParent(BlockPtr->getParent());
3614 }
3615
3616 /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
3617 /// the successors of \p From and \p From to the predecessors of \p To. Both
3618 /// VPBlockBases must have the same parent, which can be null. Both
3619 /// VPBlockBases can be already connected to other VPBlockBases.
3621 assert((From->getParent() == To->getParent()) &&
3622 "Can't connect two block with different parents");
3623 assert(From->getNumSuccessors() < 2 &&
3624 "Blocks can't have more than two successors.");
3625 From->appendSuccessor(To);
3626 To->appendPredecessor(From);
3627 }
3628
3629 /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
3630 /// from the successors of \p From and \p From from the predecessors of \p To.
3632 assert(To && "Successor to disconnect is null.");
3633 From->removeSuccessor(To);
3634 To->removePredecessor(From);
3635 }
3636
3637 /// Return an iterator range over \p Range which only includes \p BlockTy
3638 /// blocks. The accesses are casted to \p BlockTy.
3639 template <typename BlockTy, typename T>
3640 static auto blocksOnly(const T &Range) {
3641 // Create BaseTy with correct const-ness based on BlockTy.
3642 using BaseTy = std::conditional_t<std::is_const<BlockTy>::value,
3643 const VPBlockBase, VPBlockBase>;
3644
3645 // We need to first create an iterator range over (const) BlocktTy & instead
3646 // of (const) BlockTy * for filter_range to work properly.
3647 auto Mapped =
3648 map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; });
3650 Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); });
3651 return map_range(Filter, [](BaseTy &Block) -> BlockTy * {
3652 return cast<BlockTy>(&Block);
3653 });
3654 }
3655};
3656
3659 InterleaveGroupMap;
3660
3661 /// Type for mapping of instruction based interleave groups to VPInstruction
3662 /// interleave groups
3665
3666 /// Recursively \p Region and populate VPlan based interleave groups based on
3667 /// \p IAI.
3668 void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
3670 /// Recursively traverse \p Block and populate VPlan based interleave groups
3671 /// based on \p IAI.
3672 void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
3674
3675public:
3677
3680 // Avoid releasing a pointer twice.
3681 for (auto &I : InterleaveGroupMap)
3682 DelSet.insert(I.second);
3683 for (auto *Ptr : DelSet)
3684 delete Ptr;
3685 }
3686
3687 /// Get the interleave group that \p Instr belongs to.
3688 ///
3689 /// \returns nullptr if doesn't have such group.
3692 return InterleaveGroupMap.lookup(Instr);
3693 }
3694};
3695
3696/// Class that maps (parts of) an existing VPlan to trees of combined
3697/// VPInstructions.
3699 enum class OpMode { Failed, Load, Opcode };
3700
3701 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
3702 /// DenseMap keys.
3703 struct BundleDenseMapInfo {
3704 static SmallVector<VPValue *, 4> getEmptyKey() {
3705 return {reinterpret_cast<VPValue *>(-1)};
3706 }
3707
3708 static SmallVector<VPValue *, 4> getTombstoneKey() {
3709 return {reinterpret_cast<VPValue *>(-2)};
3710 }
3711
3712 static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
3713 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
3714 }
3715
3716 static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
3718 return LHS == RHS;
3719 }
3720 };
3721
3722 /// Mapping of values in the original VPlan to a combined VPInstruction.
3724 BundleToCombined;
3725
3727
3728 /// Basic block to operate on. For now, only instructions in a single BB are
3729 /// considered.
3730 const VPBasicBlock &BB;
3731
3732 /// Indicates whether we managed to combine all visited instructions or not.
3733 bool CompletelySLP = true;
3734
3735 /// Width of the widest combined bundle in bits.
3736 unsigned WidestBundleBits = 0;
3737
3738 using MultiNodeOpTy =
3739 typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
3740
3741 // Input operand bundles for the current multi node. Each multi node operand
3742 // bundle contains values not matching the multi node's opcode. They will
3743 // be reordered in reorderMultiNodeOps, once we completed building a
3744 // multi node.
3745 SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
3746
3747 /// Indicates whether we are building a multi node currently.
3748 bool MultiNodeActive = false;
3749
3750 /// Check if we can vectorize Operands together.
3751 bool areVectorizable(ArrayRef<VPValue *> Operands) const;
3752
3753 /// Add combined instruction \p New for the bundle \p Operands.
3754 void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
3755
3756 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
3757 VPInstruction *markFailed();
3758
3759 /// Reorder operands in the multi node to maximize sequential memory access
3760 /// and commutative operations.
3761 SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
3762
3763 /// Choose the best candidate to use for the lane after \p Last. The set of
3764 /// candidates to choose from are values with an opcode matching \p Last's
3765 /// or loads consecutive to \p Last.
3766 std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
3767 SmallPtrSetImpl<VPValue *> &Candidates,
3769
3770#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3771 /// Print bundle \p Values to dbgs().
3772 void dumpBundle(ArrayRef<VPValue *> Values);
3773#endif
3774
3775public:
3776 VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
3777
3778 ~VPlanSlp() = default;
3779
3780 /// Tries to build an SLP tree rooted at \p Operands and returns a
3781 /// VPInstruction combining \p Operands, if they can be combined.
3783
3784 /// Return the width of the widest combined bundle in bits.
3785 unsigned getWidestBundleBits() const { return WidestBundleBits; }
3786
3787 /// Return true if all visited instruction can be combined.
3788 bool isCompletelySLP() const { return CompletelySLP; }
3789};
3790
3791namespace vputils {
3792
3793/// Returns true if only the first lane of \p Def is used.
3794bool onlyFirstLaneUsed(const VPValue *Def);
3795
3796/// Returns true if only the first part of \p Def is used.
3797bool onlyFirstPartUsed(const VPValue *Def);
3798
3799/// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p
3800/// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in
3801/// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's
3802/// pre-header already contains a recipe expanding \p Expr, return it. If not,
3803/// create a new one.
3805 ScalarEvolution &SE);
3806
3807/// Returns true if \p VPV is uniform after vectorization.
3809 // A value defined outside the vector region must be uniform after
3810 // vectorization inside a vector region.
3812 return true;
3813 VPRecipeBase *Def = VPV->getDefiningRecipe();
3814 assert(Def && "Must have definition for value defined inside vector region");
3815 if (auto Rep = dyn_cast<VPReplicateRecipe>(Def))
3816 return Rep->isUniform();
3817 if (auto *GEP = dyn_cast<VPWidenGEPRecipe>(Def))
3818 return all_of(GEP->operands(), isUniformAfterVectorization);
3819 if (auto *VPI = dyn_cast<VPInstruction>(Def))
3820 return VPI->isSingleScalar() || VPI->isVectorToScalar();
3821 return false;
3822}
3823
3824/// Return true if \p V is a header mask in \p Plan.
3825bool isHeaderMask(VPValue *V, VPlan &Plan);
3826} // end namespace vputils
3827
3828} // end namespace llvm
3829
3830#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
always inline
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
RelocType Type
Definition: COFFYAML.cpp:391
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:537
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
uint64_t Addr
std::string Name
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1294
Flatten the CFG
Hexagon Common GEP
std::pair< BasicBlock *, unsigned > BlockTy
A pair of (basic block, score).
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
This file implements a map that provides insertion order iteration.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition: VPlan.h:869
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:530
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:694
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:202
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
static GEPNoWrapFlags inBounds()
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:915
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:470
uint32_t getFactor() const
Definition: VectorUtils.h:486
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:540
InstTy * getInsertPos() const
Definition: VectorUtils.h:556
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:612
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:174
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:193
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:71
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:113
void clear()
Completely clear the SetVector.
Definition: SetVector.h:273
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:254
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:696
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:323
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:290
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Iterator to iterate over vectorization factors in a VFRange.
Definition: VPlan.h:124
ElementCount operator*() const
Definition: VPlan.h:132
iterator & operator++()
Definition: VPlan.h:134
iterator(ElementCount VF)
Definition: VPlan.h:128
bool operator==(const iterator &Other) const
Definition: VPlan.h:130
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
Definition: VPlan.h:2767
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2775
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:2781
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition: VPlan.h:2769
~VPActiveLaneMaskPHIRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:2971
RecipeListTy::const_iterator const_iterator
Definition: VPlan.h:2996
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition: VPlan.h:3043
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition: VPlan.h:3090
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition: VPlan.h:2998
RecipeListTy::iterator iterator
Instruction iterators...
Definition: VPlan.h:2995
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:483
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:3021
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition: VPlan.h:2979
iterator end()
Definition: VPlan.h:3005
VPBasicBlock(const Twine &Name="", VPRecipeBase *Recipe=nullptr)
Definition: VPlan.h:2983
iterator begin()
Recipe iterator methods.
Definition: VPlan.h:3003
RecipeListTy::reverse_iterator reverse_iterator
Definition: VPlan.h:2997
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition: VPlan.h:3056
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
Definition: VPlan.cpp:780
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition: VPlan.cpp:212
~VPBasicBlock() override
Definition: VPlan.h:2989
VPRegionBlock * getEnclosingLoopRegion()
Definition: VPlan.cpp:575
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:527
const_reverse_iterator rbegin() const
Definition: VPlan.h:3009
reverse_iterator rend()
Definition: VPlan.h:3010
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition: VPlan.cpp:550
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition: VPlan.h:2977
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
Definition: VPlan.cpp:537
VPRecipeBase & back()
Definition: VPlan.h:3018
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition: VPlan.cpp:642
const VPRecipeBase & front() const
Definition: VPlan.h:3015
const_iterator begin() const
Definition: VPlan.h:3004
VPRecipeBase & front()
Definition: VPlan.h:3016
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition: VPlan.cpp:625
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition: VPlan.cpp:613
const VPRecipeBase & back() const
Definition: VPlan.h:3017
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3034
bool empty() const
Definition: VPlan.h:3014
const_iterator end() const
Definition: VPlan.h:3006
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:3029
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition: VPlan.h:3024
reverse_iterator rbegin()
Definition: VPlan.h:3008
size_t size() const
Definition: VPlan.h:3013
const_reverse_iterator rend() const
Definition: VPlan.h:3011
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
Definition: VPlan.h:2025
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands)
The blend operation is a User of the incoming values and of their respective masks,...
Definition: VPlan.h:2030
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2068
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:2048
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:2053
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account that the first incoming value has no mask.
Definition: VPlan.h:2045
VPBlendRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2036
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:437
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition: VPlan.h:632
VPRegionBlock * getParent()
Definition: VPlan.h:509
VPBlocksTy & getPredecessors()
Definition: VPlan.h:540
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:177
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition: VPlan.h:690
void setName(const Twine &newName)
Definition: VPlan.h:502
size_t getNumSuccessors() const
Definition: VPlan.h:554
iterator_range< VPBlockBase ** > successors()
Definition: VPlan.h:537
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition: VPlan.cpp:630
bool isLegalToHoistInto()
Return true if it is legal to hoist instructions into this block.
Definition: VPlan.h:655
virtual ~VPBlockBase()=default
void print(raw_ostream &O) const
Print plain-text dump of this VPlan to O.
Definition: VPlan.h:680
const VPBlocksTy & getHierarchicalPredecessors()
Definition: VPlan.h:590
size_t getNumPredecessors() const
Definition: VPlan.h:555
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition: VPlan.h:623
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition: VPlan.cpp:199
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:539
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
static void deleteCFG(VPBlockBase *Entry)
Delete all blocks reachable from a given VPBlockBase, inclusive.
Definition: VPlan.cpp:207
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition: VPlan.h:494
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
VPlan * getPlan()
Definition: VPlan.cpp:150
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition: VPlan.cpp:169
const VPRegionBlock * getParent() const
Definition: VPlan.h:510
void printAsOperand(raw_ostream &OS, bool PrintType) const
Definition: VPlan.h:666
const std::string & getName() const
Definition: VPlan.h:500
void clearSuccessors()
Remove all the successors of this block.
Definition: VPlan.h:642
VPBlockBase * getSingleHierarchicalSuccessor()
Definition: VPlan.h:580
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition: VPlan.h:614
VPBlockBase * getSinglePredecessor() const
Definition: VPlan.h:550
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition: VPlan.h:574
void clearPredecessors()
Remove all the predecessor of this block.
Definition: VPlan.h:639
unsigned getVPBlockID() const
Definition: VPlan.h:507
VPBlockBase(const unsigned char SC, const std::string &N)
Definition: VPlan.h:486
VPBlocksTy & getSuccessors()
Definition: VPlan.h:535
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition: VPlan.cpp:191
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:155
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition: VPlan.h:603
void setParent(VPRegionBlock *P)
Definition: VPlan.h:520
virtual void dropAllReferences(VPValue *NewValue)=0
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
VPBlockBase * getSingleHierarchicalPredecessor()
Definition: VPlan.h:596
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:544
const VPBlocksTy & getSuccessors() const
Definition: VPlan.h:534
Class that provides utilities for VPBlockBases in VPlan.
Definition: VPlan.h:3575
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition: VPlan.h:3640
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition: VPlan.h:3584
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
Definition: VPlan.h:3603
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:3631
static void connectBlocks(VPBlockBase *From, VPBlockBase *To)
Connect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:3620
A recipe for generating conditional branches on the bits of a mask.
Definition: VPlan.h:2361
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2393
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlan.h:2381
VPBranchOnMaskRecipe(VPValue *BlockInMask)
Definition: VPlan.h:2363
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2369
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2400
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
Canonical scalar induction phi of the vector loop.
Definition: VPlan.h:2710
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:2751
~VPCanonicalIVPHIRecipe() override=default
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:2725
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2717
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition: VPlan.h:2712
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2744
void execute(VPTransformState &State) override
Generate the canonical scalar induction phi of the vector loop.
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:2739
bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step) const
Check if the induction described by Kind, /p Start and Step is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:307
unsigned getVPDefID() const
Definition: VPlanValue.h:428
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition: VPlan.h:2864
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, VPValue *Step)
Definition: VPlan.h:2879
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:2909
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
Definition: VPlan.h:2872
Type * getScalarType() const
Definition: VPlan.h:2904
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2887
~VPDerivedIVRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2912
VPValue * getStartValue() const
Definition: VPlan.h:2908
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
Definition: VPlan.h:2799
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:2812
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2806
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate phi for handling IV based on EVL over iterations correctly.
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition: VPlan.h:2801
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2821
Recipe to expand a SCEV expression.
Definition: VPlan.h:2678
VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
Definition: VPlan.h:2683
const SCEV * getSCEV() const
Definition: VPlan.h:2703
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2688
~VPExpandSCEVRecipe() override=default
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition: VPlan.h:1711
static bool classof(const VPValue *V)
Definition: VPlan.h:1728
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start=nullptr, DebugLoc DL={})
Definition: VPlan.h:1713
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition: VPlan.h:1755
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:1744
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition: VPlan.h:1752
VPValue * getStartValue() const
Definition: VPlan.h:1747
static bool classof(const VPRecipeBase *B)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:1724
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition: VPlan.h:1761
~VPHeaderPHIRecipe() override=default
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition: VPlan.h:3112
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:452
VPIRBasicBlock(BasicBlock *IRBB)
Definition: VPlan.h:3116
BasicBlock * getIRBasicBlock() const
Definition: VPlan.h:3138
~VPIRBasicBlock() override
Definition: VPlan.h:3121
static bool classof(const VPBlockBase *V)
Definition: VPlan.h:3123
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition: VPlan.h:3131
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:1229
@ ResumePhi
Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
Definition: VPlan.h:1247
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1235
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1250
@ CalculateTripCountMinusVF
Definition: VPlan.h:1248
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition: VPlan.h:1304
VPInstruction * clone() override
Clone the current recipe.
Definition: VPlan.h:1334
bool hasResult() const
Definition: VPlan.h:1365
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:1341
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, WrapFlagsTy WrapFlags, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1316
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1309
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DisjointFlagsTy DisjointFlag, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1321
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
Definition: VPlan.h:1358
void execute(VPTransformState &State) override
Generate the instruction.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition: VPlan.h:2082
bool onlyFirstLaneUsed(const VPValue *Op) const override
The recipe only uses the first lane of the address.
Definition: VPlan.h:2161
~VPInterleaveRecipe() override=default
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2123
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps)
Definition: VPlan.h:2094
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2129
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2115
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2136
Instruction * getInsertPos() const
Definition: VPlan.h:2167
const InterleaveGroup< Instruction > * getInterleaveGroup()
Definition: VPlan.h:2152
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2156
InterleaveGroup< VPInstruction > * getInterleaveGroup(VPInstruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VPlan.h:3691
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:156
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:196
static unsigned getNumCachedLanes(const ElementCount &VF)
Returns the maxmimum number of lanes that we are able to consider caching for VF.
Definition: VPlan.h:231
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition: VPlan.cpp:70
VPLane(unsigned Lane, Kind LaneKind)
Definition: VPlan.h:178
Kind getKind() const
Returns the Kind of lane offset.
Definition: VPlan.h:212
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlan.h:182
bool isFirstLane() const
Returns true if this is the first lane of the whole vector.
Definition: VPlan.h:215
unsigned getKnownLane() const
Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...
Definition: VPlan.h:202
static VPLane getFirstLane()
Definition: VPlan.h:180
Kind
Kind describes how to interpret Lane.
Definition: VPlan.h:159
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
unsigned mapToCacheIndex(const ElementCount &VF) const
Maps the lane to a cache index based on VF.
Definition: VPlan.h:218
A value that is used outside the VPlan.
Definition: VPlan.h:704
VPLiveOut(PHINode *Phi, VPValue *Op)
Definition: VPlan.h:708
static bool classof(const VPUser *U)
Definition: VPlan.h:711
bool usesScalars(const VPValue *Op) const override
Returns true if the VPLiveOut uses scalars of operand Op.
Definition: VPlan.h:722
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the VPLiveOut to O.
PHINode * getPhi() const
Definition: VPlan.h:728
void fixPhi(VPlan &Plan, VPTransformState &State)
Fix the wrapped phi node.
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
Definition: VPlan.h:2412
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2436
VPPredInstPHIRecipe(VPValue *PredV)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition: VPlan.h:2416
void execute(VPTransformState &State) override
Generates phi nodes for live-outs as needed to retain SSA form.
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2420
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:764
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition: VPlan.h:855
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe using the legacy cost model and the underlying instructions.
virtual ~VPRecipeBase()=default
VPBasicBlock * getParent()
Definition: VPlan.h:789
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:860
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:831
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL={})
Definition: VPlan.h:775
virtual VPRecipeBase * clone()=0
Clone the current recipe.
const VPBasicBlock * getParent() const
Definition: VPlan.h:790
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition: VPlan.h:836
VPRecipeBase(const unsigned char SC, iterator_range< IterT > Operands, DebugLoc DL={})
Definition: VPlan.h:780
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
bool isPhi() const
Returns true for PHI-like recipes.
Definition: VPlan.h:844
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:964
ExactFlagsTy ExactFlags
Definition: VPlan.h:1020
FastMathFlagsTy FMFs
Definition: VPlan.h:1023
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, GEPFlagsTy GEPFlags, DebugLoc DL={})
Definition: VPlan.h:1097
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:1022
CmpInst::Predicate CmpPredicate
Definition: VPlan.h:1017
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, CmpInst::Predicate Pred, DebugLoc DL={})
Definition: VPlan.h:1072
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1149
bool isInBounds() const
Definition: VPlan.h:1191
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:1103
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, FastMathFlags FMFs, DebugLoc DL={})
Definition: VPlan.h:1084
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition: VPlan.h:1118
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1198
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
Definition: VPlan.h:1042
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:1019
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, WrapFlagsTy WrapFlags, DebugLoc DL={})
Definition: VPlan.h:1078
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DisjointFlagsTy DisjointFlags, DebugLoc DL={})
Definition: VPlan.h:1090
void transferFlags(VPRecipeWithIRFlags &Other)
Definition: VPlan.h:1028
WrapFlagsTy WrapFlags
Definition: VPlan.h:1018
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1202
bool isDisjoint() const
Definition: VPlan.h:1214
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1185
bool hasNoSignedWrap() const
Definition: VPlan.h:1208
static bool classof(const VPUser *U)
Definition: VPlan.h:1112
FastMathFlags getFastMathFlags() const
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL={})
Definition: VPlan.h:1035
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
Definition: VPlan.h:2247
void execute(VPTransformState &State) override
Generate the reduction in the loop.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2277
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition: VPlan.h:2274
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2258
VPReductionEVLRecipe(VPReductionRecipe *R, VPValue *EVL, VPValue *CondOp)
Definition: VPlan.h:2249
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPReductionEVLRecipe() override=default
A recipe for handling reduction phis.
Definition: VPlan.h:1966
VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc, VPValue &Start, bool IsInLoop=false, bool IsOrdered=false)
Create a new VPReductionPHIRecipe for the reduction Phi described by RdxDesc.
Definition: VPlan.h:1979
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:2017
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1989
~VPReductionPHIRecipe() override=default
bool isInLoop() const
Returns true, if the phi is part of an in-loop reduction.
Definition: VPlan.h:2020
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:1999
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Definition: VPlan.h:2012
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
Definition: VPlan.h:2173
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition: VPlan.h:2232
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2206
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2236
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Return the recurrence decriptor for the in-loop reduction.
Definition: VPlan.h:2226
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered)
Definition: VPlan.h:2192
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2238
VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, bool IsOrdered)
Definition: VPlan.h:2181
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition: VPlan.h:2230
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2234
VPReductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2201
void execute(VPTransformState &State) override
Generate the reduction in the loop.
static bool classof(const VPUser *U)
Definition: VPlan.h:2211
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3149
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition: VPlan.cpp:713
const VPBlockBase * getEntry() const
Definition: VPlan.h:3188
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition: VPlan.h:3220
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:722
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3205
VPBlockBase * getExiting()
Definition: VPlan.h:3201
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3193
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
Definition: VPlan.cpp:787
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition: VPlan.cpp:823
VPRegionBlock(const std::string &Name="", bool IsReplicator=false)
Definition: VPlan.h:3171
VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Definition: VPlan.h:3162
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition: VPlan.cpp:729
const VPBlockBase * getExiting() const
Definition: VPlan.h:3200
VPBlockBase * getEntry()
Definition: VPlan.h:3189
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition: VPlan.h:3213
~VPRegionBlock() override
Definition: VPlan.h:3175
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:3184
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition: VPlan.h:2288
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate replicas of the desired Ingredient.
~VPReplicateRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2333
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2340
bool isUniform() const
Definition: VPlan.h:2328
bool isPredicated() const
Definition: VPlan.h:2330
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2307
VPReplicateRecipe(Instruction *I, iterator_range< IterT > Operands, bool IsUniform, VPValue *Mask=nullptr)
Definition: VPlan.h:2297
unsigned getOpcode() const
Definition: VPlan.h:2357
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition: VPlan.h:2352
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
VPScalarCastRecipe is a recipe to create scalar cast instructions.
Definition: VPlan.h:1487
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Each concrete VPDef prints itself.
~VPScalarCastRecipe() override=default
VPScalarCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1501
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1517
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1515
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
Definition: VPlan.h:1495
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition: VPlan.h:2921
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2961
VPValue * getStepValue() const
Definition: VPlan.h:2958
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step)
Definition: VPlan.h:2931
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2941
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, Instruction::BinaryOps Opcode, FastMathFlags FMFs)
Definition: VPlan.h:2925
~VPScalarIVStepsRecipe() override=default
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition: VPlan.h:891
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL={})
Definition: VPlan.h:897
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:955
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:906
const Instruction * getUnderlyingInstr() const
Definition: VPlan.h:958
VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL={})
Definition: VPlan.h:894
static bool classof(const VPUser *U)
Definition: VPlan.h:947
VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV, DebugLoc DL={})
Definition: VPlan.h:902
virtual VPSingleDefRecipe * clone() override=0
Clone the current recipe.
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:449
An analysis for type-inference for VPValues.
Definition: VPlanAnalysis.h:39
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:202
operand_range operands()
Definition: VPlanValue.h:272
void setOperand(unsigned I, VPValue *New)
Definition: VPlanValue.h:257
unsigned getNumOperands() const
Definition: VPlanValue.h:251
operand_iterator op_begin()
Definition: VPlanValue.h:268
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:252
VPUser()=delete
void addOperand(VPValue *Operand)
Definition: VPlanValue.h:246
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:120
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:77
unsigned getNumUsers() const
Definition: VPlanValue.h:111
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:172
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:167
friend class VPRecipeBase
Definition: VPlanValue.h:52
user_range users()
Definition: VPlanValue.h:132
bool isDefinedOutsideVectorRegions() const
Returns true if the VPValue is defined outside any vector regions, i.e.
Definition: VPlanValue.h:186
A recipe to compute the pointers for widened memory accesses of IndexTy for all parts.
Definition: VPlan.h:1655
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse, bool IsInBounds, DebugLoc DL)
Definition: VPlan.h:1660
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1670
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1676
A recipe for widening Call instructions.
Definition: VPlan.h:1526
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const_operand_range arg_operands() const
Definition: VPlan.h:1567
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1550
VPWidenCallRecipe(Value *UV, iterator_range< IterT > CallArguments, Intrinsic::ID VectorIntrinsicID, DebugLoc DL={}, Function *Variant=nullptr)
Definition: VPlan.h:1538
Function * getCalledScalarFunction() const
Definition: VPlan.h:1560
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1564
~VPWidenCallRecipe() override=default
A Recipe for widening the canonical induction variable of the vector loop.
Definition: VPlan.h:2835
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenCanonicalIVRecipe() override=default
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2842
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition: VPlan.h:2837
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition: VPlan.h:1439
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst &UI)
Definition: VPlan.h:1447
Instruction::CastOps getOpcode() const
Definition: VPlan.h:1480
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1483
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
Definition: VPlan.h:1455
void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1461
A recipe for handling GEP instructions.
Definition: VPlan.h:1613
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1635
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range< IterT > Operands)
Definition: VPlan.h:1630
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition: VPlan.h:1768
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, TruncInst *Trunc)
Definition: VPlan.h:1781
const TruncInst * getTruncInst() const
Definition: VPlan.h:1829
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition: VPlan.h:1815
~VPWidenIntOrFpInductionRecipe() override=default
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1791
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:1828
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:1823
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc)
Definition: VPlan.h:1774
const VPValue * getStepValue() const
Definition: VPlan.h:1824
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:1842
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition: VPlan.h:1808
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1834
A common base class for widening memory operations.
Definition: VPlan.h:2445
bool IsMasked
Whether the memory access is masked.
Definition: VPlan.h:2456
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:2453
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:2492
static bool classof(const VPUser *U)
Definition: VPlan.h:2486
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition: VPlan.h:2512
Instruction & Ingredient
Definition: VPlan.h:2447
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2475
Instruction & getIngredient() const
Definition: VPlan.h:2516
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:2450
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2479
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2466
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2506
bool isMasked() const
Returns true if the recipe is masked.
Definition: VPlan.h:2502
void setMask(VPValue *Mask)
Definition: VPlan.h:2458
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2499
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:2496
A recipe for handling phis that are widened in the vector loop.
Definition: VPlan.h:1894
void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock)
Adds a pair (IncomingV, IncomingBlock) to the phi.
Definition: VPlan.h:1924
VPValue * getIncomingValue(unsigned I)
Returns the I th incoming VPValue.
Definition: VPlan.h:1933
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr)
Create a new VPWidenPHIRecipe for Phi with start value Start.
Definition: VPlan.h:1900
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1906
~VPWidenPHIRecipe() override=default
VPBasicBlock * getIncomingBlock(unsigned I)
Returns the I th incoming VPBasicBlock.
Definition: VPlan.h:1930
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1867
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1882
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, bool IsScalarAfterVectorization)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start.
Definition: VPlan.h:1855
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition: VPlan.h:1406
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
VPWidenRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1417
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenRecipe() override=default
VPWidenRecipe(Instruction &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1411
unsigned getOpcode() const
Definition: VPlan.h:1429
VPlanPrinter prints a given VPlan to a given output stream.
Definition: VPlan.h:3504
VPlanPrinter(raw_ostream &O, const VPlan &P)
Definition: VPlan.h:3545
LLVM_DUMP_METHOD void dump()
Definition: VPlan.cpp:1282
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition: VPlan.h:3698
VPInstruction * buildGraph(ArrayRef< VPValue * > Operands)
Tries to build an SLP tree rooted at Operands and returns a VPInstruction combining Operands,...
Definition: VPlanSLP.cpp:359
bool isCompletelySLP() const
Return true if all visited instruction can be combined.
Definition: VPlan.h:3788
~VPlanSlp()=default
VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB)
Definition: VPlan.h:3776
unsigned getWidestBundleBits() const
Return the width of the widest combined bundle in bits.
Definition: VPlan.h:3785
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:3253
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition: VPlan.cpp:1173
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition: VPlan.cpp:1149
void prepareToExecute(Value *TripCount, Value *VectorTripCount, Value *CanonicalIVStartValue, VPTransformState &State)
Prepare the plan for execution, setting up the required live-in values.
Definition: VPlan.cpp:916
bool hasScalableVF()
Definition: VPlan.h:3394
VPBasicBlock * getEntry()
Definition: VPlan.h:3355
VPValue & getVectorTripCount()
The vector trip count.
Definition: VPlan.h:3380
void setName(const Twine &newName)
Definition: VPlan.h:3417
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition: VPlan.h:3383
VPValue * getTripCount() const
The trip count of the original loop.
Definition: VPlan.h:3359
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition: VPlan.h:3373
void removeLiveOut(PHINode *PN)
Definition: VPlan.h:3474
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition: VPlan.h:3400
void addLiveOut(PHINode *PN, VPValue *V)
Definition: VPlan.cpp:1182
const VPBasicBlock * getEntry() const
Definition: VPlan.h:3356
VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)
Construct a VPlan with original preheader Preheader, trip count TC and Entry to the plan.
Definition: VPlan.h:3313
VPBasicBlock * getPreheader()
Definition: VPlan.h:3493
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.h:3455
const VPRegionBlock * getVectorLoopRegion() const
Definition: VPlan.h:3458
bool hasVF(ElementCount VF)
Definition: VPlan.h:3393
void addSCEVExpansion(const SCEV *S, VPValue *V)
Definition: VPlan.h:3487
bool hasUF(unsigned UF) const
Definition: VPlan.h:3406
void setVF(ElementCount VF)
Definition: VPlan.h:3387
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition: VPlan.cpp:1086
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition: VPlan.h:3366
static VPlanPtr createInitialVPlan(const SCEV *TripCount, ScalarEvolution &PSE, bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop)
Create initial VPlan, having an "entry" VPBasicBlock (wrapping original scalar pre-header ) which con...
Definition: VPlan.cpp:858
VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)
Construct a VPlan with original preheader Preheader and Entry to the plan.
Definition: VPlan.h:3322
const VPBasicBlock * getPreheader() const
Definition: VPlan.h:3494
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition: VPlan.h:3421
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition: VPlan.cpp:1179
bool hasScalarVFOnly() const
Definition: VPlan.h:3404
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition: VPlan.cpp:976
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
Definition: VPlan.h:3463
const MapVector< PHINode *, VPLiveOut * > & getLiveOuts() const
Definition: VPlan.h:3479
void print(raw_ostream &O) const
Print this VPlan to O.
Definition: VPlan.cpp:1123
void addVF(ElementCount VF)
Definition: VPlan.h:3385
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
Definition: VPlan.h:3438
VPValue * getSCEVExpansion(const SCEV *S) const
Definition: VPlan.h:3483
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition: VPlan.cpp:1093
void setUF(unsigned UF)
Definition: VPlan.h:3408
VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition: VPlan.cpp:1225
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
An ilist node that can access its parent list.
Definition: ilist_node.h:321
base_list_type::const_reverse_iterator const_reverse_iterator
Definition: ilist.h:125
void pop_back()
Definition: ilist.h:255
base_list_type::reverse_iterator reverse_iterator
Definition: ilist.h:123
base_list_type::const_iterator const_iterator
Definition: ilist.h:122
iterator insert(iterator where, pointer New)
Definition: ilist.h:165
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition: iterator.h:80
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, ScalarEvolution &SE)
Get or create a VPValue that corresponds to the expansion of Expr.
Definition: VPlan.cpp:1610
bool isUniformAfterVectorization(VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlan.h:3808
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlan.cpp:1605
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlan.cpp:1600
bool isHeaderMask(VPValue *V, VPlan &Plan)
Return true if V is a header mask in Plan.
Definition: VPlan.cpp:1627
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
const SCEV * createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE, Loop *OrigLoop)
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition: Casting.h:720
auto map_range(ContainerTy &&C, FuncTy F)
Definition: STLExtras.h:377
auto dyn_cast_or_null(const Y &Val)
Definition: Casting.h:759
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
std::unique_ptr< VPlan > VPlanPtr
Definition: VPlan.h:147
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition: STLExtras.h:572
@ Other
Any other memory.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
unsigned getReciprocalPredBlockProb()
A helper function that returns the reciprocal of the block probability of predicated blocks.
Definition: VPlan.h:95
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:471
#define N
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Definition: VPlan.h:100
iterator end()
Definition: VPlan.h:141
const ElementCount Start
Definition: VPlan.h:102
ElementCount End
Definition: VPlan.h:105
iterator begin()
Definition: VPlan.h:140
bool isEmpty() const
Definition: VPlan.h:107
VFRange(const ElementCount &Start, const ElementCount &End)
Definition: VPlan.h:111
Struct to hold various analysis needed for cost computations.
Definition: VPlan.h:737
LLVMContext & LLVMCtx
Definition: VPlan.h:740
LoopVectorizationCostModel & CM
Definition: VPlan.h:741
bool skipCostComputation(Instruction *UI, bool IsVector) const
Return true if the cost for UI shouldn't be computed, e.g.
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const
Return the cost for UI with VF using the legacy cost model as fallback until computing the cost of al...
VPCostContext(const TargetTransformInfo &TTI, Type *CanIVTy, LLVMContext &LLVMCtx, LoopVectorizationCostModel &CM)
Definition: VPlan.h:744
VPTypeAnalysis Types
Definition: VPlan.h:739
const TargetTransformInfo & TTI
Definition: VPlan.h:738
SmallPtrSet< Instruction *, 8 > SkipCostComputation
Definition: VPlan.h:742
A recipe for handling first-order recurrence phis.
Definition: VPlan.h:1939
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1949
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
Definition: VPlan.h:1940
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:1945
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPIteration represents a single point in the iteration space of the output (vectorized and/or unrolle...
Definition: VPlan.h:238
VPIteration(unsigned Part, const VPLane &Lane)
Definition: VPlan.h:248
unsigned Part
in [0..UF)
Definition: VPlan.h:240
VPLane Lane
Definition: VPlan.h:242
VPIteration(unsigned Part, unsigned Lane, VPLane::Kind Kind=VPLane::Kind::First)
Definition: VPlan.h:244
bool isFirstIteration() const
Definition: VPlan.h:250
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition: VPlan.h:981
Hold state information used when constructing the CFG of the output IR, traversing the VPBasicBlocks ...
Definition: VPlan.h:378
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:384
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:392
VPBasicBlock * PrevVPBB
The previous VPBasicBlock visited. Initially set to null.
Definition: VPlan.h:380
BasicBlock * ExitBB
The last IR BasicBlock in the output IR.
Definition: VPlan.h:388
CFGState(DominatorTree *DT)
Definition: VPlan.h:397
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:356
DomTreeUpdater DTU
Updater for the DominatorTree.
Definition: VPlan.h:395
SmallVector< Value *, 2 > PerPartValuesTy
A type for vectorized values in the new loop.
Definition: VPlan.h:273
DenseMap< VPValue *, ScalarsPerPartValuesTy > PerPartScalars
Definition: VPlan.h:278
DenseMap< VPValue *, PerPartValuesTy > PerPartOutput
Definition: VPlan.h:275
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:255
Value * get(VPValue *Def, unsigned Part, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def and a given Part if IsScalar is false,...
Definition: VPlan.cpp:254
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
Definition: VPlan.h:406
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:429
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:432
struct llvm::VPTransformState::DataState Data
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:369
void reset(VPValue *Def, Value *V, unsigned Part)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:322
struct llvm::VPTransformState::CFGState CFG
void reset(VPValue *Def, Value *V, const VPIteration &Instance)
Reset an existing scalar value for Def and a given Instance.
Definition: VPlan.h:344
LoopVersioning * LVer
LoopVersioning.
Definition: VPlan.h:425
void addNewMetadata(Instruction *To, const Instruction *Orig)
Add additional metadata to To that was not present on Orig.
Definition: VPlan.cpp:361
void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance)
Construct the vector value of a scalarized value V one lane at a time.
Definition: VPlan.cpp:401
void set(VPValue *Def, Value *V, const VPIteration &Instance)
Set the generated scalar V for Def and the given Instance.
Definition: VPlan.h:330
void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar=false)
Set the generated vector Value for a given VPValue and a given Part, if IsScalar is false.
Definition: VPlan.h:307
std::optional< VPIteration > Instance
Hold the indices to generate specific scalar instructions.
Definition: VPlan.h:267
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:409
bool hasScalarValue(VPValue *Def, VPIteration Instance)
Definition: VPlan.h:295
VPlan * Plan
Pointer to the VPlan code is generated for.
Definition: VPlan.h:415
InnerLoopVectorizer * ILV
Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
Definition: VPlan.h:412
bool hasVectorValue(VPValue *Def, unsigned Part)
Definition: VPlan.h:289
ElementCount VF
The chosen Vectorization and Unroll Factors of the loop being vectorized.
Definition: VPlan.h:261
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:418
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:380
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition: VPlan.h:2560
void execute(VPTransformState &State) override
Generate the wide load or gather.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:2572
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenLoadEVLRecipe(VPWidenLoadRecipe *L, VPValue *EVL, VPValue *Mask)
Definition: VPlan.h:2561
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2584
A recipe for widening load operations, using the address to load from and an optional mask.
Definition: VPlan.h:2521
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2522
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2548
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2530
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening select instructions.
Definition: VPlan.h:1579
bool isInvariantCond() const
Definition: VPlan.h:1607
VPWidenSelectRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1587
VPWidenSelectRecipe(SelectInst &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1581
VPValue * getCond() const
Definition: VPlan.h:1603
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
~VPWidenSelectRecipe() override=default
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition: VPlan.h:2636
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:2648
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenStoreEVLRecipe(VPWidenStoreRecipe *S, VPValue *EVL, VPValue *Mask)
Definition: VPlan.h:2637
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2663
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:2651
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition: VPlan.h:2595
void execute(VPTransformState &State) override
Generate a wide store or scatter.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2624
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2596
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:2612
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2603
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPlanIngredient(const Value *V)
Definition: VPlan.h:3554
const Value * V
Definition: VPlan.h:3552
void print(raw_ostream &O) const
Definition: VPlan.cpp:1400