LLVM 22.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/MapVector.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/ADT/ilist.h"
33#include "llvm/ADT/ilist_node.h"
37#include "llvm/IR/DebugLoc.h"
38#include "llvm/IR/FMF.h"
39#include "llvm/IR/Operator.h"
42#include <cassert>
43#include <cstddef>
44#include <functional>
45#include <string>
46#include <utility>
47#include <variant>
48
49namespace llvm {
50
51class BasicBlock;
52class DominatorTree;
54class IRBuilderBase;
55struct VPTransformState;
56class raw_ostream;
58class SCEV;
59class Type;
60class VPBasicBlock;
61class VPBuilder;
62class VPDominatorTree;
63class VPRegionBlock;
64class VPlan;
65class VPLane;
67class VPlanSlp;
68class Value;
70
71struct VPCostContext;
72
73namespace Intrinsic {
74typedef unsigned ID;
75}
76
77using VPlanPtr = std::unique_ptr<VPlan>;
78
79/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
80/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
82 friend class VPBlockUtils;
83
84 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
85
86 /// An optional name for the block.
87 std::string Name;
88
89 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
90 /// it is a topmost VPBlockBase.
91 VPRegionBlock *Parent = nullptr;
92
93 /// List of predecessor blocks.
95
96 /// List of successor blocks.
98
99 /// VPlan containing the block. Can only be set on the entry block of the
100 /// plan.
101 VPlan *Plan = nullptr;
102
103 /// Add \p Successor as the last successor to this block.
104 void appendSuccessor(VPBlockBase *Successor) {
105 assert(Successor && "Cannot add nullptr successor!");
106 Successors.push_back(Successor);
107 }
108
109 /// Add \p Predecessor as the last predecessor to this block.
110 void appendPredecessor(VPBlockBase *Predecessor) {
111 assert(Predecessor && "Cannot add nullptr predecessor!");
112 Predecessors.push_back(Predecessor);
113 }
114
115 /// Remove \p Predecessor from the predecessors of this block.
116 void removePredecessor(VPBlockBase *Predecessor) {
117 auto Pos = find(Predecessors, Predecessor);
118 assert(Pos && "Predecessor does not exist");
119 Predecessors.erase(Pos);
120 }
121
122 /// Remove \p Successor from the successors of this block.
123 void removeSuccessor(VPBlockBase *Successor) {
124 auto Pos = find(Successors, Successor);
125 assert(Pos && "Successor does not exist");
126 Successors.erase(Pos);
127 }
128
129 /// This function replaces one predecessor with another, useful when
130 /// trying to replace an old block in the CFG with a new one.
131 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
132 auto I = find(Predecessors, Old);
133 assert(I != Predecessors.end());
134 assert(Old->getParent() == New->getParent() &&
135 "replaced predecessor must have the same parent");
136 *I = New;
137 }
138
139 /// This function replaces one successor with another, useful when
140 /// trying to replace an old block in the CFG with a new one.
141 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
142 auto I = find(Successors, Old);
143 assert(I != Successors.end());
144 assert(Old->getParent() == New->getParent() &&
145 "replaced successor must have the same parent");
146 *I = New;
147 }
148
149protected:
150 VPBlockBase(const unsigned char SC, const std::string &N)
151 : SubclassID(SC), Name(N) {}
152
153public:
154 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
155 /// that are actually instantiated. Values of this enumeration are kept in the
156 /// SubclassID field of the VPBlockBase objects. They are used for concrete
157 /// type identification.
158 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
159
161
162 virtual ~VPBlockBase() = default;
163
164 const std::string &getName() const { return Name; }
165
166 void setName(const Twine &newName) { Name = newName.str(); }
167
168 /// \return an ID for the concrete type of this object.
169 /// This is used to implement the classof checks. This should not be used
170 /// for any other purpose, as the values may change as LLVM evolves.
171 unsigned getVPBlockID() const { return SubclassID; }
172
173 VPRegionBlock *getParent() { return Parent; }
174 const VPRegionBlock *getParent() const { return Parent; }
175
176 /// \return A pointer to the plan containing the current block.
177 VPlan *getPlan();
178 const VPlan *getPlan() const;
179
180 /// Sets the pointer of the plan containing the block. The block must be the
181 /// entry block into the VPlan.
182 void setPlan(VPlan *ParentPlan);
183
184 void setParent(VPRegionBlock *P) { Parent = P; }
185
186 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
187 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
188 /// VPBlockBase is a VPBasicBlock, it is returned.
189 const VPBasicBlock *getEntryBasicBlock() const;
190 VPBasicBlock *getEntryBasicBlock();
191
192 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
193 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
194 /// VPBlockBase is a VPBasicBlock, it is returned.
195 const VPBasicBlock *getExitingBasicBlock() const;
196 VPBasicBlock *getExitingBasicBlock();
197
198 const VPBlocksTy &getSuccessors() const { return Successors; }
199 VPBlocksTy &getSuccessors() { return Successors; }
200
203
204 const VPBlocksTy &getPredecessors() const { return Predecessors; }
205 VPBlocksTy &getPredecessors() { return Predecessors; }
206
207 /// \return the successor of this VPBlockBase if it has a single successor.
208 /// Otherwise return a null pointer.
210 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
211 }
212
213 /// \return the predecessor of this VPBlockBase if it has a single
214 /// predecessor. Otherwise return a null pointer.
216 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
217 }
218
219 size_t getNumSuccessors() const { return Successors.size(); }
220 size_t getNumPredecessors() const { return Predecessors.size(); }
221
222 /// Returns true if this block has any predecessors.
223 bool hasPredecessors() const { return !Predecessors.empty(); }
224
225 /// An Enclosing Block of a block B is any block containing B, including B
226 /// itself. \return the closest enclosing block starting from "this", which
227 /// has successors. \return the root enclosing block if all enclosing blocks
228 /// have no successors.
229 VPBlockBase *getEnclosingBlockWithSuccessors();
230
231 /// \return the closest enclosing block starting from "this", which has
232 /// predecessors. \return the root enclosing block if all enclosing blocks
233 /// have no predecessors.
234 VPBlockBase *getEnclosingBlockWithPredecessors();
235
236 /// \return the successors either attached directly to this VPBlockBase or, if
237 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
238 /// successors of its own, search recursively for the first enclosing
239 /// VPRegionBlock that has successors and return them. If no such
240 /// VPRegionBlock exists, return the (empty) successors of the topmost
241 /// VPBlockBase reached.
243 return getEnclosingBlockWithSuccessors()->getSuccessors();
244 }
245
246 /// \return the hierarchical successor of this VPBlockBase if it has a single
247 /// hierarchical successor. Otherwise return a null pointer.
249 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
250 }
251
252 /// \return the predecessors either attached directly to this VPBlockBase or,
253 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
254 /// predecessors of its own, search recursively for the first enclosing
255 /// VPRegionBlock that has predecessors and return them. If no such
256 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
257 /// VPBlockBase reached.
259 return getEnclosingBlockWithPredecessors()->getPredecessors();
260 }
261
262 /// \return the hierarchical predecessor of this VPBlockBase if it has a
263 /// single hierarchical predecessor. Otherwise return a null pointer.
267
268 /// Set a given VPBlockBase \p Successor as the single successor of this
269 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
270 /// This VPBlockBase must have no successors.
272 assert(Successors.empty() && "Setting one successor when others exist.");
273 assert(Successor->getParent() == getParent() &&
274 "connected blocks must have the same parent");
275 appendSuccessor(Successor);
276 }
277
278 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
279 /// successors of this VPBlockBase. This VPBlockBase is not added as
280 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
281 /// successors.
282 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
283 assert(Successors.empty() && "Setting two successors when others exist.");
284 appendSuccessor(IfTrue);
285 appendSuccessor(IfFalse);
286 }
287
288 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
289 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
290 /// as successor of any VPBasicBlock in \p NewPreds.
292 assert(Predecessors.empty() && "Block predecessors already set.");
293 for (auto *Pred : NewPreds)
294 appendPredecessor(Pred);
295 }
296
297 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
298 /// This VPBlockBase must have no successors. This VPBlockBase is not added
299 /// as predecessor of any VPBasicBlock in \p NewSuccs.
301 assert(Successors.empty() && "Block successors already set.");
302 for (auto *Succ : NewSuccs)
303 appendSuccessor(Succ);
304 }
305
306 /// Remove all the predecessor of this block.
307 void clearPredecessors() { Predecessors.clear(); }
308
309 /// Remove all the successors of this block.
310 void clearSuccessors() { Successors.clear(); }
311
312 /// Swap predecessors of the block. The block must have exactly 2
313 /// predecessors.
315 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
316 std::swap(Predecessors[0], Predecessors[1]);
317 }
318
319 /// Swap successors of the block. The block must have exactly 2 successors.
320 // TODO: This should be part of introducing conditional branch recipes rather
321 // than being independent.
323 assert(Successors.size() == 2 && "must have 2 successors to swap");
324 std::swap(Successors[0], Successors[1]);
325 }
326
327 /// Returns the index for \p Pred in the blocks predecessors list.
328 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
329 assert(count(Predecessors, Pred) == 1 &&
330 "must have Pred exactly once in Predecessors");
331 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
332 }
333
334 /// Returns the index for \p Succ in the blocks successor list.
335 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
336 assert(count(Successors, Succ) == 1 &&
337 "must have Succ exactly once in Successors");
338 return std::distance(Successors.begin(), find(Successors, Succ));
339 }
340
341 /// The method which generates the output IR that correspond to this
342 /// VPBlockBase, thereby "executing" the VPlan.
343 virtual void execute(VPTransformState *State) = 0;
344
345 /// Return the cost of the block.
347
348#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
349 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
350 OS << getName();
351 }
352
353 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
354 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
355 /// consequtive numbers.
356 ///
357 /// Note that the numbering is applied to the whole VPlan, so printing
358 /// individual blocks is consistent with the whole VPlan printing.
359 virtual void print(raw_ostream &O, const Twine &Indent,
360 VPSlotTracker &SlotTracker) const = 0;
361
362 /// Print plain-text dump of this VPlan to \p O.
363 void print(raw_ostream &O) const;
364
365 /// Print the successors of this block to \p O, prefixing all lines with \p
366 /// Indent.
367 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
368
369 /// Dump this VPBlockBase to dbgs().
370 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
371#endif
372
373 /// Clone the current block and it's recipes without updating the operands of
374 /// the cloned recipes, including all blocks in the single-entry single-exit
375 /// region for VPRegionBlocks.
376 virtual VPBlockBase *clone() = 0;
377};
378
379/// VPRecipeBase is a base class modeling a sequence of one or more output IR
380/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
381/// and is responsible for deleting its defined values. Single-value
382/// recipes must inherit from VPSingleDef instead of inheriting from both
383/// VPRecipeBase and VPValue separately.
385 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
386 public VPDef,
387 public VPUser {
388 friend VPBasicBlock;
389 friend class VPBlockUtils;
390
391 /// Each VPRecipe belongs to a single VPBasicBlock.
392 VPBasicBlock *Parent = nullptr;
393
394 /// The debug location for the recipe.
395 DebugLoc DL;
396
397public:
398 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
400 : VPDef(SC), VPUser(Operands), DL(DL) {}
401
402 ~VPRecipeBase() override = default;
403
404 /// Clone the current recipe.
405 virtual VPRecipeBase *clone() = 0;
406
407 /// \return the VPBasicBlock which this VPRecipe belongs to.
408 VPBasicBlock *getParent() { return Parent; }
409 const VPBasicBlock *getParent() const { return Parent; }
410
411 /// \return the VPRegionBlock which the recipe belongs to.
412 VPRegionBlock *getRegion();
413 const VPRegionBlock *getRegion() const;
414
415 /// The method which generates the output IR instructions that correspond to
416 /// this VPRecipe, thereby "executing" the VPlan.
417 virtual void execute(VPTransformState &State) = 0;
418
419 /// Return the cost of this recipe, taking into account if the cost
420 /// computation should be skipped and the ForceTargetInstructionCost flag.
421 /// Also takes care of printing the cost for debugging.
423
424 /// Insert an unlinked recipe into a basic block immediately before
425 /// the specified recipe.
426 void insertBefore(VPRecipeBase *InsertPos);
427 /// Insert an unlinked recipe into \p BB immediately before the insertion
428 /// point \p IP;
429 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
430
431 /// Insert an unlinked Recipe into a basic block immediately after
432 /// the specified Recipe.
433 void insertAfter(VPRecipeBase *InsertPos);
434
435 /// Unlink this recipe from its current VPBasicBlock and insert it into
436 /// the VPBasicBlock that MovePos lives in, right after MovePos.
437 void moveAfter(VPRecipeBase *MovePos);
438
439 /// Unlink this recipe and insert into BB before I.
440 ///
441 /// \pre I is a valid iterator into BB.
442 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
443
444 /// This method unlinks 'this' from the containing basic block, but does not
445 /// delete it.
446 void removeFromParent();
447
448 /// This method unlinks 'this' from the containing basic block and deletes it.
449 ///
450 /// \returns an iterator pointing to the element after the erased one
452
453 /// Method to support type inquiry through isa, cast, and dyn_cast.
454 static inline bool classof(const VPDef *D) {
455 // All VPDefs are also VPRecipeBases.
456 return true;
457 }
458
459 static inline bool classof(const VPUser *U) { return true; }
460
461 /// Returns true if the recipe may have side-effects.
462 bool mayHaveSideEffects() const;
463
464 /// Returns true for PHI-like recipes.
465 bool isPhi() const;
466
467 /// Returns true if the recipe may read from memory.
468 bool mayReadFromMemory() const;
469
470 /// Returns true if the recipe may write to memory.
471 bool mayWriteToMemory() const;
472
473 /// Returns true if the recipe may read from or write to memory.
474 bool mayReadOrWriteMemory() const {
476 }
477
478 /// Returns the debug location of the recipe.
479 DebugLoc getDebugLoc() const { return DL; }
480
481 /// Return true if the recipe is a scalar cast.
482 bool isScalarCast() const;
483
484 /// Set the recipe's debug location to \p NewDL.
485 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
486
487#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
488 /// Print the recipe, delegating to printRecipe().
489 void print(raw_ostream &O, const Twine &Indent,
490 VPSlotTracker &SlotTracker) const override final;
491#endif
492
493protected:
494 /// Compute the cost of this recipe either using a recipe's specialized
495 /// implementation or using the legacy cost model and the underlying
496 /// instructions.
497 virtual InstructionCost computeCost(ElementCount VF,
498 VPCostContext &Ctx) const;
499
500#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
501 /// Each concrete VPRecipe prints itself, without printing common information,
502 /// like debug info or metadata.
503 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
504 VPSlotTracker &SlotTracker) const = 0;
505#endif
506};
507
508// Helper macro to define common classof implementations for recipes.
509#define VP_CLASSOF_IMPL(VPDefID) \
510 static inline bool classof(const VPDef *D) { \
511 return D->getVPDefID() == VPDefID; \
512 } \
513 static inline bool classof(const VPValue *V) { \
514 auto *R = V->getDefiningRecipe(); \
515 return R && R->getVPDefID() == VPDefID; \
516 } \
517 static inline bool classof(const VPUser *U) { \
518 auto *R = dyn_cast<VPRecipeBase>(U); \
519 return R && R->getVPDefID() == VPDefID; \
520 } \
521 static inline bool classof(const VPRecipeBase *R) { \
522 return R->getVPDefID() == VPDefID; \
523 } \
524 static inline bool classof(const VPSingleDefRecipe *R) { \
525 return R->getVPDefID() == VPDefID; \
526 }
527
528/// VPSingleDef is a base class for recipes for modeling a sequence of one or
529/// more output IR that define a single result VPValue.
530/// Note that VPRecipeBase must be inherited from before VPValue.
531class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
532public:
533 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
535 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
536
537 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
539 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
540
541 static inline bool classof(const VPRecipeBase *R) {
542 switch (R->getVPDefID()) {
543 case VPRecipeBase::VPDerivedIVSC:
544 case VPRecipeBase::VPEVLBasedIVPHISC:
545 case VPRecipeBase::VPExpandSCEVSC:
546 case VPRecipeBase::VPExpressionSC:
547 case VPRecipeBase::VPInstructionSC:
548 case VPRecipeBase::VPReductionEVLSC:
549 case VPRecipeBase::VPReductionSC:
550 case VPRecipeBase::VPReplicateSC:
551 case VPRecipeBase::VPScalarIVStepsSC:
552 case VPRecipeBase::VPVectorPointerSC:
553 case VPRecipeBase::VPVectorEndPointerSC:
554 case VPRecipeBase::VPWidenCallSC:
555 case VPRecipeBase::VPWidenCanonicalIVSC:
556 case VPRecipeBase::VPWidenCastSC:
557 case VPRecipeBase::VPWidenGEPSC:
558 case VPRecipeBase::VPWidenIntrinsicSC:
559 case VPRecipeBase::VPWidenSC:
560 case VPRecipeBase::VPWidenSelectSC:
561 case VPRecipeBase::VPBlendSC:
562 case VPRecipeBase::VPPredInstPHISC:
563 case VPRecipeBase::VPCanonicalIVPHISC:
564 case VPRecipeBase::VPActiveLaneMaskPHISC:
565 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
566 case VPRecipeBase::VPWidenPHISC:
567 case VPRecipeBase::VPWidenIntOrFpInductionSC:
568 case VPRecipeBase::VPWidenPointerInductionSC:
569 case VPRecipeBase::VPReductionPHISC:
570 return true;
571 case VPRecipeBase::VPBranchOnMaskSC:
572 case VPRecipeBase::VPInterleaveEVLSC:
573 case VPRecipeBase::VPInterleaveSC:
574 case VPRecipeBase::VPIRInstructionSC:
575 case VPRecipeBase::VPWidenLoadEVLSC:
576 case VPRecipeBase::VPWidenLoadSC:
577 case VPRecipeBase::VPWidenStoreEVLSC:
578 case VPRecipeBase::VPWidenStoreSC:
579 case VPRecipeBase::VPHistogramSC:
580 // TODO: Widened stores don't define a value, but widened loads do. Split
581 // the recipes to be able to make widened loads VPSingleDefRecipes.
582 return false;
583 }
584 llvm_unreachable("Unhandled VPDefID");
585 }
586
587 static inline bool classof(const VPUser *U) {
588 auto *R = dyn_cast<VPRecipeBase>(U);
589 return R && classof(R);
590 }
591
592 VPSingleDefRecipe *clone() override = 0;
593
594 /// Returns the underlying instruction.
601
602#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
603 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
605#endif
606};
607
608/// Class to record and manage LLVM IR flags.
610 enum class OperationType : unsigned char {
611 Cmp,
612 FCmp,
613 OverflowingBinOp,
614 Trunc,
615 DisjointOp,
616 PossiblyExactOp,
617 GEPOp,
618 FPMathOp,
619 NonNegOp,
620 Other
621 };
622
623public:
624 struct WrapFlagsTy {
625 char HasNUW : 1;
626 char HasNSW : 1;
627
629 };
630
632 char HasNUW : 1;
633 char HasNSW : 1;
634
636 };
637
642
644 char NonNeg : 1;
645 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
646 };
647
648private:
649 struct ExactFlagsTy {
650 char IsExact : 1;
651 };
652 struct FastMathFlagsTy {
653 char AllowReassoc : 1;
654 char NoNaNs : 1;
655 char NoInfs : 1;
656 char NoSignedZeros : 1;
657 char AllowReciprocal : 1;
658 char AllowContract : 1;
659 char ApproxFunc : 1;
660
661 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
662 };
663 /// Holds both the predicate and fast-math flags for floating-point
664 /// comparisons.
665 struct FCmpFlagsTy {
667 FastMathFlagsTy FMFs;
668 };
669
670 OperationType OpType;
671
672 union {
677 ExactFlagsTy ExactFlags;
680 FastMathFlagsTy FMFs;
681 FCmpFlagsTy FCmpFlags;
682 unsigned AllFlags;
683 };
684
685public:
686 VPIRFlags() : OpType(OperationType::Other), AllFlags(0) {}
687
689 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
690 OpType = OperationType::FCmp;
691 FCmpFlags.Pred = FCmp->getPredicate();
692 FCmpFlags.FMFs = FCmp->getFastMathFlags();
693 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
694 OpType = OperationType::Cmp;
695 CmpPredicate = Op->getPredicate();
696 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
697 OpType = OperationType::DisjointOp;
698 DisjointFlags.IsDisjoint = Op->isDisjoint();
699 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
700 OpType = OperationType::OverflowingBinOp;
701 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
702 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
703 OpType = OperationType::Trunc;
704 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
705 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
706 OpType = OperationType::PossiblyExactOp;
707 ExactFlags.IsExact = Op->isExact();
708 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
709 OpType = OperationType::GEPOp;
710 GEPFlags = GEP->getNoWrapFlags();
711 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
712 OpType = OperationType::NonNegOp;
713 NonNegFlags.NonNeg = PNNI->hasNonNeg();
714 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
715 OpType = OperationType::FPMathOp;
716 FMFs = Op->getFastMathFlags();
717 } else {
718 OpType = OperationType::Other;
719 AllFlags = 0;
720 }
721 }
722
724 : OpType(OperationType::Cmp), CmpPredicate(Pred) {}
725
727 : OpType(OperationType::FCmp) {
728 FCmpFlags.Pred = Pred;
729 FCmpFlags.FMFs = FMFs;
730 }
731
733 : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
734
736 : OpType(OperationType::Trunc), TruncFlags(TruncFlags) {}
737
738 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {}
739
741 : OpType(OperationType::DisjointOp), DisjointFlags(DisjointFlags) {}
742
744 : OpType(OperationType::NonNegOp), NonNegFlags(NonNegFlags) {}
745
747 : OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {}
748
750 OpType = Other.OpType;
751 AllFlags = Other.AllFlags;
752 }
753
754 /// Only keep flags also present in \p Other. \p Other must have the same
755 /// OpType as the current object.
756 void intersectFlags(const VPIRFlags &Other);
757
758 /// Drop all poison-generating flags.
760 // NOTE: This needs to be kept in-sync with
761 // Instruction::dropPoisonGeneratingFlags.
762 switch (OpType) {
763 case OperationType::OverflowingBinOp:
764 WrapFlags.HasNUW = false;
765 WrapFlags.HasNSW = false;
766 break;
767 case OperationType::Trunc:
768 TruncFlags.HasNUW = false;
769 TruncFlags.HasNSW = false;
770 break;
771 case OperationType::DisjointOp:
772 DisjointFlags.IsDisjoint = false;
773 break;
774 case OperationType::PossiblyExactOp:
775 ExactFlags.IsExact = false;
776 break;
777 case OperationType::GEPOp:
779 break;
780 case OperationType::FPMathOp:
781 case OperationType::FCmp:
782 getFMFsRef().NoNaNs = false;
783 getFMFsRef().NoInfs = false;
784 break;
785 case OperationType::NonNegOp:
786 NonNegFlags.NonNeg = false;
787 break;
788 case OperationType::Cmp:
789 case OperationType::Other:
790 break;
791 }
792 }
793
794 /// Apply the IR flags to \p I.
795 void applyFlags(Instruction &I) const {
796 switch (OpType) {
797 case OperationType::OverflowingBinOp:
798 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
799 I.setHasNoSignedWrap(WrapFlags.HasNSW);
800 break;
801 case OperationType::Trunc:
802 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
803 I.setHasNoSignedWrap(TruncFlags.HasNSW);
804 break;
805 case OperationType::DisjointOp:
806 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
807 break;
808 case OperationType::PossiblyExactOp:
809 I.setIsExact(ExactFlags.IsExact);
810 break;
811 case OperationType::GEPOp:
812 cast<GetElementPtrInst>(&I)->setNoWrapFlags(GEPFlags);
813 break;
814 case OperationType::FPMathOp:
815 case OperationType::FCmp: {
816 const FastMathFlagsTy &F = getFMFsRef();
817 I.setHasAllowReassoc(F.AllowReassoc);
818 I.setHasNoNaNs(F.NoNaNs);
819 I.setHasNoInfs(F.NoInfs);
820 I.setHasNoSignedZeros(F.NoSignedZeros);
821 I.setHasAllowReciprocal(F.AllowReciprocal);
822 I.setHasAllowContract(F.AllowContract);
823 I.setHasApproxFunc(F.ApproxFunc);
824 break;
825 }
826 case OperationType::NonNegOp:
827 I.setNonNeg(NonNegFlags.NonNeg);
828 break;
829 case OperationType::Cmp:
830 case OperationType::Other:
831 break;
832 }
833 }
834
836 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
837 "recipe doesn't have a compare predicate");
838 return OpType == OperationType::FCmp ? FCmpFlags.Pred : CmpPredicate;
839 }
840
842 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
843 "recipe doesn't have a compare predicate");
844 if (OpType == OperationType::FCmp)
845 FCmpFlags.Pred = Pred;
846 else
847 CmpPredicate = Pred;
848 }
849
851
852 /// Returns true if the recipe has a comparison predicate.
853 bool hasPredicate() const {
854 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
855 }
856
857 /// Returns true if the recipe has fast-math flags.
858 bool hasFastMathFlags() const {
859 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp;
860 }
861
863
864 /// Returns true if the recipe has non-negative flag.
865 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
866
867 bool isNonNeg() const {
868 assert(OpType == OperationType::NonNegOp &&
869 "recipe doesn't have a NNEG flag");
870 return NonNegFlags.NonNeg;
871 }
872
873 bool hasNoUnsignedWrap() const {
874 switch (OpType) {
875 case OperationType::OverflowingBinOp:
876 return WrapFlags.HasNUW;
877 case OperationType::Trunc:
878 return TruncFlags.HasNUW;
879 default:
880 llvm_unreachable("recipe doesn't have a NUW flag");
881 }
882 }
883
884 bool hasNoSignedWrap() const {
885 switch (OpType) {
886 case OperationType::OverflowingBinOp:
887 return WrapFlags.HasNSW;
888 case OperationType::Trunc:
889 return TruncFlags.HasNSW;
890 default:
891 llvm_unreachable("recipe doesn't have a NSW flag");
892 }
893 }
894
895 bool isDisjoint() const {
896 assert(OpType == OperationType::DisjointOp &&
897 "recipe cannot have a disjoing flag");
898 return DisjointFlags.IsDisjoint;
899 }
900
901private:
902 /// Get a reference to the fast-math flags for FPMathOp or FCmp.
903 FastMathFlagsTy &getFMFsRef() {
904 return OpType == OperationType::FCmp ? FCmpFlags.FMFs : FMFs;
905 }
906 const FastMathFlagsTy &getFMFsRef() const {
907 return OpType == OperationType::FCmp ? FCmpFlags.FMFs : FMFs;
908 }
909
910public:
911#if !defined(NDEBUG)
912 /// Returns true if the set flags are valid for \p Opcode.
913 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
914#endif
915
916#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
917 void printFlags(raw_ostream &O) const;
918#endif
919};
920
921/// A pure-virtual common base class for recipes defining a single VPValue and
922/// using IR flags.
924 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
925 const VPIRFlags &Flags,
927 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
928
929 static inline bool classof(const VPRecipeBase *R) {
930 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
931 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
932 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
933 R->getVPDefID() == VPRecipeBase::VPWidenCallSC ||
934 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
935 R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC ||
936 R->getVPDefID() == VPRecipeBase::VPWidenSelectSC ||
937 R->getVPDefID() == VPRecipeBase::VPReductionSC ||
938 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
939 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
940 R->getVPDefID() == VPRecipeBase::VPVectorEndPointerSC ||
941 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
942 }
943
944 static inline bool classof(const VPUser *U) {
945 auto *R = dyn_cast<VPRecipeBase>(U);
946 return R && classof(R);
947 }
948
949 static inline bool classof(const VPValue *V) {
950 auto *R = dyn_cast_or_null<VPRecipeBase>(V->getDefiningRecipe());
951 return R && classof(R);
952 }
953
954 VPRecipeWithIRFlags *clone() override = 0;
955
956 static inline bool classof(const VPSingleDefRecipe *U) {
957 auto *R = dyn_cast<VPRecipeBase>(U);
958 return R && classof(R);
959 }
960
961 void execute(VPTransformState &State) override = 0;
962
963 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
965 VPCostContext &Ctx) const;
966};
967
968/// Helper to access the operand that contains the unroll part for this recipe
969/// after unrolling.
970template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
971protected:
972 /// Return the VPValue operand containing the unroll part or null if there is
973 /// no such operand.
974 VPValue *getUnrollPartOperand(const VPUser &U) const;
975
976 /// Return the unroll part.
977 unsigned getUnrollPart(const VPUser &U) const;
978};
979
980/// Helper to manage IR metadata for recipes. It filters out metadata that
981/// cannot be propagated.
984
985public:
986 VPIRMetadata() = default;
987
988 /// Adds metatadata that can be preserved from the original instruction
989 /// \p I.
991
992 /// Copy constructor for cloning.
993 VPIRMetadata(const VPIRMetadata &Other) = default;
994
996
997 /// Add all metadata to \p I.
998 void applyMetadata(Instruction &I) const;
999
1000 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1001 /// already exists, it will be replaced. Otherwise, it will be added.
1002 void setMetadata(unsigned Kind, MDNode *Node) {
1003 auto It =
1004 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1005 return P.first == Kind;
1006 });
1007 if (It != Metadata.end())
1008 It->second = Node;
1009 else
1010 Metadata.emplace_back(Kind, Node);
1011 }
1012
1013 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1014 /// nodes that are common to both.
1015 void intersect(const VPIRMetadata &MD);
1016
1017 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1018 MDNode *getMetadata(unsigned Kind) const {
1019 auto It =
1020 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1021 return It != Metadata.end() ? It->second : nullptr;
1022 }
1023
1024#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1025 /// Print metadata with node IDs.
1026 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1027#endif
1028};
1029
1030/// This is a concrete Recipe that models a single VPlan-level instruction.
1031/// While as any Recipe it may generate a sequence of IR instructions when
1032/// executed, these instructions would always form a single-def expression as
1033/// the VPInstruction is also a single def-use vertex.
1035 public VPIRMetadata,
1036 public VPUnrollPartAccessor<1> {
1037 friend class VPlanSlp;
1038
1039public:
1040 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1041 enum {
1043 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1044 // values of a first-order recurrence.
1048 // Creates a mask where each lane is active (true) whilst the current
1049 // counter (first operand + index) is less than the second operand. i.e.
1050 // mask[i] = icmpt ult (op0 + i), op1
1051 // The size of the mask returned is VF * Multiplier (UF, third op).
1055 // Increment the canonical IV separately for each unrolled part.
1057 // Abstract instruction that compares two values and branches. This is
1058 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1062 /// Given operands of (the same) struct type, creates a struct of fixed-
1063 /// width vectors each containing a struct field of all operands. The
1064 /// number of operands matches the element count of every vector.
1066 /// Creates a fixed-width vector containing all operands. The number of
1067 /// operands matches the vector element count.
1069 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1070 /// abstract VPInstruction whose single defined VPValue represents VF
1071 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1072 /// VPInstructions.
1074 /// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
1075 /// where one of (x,y) is loop invariant, and both x and y are integer type.
1079 // Extracts the last part of its operand. Removed during unrolling.
1081 // Extracts the last lane of its vector operand, per part.
1083 // Extracts the second-to-last lane from its operand or the second-to-last
1084 // part if it is scalar. In the latter case, the recipe will be removed
1085 // during unrolling.
1087 LogicalAnd, // Non-poison propagating logical And.
1088 // Add an offset in bytes (second operand) to a base pointer (first
1089 // operand). Only generates scalar values (either for the first lane only or
1090 // for all lanes, depending on its uses).
1092 // Add a vector offset in bytes (second operand) to a scalar base pointer
1093 // (first operand).
1095 // Returns a scalar boolean value, which is true if any lane of its
1096 // (boolean) vector operands is true. It produces the reduced value across
1097 // all unrolled iterations. Unrolling will add all copies of its original
1098 // operand as additional operands. AnyOf is poison-safe as all operands
1099 // will be frozen.
1101 // Calculates the first active lane index of the vector predicate operands.
1102 // It produces the lane index across all unrolled iterations. Unrolling will
1103 // add all copies of its original operand as additional operands.
1104 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1105 // result even with operands that are all zeroes.
1107 // Calculates the last active lane index of the vector predicate operands.
1108 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1109 // tail-folding to extract the correct live-out value from the last active
1110 // iteration. It produces the lane index across all unrolled iterations.
1111 // Unrolling will add all copies of its original operand as additional
1112 // operands.
1114
1115 // The opcodes below are used for VPInstructionWithType.
1116 //
1117 /// Scale the first operand (vector step) by the second operand
1118 /// (scalar-step). Casts both operands to the result type if needed.
1120 /// Start vector for reductions with 3 operands: the original start value,
1121 /// the identity value for the reduction and an integer indicating the
1122 /// scaling factor.
1124 // Creates a step vector starting from 0 to VF with a step of 1.
1126 /// Extracts a single lane (first operand) from a set of vector operands.
1127 /// The lane specifies an index into a vector formed by combining all vector
1128 /// operands (all operands after the first one).
1130 /// Explicit user for the resume phi of the canonical induction in the main
1131 /// VPlan, used by the epilogue vector loop.
1133 /// Returns the value for vscale.
1136 };
1137
1138 /// Returns true if this VPInstruction generates scalar values for all lanes.
1139 /// Most VPInstructions generate a single value per part, either vector or
1140 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1141 /// values per all lanes, stemming from an original ingredient. This method
1142 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1143 /// underlying ingredient.
1144 bool doesGeneratePerAllLanes() const;
1145
1146private:
1147 typedef unsigned char OpcodeTy;
1148 OpcodeTy Opcode;
1149
1150 /// An optional name that can be used for the generated IR instruction.
1151 std::string Name;
1152
1153 /// Returns true if we can generate a scalar for the first lane only if
1154 /// needed.
1155 bool canGenerateScalarForFirstLane() const;
1156
1157 /// Utility methods serving execute(): generates a single vector instance of
1158 /// the modeled instruction. \returns the generated value. . In some cases an
1159 /// existing value is returned rather than a generated one.
1160 Value *generate(VPTransformState &State);
1161
1162#if !defined(NDEBUG)
1163 /// Return the number of operands determined by the opcode of the
1164 /// VPInstruction. Returns -1u if the number of operands cannot be determined
1165 /// directly by the opcode.
1166 static unsigned getNumOperandsForOpcode(unsigned Opcode);
1167#endif
1168
1169public:
1170 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1171 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1172 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1173
1174 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1175
1176 VPInstruction *clone() override {
1177 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1178 getDebugLoc(), Name);
1179 if (getUnderlyingValue())
1180 New->setUnderlyingValue(getUnderlyingInstr());
1181 return New;
1182 }
1183
1184 unsigned getOpcode() const { return Opcode; }
1185
1186 /// Generate the instruction.
1187 /// TODO: We currently execute only per-part unless a specific instance is
1188 /// provided.
1189 void execute(VPTransformState &State) override;
1190
1191 /// Return the cost of this VPInstruction.
1192 InstructionCost computeCost(ElementCount VF,
1193 VPCostContext &Ctx) const override;
1194
1195#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1196 /// Print the VPInstruction to dbgs() (for debugging).
1197 LLVM_DUMP_METHOD void dump() const;
1198#endif
1199
1200 bool hasResult() const {
1201 // CallInst may or may not have a result, depending on the called function.
1202 // Conservatively return calls have results for now.
1203 switch (getOpcode()) {
1204 case Instruction::Ret:
1205 case Instruction::Br:
1206 case Instruction::Store:
1207 case Instruction::Switch:
1208 case Instruction::IndirectBr:
1209 case Instruction::Resume:
1210 case Instruction::CatchRet:
1211 case Instruction::Unreachable:
1212 case Instruction::Fence:
1213 case Instruction::AtomicRMW:
1216 return false;
1217 default:
1218 return true;
1219 }
1220 }
1221
1222 /// Returns true if the underlying opcode may read from or write to memory.
1223 bool opcodeMayReadOrWriteFromMemory() const;
1224
1225 /// Returns true if the recipe only uses the first lane of operand \p Op.
1226 bool usesFirstLaneOnly(const VPValue *Op) const override;
1227
1228 /// Returns true if the recipe only uses the first part of operand \p Op.
1229 bool usesFirstPartOnly(const VPValue *Op) const override;
1230
1231 /// Returns true if this VPInstruction produces a scalar value from a vector,
1232 /// e.g. by performing a reduction or extracting a lane.
1233 bool isVectorToScalar() const;
1234
1235 /// Returns true if this VPInstruction's operands are single scalars and the
1236 /// result is also a single scalar.
1237 bool isSingleScalar() const;
1238
1239 /// Returns the symbolic name assigned to the VPInstruction.
1240 StringRef getName() const { return Name; }
1241
1242 /// Set the symbolic name for the VPInstruction.
1243 void setName(StringRef NewName) { Name = NewName.str(); }
1244
1245protected:
1246#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1247 /// Print the VPInstruction to \p O.
1248 void printRecipe(raw_ostream &O, const Twine &Indent,
1249 VPSlotTracker &SlotTracker) const override;
1250#endif
1251};
1252
1253/// A specialization of VPInstruction augmenting it with a dedicated result
1254/// type, to be used when the opcode and operands of the VPInstruction don't
1255/// directly determine the result type. Note that there is no separate VPDef ID
1256/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1257/// distinguished purely by the opcode.
1259 /// Scalar result type produced by the recipe.
1260 Type *ResultTy;
1261
1262public:
1264 Type *ResultTy, const VPIRFlags &Flags = {},
1265 const VPIRMetadata &Metadata = {},
1267 const Twine &Name = "")
1268 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1269 ResultTy(ResultTy) {}
1270
1271 static inline bool classof(const VPRecipeBase *R) {
1272 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1273 // type information.
1274 if (R->isScalarCast())
1275 return true;
1276 auto *VPI = dyn_cast<VPInstruction>(R);
1277 if (!VPI)
1278 return false;
1279 switch (VPI->getOpcode()) {
1283 return true;
1284 default:
1285 return false;
1286 }
1287 }
1288
1289 static inline bool classof(const VPUser *R) {
1291 }
1292
1293 VPInstruction *clone() override {
1294 auto *New =
1296 *this, *this, getDebugLoc(), getName());
1297 New->setUnderlyingValue(getUnderlyingValue());
1298 return New;
1299 }
1300
1301 void execute(VPTransformState &State) override;
1302
1303 /// Return the cost of this VPInstruction.
1305 VPCostContext &Ctx) const override {
1306 // TODO: Compute accurate cost after retiring the legacy cost model.
1307 return 0;
1308 }
1309
1310 Type *getResultType() const { return ResultTy; }
1311
1312protected:
1313#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1314 /// Print the recipe.
1315 void printRecipe(raw_ostream &O, const Twine &Indent,
1316 VPSlotTracker &SlotTracker) const override;
1317#endif
1318};
1319
1320/// Helper type to provide functions to access incoming values and blocks for
1321/// phi-like recipes.
1323protected:
1324 /// Return a VPRecipeBase* to the current object.
1325 virtual const VPRecipeBase *getAsRecipe() const = 0;
1326
1327public:
1328 virtual ~VPPhiAccessors() = default;
1329
1330 /// Returns the incoming VPValue with index \p Idx.
1331 VPValue *getIncomingValue(unsigned Idx) const {
1332 return getAsRecipe()->getOperand(Idx);
1333 }
1334
1335 /// Returns the incoming block with index \p Idx.
1336 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1337
1338 /// Returns the number of incoming values, also number of incoming blocks.
1339 virtual unsigned getNumIncoming() const {
1340 return getAsRecipe()->getNumOperands();
1341 }
1342
1343 /// Returns an interator range over the incoming values.
1345 return make_range(getAsRecipe()->op_begin(),
1346 getAsRecipe()->op_begin() + getNumIncoming());
1347 }
1348
1350 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1351
1352 /// Returns an iterator range over the incoming blocks.
1354 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1355 return getIncomingBlock(Idx);
1356 };
1357 return map_range(index_range(0, getNumIncoming()), GetBlock);
1358 }
1359
1360 /// Returns an iterator range over pairs of incoming values and corresponding
1361 /// incoming blocks.
1367
1368 /// Removes the incoming value for \p IncomingBlock, which must be a
1369 /// predecessor.
1370 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1371
1372#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1373 /// Print the recipe.
1375#endif
1376};
1377
1379 VPPhi(ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "")
1380 : VPInstruction(Instruction::PHI, Operands, {}, {}, DL, Name) {}
1381
1382 static inline bool classof(const VPUser *U) {
1383 auto *VPI = dyn_cast<VPInstruction>(U);
1384 return VPI && VPI->getOpcode() == Instruction::PHI;
1385 }
1386
1387 static inline bool classof(const VPValue *V) {
1388 auto *VPI = dyn_cast<VPInstruction>(V);
1389 return VPI && VPI->getOpcode() == Instruction::PHI;
1390 }
1391
1392 static inline bool classof(const VPSingleDefRecipe *SDR) {
1393 auto *VPI = dyn_cast<VPInstruction>(SDR);
1394 return VPI && VPI->getOpcode() == Instruction::PHI;
1395 }
1396
1397 VPPhi *clone() override {
1398 auto *PhiR = new VPPhi(operands(), getDebugLoc(), getName());
1399 PhiR->setUnderlyingValue(getUnderlyingValue());
1400 return PhiR;
1401 }
1402
1403 void execute(VPTransformState &State) override;
1404
1405protected:
1406#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1407 /// Print the recipe.
1408 void printRecipe(raw_ostream &O, const Twine &Indent,
1409 VPSlotTracker &SlotTracker) const override;
1410#endif
1411
1412 const VPRecipeBase *getAsRecipe() const override { return this; }
1413};
1414
1415/// A recipe to wrap on original IR instruction not to be modified during
1416/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1417/// Expect PHIs, VPIRInstructions cannot have any operands.
1419 Instruction &I;
1420
1421protected:
1422 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1423 /// subclasses may need to be created, e.g. VPIRPhi.
1425 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1426
1427public:
1428 ~VPIRInstruction() override = default;
1429
1430 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1431 /// VPIRInstruction.
1433
1434 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1435
1437 auto *R = create(I);
1438 for (auto *Op : operands())
1439 R->addOperand(Op);
1440 return R;
1441 }
1442
1443 void execute(VPTransformState &State) override;
1444
1445 /// Return the cost of this VPIRInstruction.
1447 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1448
1449 Instruction &getInstruction() const { return I; }
1450
1451 bool usesScalars(const VPValue *Op) const override {
1453 "Op must be an operand of the recipe");
1454 return true;
1455 }
1456
1457 bool usesFirstPartOnly(const VPValue *Op) const override {
1459 "Op must be an operand of the recipe");
1460 return true;
1461 }
1462
1463 bool usesFirstLaneOnly(const VPValue *Op) const override {
1465 "Op must be an operand of the recipe");
1466 return true;
1467 }
1468
1469 /// Update the recipe's first operand to the last lane of the last part of the
1470 /// operand using \p Builder. Must only be used for VPIRInstructions with at
1471 /// least one operand wrapping a PHINode.
1473
1474protected:
1475#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1476 /// Print the recipe.
1477 void printRecipe(raw_ostream &O, const Twine &Indent,
1478 VPSlotTracker &SlotTracker) const override;
1479#endif
1480};
1481
1482/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1483/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1484/// allowed, and it is used to add a new incoming value for the single
1485/// predecessor VPBB.
1487 public VPPhiAccessors {
1489
1490 static inline bool classof(const VPRecipeBase *U) {
1491 auto *R = dyn_cast<VPIRInstruction>(U);
1492 return R && isa<PHINode>(R->getInstruction());
1493 }
1494
1496
1497 void execute(VPTransformState &State) override;
1498
1499protected:
1500#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1501 /// Print the recipe.
1502 void printRecipe(raw_ostream &O, const Twine &Indent,
1503 VPSlotTracker &SlotTracker) const override;
1504#endif
1505
1506 const VPRecipeBase *getAsRecipe() const override { return this; }
1507};
1508
1509/// VPWidenRecipe is a recipe for producing a widened instruction using the
1510/// opcode and operands of the recipe. This recipe covers most of the
1511/// traditional vectorization cases where each recipe transforms into a
1512/// vectorized version of itself.
1514 public VPIRMetadata {
1515 unsigned Opcode;
1516
1517public:
1519 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1520 DebugLoc DL = {})
1521 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
1522 VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
1523 setUnderlyingValue(&I);
1524 }
1525
1526 ~VPWidenRecipe() override = default;
1527
1528 VPWidenRecipe *clone() override {
1529 return new VPWidenRecipe(*getUnderlyingInstr(), operands(), *this, *this,
1530 getDebugLoc());
1531 }
1532
1533 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1534
1535 /// Produce a widened instruction using the opcode and operands of the recipe,
1536 /// processing State.VF elements.
1537 void execute(VPTransformState &State) override;
1538
1539 /// Return the cost of this VPWidenRecipe.
1540 InstructionCost computeCost(ElementCount VF,
1541 VPCostContext &Ctx) const override;
1542
1543 unsigned getOpcode() const { return Opcode; }
1544
1545protected:
1546#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1547 /// Print the recipe.
1548 void printRecipe(raw_ostream &O, const Twine &Indent,
1549 VPSlotTracker &SlotTracker) const override;
1550#endif
1551};
1552
1553/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1555 /// Cast instruction opcode.
1556 Instruction::CastOps Opcode;
1557
1558 /// Result type for the cast.
1559 Type *ResultTy;
1560
1561public:
1563 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1564 const VPIRMetadata &Metadata = {},
1566 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL),
1567 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1568 assert(flagsValidForOpcode(Opcode) &&
1569 "Set flags not supported for the provided opcode");
1571 }
1572
1573 ~VPWidenCastRecipe() override = default;
1574
1576 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1578 *this, *this, getDebugLoc());
1579 }
1580
1581 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1582
1583 /// Produce widened copies of the cast.
1584 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1585
1586 /// Return the cost of this VPWidenCastRecipe.
1588 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1589
1590 Instruction::CastOps getOpcode() const { return Opcode; }
1591
1592 /// Returns the result type of the cast.
1593 Type *getResultType() const { return ResultTy; }
1594
1595protected:
1596#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1597 /// Print the recipe.
1598 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1599 VPSlotTracker &SlotTracker) const override;
1600#endif
1601};
1602
1603/// A recipe for widening vector intrinsics.
1605 /// ID of the vector intrinsic to widen.
1606 Intrinsic::ID VectorIntrinsicID;
1607
1608 /// Scalar return type of the intrinsic.
1609 Type *ResultTy;
1610
1611 /// True if the intrinsic may read from memory.
1612 bool MayReadFromMemory;
1613
1614 /// True if the intrinsic may read write to memory.
1615 bool MayWriteToMemory;
1616
1617 /// True if the intrinsic may have side-effects.
1618 bool MayHaveSideEffects;
1619
1620public:
1622 ArrayRef<VPValue *> CallArguments, Type *Ty,
1623 const VPIRFlags &Flags = {},
1624 const VPIRMetadata &MD = {},
1626 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags,
1627 DL),
1628 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1629 MayReadFromMemory(CI.mayReadFromMemory()),
1630 MayWriteToMemory(CI.mayWriteToMemory()),
1631 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1632 setUnderlyingValue(&CI);
1633 }
1634
1636 ArrayRef<VPValue *> CallArguments, Type *Ty,
1637 const VPIRFlags &Flags = {},
1638 const VPIRMetadata &Metadata = {},
1640 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags,
1641 DL),
1642 VPIRMetadata(Metadata), VectorIntrinsicID(VectorIntrinsicID),
1643 ResultTy(Ty) {
1644 LLVMContext &Ctx = Ty->getContext();
1645 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1646 MemoryEffects ME = Attrs.getMemoryEffects();
1647 MayReadFromMemory = !ME.onlyWritesMemory();
1648 MayWriteToMemory = !ME.onlyReadsMemory();
1649 MayHaveSideEffects = MayWriteToMemory ||
1650 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1651 !Attrs.hasAttribute(Attribute::WillReturn);
1652 }
1653
1654 ~VPWidenIntrinsicRecipe() override = default;
1655
1657 if (Value *CI = getUnderlyingValue())
1658 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1659 operands(), ResultTy, *this, *this,
1660 getDebugLoc());
1661 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1662 *this, *this, getDebugLoc());
1663 }
1664
1665 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1666
1667 /// Produce a widened version of the vector intrinsic.
1668 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1669
1670 /// Return the cost of this vector intrinsic.
1672 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1673
1674 /// Return the ID of the intrinsic.
1675 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1676
1677 /// Return the scalar return type of the intrinsic.
1678 Type *getResultType() const { return ResultTy; }
1679
1680 /// Return to name of the intrinsic as string.
1682
1683 /// Returns true if the intrinsic may read from memory.
1684 bool mayReadFromMemory() const { return MayReadFromMemory; }
1685
1686 /// Returns true if the intrinsic may write to memory.
1687 bool mayWriteToMemory() const { return MayWriteToMemory; }
1688
1689 /// Returns true if the intrinsic may have side-effects.
1690 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1691
1692 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
1693
1694protected:
1695#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1696 /// Print the recipe.
1697 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1698 VPSlotTracker &SlotTracker) const override;
1699#endif
1700};
1701
1702/// A recipe for widening Call instructions using library calls.
1704 public VPIRMetadata {
1705 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1706 /// between a given VF and the chosen vectorized variant, so there will be a
1707 /// different VPlan for each VF with a valid variant.
1708 Function *Variant;
1709
1710public:
1712 ArrayRef<VPValue *> CallArguments,
1713 const VPIRFlags &Flags = {},
1714 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
1715 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments, Flags, DL),
1716 VPIRMetadata(Metadata), Variant(Variant) {
1717 setUnderlyingValue(UV);
1718 assert(
1719 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1720 "last operand must be the called function");
1721 }
1722
1723 ~VPWidenCallRecipe() override = default;
1724
1726 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
1727 *this, *this, getDebugLoc());
1728 }
1729
1730 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1731
1732 /// Produce a widened version of the call instruction.
1733 void execute(VPTransformState &State) override;
1734
1735 /// Return the cost of this VPWidenCallRecipe.
1736 InstructionCost computeCost(ElementCount VF,
1737 VPCostContext &Ctx) const override;
1738
1742
1745
1746protected:
1747#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1748 /// Print the recipe.
1749 void printRecipe(raw_ostream &O, const Twine &Indent,
1750 VPSlotTracker &SlotTracker) const override;
1751#endif
1752};
1753
1754/// A recipe representing a sequence of load -> update -> store as part of
1755/// a histogram operation. This means there may be aliasing between vector
1756/// lanes, which is handled by the llvm.experimental.vector.histogram family
1757/// of intrinsics. The only update operations currently supported are
1758/// 'add' and 'sub' where the other term is loop-invariant.
1760 /// Opcode of the update operation, currently either add or sub.
1761 unsigned Opcode;
1762
1763public:
1764 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1766 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
1767
1768 ~VPHistogramRecipe() override = default;
1769
1771 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1772 }
1773
1774 VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
1775
1776 /// Produce a vectorized histogram operation.
1777 void execute(VPTransformState &State) override;
1778
1779 /// Return the cost of this VPHistogramRecipe.
1781 VPCostContext &Ctx) const override;
1782
1783 unsigned getOpcode() const { return Opcode; }
1784
1785 /// Return the mask operand if one was provided, or a null pointer if all
1786 /// lanes should be executed unconditionally.
1787 VPValue *getMask() const {
1788 return getNumOperands() == 3 ? getOperand(2) : nullptr;
1789 }
1790
1791protected:
1792#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1793 /// Print the recipe
1794 void printRecipe(raw_ostream &O, const Twine &Indent,
1795 VPSlotTracker &SlotTracker) const override;
1796#endif
1797};
1798
1799/// A recipe for widening select instructions. Supports both wide vector and
1800/// single-scalar conditions, matching the behavior of LLVM IR's select
1801/// instruction.
1803 public VPIRMetadata {
1805 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1806 DebugLoc DL = {})
1807 : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, Flags, DL),
1808 VPIRMetadata(MD) {
1809 setUnderlyingValue(SI);
1810 }
1811
1812 ~VPWidenSelectRecipe() override = default;
1813
1816 operands(), *this, *this, getDebugLoc());
1817 }
1818
1819 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1820
1821 /// Produce a widened version of the select instruction.
1822 void execute(VPTransformState &State) override;
1823
1824 /// Return the cost of this VPWidenSelectRecipe.
1825 InstructionCost computeCost(ElementCount VF,
1826 VPCostContext &Ctx) const override;
1827
1828 unsigned getOpcode() const { return Instruction::Select; }
1829
1830 VPValue *getCond() const {
1831 return getOperand(0);
1832 }
1833
1834 /// Returns true if the recipe only uses the first lane of operand \p Op.
1835 bool usesFirstLaneOnly(const VPValue *Op) const override {
1837 "Op must be an operand of the recipe");
1838 return Op == getCond() && Op->isDefinedOutsideLoopRegions();
1839 }
1840
1841protected:
1842#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1843 /// Print the recipe.
1844 void printRecipe(raw_ostream &O, const Twine &Indent,
1845 VPSlotTracker &SlotTracker) const override;
1846#endif
1847};
1848
1849/// A recipe for handling GEP instructions.
1851 Type *SourceElementTy;
1852
1853 bool isPointerLoopInvariant() const {
1854 return getOperand(0)->isDefinedOutsideLoopRegions();
1855 }
1856
1857 bool isIndexLoopInvariant(unsigned I) const {
1858 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
1859 }
1860
1861public:
1863 const VPIRFlags &Flags = {},
1865 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, Flags, DL),
1866 SourceElementTy(GEP->getSourceElementType()) {
1867 setUnderlyingValue(GEP);
1869 (void)Metadata;
1871 assert(Metadata.empty() && "unexpected metadata on GEP");
1872 }
1873
1874 ~VPWidenGEPRecipe() override = default;
1875
1878 operands(), *this, getDebugLoc());
1879 }
1880
1881 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1882
1883 /// This recipe generates a GEP instruction.
1884 unsigned getOpcode() const { return Instruction::GetElementPtr; }
1885
1886 /// Generate the gep nodes.
1887 void execute(VPTransformState &State) override;
1888
1889 Type *getSourceElementType() const { return SourceElementTy; }
1890
1891 /// Return the cost of this VPWidenGEPRecipe.
1893 VPCostContext &Ctx) const override {
1894 // TODO: Compute accurate cost after retiring the legacy cost model.
1895 return 0;
1896 }
1897
1898 /// Returns true if the recipe only uses the first lane of operand \p Op.
1899 bool usesFirstLaneOnly(const VPValue *Op) const override;
1900
1901protected:
1902#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1903 /// Print the recipe.
1904 void printRecipe(raw_ostream &O, const Twine &Indent,
1905 VPSlotTracker &SlotTracker) const override;
1906#endif
1907};
1908
1909/// A recipe to compute a pointer to the last element of each part of a widened
1910/// memory access for widened memory accesses of IndexedTy. Used for
1911/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed.
1913 public VPUnrollPartAccessor<2> {
1914 Type *IndexedTy;
1915
1916 /// The constant stride of the pointer computed by this recipe, expressed in
1917 /// units of IndexedTy.
1918 int64_t Stride;
1919
1920public:
1922 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
1923 : VPRecipeWithIRFlags(VPDef::VPVectorEndPointerSC,
1924 ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
1925 IndexedTy(IndexedTy), Stride(Stride) {
1926 assert(Stride < 0 && "Stride must be negative");
1927 }
1928
1929 VP_CLASSOF_IMPL(VPDef::VPVectorEndPointerSC)
1930
1932 const VPValue *getVFValue() const { return getOperand(1); }
1933
1934 void execute(VPTransformState &State) override;
1935
1936 bool usesFirstLaneOnly(const VPValue *Op) const override {
1938 "Op must be an operand of the recipe");
1939 return true;
1940 }
1941
1942 /// Return the cost of this VPVectorPointerRecipe.
1944 VPCostContext &Ctx) const override {
1945 // TODO: Compute accurate cost after retiring the legacy cost model.
1946 return 0;
1947 }
1948
1949 /// Returns true if the recipe only uses the first part of operand \p Op.
1950 bool usesFirstPartOnly(const VPValue *Op) const override {
1952 "Op must be an operand of the recipe");
1953 assert(getNumOperands() <= 2 && "must have at most two operands");
1954 return true;
1955 }
1956
1958 return new VPVectorEndPointerRecipe(getOperand(0), getVFValue(), IndexedTy,
1959 Stride, getGEPNoWrapFlags(),
1960 getDebugLoc());
1961 }
1962
1963protected:
1964#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1965 /// Print the recipe.
1966 void printRecipe(raw_ostream &O, const Twine &Indent,
1967 VPSlotTracker &SlotTracker) const override;
1968#endif
1969};
1970
1971/// A recipe to compute the pointers for widened memory accesses of \p
1972/// SourceElementTy. Unrolling adds an extra offset operand for unrolled parts >
1973/// 0 and it produces `GEP Ptr, Offset`. The offset for unrolled part 0 is 0.
1975 Type *SourceElementTy;
1976
1977public:
1978 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
1980 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, Ptr, GEPFlags, DL),
1981 SourceElementTy(SourceElementTy) {}
1982
1983 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1984
1986 return getNumOperands() == 2 ? getOperand(1) : nullptr;
1987 }
1988
1989 void execute(VPTransformState &State) override;
1990
1991 Type *getSourceElementType() const { return SourceElementTy; }
1992
1993 bool usesFirstLaneOnly(const VPValue *Op) const override {
1995 "Op must be an operand of the recipe");
1996 return true;
1997 }
1998
1999 /// Returns true if the recipe only uses the first part of operand \p Op.
2000 bool usesFirstPartOnly(const VPValue *Op) const override {
2002 "Op must be an operand of the recipe");
2003 assert(getNumOperands() <= 2 && "must have at most two operands");
2004 return true;
2005 }
2006
2008 auto *Clone = new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
2010 if (auto *Off = getOffset())
2011 Clone->addOperand(Off);
2012 return Clone;
2013 }
2014
2015 /// Return the cost of this VPHeaderPHIRecipe.
2017 VPCostContext &Ctx) const override {
2018 // TODO: Compute accurate cost after retiring the legacy cost model.
2019 return 0;
2020 }
2021
2022protected:
2023#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2024 /// Print the recipe.
2025 void printRecipe(raw_ostream &O, const Twine &Indent,
2026 VPSlotTracker &SlotTracker) const override;
2027#endif
2028};
2029
2030/// A pure virtual base class for all recipes modeling header phis, including
2031/// phis for first order recurrences, pointer inductions and reductions. The
2032/// start value is the first operand of the recipe and the incoming value from
2033/// the backedge is the second operand.
2034///
2035/// Inductions are modeled using the following sub-classes:
2036/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2037/// starting at a specified value (zero for the main vector loop, the resume
2038/// value for the epilogue vector loop) and stepping by 1. The induction
2039/// controls exiting of the vector loop by comparing against the vector trip
2040/// count. Produces a single scalar PHI for the induction value per
2041/// iteration.
2042/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2043/// floating point inductions with arbitrary start and step values. Produces
2044/// a vector PHI per-part.
2045/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
2046/// value of an IV with different start and step values. Produces a single
2047/// scalar value per iteration
2048/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
2049/// canonical or derived induction.
2050/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2051/// pointer induction. Produces either a vector PHI per-part or scalar values
2052/// per-lane based on the canonical induction.
2054 public VPPhiAccessors {
2055protected:
2056 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
2057 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2058 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>({Start}),
2059 UnderlyingInstr, DL) {}
2060
2061 const VPRecipeBase *getAsRecipe() const override { return this; }
2062
2063public:
2064 ~VPHeaderPHIRecipe() override = default;
2065
2066 /// Method to support type inquiry through isa, cast, and dyn_cast.
2067 static inline bool classof(const VPRecipeBase *R) {
2068 return R->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
2069 R->getVPDefID() <= VPDef::VPLastHeaderPHISC;
2070 }
2071 static inline bool classof(const VPValue *V) {
2072 return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
2073 }
2074 static inline bool classof(const VPSingleDefRecipe *R) {
2075 return isa<VPHeaderPHIRecipe>(static_cast<const VPRecipeBase *>(R));
2076 }
2077
2078 /// Generate the phi nodes.
2079 void execute(VPTransformState &State) override = 0;
2080
2081 /// Return the cost of this header phi recipe.
2083 VPCostContext &Ctx) const override;
2084
2085 /// Returns the start value of the phi, if one is set.
2087 return getNumOperands() == 0 ? nullptr : getOperand(0);
2088 }
2090 return getNumOperands() == 0 ? nullptr : getOperand(0);
2091 }
2092
2093 /// Update the start value of the recipe.
2095
2096 /// Returns the incoming value from the loop backedge.
2098 return getOperand(1);
2099 }
2100
2101 /// Update the incoming value from the loop backedge.
2103
2104 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2105 /// to be a recipe.
2107 return *getBackedgeValue()->getDefiningRecipe();
2108 }
2109
2110protected:
2111#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2112 /// Print the recipe.
2113 void printRecipe(raw_ostream &O, const Twine &Indent,
2114 VPSlotTracker &SlotTracker) const override = 0;
2115#endif
2116};
2117
2118/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2119/// VPWidenPointerInductionRecipe), providing shared functionality, including
2120/// retrieving the step value, induction descriptor and original phi node.
2122 const InductionDescriptor &IndDesc;
2123
2124public:
2125 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2126 VPValue *Step, const InductionDescriptor &IndDesc,
2127 DebugLoc DL)
2128 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2129 addOperand(Step);
2130 }
2131
2132 static inline bool classof(const VPRecipeBase *R) {
2133 return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC ||
2134 R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
2135 }
2136
2137 static inline bool classof(const VPValue *V) {
2138 auto *R = V->getDefiningRecipe();
2139 return R && classof(R);
2140 }
2141
2142 static inline bool classof(const VPSingleDefRecipe *R) {
2143 return classof(static_cast<const VPRecipeBase *>(R));
2144 }
2145
2146 void execute(VPTransformState &State) override = 0;
2147
2148 /// Returns the step value of the induction.
2150 const VPValue *getStepValue() const { return getOperand(1); }
2151
2152 /// Update the step value of the recipe.
2153 void setStepValue(VPValue *V) { setOperand(1, V); }
2154
2156 const VPValue *getVFValue() const { return getOperand(2); }
2157
2158 /// Returns the number of incoming values, also number of incoming blocks.
2159 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2160 /// incoming value, its start value.
2161 unsigned getNumIncoming() const override { return 1; }
2162
2164
2165 /// Returns the induction descriptor for the recipe.
2166 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2167
2169 // TODO: All operands of base recipe must exist and be at same index in
2170 // derived recipe.
2172 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2173 }
2174
2176 // TODO: All operands of base recipe must exist and be at same index in
2177 // derived recipe.
2179 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2180 }
2181
2182 /// Returns true if the recipe only uses the first lane of operand \p Op.
2183 bool usesFirstLaneOnly(const VPValue *Op) const override {
2185 "Op must be an operand of the recipe");
2186 // The recipe creates its own wide start value, so it only requests the
2187 // first lane of the operand.
2188 // TODO: Remove once creating the start value is modeled separately.
2189 return Op == getStartValue() || Op == getStepValue();
2190 }
2191};
2192
2193/// A recipe for handling phi nodes of integer and floating-point inductions,
2194/// producing their vector values. This is an abstract recipe and must be
2195/// converted to concrete recipes before executing.
2197 public VPIRFlags {
2198 TruncInst *Trunc;
2199
2200 // If this recipe is unrolled it will have 2 additional operands.
2201 bool isUnrolled() const { return getNumOperands() == 5; }
2202
2203public:
2205 VPValue *VF, const InductionDescriptor &IndDesc,
2206 const VPIRFlags &Flags, DebugLoc DL)
2207 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2208 Step, IndDesc, DL),
2209 VPIRFlags(Flags), Trunc(nullptr) {
2210 addOperand(VF);
2211 }
2212
2214 VPValue *VF, const InductionDescriptor &IndDesc,
2215 TruncInst *Trunc, const VPIRFlags &Flags,
2216 DebugLoc DL)
2217 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2218 Step, IndDesc, DL),
2219 VPIRFlags(Flags), Trunc(Trunc) {
2220 addOperand(VF);
2222 (void)Metadata;
2223 if (Trunc)
2225 assert(Metadata.empty() && "unexpected metadata on Trunc");
2226 }
2227
2229
2235
2236 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
2237
2238 void execute(VPTransformState &State) override {
2239 llvm_unreachable("cannot execute this recipe, should be expanded via "
2240 "expandVPWidenIntOrFpInductionRecipe");
2241 }
2242
2244 // If the recipe has been unrolled return the VPValue for the induction
2245 // increment.
2246 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2247 }
2248
2249 /// Returns the number of incoming values, also number of incoming blocks.
2250 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2251 /// incoming value, its start value.
2252 unsigned getNumIncoming() const override { return 1; }
2253
2254 /// Returns the first defined value as TruncInst, if it is one or nullptr
2255 /// otherwise.
2256 TruncInst *getTruncInst() { return Trunc; }
2257 const TruncInst *getTruncInst() const { return Trunc; }
2258
2259 /// Returns true if the induction is canonical, i.e. starting at 0 and
2260 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2261 /// same type as the canonical induction.
2262 bool isCanonical() const;
2263
2264 /// Returns the scalar type of the induction.
2266 return Trunc ? Trunc->getType()
2268 }
2269
2270 /// Returns the VPValue representing the value of this induction at
2271 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2272 /// take place.
2274 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2275 }
2276
2277protected:
2278#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2279 /// Print the recipe.
2280 void printRecipe(raw_ostream &O, const Twine &Indent,
2281 VPSlotTracker &SlotTracker) const override;
2282#endif
2283};
2284
2286public:
2287 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2288 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2289 /// VF*UF.
2291 VPValue *NumUnrolledElems,
2292 const InductionDescriptor &IndDesc, DebugLoc DL)
2293 : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
2294 Step, IndDesc, DL) {
2295 addOperand(NumUnrolledElems);
2296 }
2297
2299
2305
2306 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
2307
2308 /// Generate vector values for the pointer induction.
2309 void execute(VPTransformState &State) override {
2310 llvm_unreachable("cannot execute this recipe, should be expanded via "
2311 "expandVPWidenPointerInduction");
2312 };
2313
2314 /// Returns true if only scalar values will be generated.
2315 bool onlyScalarsGenerated(bool IsScalable);
2316
2317protected:
2318#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2319 /// Print the recipe.
2320 void printRecipe(raw_ostream &O, const Twine &Indent,
2321 VPSlotTracker &SlotTracker) const override;
2322#endif
2323};
2324
2325/// A recipe for widened phis. Incoming values are operands of the recipe and
2326/// their operand index corresponds to the incoming predecessor block. If the
2327/// recipe is placed in an entry block to a (non-replicate) region, it must have
2328/// exactly 2 incoming values, the first from the predecessor of the region and
2329/// the second from the exiting block of the region.
2331 public VPPhiAccessors {
2332 /// Name to use for the generated IR instruction for the widened phi.
2333 std::string Name;
2334
2335public:
2336 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2337 /// debug location \p DL.
2338 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
2339 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2340 : VPSingleDefRecipe(VPDef::VPWidenPHISC, {}, Phi, DL), Name(Name.str()) {
2341 if (Start)
2342 addOperand(Start);
2343 }
2344
2347 getOperand(0), getDebugLoc(), Name);
2349 C->addOperand(Op);
2350 return C;
2351 }
2352
2353 ~VPWidenPHIRecipe() override = default;
2354
2355 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
2356
2357 /// Generate the phi/select nodes.
2358 void execute(VPTransformState &State) override;
2359
2360protected:
2361#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2362 /// Print the recipe.
2363 void printRecipe(raw_ostream &O, const Twine &Indent,
2364 VPSlotTracker &SlotTracker) const override;
2365#endif
2366
2367 const VPRecipeBase *getAsRecipe() const override { return this; }
2368};
2369
2370/// A recipe for handling first-order recurrence phis. The start value is the
2371/// first operand of the recipe and the incoming value from the backedge is the
2372/// second operand.
2375 VPValue &BackedgeValue)
2376 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {
2377 addOperand(&BackedgeValue);
2378 }
2379
2380 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
2381
2386
2387 void execute(VPTransformState &State) override;
2388
2389 /// Return the cost of this first-order recurrence phi recipe.
2391 VPCostContext &Ctx) const override;
2392
2393 /// Returns true if the recipe only uses the first lane of operand \p Op.
2394 bool usesFirstLaneOnly(const VPValue *Op) const override {
2396 "Op must be an operand of the recipe");
2397 return Op == getStartValue();
2398 }
2399
2400protected:
2401#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2402 /// Print the recipe.
2403 void printRecipe(raw_ostream &O, const Twine &Indent,
2404 VPSlotTracker &SlotTracker) const override;
2405#endif
2406};
2407
2408/// Possible variants of a reduction.
2409
2410/// This reduction is ordered and in-loop.
2411struct RdxOrdered {};
2412/// This reduction is in-loop.
2413struct RdxInLoop {};
2414/// This reduction is unordered with the partial result scaled down by some
2415/// factor.
2418};
2419using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2420
2421inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2422 unsigned ScaleFactor) {
2423 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2424 if (Ordered)
2425 return RdxOrdered{};
2426 if (InLoop)
2427 return RdxInLoop{};
2428 return RdxUnordered{/*VFScaleFactor=*/ScaleFactor};
2429}
2430
2431/// A recipe for handling reduction phis. The start value is the first operand
2432/// of the recipe and the incoming value from the backedge is the second
2433/// operand.
2435 public VPUnrollPartAccessor<2> {
2436 /// The recurrence kind of the reduction.
2437 const RecurKind Kind;
2438
2439 ReductionStyle Style;
2440
2441 /// The phi is part of a multi-use reduction (e.g., used in FindLastIV
2442 /// patterns for argmin/argmax).
2443 /// TODO: Also support cases where the phi itself has a single use, but its
2444 /// compare has multiple uses.
2445 bool HasUsesOutsideReductionChain;
2446
2447public:
2448 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2450 VPValue &BackedgeValue, ReductionStyle Style,
2451 bool HasUsesOutsideReductionChain = false)
2452 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
2453 Style(Style),
2454 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2455 addOperand(&BackedgeValue);
2456 }
2457
2458 ~VPReductionPHIRecipe() override = default;
2459
2461 return new VPReductionPHIRecipe(
2463 *getOperand(0), *getBackedgeValue(), Style,
2464 HasUsesOutsideReductionChain);
2465 }
2466
2467 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2468
2469 /// Generate the phi/select nodes.
2470 void execute(VPTransformState &State) override;
2471
2472 /// Get the factor that the VF of this recipe's output should be scaled by, or
2473 /// 1 if it isn't scaled.
2474 unsigned getVFScaleFactor() const {
2475 auto *Partial = std::get_if<RdxUnordered>(&Style);
2476 return Partial ? Partial->VFScaleFactor : 1;
2477 }
2478
2479 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2480 /// > 1.
2481 void setVFScaleFactor(unsigned ScaleFactor) {
2482 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2483 Style = RdxUnordered{ScaleFactor};
2484 }
2485
2486 /// Returns the number of incoming values, also number of incoming blocks.
2487 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2488 /// incoming value, its start value.
2489 unsigned getNumIncoming() const override { return 2; }
2490
2491 /// Returns the recurrence kind of the reduction.
2492 RecurKind getRecurrenceKind() const { return Kind; }
2493
2494 /// Returns true, if the phi is part of an ordered reduction.
2495 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); }
2496
2497 /// Returns true if the phi is part of an in-loop reduction.
2498 bool isInLoop() const {
2499 return std::holds_alternative<RdxInLoop>(Style) ||
2500 std::holds_alternative<RdxOrdered>(Style);
2501 }
2502
2503 /// Returns true if the reduction outputs a vector with a scaled down VF.
2504 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2505
2506 /// Returns true, if the phi is part of a multi-use reduction.
2508 return HasUsesOutsideReductionChain;
2509 }
2510
2511 /// Returns true if the recipe only uses the first lane of operand \p Op.
2512 bool usesFirstLaneOnly(const VPValue *Op) const override {
2514 "Op must be an operand of the recipe");
2515 return isOrdered() || isInLoop();
2516 }
2517
2518protected:
2519#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2520 /// Print the recipe.
2521 void printRecipe(raw_ostream &O, const Twine &Indent,
2522 VPSlotTracker &SlotTracker) const override;
2523#endif
2524};
2525
2526/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2527/// instructions.
2529public:
2530 /// The blend operation is a User of the incoming values and of their
2531 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2532 /// be omitted (implied by passing an odd number of operands) in which case
2533 /// all other incoming values are merged into it.
2535 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, DL) {
2536 assert(Operands.size() >= 2 && "Expected at least two operands!");
2537 }
2538
2543
2544 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2545
2546 /// A normalized blend is one that has an odd number of operands, whereby the
2547 /// first operand does not have an associated mask.
2548 bool isNormalized() const { return getNumOperands() % 2; }
2549
2550 /// Return the number of incoming values, taking into account when normalized
2551 /// the first incoming value will have no mask.
2552 unsigned getNumIncomingValues() const {
2553 return (getNumOperands() + isNormalized()) / 2;
2554 }
2555
2556 /// Return incoming value number \p Idx.
2557 VPValue *getIncomingValue(unsigned Idx) const {
2558 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2559 }
2560
2561 /// Return mask number \p Idx.
2562 VPValue *getMask(unsigned Idx) const {
2563 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2564 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2565 }
2566
2567 /// Set mask number \p Idx to \p V.
2568 void setMask(unsigned Idx, VPValue *V) {
2569 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2570 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2571 }
2572
2573 void execute(VPTransformState &State) override {
2574 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2575 }
2576
2577 /// Return the cost of this VPWidenMemoryRecipe.
2578 InstructionCost computeCost(ElementCount VF,
2579 VPCostContext &Ctx) const override;
2580
2581 /// Returns true if the recipe only uses the first lane of operand \p Op.
2582 bool usesFirstLaneOnly(const VPValue *Op) const override {
2584 "Op must be an operand of the recipe");
2585 // Recursing through Blend recipes only, must terminate at header phi's the
2586 // latest.
2587 return all_of(users(),
2588 [this](VPUser *U) { return U->usesFirstLaneOnly(this); });
2589 }
2590
2591protected:
2592#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2593 /// Print the recipe.
2594 void printRecipe(raw_ostream &O, const Twine &Indent,
2595 VPSlotTracker &SlotTracker) const override;
2596#endif
2597};
2598
2599/// A common base class for interleaved memory operations.
2600/// An Interleaved memory operation is a memory access method that combines
2601/// multiple strided loads/stores into a single wide load/store with shuffles.
2602/// The first operand is the start address. The optional operands are, in order,
2603/// the stored values and the mask.
2605 public VPIRMetadata {
2607
2608 /// Indicates if the interleave group is in a conditional block and requires a
2609 /// mask.
2610 bool HasMask = false;
2611
2612 /// Indicates if gaps between members of the group need to be masked out or if
2613 /// unusued gaps can be loaded speculatively.
2614 bool NeedsMaskForGaps = false;
2615
2616protected:
2617 VPInterleaveBase(const unsigned char SC,
2619 ArrayRef<VPValue *> Operands,
2620 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2621 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2622 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2623 NeedsMaskForGaps(NeedsMaskForGaps) {
2624 // TODO: extend the masked interleaved-group support to reversed access.
2625 assert((!Mask || !IG->isReverse()) &&
2626 "Reversed masked interleave-group not supported.");
2627 for (unsigned I = 0; I < IG->getFactor(); ++I)
2628 if (Instruction *Inst = IG->getMember(I)) {
2629 if (Inst->getType()->isVoidTy())
2630 continue;
2631 new VPValue(Inst, this);
2632 }
2633
2634 for (auto *SV : StoredValues)
2635 addOperand(SV);
2636 if (Mask) {
2637 HasMask = true;
2638 addOperand(Mask);
2639 }
2640 }
2641
2642public:
2643 VPInterleaveBase *clone() override = 0;
2644
2645 static inline bool classof(const VPRecipeBase *R) {
2646 return R->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
2647 R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC;
2648 }
2649
2650 static inline bool classof(const VPUser *U) {
2651 auto *R = dyn_cast<VPRecipeBase>(U);
2652 return R && classof(R);
2653 }
2654
2655 /// Return the address accessed by this recipe.
2656 VPValue *getAddr() const {
2657 return getOperand(0); // Address is the 1st, mandatory operand.
2658 }
2659
2660 /// Return the mask used by this recipe. Note that a full mask is represented
2661 /// by a nullptr.
2662 VPValue *getMask() const {
2663 // Mask is optional and the last operand.
2664 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2665 }
2666
2667 /// Return true if the access needs a mask because of the gaps.
2668 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2669
2671
2672 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2673
2674 void execute(VPTransformState &State) override {
2675 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2676 }
2677
2678 /// Return the cost of this recipe.
2679 InstructionCost computeCost(ElementCount VF,
2680 VPCostContext &Ctx) const override;
2681
2682 /// Returns true if the recipe only uses the first lane of operand \p Op.
2683 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
2684
2685 /// Returns the number of stored operands of this interleave group. Returns 0
2686 /// for load interleave groups.
2687 virtual unsigned getNumStoreOperands() const = 0;
2688
2689 /// Return the VPValues stored by this interleave group. If it is a load
2690 /// interleave group, return an empty ArrayRef.
2692 return ArrayRef<VPValue *>(op_end() -
2693 (getNumStoreOperands() + (HasMask ? 1 : 0)),
2695 }
2696};
2697
2698/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2699/// or stores into one wide load/store and shuffles. The first operand of a
2700/// VPInterleave recipe is the address, followed by the stored values, followed
2701/// by an optional mask.
2703public:
2705 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2706 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2707 : VPInterleaveBase(VPDef::VPInterleaveSC, IG, Addr, StoredValues, Mask,
2708 NeedsMaskForGaps, MD, DL) {}
2709
2710 ~VPInterleaveRecipe() override = default;
2711
2715 needsMaskForGaps(), *this, getDebugLoc());
2716 }
2717
2718 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2719
2720 /// Generate the wide load or store, and shuffles.
2721 void execute(VPTransformState &State) override;
2722
2723 bool usesFirstLaneOnly(const VPValue *Op) const override {
2725 "Op must be an operand of the recipe");
2726 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2727 }
2728
2729 unsigned getNumStoreOperands() const override {
2730 return getNumOperands() - (getMask() ? 2 : 1);
2731 }
2732
2733protected:
2734#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2735 /// Print the recipe.
2736 void printRecipe(raw_ostream &O, const Twine &Indent,
2737 VPSlotTracker &SlotTracker) const override;
2738#endif
2739};
2740
2741/// A recipe for interleaved memory operations with vector-predication
2742/// intrinsics. The first operand is the address, the second operand is the
2743/// explicit vector length. Stored values and mask are optional operands.
2745public:
2747 : VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(),
2748 ArrayRef<VPValue *>({R.getAddr(), &EVL}),
2749 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2750 R.getDebugLoc()) {
2751 assert(!getInterleaveGroup()->isReverse() &&
2752 "Reversed interleave-group with tail folding is not supported.");
2753 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2754 "supported for scalable vector.");
2755 }
2756
2757 ~VPInterleaveEVLRecipe() override = default;
2758
2760 llvm_unreachable("cloning not implemented yet");
2761 }
2762
2763 VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC)
2764
2765 /// The VPValue of the explicit vector length.
2766 VPValue *getEVL() const { return getOperand(1); }
2767
2768 /// Generate the wide load or store, and shuffles.
2769 void execute(VPTransformState &State) override;
2770
2771 /// The recipe only uses the first lane of the address, and EVL operand.
2772 bool usesFirstLaneOnly(const VPValue *Op) const override {
2774 "Op must be an operand of the recipe");
2775 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
2776 Op == getEVL();
2777 }
2778
2779 unsigned getNumStoreOperands() const override {
2780 return getNumOperands() - (getMask() ? 3 : 2);
2781 }
2782
2783protected:
2784#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2785 /// Print the recipe.
2786 void printRecipe(raw_ostream &O, const Twine &Indent,
2787 VPSlotTracker &SlotTracker) const override;
2788#endif
2789};
2790
2791/// A recipe to represent inloop, ordered or partial reduction operations. It
2792/// performs a reduction on a vector operand into a scalar (vector in the case
2793/// of a partial reduction) value, and adds the result to a chain. The Operands
2794/// are {ChainOp, VecOp, [Condition]}.
2796
2797 /// The recurrence kind for the reduction in question.
2798 RecurKind RdxKind;
2799 /// Whether the reduction is conditional.
2800 bool IsConditional = false;
2801 ReductionStyle Style;
2802
2803protected:
2804 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
2806 ArrayRef<VPValue *> Operands, VPValue *CondOp,
2807 ReductionStyle Style, DebugLoc DL)
2808 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
2809 Style(Style) {
2810 if (CondOp) {
2811 IsConditional = true;
2812 addOperand(CondOp);
2813 }
2815 }
2816
2817public:
2819 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2821 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
2822 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, Style,
2823 DL) {}
2824
2826 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2828 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr,
2829 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, Style,
2830 DL) {}
2831
2832 ~VPReductionRecipe() override = default;
2833
2835 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
2837 getCondOp(), Style, getDebugLoc());
2838 }
2839
2840 static inline bool classof(const VPRecipeBase *R) {
2841 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2842 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
2843 }
2844
2845 static inline bool classof(const VPUser *U) {
2846 auto *R = dyn_cast<VPRecipeBase>(U);
2847 return R && classof(R);
2848 }
2849
2850 static inline bool classof(const VPValue *VPV) {
2851 const VPRecipeBase *R = VPV->getDefiningRecipe();
2852 return R && classof(R);
2853 }
2854
2855 static inline bool classof(const VPSingleDefRecipe *R) {
2856 return classof(static_cast<const VPRecipeBase *>(R));
2857 }
2858
2859 /// Generate the reduction in the loop.
2860 void execute(VPTransformState &State) override;
2861
2862 /// Return the cost of VPReductionRecipe.
2863 InstructionCost computeCost(ElementCount VF,
2864 VPCostContext &Ctx) const override;
2865
2866 /// Return the recurrence kind for the in-loop reduction.
2867 RecurKind getRecurrenceKind() const { return RdxKind; }
2868 /// Return true if the in-loop reduction is ordered.
2869 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); };
2870 /// Return true if the in-loop reduction is conditional.
2871 bool isConditional() const { return IsConditional; };
2872 /// Returns true if the reduction outputs a vector with a scaled down VF.
2873 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2874 /// Returns true if the reduction is in-loop.
2875 bool isInLoop() const {
2876 return std::holds_alternative<RdxInLoop>(Style) ||
2877 std::holds_alternative<RdxOrdered>(Style);
2878 }
2879 /// The VPValue of the scalar Chain being accumulated.
2880 VPValue *getChainOp() const { return getOperand(0); }
2881 /// The VPValue of the vector value to be reduced.
2882 VPValue *getVecOp() const { return getOperand(1); }
2883 /// The VPValue of the condition for the block.
2885 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2886 }
2887 /// Get the factor that the VF of this recipe's output should be scaled by, or
2888 /// 1 if it isn't scaled.
2889 unsigned getVFScaleFactor() const {
2890 auto *Partial = std::get_if<RdxUnordered>(&Style);
2891 return Partial ? Partial->VFScaleFactor : 1;
2892 }
2893
2894protected:
2895#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2896 /// Print the recipe.
2897 void printRecipe(raw_ostream &O, const Twine &Indent,
2898 VPSlotTracker &SlotTracker) const override;
2899#endif
2900};
2901
2902/// A recipe to represent inloop reduction operations with vector-predication
2903/// intrinsics, performing a reduction on a vector operand with the explicit
2904/// vector length (EVL) into a scalar value, and adding the result to a chain.
2905/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2907public:
2911 VPDef::VPReductionEVLSC, R.getRecurrenceKind(),
2912 R.getFastMathFlags(),
2914 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2915 getReductionStyle(/*InLoop=*/true, R.isOrdered(), 1), DL) {}
2916
2917 ~VPReductionEVLRecipe() override = default;
2918
2920 llvm_unreachable("cloning not implemented yet");
2921 }
2922
2923 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2924
2925 /// Generate the reduction in the loop
2926 void execute(VPTransformState &State) override;
2927
2928 /// The VPValue of the explicit vector length.
2929 VPValue *getEVL() const { return getOperand(2); }
2930
2931 /// Returns true if the recipe only uses the first lane of operand \p Op.
2932 bool usesFirstLaneOnly(const VPValue *Op) const override {
2934 "Op must be an operand of the recipe");
2935 return Op == getEVL();
2936 }
2937
2938protected:
2939#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2940 /// Print the recipe.
2941 void printRecipe(raw_ostream &O, const Twine &Indent,
2942 VPSlotTracker &SlotTracker) const override;
2943#endif
2944};
2945
2946/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2947/// copies of the original scalar type, one per lane, instead of producing a
2948/// single copy of widened type for all lanes. If the instruction is known to be
2949/// a single scalar, only one copy, per lane zero, will be generated.
2951 public VPIRMetadata {
2952 /// Indicator if only a single replica per lane is needed.
2953 bool IsSingleScalar;
2954
2955 /// Indicator if the replicas are also predicated.
2956 bool IsPredicated;
2957
2958public:
2960 bool IsSingleScalar, VPValue *Mask = nullptr,
2961 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
2962 DebugLoc DL = DebugLoc::getUnknown())
2963 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, Flags, DL),
2964 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
2965 IsPredicated(Mask) {
2966 setUnderlyingValue(I);
2967 if (Mask)
2968 addOperand(Mask);
2969 }
2970
2971 ~VPReplicateRecipe() override = default;
2972
2974 auto *Copy = new VPReplicateRecipe(
2975 getUnderlyingInstr(), operands(), IsSingleScalar,
2976 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
2977 Copy->transferFlags(*this);
2978 return Copy;
2979 }
2980
2981 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2982
2983 /// Generate replicas of the desired Ingredient. Replicas will be generated
2984 /// for all parts and lanes unless a specific part and lane are specified in
2985 /// the \p State.
2986 void execute(VPTransformState &State) override;
2987
2988 /// Return the cost of this VPReplicateRecipe.
2989 InstructionCost computeCost(ElementCount VF,
2990 VPCostContext &Ctx) const override;
2991
2992 bool isSingleScalar() const { return IsSingleScalar; }
2993
2994 bool isPredicated() const { return IsPredicated; }
2995
2996 /// Returns true if the recipe only uses the first lane of operand \p Op.
2997 bool usesFirstLaneOnly(const VPValue *Op) const override {
2999 "Op must be an operand of the recipe");
3000 return isSingleScalar();
3001 }
3002
3003 /// Returns true if the recipe uses scalars of operand \p Op.
3004 bool usesScalars(const VPValue *Op) const override {
3006 "Op must be an operand of the recipe");
3007 return true;
3008 }
3009
3010 /// Returns true if the recipe is used by a widened recipe via an intervening
3011 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
3012 /// in a vector.
3013 bool shouldPack() const;
3014
3015 /// Return the mask of a predicated VPReplicateRecipe.
3017 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3018 return getOperand(getNumOperands() - 1);
3019 }
3020
3021 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3022
3023protected:
3024#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3025 /// Print the recipe.
3026 void printRecipe(raw_ostream &O, const Twine &Indent,
3027 VPSlotTracker &SlotTracker) const override;
3028#endif
3029};
3030
3031/// A recipe for generating conditional branches on the bits of a mask.
3033public:
3035 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3036
3039 }
3040
3041 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
3042
3043 /// Generate the extraction of the appropriate bit from the block mask and the
3044 /// conditional branch.
3045 void execute(VPTransformState &State) override;
3046
3047 /// Return the cost of this VPBranchOnMaskRecipe.
3048 InstructionCost computeCost(ElementCount VF,
3049 VPCostContext &Ctx) const override;
3050
3051#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3052 /// Print the recipe.
3053 void printRecipe(raw_ostream &O, const Twine &Indent,
3054 VPSlotTracker &SlotTracker) const override {
3055 O << Indent << "BRANCH-ON-MASK ";
3057 }
3058#endif
3059
3060 /// Returns true if the recipe uses scalars of operand \p Op.
3061 bool usesScalars(const VPValue *Op) const override {
3063 "Op must be an operand of the recipe");
3064 return true;
3065 }
3066};
3067
3068/// A recipe to combine multiple recipes into a single 'expression' recipe,
3069/// which should be considered a single entity for cost-modeling and transforms.
3070/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3071/// expression recipes, before execute. The individual expression recipes are
3072/// completely disconnected from the def-use graph of other recipes not part of
3073/// the expression. Def-use edges between pairs of expression recipes remain
3074/// intact, whereas every edge between an expression recipe and a recipe outside
3075/// the expression is elevated to connect the non-expression recipe with the
3076/// VPExpressionRecipe itself.
3077class VPExpressionRecipe : public VPSingleDefRecipe {
3078 /// Recipes included in this VPExpressionRecipe. This could contain
3079 /// duplicates.
3080 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3081
3082 /// Temporary VPValues used for external operands of the expression, i.e.
3083 /// operands not defined by recipes in the expression.
3084 SmallVector<VPValue *> LiveInPlaceholders;
3085
3086 enum class ExpressionTypes {
3087 /// Represents an inloop extended reduction operation, performing a
3088 /// reduction on an extended vector operand into a scalar value, and adding
3089 /// the result to a chain.
3090 ExtendedReduction,
3091 /// Represent an inloop multiply-accumulate reduction, multiplying the
3092 /// extended vector operands, performing a reduction.add on the result, and
3093 /// adding the scalar result to a chain.
3094 ExtMulAccReduction,
3095 /// Represent an inloop multiply-accumulate reduction, multiplying the
3096 /// vector operands, performing a reduction.add on the result, and adding
3097 /// the scalar result to a chain.
3098 MulAccReduction,
3099 /// Represent an inloop multiply-accumulate reduction, multiplying the
3100 /// extended vector operands, negating the multiplication, performing a
3101 /// reduction.add on the result, and adding the scalar result to a chain.
3102 ExtNegatedMulAccReduction,
3103 };
3104
3105 /// Type of the expression.
3106 ExpressionTypes ExpressionType;
3107
3108 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3109 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3110 /// in the expression) are replaced by temporary VPValues and the original
3111 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3112 /// as needed (excluding last) to ensure they are only used by other recipes
3113 /// in the expression.
3114 VPExpressionRecipe(ExpressionTypes ExpressionType,
3115 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3116
3117public:
3119 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3121 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3124 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3125 {Ext0, Ext1, Mul, Red}) {}
3128 VPReductionRecipe *Red)
3129 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3130 {Ext0, Ext1, Mul, Sub, Red}) {
3131 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3132 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3133 "Expected an add reduction");
3134 assert(getNumOperands() >= 3 && "Expected at least three operands");
3135 [[maybe_unused]] auto *SubConst = dyn_cast<ConstantInt>(getOperand(2)->getLiveInIRValue());
3136 assert(SubConst && SubConst->getValue() == 0 &&
3137 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3138 }
3139
3141 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3142 for (auto *R : reverse(ExpressionRecipes)) {
3143 if (ExpressionRecipesSeen.insert(R).second)
3144 delete R;
3145 }
3146 for (VPValue *T : LiveInPlaceholders)
3147 delete T;
3148 }
3149
3150 VP_CLASSOF_IMPL(VPDef::VPExpressionSC)
3151
3152 VPExpressionRecipe *clone() override {
3153 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3154 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3155 for (auto *R : ExpressionRecipes)
3156 NewExpressiondRecipes.push_back(R->clone());
3157 for (auto *New : NewExpressiondRecipes) {
3158 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3159 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3160 // Update placeholder operands in the cloned recipe to use the external
3161 // operands, to be internalized when the cloned expression is constructed.
3162 for (const auto &[Placeholder, OutsideOp] :
3163 zip(LiveInPlaceholders, operands()))
3164 New->replaceUsesOfWith(Placeholder, OutsideOp);
3165 }
3166 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3167 }
3168
3169 /// Return the VPValue to use to infer the result type of the recipe.
3171 unsigned OpIdx =
3172 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3173 : 1;
3174 return getOperand(getNumOperands() - OpIdx);
3175 }
3176
3177 /// Insert the recipes of the expression back into the VPlan, directly before
3178 /// the current recipe. Leaves the expression recipe empty, which must be
3179 /// removed before codegen.
3180 void decompose();
3181
3182 unsigned getVFScaleFactor() const {
3183 auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
3184 return PR ? PR->getVFScaleFactor() : 1;
3185 }
3186
3187 /// Method for generating code, must not be called as this recipe is abstract.
3188 void execute(VPTransformState &State) override {
3189 llvm_unreachable("recipe must be removed before execute");
3190 }
3191
3193 VPCostContext &Ctx) const override;
3194
3195 /// Returns true if this expression contains recipes that may read from or
3196 /// write to memory.
3197 bool mayReadOrWriteMemory() const;
3198
3199 /// Returns true if this expression contains recipes that may have side
3200 /// effects.
3201 bool mayHaveSideEffects() const;
3202
3203 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3204 bool isSingleScalar() const;
3205
3206protected:
3207#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3208 /// Print the recipe.
3209 void printRecipe(raw_ostream &O, const Twine &Indent,
3210 VPSlotTracker &SlotTracker) const override;
3211#endif
3212};
3213
3214/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3215/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3216/// order to merge values that are set under such a branch and feed their uses.
3217/// The phi nodes can be scalar or vector depending on the users of the value.
3218/// This recipe works in concert with VPBranchOnMaskRecipe.
3220public:
3221 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3222 /// nodes after merging back from a Branch-on-Mask.
3224 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {}
3225 ~VPPredInstPHIRecipe() override = default;
3226
3228 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3229 }
3230
3231 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
3232
3233 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3234 /// retain SSA form.
3235 void execute(VPTransformState &State) override;
3236
3237 /// Return the cost of this VPPredInstPHIRecipe.
3239 VPCostContext &Ctx) const override {
3240 // TODO: Compute accurate cost after retiring the legacy cost model.
3241 return 0;
3242 }
3243
3244 /// Returns true if the recipe uses scalars of operand \p Op.
3245 bool usesScalars(const VPValue *Op) const override {
3247 "Op must be an operand of the recipe");
3248 return true;
3249 }
3250
3251protected:
3252#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3253 /// Print the recipe.
3254 void printRecipe(raw_ostream &O, const Twine &Indent,
3255 VPSlotTracker &SlotTracker) const override;
3256#endif
3257};
3258
3259/// A common base class for widening memory operations. An optional mask can be
3260/// provided as the last operand.
3262 public VPIRMetadata {
3263protected:
3265
3266 /// Alignment information for this memory access.
3268
3269 /// Whether the accessed addresses are consecutive.
3271
3272 /// Whether the consecutive accessed addresses are in reverse order.
3274
3275 /// Whether the memory access is masked.
3276 bool IsMasked = false;
3277
3278 void setMask(VPValue *Mask) {
3279 assert(!IsMasked && "cannot re-set mask");
3280 if (!Mask)
3281 return;
3282 addOperand(Mask);
3283 IsMasked = true;
3284 }
3285
3286 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3287 std::initializer_list<VPValue *> Operands,
3288 bool Consecutive, bool Reverse,
3289 const VPIRMetadata &Metadata, DebugLoc DL)
3290 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3292 Reverse(Reverse) {
3293 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
3295 "Reversed acccess without VPVectorEndPointerRecipe address?");
3296 }
3297
3298public:
3300 llvm_unreachable("cloning not supported");
3301 }
3302
3303 static inline bool classof(const VPRecipeBase *R) {
3304 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
3305 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
3306 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
3307 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
3308 }
3309
3310 static inline bool classof(const VPUser *U) {
3311 auto *R = dyn_cast<VPRecipeBase>(U);
3312 return R && classof(R);
3313 }
3314
3315 /// Return whether the loaded-from / stored-to addresses are consecutive.
3316 bool isConsecutive() const { return Consecutive; }
3317
3318 /// Return whether the consecutive loaded/stored addresses are in reverse
3319 /// order.
3320 bool isReverse() const { return Reverse; }
3321
3322 /// Return the address accessed by this recipe.
3323 VPValue *getAddr() const { return getOperand(0); }
3324
3325 /// Returns true if the recipe is masked.
3326 bool isMasked() const { return IsMasked; }
3327
3328 /// Return the mask used by this recipe. Note that a full mask is represented
3329 /// by a nullptr.
3330 VPValue *getMask() const {
3331 // Mask is optional and therefore the last operand.
3332 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3333 }
3334
3335 /// Returns the alignment of the memory access.
3336 Align getAlign() const { return Alignment; }
3337
3338 /// Generate the wide load/store.
3339 void execute(VPTransformState &State) override {
3340 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3341 }
3342
3343 /// Return the cost of this VPWidenMemoryRecipe.
3344 InstructionCost computeCost(ElementCount VF,
3345 VPCostContext &Ctx) const override;
3346
3348};
3349
3350/// A recipe for widening load operations, using the address to load from and an
3351/// optional mask.
3353 public VPValue {
3355 bool Consecutive, bool Reverse,
3356 const VPIRMetadata &Metadata, DebugLoc DL)
3357 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
3358 Reverse, Metadata, DL),
3359 VPValue(this, &Load) {
3360 setMask(Mask);
3361 }
3362
3365 getMask(), Consecutive, Reverse, *this,
3366 getDebugLoc());
3367 }
3368
3369 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
3370
3371 /// Generate a wide load or gather.
3372 void execute(VPTransformState &State) override;
3373
3374 /// Returns true if the recipe only uses the first lane of operand \p Op.
3375 bool usesFirstLaneOnly(const VPValue *Op) const override {
3377 "Op must be an operand of the recipe");
3378 // Widened, consecutive loads operations only demand the first lane of
3379 // their address.
3380 return Op == getAddr() && isConsecutive();
3381 }
3382
3383protected:
3384#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3385 /// Print the recipe.
3386 void printRecipe(raw_ostream &O, const Twine &Indent,
3387 VPSlotTracker &SlotTracker) const override;
3388#endif
3389};
3390
3391/// A recipe for widening load operations with vector-predication intrinsics,
3392/// using the address to load from, the explicit vector length and an optional
3393/// mask.
3394struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
3396 VPValue *Mask)
3397 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3398 {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
3399 L.getDebugLoc()),
3400 VPValue(this, &getIngredient()) {
3401 setMask(Mask);
3402 }
3403
3404 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
3405
3406 /// Return the EVL operand.
3407 VPValue *getEVL() const { return getOperand(1); }
3408
3409 /// Generate the wide load or gather.
3410 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3411
3412 /// Return the cost of this VPWidenLoadEVLRecipe.
3414 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3415
3416 /// Returns true if the recipe only uses the first lane of operand \p Op.
3417 bool usesFirstLaneOnly(const VPValue *Op) const override {
3419 "Op must be an operand of the recipe");
3420 // Widened loads only demand the first lane of EVL and consecutive loads
3421 // only demand the first lane of their address.
3422 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3423 }
3424
3425protected:
3426#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3427 /// Print the recipe.
3428 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3429 VPSlotTracker &SlotTracker) const override;
3430#endif
3431};
3432
3433/// A recipe for widening store operations, using the stored value, the address
3434/// to store to and an optional mask.
3436 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3437 VPValue *Mask, bool Consecutive, bool Reverse,
3438 const VPIRMetadata &Metadata, DebugLoc DL)
3439 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3440 Consecutive, Reverse, Metadata, DL) {
3441 setMask(Mask);
3442 }
3443
3449
3450 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
3451
3452 /// Return the value stored by this recipe.
3453 VPValue *getStoredValue() const { return getOperand(1); }
3454
3455 /// Generate a wide store or scatter.
3456 void execute(VPTransformState &State) override;
3457
3458 /// Returns true if the recipe only uses the first lane of operand \p Op.
3459 bool usesFirstLaneOnly(const VPValue *Op) const override {
3461 "Op must be an operand of the recipe");
3462 // Widened, consecutive stores only demand the first lane of their address,
3463 // unless the same operand is also stored.
3464 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3465 }
3466
3467protected:
3468#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3469 /// Print the recipe.
3470 void printRecipe(raw_ostream &O, const Twine &Indent,
3471 VPSlotTracker &SlotTracker) const override;
3472#endif
3473};
3474
3475/// A recipe for widening store operations with vector-predication intrinsics,
3476/// using the value to store, the address to store to, the explicit vector
3477/// length and an optional mask.
3480 VPValue *Mask)
3481 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3482 {Addr, S.getStoredValue(), &EVL}, S.isConsecutive(),
3483 S.isReverse(), S, S.getDebugLoc()) {
3484 setMask(Mask);
3485 }
3486
3487 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
3488
3489 /// Return the address accessed by this recipe.
3490 VPValue *getStoredValue() const { return getOperand(1); }
3491
3492 /// Return the EVL operand.
3493 VPValue *getEVL() const { return getOperand(2); }
3494
3495 /// Generate the wide store or scatter.
3496 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3497
3498 /// Return the cost of this VPWidenStoreEVLRecipe.
3500 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3501
3502 /// Returns true if the recipe only uses the first lane of operand \p Op.
3503 bool usesFirstLaneOnly(const VPValue *Op) const override {
3505 "Op must be an operand of the recipe");
3506 if (Op == getEVL()) {
3507 assert(getStoredValue() != Op && "unexpected store of EVL");
3508 return true;
3509 }
3510 // Widened, consecutive memory operations only demand the first lane of
3511 // their address, unless the same operand is also stored. That latter can
3512 // happen with opaque pointers.
3513 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3514 }
3515
3516protected:
3517#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3518 /// Print the recipe.
3519 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3520 VPSlotTracker &SlotTracker) const override;
3521#endif
3522};
3523
3524/// Recipe to expand a SCEV expression.
3526 const SCEV *Expr;
3527
3528public:
3530 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr) {}
3531
3532 ~VPExpandSCEVRecipe() override = default;
3533
3534 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3535
3536 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
3537
3538 void execute(VPTransformState &State) override {
3539 llvm_unreachable("SCEV expressions must be expanded before final execute");
3540 }
3541
3542 /// Return the cost of this VPExpandSCEVRecipe.
3544 VPCostContext &Ctx) const override {
3545 // TODO: Compute accurate cost after retiring the legacy cost model.
3546 return 0;
3547 }
3548
3549 const SCEV *getSCEV() const { return Expr; }
3550
3551protected:
3552#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3553 /// Print the recipe.
3554 void printRecipe(raw_ostream &O, const Twine &Indent,
3555 VPSlotTracker &SlotTracker) const override;
3556#endif
3557};
3558
3559/// Canonical scalar induction phi of the vector loop. Starting at the specified
3560/// start value (either 0 or the resume value when vectorizing the epilogue
3561/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3562/// canonical induction variable.
3564public:
3566 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
3567
3568 ~VPCanonicalIVPHIRecipe() override = default;
3569
3571 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
3572 R->addOperand(getBackedgeValue());
3573 return R;
3574 }
3575
3576 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
3577
3578 void execute(VPTransformState &State) override {
3579 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3580 "scalar phi recipe");
3581 }
3582
3583 /// Returns the scalar type of the induction.
3585 return getStartValue()->getLiveInIRValue()->getType();
3586 }
3587
3588 /// Returns true if the recipe only uses the first lane of operand \p Op.
3589 bool usesFirstLaneOnly(const VPValue *Op) const override {
3591 "Op must be an operand of the recipe");
3592 return true;
3593 }
3594
3595 /// Returns true if the recipe only uses the first part of operand \p Op.
3596 bool usesFirstPartOnly(const VPValue *Op) const override {
3598 "Op must be an operand of the recipe");
3599 return true;
3600 }
3601
3602 /// Return the cost of this VPCanonicalIVPHIRecipe.
3604 VPCostContext &Ctx) const override {
3605 // For now, match the behavior of the legacy cost model.
3606 return 0;
3607 }
3608
3609protected:
3610#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3611 /// Print the recipe.
3612 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3613 VPSlotTracker &SlotTracker) const override;
3614#endif
3615};
3616
3617/// A recipe for generating the active lane mask for the vector loop that is
3618/// used to predicate the vector operations.
3620public:
3622 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
3623 DL) {}
3624
3625 ~VPActiveLaneMaskPHIRecipe() override = default;
3626
3629 if (getNumOperands() == 2)
3630 R->addOperand(getOperand(1));
3631 return R;
3632 }
3633
3634 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
3635
3636 /// Generate the active lane mask phi of the vector loop.
3637 void execute(VPTransformState &State) override;
3638
3639protected:
3640#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3641 /// Print the recipe.
3642 void printRecipe(raw_ostream &O, const Twine &Indent,
3643 VPSlotTracker &SlotTracker) const override;
3644#endif
3645};
3646
3647/// A recipe for generating the phi node for the current index of elements,
3648/// adjusted in accordance with EVL value. It starts at the start value of the
3649/// canonical induction and gets incremented by EVL in each iteration of the
3650/// vector loop.
3652public:
3654 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
3655
3656 ~VPEVLBasedIVPHIRecipe() override = default;
3657
3659 llvm_unreachable("cloning not implemented yet");
3660 }
3661
3662 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
3663
3664 void execute(VPTransformState &State) override {
3665 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3666 "scalar phi recipe");
3667 }
3668
3669 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3671 VPCostContext &Ctx) const override {
3672 // For now, match the behavior of the legacy cost model.
3673 return 0;
3674 }
3675
3676 /// Returns true if the recipe only uses the first lane of operand \p Op.
3677 bool usesFirstLaneOnly(const VPValue *Op) const override {
3679 "Op must be an operand of the recipe");
3680 return true;
3681 }
3682
3683protected:
3684#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3685 /// Print the recipe.
3686 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3687 VPSlotTracker &SlotTracker) const override;
3688#endif
3689};
3690
3691/// A Recipe for widening the canonical induction variable of the vector loop.
3693 public VPUnrollPartAccessor<1> {
3694public:
3696 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3697
3698 ~VPWidenCanonicalIVRecipe() override = default;
3699
3704
3705 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
3706
3707 /// Generate a canonical vector induction variable of the vector loop, with
3708 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3709 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3710 void execute(VPTransformState &State) override;
3711
3712 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3714 VPCostContext &Ctx) const override {
3715 // TODO: Compute accurate cost after retiring the legacy cost model.
3716 return 0;
3717 }
3718
3719protected:
3720#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3721 /// Print the recipe.
3722 void printRecipe(raw_ostream &O, const Twine &Indent,
3723 VPSlotTracker &SlotTracker) const override;
3724#endif
3725};
3726
3727/// A recipe for converting the input value \p IV value to the corresponding
3728/// value of an IV with different start and step values, using Start + IV *
3729/// Step.
3731 /// Kind of the induction.
3733 /// If not nullptr, the floating point induction binary operator. Must be set
3734 /// for floating point inductions.
3735 const FPMathOperator *FPBinOp;
3736
3737 /// Name to use for the generated IR instruction for the derived IV.
3738 std::string Name;
3739
3740public:
3742 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3743 const Twine &Name = "")
3745 IndDesc.getKind(),
3746 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3747 Start, CanonicalIV, Step, Name) {}
3748
3750 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
3751 VPValue *Step, const Twine &Name = "")
3752 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
3753 FPBinOp(FPBinOp), Name(Name.str()) {}
3754
3755 ~VPDerivedIVRecipe() override = default;
3756
3758 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3759 getStepValue());
3760 }
3761
3762 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
3763
3764 /// Generate the transformed value of the induction at offset StartValue (1.
3765 /// operand) + IV (2. operand) * StepValue (3, operand).
3766 void execute(VPTransformState &State) override;
3767
3768 /// Return the cost of this VPDerivedIVRecipe.
3770 VPCostContext &Ctx) const override {
3771 // TODO: Compute accurate cost after retiring the legacy cost model.
3772 return 0;
3773 }
3774
3776 return getStartValue()->getLiveInIRValue()->getType();
3777 }
3778
3779 VPValue *getStartValue() const { return getOperand(0); }
3780 VPValue *getStepValue() const { return getOperand(2); }
3781
3782 /// Returns true if the recipe only uses the first lane of operand \p Op.
3783 bool usesFirstLaneOnly(const VPValue *Op) const override {
3785 "Op must be an operand of the recipe");
3786 return true;
3787 }
3788
3789protected:
3790#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3791 /// Print the recipe.
3792 void printRecipe(raw_ostream &O, const Twine &Indent,
3793 VPSlotTracker &SlotTracker) const override;
3794#endif
3795};
3796
3797/// A recipe for handling phi nodes of integer and floating-point inductions,
3798/// producing their scalar values.
3800 public VPUnrollPartAccessor<3> {
3801 Instruction::BinaryOps InductionOpcode;
3802
3803public:
3806 DebugLoc DL)
3807 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3808 ArrayRef<VPValue *>({IV, Step, VF}), FMFs, DL),
3809 InductionOpcode(Opcode) {}
3810
3812 VPValue *Step, VPValue *VF,
3815 IV, Step, VF, IndDesc.getInductionOpcode(),
3816 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3817 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3818 : FastMathFlags(),
3819 DL) {}
3820
3821 ~VPScalarIVStepsRecipe() override = default;
3822
3824 return new VPScalarIVStepsRecipe(
3825 getOperand(0), getOperand(1), getOperand(2), InductionOpcode,
3827 getDebugLoc());
3828 }
3829
3830 /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
3831 /// this is only accurate after the VPlan has been unrolled.
3832 bool isPart0() const { return getUnrollPart(*this) == 0; }
3833
3834 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
3835
3836 /// Generate the scalarized versions of the phi node as needed by their users.
3837 void execute(VPTransformState &State) override;
3838
3839 /// Return the cost of this VPScalarIVStepsRecipe.
3841 VPCostContext &Ctx) const override {
3842 // TODO: Compute accurate cost after retiring the legacy cost model.
3843 return 0;
3844 }
3845
3846 VPValue *getStepValue() const { return getOperand(1); }
3847
3848 /// Returns true if the recipe only uses the first lane of operand \p Op.
3849 bool usesFirstLaneOnly(const VPValue *Op) const override {
3851 "Op must be an operand of the recipe");
3852 return true;
3853 }
3854
3855protected:
3856#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3857 /// Print the recipe.
3858 void printRecipe(raw_ostream &O, const Twine &Indent,
3859 VPSlotTracker &SlotTracker) const override;
3860#endif
3861};
3862
3863/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe
3864/// types implementing VPPhiAccessors. Used by isa<> & co.
3866 static inline bool isPossible(const VPRecipeBase *f) {
3867 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
3869 }
3870};
3871/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
3872/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
3873template <typename SrcTy>
3874struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
3875
3877
3878 /// doCast is used by cast<>.
3879 static inline VPPhiAccessors *doCast(SrcTy R) {
3880 return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
3881 switch (R->getVPDefID()) {
3882 case VPDef::VPInstructionSC:
3883 return cast<VPPhi>(R);
3884 case VPDef::VPIRInstructionSC:
3885 return cast<VPIRPhi>(R);
3886 case VPDef::VPWidenPHISC:
3887 return cast<VPWidenPHIRecipe>(R);
3888 default:
3889 return cast<VPHeaderPHIRecipe>(R);
3890 }
3891 }());
3892 }
3893
3894 /// doCastIfPossible is used by dyn_cast<>.
3895 static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
3896 if (!Self::isPossible(f))
3897 return nullptr;
3898 return doCast(f);
3899 }
3900};
3901template <>
3904template <>
3907
3908/// Casting from (const) VPRecipeBase -> (const) VPIRMetadata is supported for
3909/// all recipe types implementing VPIRMetadata. Used by isa<> & co.
3910namespace detail {
3911template <typename DstTy, typename RecipeBasePtrTy>
3912static inline auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy {
3913 switch (R->getVPDefID()) {
3914 case VPDef::VPInstructionSC:
3915 return cast<VPInstruction>(R);
3916 case VPDef::VPWidenSC:
3917 return cast<VPWidenRecipe>(R);
3918 case VPDef::VPWidenCastSC:
3919 return cast<VPWidenCastRecipe>(R);
3920 case VPDef::VPWidenIntrinsicSC:
3922 case VPDef::VPWidenCallSC:
3923 return cast<VPWidenCallRecipe>(R);
3924 case VPDef::VPWidenSelectSC:
3925 return cast<VPWidenSelectRecipe>(R);
3926 case VPDef::VPReplicateSC:
3927 return cast<VPReplicateRecipe>(R);
3928 case VPDef::VPInterleaveSC:
3929 case VPDef::VPInterleaveEVLSC:
3930 return cast<VPInterleaveBase>(R);
3931 case VPDef::VPWidenLoadSC:
3932 case VPDef::VPWidenLoadEVLSC:
3933 case VPDef::VPWidenStoreSC:
3934 case VPDef::VPWidenStoreEVLSC:
3935 return cast<VPWidenMemoryRecipe>(R);
3936 default:
3937 llvm_unreachable("invalid recipe for VPIRMetadata cast");
3938 }
3939}
3940} // namespace detail
3941
3942/// Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the
3943/// recipe types implementing VPIRMetadata. Used by cast<>, dyn_cast<> & co.
3944template <typename DstTy, typename SrcTy>
3945struct CastInfoVPIRMetadata : public CastIsPossible<DstTy, SrcTy> {
3946 static inline bool isPossible(SrcTy R) {
3947 // NOTE: Each recipe inheriting from VPIRMetadata must be listed here and
3948 // also handled in castToVPIRMetadata.
3954 }
3955
3956 using RetTy = DstTy *;
3957
3958 /// doCast is used by cast<>.
3959 static inline RetTy doCast(SrcTy R) {
3961 }
3962
3963 /// doCastIfPossible is used by dyn_cast<>.
3964 static inline RetTy doCastIfPossible(SrcTy R) {
3965 if (!isPossible(R))
3966 return nullptr;
3967 return doCast(R);
3968 }
3969};
3970template <>
3973template <>
3976
3977/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
3978/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3979/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3980class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
3981 friend class VPlan;
3982
3983 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
3984 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3985 : VPBlockBase(VPBasicBlockSC, Name.str()) {
3986 if (Recipe)
3987 appendRecipe(Recipe);
3988 }
3989
3990public:
3992
3993protected:
3994 /// The VPRecipes held in the order of output instructions to generate.
3996
3997 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
3998 : VPBlockBase(BlockSC, Name.str()) {}
3999
4000public:
4001 ~VPBasicBlock() override {
4002 while (!Recipes.empty())
4003 Recipes.pop_back();
4004 }
4005
4006 /// Instruction iterators...
4011
4012 //===--------------------------------------------------------------------===//
4013 /// Recipe iterator methods
4014 ///
4015 inline iterator begin() { return Recipes.begin(); }
4016 inline const_iterator begin() const { return Recipes.begin(); }
4017 inline iterator end() { return Recipes.end(); }
4018 inline const_iterator end() const { return Recipes.end(); }
4019
4020 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4021 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4022 inline reverse_iterator rend() { return Recipes.rend(); }
4023 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4024
4025 inline size_t size() const { return Recipes.size(); }
4026 inline bool empty() const { return Recipes.empty(); }
4027 inline const VPRecipeBase &front() const { return Recipes.front(); }
4028 inline VPRecipeBase &front() { return Recipes.front(); }
4029 inline const VPRecipeBase &back() const { return Recipes.back(); }
4030 inline VPRecipeBase &back() { return Recipes.back(); }
4031
4032 /// Returns a reference to the list of recipes.
4034
4035 /// Returns a pointer to a member of the recipe list.
4036 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4037 return &VPBasicBlock::Recipes;
4038 }
4039
4040 /// Method to support type inquiry through isa, cast, and dyn_cast.
4041 static inline bool classof(const VPBlockBase *V) {
4042 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4043 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4044 }
4045
4046 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4047 assert(Recipe && "No recipe to append.");
4048 assert(!Recipe->Parent && "Recipe already in VPlan");
4049 Recipe->Parent = this;
4050 Recipes.insert(InsertPt, Recipe);
4051 }
4052
4053 /// Augment the existing recipes of a VPBasicBlock with an additional
4054 /// \p Recipe as the last recipe.
4055 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4056
4057 /// The method which generates the output IR instructions that correspond to
4058 /// this VPBasicBlock, thereby "executing" the VPlan.
4059 void execute(VPTransformState *State) override;
4060
4061 /// Return the cost of this VPBasicBlock.
4062 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4063
4064 /// Return the position of the first non-phi node recipe in the block.
4065 iterator getFirstNonPhi();
4066
4067 /// Returns an iterator range over the PHI-like recipes in the block.
4071
4072 /// Split current block at \p SplitAt by inserting a new block between the
4073 /// current block and its successors and moving all recipes starting at
4074 /// SplitAt to the new block. Returns the new block.
4075 VPBasicBlock *splitAt(iterator SplitAt);
4076
4077 VPRegionBlock *getEnclosingLoopRegion();
4078 const VPRegionBlock *getEnclosingLoopRegion() const;
4079
4080#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4081 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4082 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4083 ///
4084 /// Note that the numbering is applied to the whole VPlan, so printing
4085 /// individual blocks is consistent with the whole VPlan printing.
4086 void print(raw_ostream &O, const Twine &Indent,
4087 VPSlotTracker &SlotTracker) const override;
4088 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4089#endif
4090
4091 /// If the block has multiple successors, return the branch recipe terminating
4092 /// the block. If there are no or only a single successor, return nullptr;
4093 VPRecipeBase *getTerminator();
4094 const VPRecipeBase *getTerminator() const;
4095
4096 /// Returns true if the block is exiting it's parent region.
4097 bool isExiting() const;
4098
4099 /// Clone the current block and it's recipes, without updating the operands of
4100 /// the cloned recipes.
4101 VPBasicBlock *clone() override;
4102
4103 /// Returns the predecessor block at index \p Idx with the predecessors as per
4104 /// the corresponding plain CFG. If the block is an entry block to a region,
4105 /// the first predecessor is the single predecessor of a region, and the
4106 /// second predecessor is the exiting block of the region.
4107 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4108
4109protected:
4110 /// Execute the recipes in the IR basic block \p BB.
4111 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4112
4113 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4114 /// generated for this VPBB.
4115 void connectToPredecessors(VPTransformState &State);
4116
4117private:
4118 /// Create an IR BasicBlock to hold the output instructions generated by this
4119 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4120 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4121};
4122
4123inline const VPBasicBlock *
4125 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4126}
4127
4128/// A special type of VPBasicBlock that wraps an existing IR basic block.
4129/// Recipes of the block get added before the first non-phi instruction in the
4130/// wrapped block.
4131/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4132/// preheader block.
4133class VPIRBasicBlock : public VPBasicBlock {
4134 friend class VPlan;
4135
4136 BasicBlock *IRBB;
4137
4138 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4139 VPIRBasicBlock(BasicBlock *IRBB)
4140 : VPBasicBlock(VPIRBasicBlockSC,
4141 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4142 IRBB(IRBB) {}
4143
4144public:
4145 ~VPIRBasicBlock() override = default;
4146
4147 static inline bool classof(const VPBlockBase *V) {
4148 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4149 }
4150
4151 /// The method which generates the output IR instructions that correspond to
4152 /// this VPBasicBlock, thereby "executing" the VPlan.
4153 void execute(VPTransformState *State) override;
4154
4155 VPIRBasicBlock *clone() override;
4156
4157 BasicBlock *getIRBasicBlock() const { return IRBB; }
4158};
4159
4160/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4161/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4162/// A VPRegionBlock may indicate that its contents are to be replicated several
4163/// times. This is designed to support predicated scalarization, in which a
4164/// scalar if-then code structure needs to be generated VF * UF times. Having
4165/// this replication indicator helps to keep a single model for multiple
4166/// candidate VF's. The actual replication takes place only once the desired VF
4167/// and UF have been determined.
4168class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4169 friend class VPlan;
4170
4171 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4172 VPBlockBase *Entry;
4173
4174 /// Hold the Single Exiting block of the SESE region modelled by the
4175 /// VPRegionBlock.
4176 VPBlockBase *Exiting;
4177
4178 /// An indicator whether this region is to generate multiple replicated
4179 /// instances of output IR corresponding to its VPBlockBases.
4180 bool IsReplicator;
4181
4182 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
4183 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4184 const std::string &Name = "", bool IsReplicator = false)
4185 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
4186 IsReplicator(IsReplicator) {
4187 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
4188 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
4189 Entry->setParent(this);
4190 Exiting->setParent(this);
4191 }
4192 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
4193 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
4194 IsReplicator(IsReplicator) {}
4195
4196public:
4197 ~VPRegionBlock() override = default;
4198
4199 /// Method to support type inquiry through isa, cast, and dyn_cast.
4200 static inline bool classof(const VPBlockBase *V) {
4201 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4202 }
4203
4204 const VPBlockBase *getEntry() const { return Entry; }
4205 VPBlockBase *getEntry() { return Entry; }
4206
4207 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4208 /// EntryBlock must have no predecessors.
4209 void setEntry(VPBlockBase *EntryBlock) {
4210 assert(EntryBlock->getPredecessors().empty() &&
4211 "Entry block cannot have predecessors.");
4212 Entry = EntryBlock;
4213 EntryBlock->setParent(this);
4214 }
4215
4216 const VPBlockBase *getExiting() const { return Exiting; }
4217 VPBlockBase *getExiting() { return Exiting; }
4218
4219 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4220 /// ExitingBlock must have no successors.
4221 void setExiting(VPBlockBase *ExitingBlock) {
4222 assert(ExitingBlock->getSuccessors().empty() &&
4223 "Exit block cannot have successors.");
4224 Exiting = ExitingBlock;
4225 ExitingBlock->setParent(this);
4226 }
4227
4228 /// Returns the pre-header VPBasicBlock of the loop region.
4230 assert(!isReplicator() && "should only get pre-header of loop regions");
4231 return getSinglePredecessor()->getExitingBasicBlock();
4232 }
4233
4234 /// An indicator whether this region is to generate multiple replicated
4235 /// instances of output IR corresponding to its VPBlockBases.
4236 bool isReplicator() const { return IsReplicator; }
4237
4238 /// The method which generates the output IR instructions that correspond to
4239 /// this VPRegionBlock, thereby "executing" the VPlan.
4240 void execute(VPTransformState *State) override;
4241
4242 // Return the cost of this region.
4243 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4244
4245#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4246 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4247 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4248 /// consequtive numbers.
4249 ///
4250 /// Note that the numbering is applied to the whole VPlan, so printing
4251 /// individual regions is consistent with the whole VPlan printing.
4252 void print(raw_ostream &O, const Twine &Indent,
4253 VPSlotTracker &SlotTracker) const override;
4254 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4255#endif
4256
4257 /// Clone all blocks in the single-entry single-exit region of the block and
4258 /// their recipes without updating the operands of the cloned recipes.
4259 VPRegionBlock *clone() override;
4260
4261 /// Remove the current region from its VPlan, connecting its predecessor to
4262 /// its entry, and its exiting block to its successor.
4263 void dissolveToCFGLoop();
4264
4265 /// Returns the canonical induction recipe of the region.
4267 VPBasicBlock *EntryVPBB = getEntryBasicBlock();
4268 if (EntryVPBB->empty()) {
4269 // VPlan native path. TODO: Unify both code paths.
4270 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4271 }
4272 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4273 }
4275 return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
4276 }
4277
4278 /// Return the type of the canonical IV for loop regions.
4279 Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); }
4280 const Type *getCanonicalIVType() const {
4281 return getCanonicalIV()->getScalarType();
4282 }
4283};
4284
4286 return getParent()->getParent();
4287}
4288
4290 return getParent()->getParent();
4291}
4292
4293/// VPlan models a candidate for vectorization, encoding various decisions take
4294/// to produce efficient output IR, including which branches, basic-blocks and
4295/// output IR instructions to generate, and their cost. VPlan holds a
4296/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4297/// VPBasicBlock.
4298class VPlan {
4299 friend class VPlanPrinter;
4300 friend class VPSlotTracker;
4301
4302 /// VPBasicBlock corresponding to the original preheader. Used to place
4303 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4304 /// rest of VPlan execution.
4305 /// When this VPlan is used for the epilogue vector loop, the entry will be
4306 /// replaced by a new entry block created during skeleton creation.
4307 VPBasicBlock *Entry;
4308
4309 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4310 VPIRBasicBlock *ScalarHeader;
4311
4312 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4313 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4314 /// e.g. if the scalar epilogue always executes.
4316
4317 /// Holds the VFs applicable to this VPlan.
4319
4320 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4321 /// any UF.
4323
4324 /// Holds the name of the VPlan, for printing.
4325 std::string Name;
4326
4327 /// Represents the trip count of the original loop, for folding
4328 /// the tail.
4329 VPValue *TripCount = nullptr;
4330
4331 /// Represents the backedge taken count of the original loop, for folding
4332 /// the tail. It equals TripCount - 1.
4333 VPValue *BackedgeTakenCount = nullptr;
4334
4335 /// Represents the vector trip count.
4336 VPValue VectorTripCount;
4337
4338 /// Represents the vectorization factor of the loop.
4339 VPValue VF;
4340
4341 /// Represents the loop-invariant VF * UF of the vector loop region.
4342 VPValue VFxUF;
4343
4344 /// Contains all the external definitions created for this VPlan, as a mapping
4345 /// from IR Values to VPValues.
4347
4348 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4349 /// VPlan is destroyed.
4350 SmallVector<VPBlockBase *> CreatedBlocks;
4351
4352 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4353 /// wrapping the original header of the scalar loop.
4354 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
4355 : Entry(Entry), ScalarHeader(ScalarHeader) {
4356 Entry->setPlan(this);
4357 assert(ScalarHeader->getNumSuccessors() == 0 &&
4358 "scalar header must be a leaf node");
4359 }
4360
4361public:
4362 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4363 /// original preheader and scalar header of \p L, to be used as entry and
4364 /// scalar header blocks of the new VPlan.
4365 VPlan(Loop *L);
4366
4367 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4368 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
4369 VPlan(BasicBlock *ScalarHeaderBB) {
4370 setEntry(createVPBasicBlock("preheader"));
4371 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4372 }
4373
4375
4377 Entry = VPBB;
4378 VPBB->setPlan(this);
4379 }
4380
4381 /// Generate the IR code for this VPlan.
4382 void execute(VPTransformState *State);
4383
4384 /// Return the cost of this plan.
4386
4387 VPBasicBlock *getEntry() { return Entry; }
4388 const VPBasicBlock *getEntry() const { return Entry; }
4389
4390 /// Returns the preheader of the vector loop region, if one exists, or null
4391 /// otherwise.
4393 VPRegionBlock *VectorRegion = getVectorLoopRegion();
4394 return VectorRegion
4395 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4396 : nullptr;
4397 }
4398
4399 /// Returns the VPRegionBlock of the vector loop.
4402
4403 /// Returns the 'middle' block of the plan, that is the block that selects
4404 /// whether to execute the scalar tail loop or the exit block from the loop
4405 /// latch. If there is an early exit from the vector loop, the middle block
4406 /// conceptully has the early exit block as third successor, split accross 2
4407 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4408 /// tail loop or the exit bock. If the scalar tail loop or exit block are
4409 /// known to always execute, the middle block may branch directly to that
4410 /// block. This function cannot be called once the vector loop region has been
4411 /// removed.
4413 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4414 assert(
4415 LoopRegion &&
4416 "cannot call the function after vector loop region has been removed");
4417 auto *RegionSucc = cast<VPBasicBlock>(LoopRegion->getSingleSuccessor());
4418 if (RegionSucc->getSingleSuccessor() ||
4419 is_contained(RegionSucc->getSuccessors(), getScalarPreheader()))
4420 return RegionSucc;
4421 // There is an early exit. The successor of RegionSucc is the middle block.
4422 return cast<VPBasicBlock>(RegionSucc->getSuccessors()[1]);
4423 }
4424
4426 return const_cast<VPlan *>(this)->getMiddleBlock();
4427 }
4428
4429 /// Return the VPBasicBlock for the preheader of the scalar loop.
4431 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
4432 }
4433
4434 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4435 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4436
4437 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4438 /// the original scalar loop.
4439 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4440
4441 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4442 /// exit block.
4444
4445 /// Returns true if \p VPBB is an exit block.
4446 bool isExitBlock(VPBlockBase *VPBB);
4447
4448 /// The trip count of the original loop.
4450 assert(TripCount && "trip count needs to be set before accessing it");
4451 return TripCount;
4452 }
4453
4454 /// Set the trip count assuming it is currently null; if it is not - use
4455 /// resetTripCount().
4456 void setTripCount(VPValue *NewTripCount) {
4457 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4458 TripCount = NewTripCount;
4459 }
4460
4461 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4462 /// the original trip count have been replaced.
4463 void resetTripCount(VPValue *NewTripCount) {
4464 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4465 "TripCount must be set when resetting");
4466 TripCount = NewTripCount;
4467 }
4468
4469 /// The backedge taken count of the original loop.
4471 if (!BackedgeTakenCount)
4472 BackedgeTakenCount = new VPValue();
4473 return BackedgeTakenCount;
4474 }
4475 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4476
4477 /// The vector trip count.
4478 VPValue &getVectorTripCount() { return VectorTripCount; }
4479
4480 /// Returns the VF of the vector loop region.
4481 VPValue &getVF() { return VF; };
4482 const VPValue &getVF() const { return VF; };
4483
4484 /// Returns VF * UF of the vector loop region.
4485 VPValue &getVFxUF() { return VFxUF; }
4486
4489 }
4490
4491 void addVF(ElementCount VF) { VFs.insert(VF); }
4492
4494 assert(hasVF(VF) && "Cannot set VF not already in plan");
4495 VFs.clear();
4496 VFs.insert(VF);
4497 }
4498
4499 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4500 bool hasScalableVF() const {
4501 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4502 }
4503
4504 /// Returns an iterator range over all VFs of the plan.
4507 return VFs;
4508 }
4509
4510 bool hasScalarVFOnly() const {
4511 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4512 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4513 "Plan with scalar VF should only have a single VF");
4514 return HasScalarVFOnly;
4515 }
4516
4517 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4518
4519 unsigned getUF() const {
4520 assert(UFs.size() == 1 && "Expected a single UF");
4521 return UFs[0];
4522 }
4523
4524 void setUF(unsigned UF) {
4525 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4526 UFs.clear();
4527 UFs.insert(UF);
4528 }
4529
4530 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4531 /// concrete UF.
4532 bool isUnrolled() const { return UFs.size() == 1; }
4533
4534 /// Return a string with the name of the plan and the applicable VFs and UFs.
4535 std::string getName() const;
4536
4537 void setName(const Twine &newName) { Name = newName.str(); }
4538
4539 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
4540 /// yet) for \p V.
4542 assert(V && "Trying to get or add the VPValue of a null Value");
4543 auto [It, Inserted] = LiveIns.try_emplace(V);
4544 if (Inserted) {
4545 VPValue *VPV = new VPValue(V);
4546 assert(VPV->isLiveIn() && "VPV must be a live-in.");
4547 It->second = VPV;
4548 }
4549
4550 assert(It->second->isLiveIn() && "Only live-ins should be in mapping");
4551 return It->second;
4552 }
4553
4554 /// Return a VPValue wrapping i1 true.
4555 VPValue *getTrue() { return getConstantInt(1, 1); }
4556
4557 /// Return a VPValue wrapping i1 false.
4558 VPValue *getFalse() { return getConstantInt(1, 0); }
4559
4560 /// Return a VPValue wrapping a ConstantInt with the given type and value.
4561 VPValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4562 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
4563 }
4564
4565 /// Return a VPValue wrapping a ConstantInt with the given bitwidth and value.
4567 bool IsSigned = false) {
4568 return getConstantInt(APInt(BitWidth, Val, IsSigned));
4569 }
4570
4571 /// Return a VPValue wrapping a ConstantInt with the given APInt value.
4573 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
4574 }
4575
4576 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
4577 VPValue *getLiveIn(Value *V) const { return LiveIns.lookup(V); }
4578
4579 /// Return the list of live-in VPValues available in the VPlan.
4580 auto getLiveIns() const { return LiveIns.values(); }
4581
4582#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4583 /// Print the live-ins of this VPlan to \p O.
4584 void printLiveIns(raw_ostream &O) const;
4585
4586 /// Print this VPlan to \p O.
4587 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
4588
4589 /// Print this VPlan in DOT format to \p O.
4590 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
4591
4592 /// Dump the plan to stderr (for debugging).
4593 LLVM_DUMP_METHOD void dump() const;
4594#endif
4595
4596 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4597 /// recipes to refer to the clones, and return it.
4599
4600 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4601 /// present. The returned block is owned by the VPlan and deleted once the
4602 /// VPlan is destroyed.
4604 VPRecipeBase *Recipe = nullptr) {
4605 auto *VPB = new VPBasicBlock(Name, Recipe);
4606 CreatedBlocks.push_back(VPB);
4607 return VPB;
4608 }
4609
4610 /// Create a new loop region with \p Name and entry and exiting blocks set
4611 /// to \p Entry and \p Exiting respectively, if set. The returned block is
4612 /// owned by the VPlan and deleted once the VPlan is destroyed.
4613 VPRegionBlock *createLoopRegion(const std::string &Name = "",
4614 VPBlockBase *Entry = nullptr,
4615 VPBlockBase *Exiting = nullptr) {
4616 auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name)
4617 : new VPRegionBlock(Name);
4618 CreatedBlocks.push_back(VPB);
4619 return VPB;
4620 }
4621
4622 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4623 /// returned block is owned by the VPlan and deleted once the VPlan is
4624 /// destroyed.
4626 const std::string &Name = "") {
4627 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
4628 CreatedBlocks.push_back(VPB);
4629 return VPB;
4630 }
4631
4632 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4633 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4634 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4636
4637 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4638 /// instructions in \p IRBB, except its terminator which is managed by the
4639 /// successors of the block in VPlan. The returned block is owned by the VPlan
4640 /// and deleted once the VPlan is destroyed.
4642
4643 /// Returns true if the VPlan is based on a loop with an early exit. That is
4644 /// the case if the VPlan has either more than one exit block or a single exit
4645 /// block with multiple predecessors (one for the exit via the latch and one
4646 /// via the other early exit).
4647 bool hasEarlyExit() const {
4648 return count_if(ExitBlocks,
4649 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4650 1 ||
4651 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4652 }
4653
4654 /// Returns true if the scalar tail may execute after the vector loop. Note
4655 /// that this relies on unneeded branches to the scalar tail loop being
4656 /// removed.
4657 bool hasScalarTail() const {
4658 return !(!getScalarPreheader()->hasPredecessors() ||
4660 }
4661};
4662
4663#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4664inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4665 Plan.print(OS);
4666 return OS;
4667}
4668#endif
4669
4670} // end namespace llvm
4671
4672#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
dxil translate DXIL Translate Metadata
Hexagon Common GEP
iv users
Definition IVUsers.cpp:48
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
StandardInstrumentations SI(Mod->getContext(), Debug, VerifyEach)
static StringRef getName(Value *V)
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition VPlan.h:509
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1078
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
This class represents the LLVM 'select' instruction.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:225
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3627
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:3621
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3980
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:4008
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4055
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:4010
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4007
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4033
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:3991
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:3997
iterator end()
Definition VPlan.h:4017
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4015
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:4009
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4068
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:770
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:216
~VPBasicBlock() override
Definition VPlan.h:4001
const_reverse_iterator rbegin() const
Definition VPlan.h:4021
reverse_iterator rend()
Definition VPlan.h:4022
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:3995
VPRecipeBase & back()
Definition VPlan.h:4030
const VPRecipeBase & front() const
Definition VPlan.h:4027
const_iterator begin() const
Definition VPlan.h:4016
VPRecipeBase & front()
Definition VPlan.h:4028
const VPRecipeBase & back() const
Definition VPlan.h:4029
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4046
bool empty() const
Definition VPlan.h:4026
const_iterator end() const
Definition VPlan.h:4018
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4041
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4036
reverse_iterator rbegin()
Definition VPlan.h:4020
friend class VPlan
Definition VPlan.h:3981
size_t size() const
Definition VPlan.h:4025
const_reverse_iterator rend() const
Definition VPlan.h:4023
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2557
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2562
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2552
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2573
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2582
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2539
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2534
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:2568
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2548
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:81
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:300
VPRegionBlock * getParent()
Definition VPlan.h:173
VPBlocksTy & getPredecessors()
Definition VPlan.h:205
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:202
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:370
void setName(const Twine &newName)
Definition VPlan.h:166
size_t getNumSuccessors() const
Definition VPlan.h:219
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:201
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:223
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:322
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:645
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:160
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:258
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:335
size_t getNumPredecessors() const
Definition VPlan.h:220
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:291
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:208
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:328
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:204
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:158
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:180
const VPRegionBlock * getParent() const
Definition VPlan.h:174
const std::string & getName() const
Definition VPlan.h:164
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:310
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:248
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:282
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:215
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:242
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:307
friend class VPBlockUtils
Definition VPlan.h:82
unsigned getVPBlockID() const
Definition VPlan.h:171
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:349
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:314
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:150
VPBlocksTy & getSuccessors()
Definition VPlan.h:199
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:200
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:166
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:271
void setParent(VPRegionBlock *P)
Definition VPlan.h:184
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:264
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:209
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:198
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3053
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3037
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3061
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3034
VPlan-based builder utility analogous to IRBuilder.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3563
~VPCanonicalIVPHIRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3589
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3570
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:3596
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition VPlan.h:3565
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:3584
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3578
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition VPlan.h:3603
This class augments a recipe with a set of VPValues defined by the recipe.
Definition VPlanValue.h:305
friend class VPValue
Definition VPlanValue.h:306
VPDef(const unsigned char SC)
Definition VPlanValue.h:384
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlan.h:3769
VPValue * getStepValue() const
Definition VPlan.h:3780
Type * getScalarType() const
Definition VPlan.h:3775
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3757
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3749
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3783
VPValue * getStartValue() const
Definition VPlan.h:3779
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3741
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3677
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3658
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3664
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPEVLBasedIVPHIRecipe.
Definition VPlan.h:3670
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:3653
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3538
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:3543
VPExpandSCEVRecipe(const SCEV *Expr)
Definition VPlan.h:3529
const SCEV * getSCEV() const
Definition VPlan.h:3549
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3534
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3188
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3170
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3152
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3140
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Sub, VPReductionRecipe *Red)
Definition VPlan.h:3126
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3118
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3122
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3182
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3120
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2061
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2074
static bool classof(const VPValue *V)
Definition VPlan.h:2071
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2097
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2102
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2086
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2094
static bool classof(const VPRecipeBase *R)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2067
VPValue * getStartValue() const
Definition VPlan.h:2089
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2106
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2056
~VPHeaderPHIRecipe() override=default
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VP_CLASSOF_IMPL(VPDef::VPHistogramSC)
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1770
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:1787
unsigned getOpcode() const
Definition VPlan.h:1783
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1764
~VPHistogramRecipe() override=default
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4133
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:446
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4157
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4147
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4134
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:471
Class to record and manage LLVM IR flags.
Definition VPlan.h:609
FastMathFlagsTy FMFs
Definition VPlan.h:680
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:740
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:732
WrapFlagsTy WrapFlags
Definition VPlan.h:674
CmpInst::Predicate CmpPredicate
Definition VPlan.h:673
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:726
GEPNoWrapFlags GEPFlags
Definition VPlan.h:678
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:858
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
TruncFlagsTy TruncFlags
Definition VPlan.h:675
CmpInst::Predicate getPredicate() const
Definition VPlan.h:835
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:865
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:749
ExactFlagsTy ExactFlags
Definition VPlan.h:677
bool hasNoSignedWrap() const
Definition VPlan.h:884
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:895
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:735
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:738
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:743
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:723
bool isNonNeg() const
Definition VPlan.h:867
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:850
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:853
DisjointFlagsTy DisjointFlags
Definition VPlan.h:676
unsigned AllFlags
Definition VPlan.h:682
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:841
bool hasNoUnsignedWrap() const
Definition VPlan.h:873
FCmpFlagsTy FCmpFlags
Definition VPlan.h:681
NonNegFlagsTy NonNegFlags
Definition VPlan.h:679
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:759
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:795
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:746
VPIRFlags(Instruction &I)
Definition VPlan.h:688
Instruction & getInstruction() const
Definition VPlan.h:1449
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1457
void extractLastLaneOfLastPartOfFirstOperand(VPBuilder &Builder)
Update the recipe's first operand to the last lane of the last part of the operand using Builder.
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1436
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1463
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1451
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1424
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:982
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1018
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:990
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetadata object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print metadata with node IDs.
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1002
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1263
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1304
static bool classof(const VPUser *R)
Definition VPlan.h:1289
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1271
Type * getResultType() const
Definition VPlan.h:1310
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1293
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1036
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1176
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1129
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
Definition VPlan.h:1076
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1119
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1132
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1073
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1123
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1068
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1065
@ VScale
Returns the value for vscale.
Definition VPlan.h:1134
@ CanonicalIVIncrementForPart
Definition VPlan.h:1056
bool hasResult() const
Definition VPlan.h:1200
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1240
unsigned getOpcode() const
Definition VPlan.h:1184
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
Definition VPlan.h:1243
friend class VPlanSlp
Definition VPlan.h:1037
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:2668
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2674
static bool classof(const VPUser *U)
Definition VPlan.h:2650
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2617
Instruction * getInsertPos() const
Definition VPlan.h:2672
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2645
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:2670
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:2662
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:2691
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:2656
A recipe for interleaved memory operations with vector-predication intrinsics.
Definition VPlan.h:2744
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:2772
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2766
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2779
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2759
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:2746
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:2702
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2729
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2712
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2723
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2704
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1322
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1344
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1339
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4124
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1364
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1331
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1349
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1353
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3245
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3227
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3238
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3223
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:387
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:474
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4285
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:485
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:408
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:479
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:454
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:389
const VPBasicBlock * getParent() const
Definition VPlan.h:409
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:459
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:398
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2929
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2908
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2932
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2919
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2495
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
Definition VPlan.h:2481
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2460
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2474
~VPReductionPHIRecipe() override=default
bool hasUsesOutsideReductionChain() const
Returns true, if the phi is part of a multi-use reduction.
Definition VPlan.h:2507
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2489
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2498
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2512
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, VPValue &BackedgeValue, ReductionStyle Style, bool HasUsesOutsideReductionChain=false)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2449
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2504
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2492
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:2795
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, ReductionStyle Style, DebugLoc DL)
Definition VPlan.h:2804
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:2871
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2840
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2855
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:2882
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:2884
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:2867
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2818
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:2869
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2825
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2873
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:2880
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:2875
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2834
static bool classof(const VPUser *U)
Definition VPlan.h:2845
static bool classof(const VPValue *VPV)
Definition VPlan.h:2850
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2889
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4168
const VPBlockBase * getEntry() const
Definition VPlan.h:4204
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4279
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4236
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4221
VPBlockBase * getExiting()
Definition VPlan.h:4217
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4266
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4209
const Type * getCanonicalIVType() const
Definition VPlan.h:4280
const VPBlockBase * getExiting() const
Definition VPlan.h:4216
VPBlockBase * getEntry()
Definition VPlan.h:4205
const VPCanonicalIVPHIRecipe * getCanonicalIV() const
Definition VPlan.h:4274
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4229
friend class VPlan
Definition VPlan.h:4169
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4200
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:2951
bool isSingleScalar() const
Definition VPlan.h:2992
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2959
~VPReplicateRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3004
bool isPredicated() const
Definition VPlan.h:2994
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2973
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2997
unsigned getOpcode() const
Definition VPlan.h:3021
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:3016
VPValue * getStepValue() const
Definition VPlan.h:3846
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:3840
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3811
bool isPart0() const
Return true if this VPScalarIVStepsRecipe corresponds to part 0.
Definition VPlan.h:3832
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3823
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:3804
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3849
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:531
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:537
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:595
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:541
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:598
static bool classof(const VPUser *U)
Definition VPlan.h:587
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:533
This class can be used to assign names to VPValues.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition VPlan.h:970
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:202
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1420
operand_range operands()
Definition VPlanValue.h:270
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:246
unsigned getNumOperands() const
Definition VPlanValue.h:240
operand_iterator op_end()
Definition VPlanValue.h:268
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:241
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:221
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:264
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:263
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:46
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:131
friend class VPExpressionRecipe
Definition VPlanValue.h:51
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition VPlanValue.h:181
friend class VPDef
Definition VPlanValue.h:47
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:83
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
Definition VPlan.cpp:94
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:191
unsigned getNumUsers() const
Definition VPlanValue.h:111
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition VPlanValue.h:176
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1936
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1957
const VPValue * getVFValue() const
Definition VPlan.h:1932
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:1950
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:1943
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1921
Type * getSourceElementType() const
Definition VPlan.h:1991
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1993
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2000
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1978
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:2016
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2007
A recipe for widening Call instructions using library calls.
Definition VPlan.h:1704
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1711
const_operand_range args() const
Definition VPlan.h:1744
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1725
operand_range args()
Definition VPlan.h:1743
Function * getCalledScalarFunction() const
Definition VPlan.h:1739
~VPWidenCallRecipe() override=default
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
~VPWidenCanonicalIVRecipe() override=default
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:3713
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3700
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition VPlan.h:3695
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1554
Instruction::CastOps getOpcode() const
Definition VPlan.h:1590
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition VPlan.h:1593
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1562
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1575
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:1884
Type * getSourceElementType() const
Definition VPlan.h:1889
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:1892
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1876
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1862
void execute(VPTransformState &State) override=0
Generate the phi nodes.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2183
static bool classof(const VPValue *V)
Definition VPlan.h:2137
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2153
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2168
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2161
PHINode * getPHINode() const
Definition VPlan.h:2163
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2125
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2149
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2166
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2175
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2132
const VPValue * getVFValue() const
Definition VPlan.h:2156
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2142
const VPValue * getStepValue() const
Definition VPlan.h:2150
const TruncInst * getTruncInst() const
Definition VPlan.h:2257
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2238
~VPWidenIntOrFpInductionRecipe() override=default
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2213
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2230
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2256
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2204
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2273
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2252
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:2265
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1604
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1635
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1675
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:1684
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1621
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:1690
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1656
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:1687
~VPWidenIntrinsicRecipe() override=default
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition VPlan.h:1678
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3276
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition VPlan.h:3273
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3316
static bool classof(const VPUser *U)
Definition VPlan.h:3310
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition VPlan.h:3339
Instruction & Ingredient
Definition VPlan.h:3264
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3299
Instruction & getIngredient() const
Definition VPlan.h:3347
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3270
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3303
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3330
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3267
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3326
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3286
void setMask(VPValue *Mask)
Definition VPlan.h:3278
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3336
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3323
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition VPlan.h:3320
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2367
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe for Phi with start value Start and debug location DL.
Definition VPlan.h:2338
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2345
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2300
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2309
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2290
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1514
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1528
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1518
~VPWidenRecipe() override=default
unsigned getOpcode() const
Definition VPlan.h:1543
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition VPlanSLP.h:74
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4298
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1106
friend class VPSlotTracker
Definition VPlan.h:4300
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1082
bool hasVF(ElementCount VF) const
Definition VPlan.h:4499
LLVMContext & getContext() const
Definition VPlan.h:4487
VPBasicBlock * getEntry()
Definition VPlan.h:4387
VPValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4478
void setName(const Twine &newName)
Definition VPlan.h:4537
bool hasScalableVF() const
Definition VPlan.h:4500
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4485
VPValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4481
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4449
VPValue * getTrue()
Return a VPValue wrapping i1 true.
Definition VPlan.h:4555
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4470
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4506
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:890
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:868
const VPValue & getVF() const
Definition VPlan.h:4482
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:898
const VPBasicBlock * getEntry() const
Definition VPlan.h:4388
friend class VPlanPrinter
Definition VPlan.h:4299
VPValue * getConstantInt(const APInt &Val)
Return a VPValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:4572
unsigned getUF() const
Definition VPlan.h:4519
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:4625
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1220
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4580
bool hasUF(unsigned UF) const
Definition VPlan.h:4517
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4439
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4561
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4475
void setVF(ElementCount VF)
Definition VPlan.h:4493
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:4532
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1011
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4647
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:993
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4425
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4456
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4463
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4412
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4376
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4603
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1226
VPValue * getFalse()
Return a VPValue wrapping i1 false.
Definition VPlan.h:4558
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4541
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4613
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1112
bool hasScalarVFOnly() const
Definition VPlan.h:4510
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4430
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:905
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1065
void addVF(ElementCount VF)
Definition VPlan.h:4491
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4435
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4577
VPValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:4566
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1027
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4392
void setUF(unsigned UF)
Definition VPlan.h:4524
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4657
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1153
VPlan(BasicBlock *ScalarHeaderBB)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition VPlan.h:4369
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
Increasing range of size_t indices.
Definition STLExtras.h:2447
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:189
static auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy
Definition VPlan.h:3912
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1763
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:839
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2421
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2494
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:301
auto map_range(ContainerTy &&C, FuncTy F)
Definition STLExtras.h:364
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1966
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1973
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1909
std::variant< RdxOrdered, RdxInLoop, RdxUnordered > ReductionStyle
Definition VPlan.h:2419
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the recipe types implementing V...
Definition VPlan.h:3945
static RetTy doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3959
static RetTy doCastIfPossible(SrcTy R)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:3964
static bool isPossible(SrcTy R)
Definition VPlan.h:3946
Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the recipe types implementing...
Definition VPlan.h:3874
static VPPhiAccessors * doCastIfPossible(SrcTy f)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:3895
CastInfo< VPPhiAccessors, SrcTy > Self
Definition VPlan.h:3876
static VPPhiAccessors * doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3879
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
static bool isPossible(const VPRecipeBase *f)
Definition VPlan.h:3866
This struct provides a way to check if a given cast is possible.
Definition Casting.h:253
static bool isPossible(const SrcTy &f)
Definition Casting.h:254
This reduction is in-loop.
Definition VPlan.h:2413
Possible variants of a reduction.
Definition VPlan.h:2411
This reduction is unordered with the partial result scaled down by some factor.
Definition VPlan.h:2416
unsigned VFScaleFactor
Definition VPlan.h:2417
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:276
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2382
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2394
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start, VPValue &BackedgeValue)
Definition VPlan.h:2374
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:640
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:645
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:635
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:628
PHINode & getIRPhi()
Definition VPlan.h:1495
VPIRPhi(PHINode &PN)
Definition VPlan.h:1488
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1490
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1506
static bool classof(const VPUser *U)
Definition VPlan.h:1382
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1397
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1412
VPPhi(ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1379
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1392
static bool classof(const VPValue *V)
Definition VPlan.h:1387
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:923
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:929
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:924
static bool classof(const VPValue *V)
Definition VPlan.h:949
static bool classof(const VPSingleDefRecipe *U)
Definition VPlan.h:956
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:944
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3394
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide load or gather.
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3407
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3395
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3417
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3353
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3375
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3354
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3363
A recipe for widening select instructions.
Definition VPlan.h:1803
VPWidenSelectRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1814
VPWidenSelectRecipe(SelectInst *SI, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL={})
Definition VPlan.h:1804
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1835
VPValue * getCond() const
Definition VPlan.h:1830
unsigned getOpcode() const
Definition VPlan.h:1828
~VPWidenSelectRecipe() override=default
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3478
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3490
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide store or scatter.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3503
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3479
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3493
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3435
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3453
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3444
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3459
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3436