LLVM 22.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/ADT/ilist.h"
33#include "llvm/ADT/ilist_node.h"
37#include "llvm/IR/DebugLoc.h"
38#include "llvm/IR/FMF.h"
39#include "llvm/IR/Operator.h"
42#include <cassert>
43#include <cstddef>
44#include <functional>
45#include <string>
46#include <utility>
47
48namespace llvm {
49
50class BasicBlock;
51class DominatorTree;
53class IRBuilderBase;
54struct VPTransformState;
55class raw_ostream;
57class SCEV;
58class Type;
59class VPBasicBlock;
60class VPBuilder;
61class VPDominatorTree;
62class VPRegionBlock;
63class VPlan;
64class VPLane;
66class VPlanSlp;
67class Value;
69
70struct VPCostContext;
71
72namespace Intrinsic {
73typedef unsigned ID;
74}
75
76using VPlanPtr = std::unique_ptr<VPlan>;
77
78/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
79/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
81 friend class VPBlockUtils;
82
83 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
84
85 /// An optional name for the block.
86 std::string Name;
87
88 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
89 /// it is a topmost VPBlockBase.
90 VPRegionBlock *Parent = nullptr;
91
92 /// List of predecessor blocks.
94
95 /// List of successor blocks.
97
98 /// VPlan containing the block. Can only be set on the entry block of the
99 /// plan.
100 VPlan *Plan = nullptr;
101
102 /// Add \p Successor as the last successor to this block.
103 void appendSuccessor(VPBlockBase *Successor) {
104 assert(Successor && "Cannot add nullptr successor!");
105 Successors.push_back(Successor);
106 }
107
108 /// Add \p Predecessor as the last predecessor to this block.
109 void appendPredecessor(VPBlockBase *Predecessor) {
110 assert(Predecessor && "Cannot add nullptr predecessor!");
111 Predecessors.push_back(Predecessor);
112 }
113
114 /// Remove \p Predecessor from the predecessors of this block.
115 void removePredecessor(VPBlockBase *Predecessor) {
116 auto Pos = find(Predecessors, Predecessor);
117 assert(Pos && "Predecessor does not exist");
118 Predecessors.erase(Pos);
119 }
120
121 /// Remove \p Successor from the successors of this block.
122 void removeSuccessor(VPBlockBase *Successor) {
123 auto Pos = find(Successors, Successor);
124 assert(Pos && "Successor does not exist");
125 Successors.erase(Pos);
126 }
127
128 /// This function replaces one predecessor with another, useful when
129 /// trying to replace an old block in the CFG with a new one.
130 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
131 auto I = find(Predecessors, Old);
132 assert(I != Predecessors.end());
133 assert(Old->getParent() == New->getParent() &&
134 "replaced predecessor must have the same parent");
135 *I = New;
136 }
137
138 /// This function replaces one successor with another, useful when
139 /// trying to replace an old block in the CFG with a new one.
140 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
141 auto I = find(Successors, Old);
142 assert(I != Successors.end());
143 assert(Old->getParent() == New->getParent() &&
144 "replaced successor must have the same parent");
145 *I = New;
146 }
147
148protected:
149 VPBlockBase(const unsigned char SC, const std::string &N)
150 : SubclassID(SC), Name(N) {}
151
152public:
153 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
154 /// that are actually instantiated. Values of this enumeration are kept in the
155 /// SubclassID field of the VPBlockBase objects. They are used for concrete
156 /// type identification.
157 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
158
160
161 virtual ~VPBlockBase() = default;
162
163 const std::string &getName() const { return Name; }
164
165 void setName(const Twine &newName) { Name = newName.str(); }
166
167 /// \return an ID for the concrete type of this object.
168 /// This is used to implement the classof checks. This should not be used
169 /// for any other purpose, as the values may change as LLVM evolves.
170 unsigned getVPBlockID() const { return SubclassID; }
171
172 VPRegionBlock *getParent() { return Parent; }
173 const VPRegionBlock *getParent() const { return Parent; }
174
175 /// \return A pointer to the plan containing the current block.
176 VPlan *getPlan();
177 const VPlan *getPlan() const;
178
179 /// Sets the pointer of the plan containing the block. The block must be the
180 /// entry block into the VPlan.
181 void setPlan(VPlan *ParentPlan);
182
183 void setParent(VPRegionBlock *P) { Parent = P; }
184
185 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
186 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
187 /// VPBlockBase is a VPBasicBlock, it is returned.
188 const VPBasicBlock *getEntryBasicBlock() const;
189 VPBasicBlock *getEntryBasicBlock();
190
191 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
192 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
193 /// VPBlockBase is a VPBasicBlock, it is returned.
194 const VPBasicBlock *getExitingBasicBlock() const;
195 VPBasicBlock *getExitingBasicBlock();
196
197 const VPBlocksTy &getSuccessors() const { return Successors; }
198 VPBlocksTy &getSuccessors() { return Successors; }
199
202
203 const VPBlocksTy &getPredecessors() const { return Predecessors; }
204 VPBlocksTy &getPredecessors() { return Predecessors; }
205
206 /// \return the successor of this VPBlockBase if it has a single successor.
207 /// Otherwise return a null pointer.
209 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
210 }
211
212 /// \return the predecessor of this VPBlockBase if it has a single
213 /// predecessor. Otherwise return a null pointer.
215 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
216 }
217
218 size_t getNumSuccessors() const { return Successors.size(); }
219 size_t getNumPredecessors() const { return Predecessors.size(); }
220
221 /// Returns true if this block has any predecessors.
222 bool hasPredecessors() const { return !Predecessors.empty(); }
223
224 /// An Enclosing Block of a block B is any block containing B, including B
225 /// itself. \return the closest enclosing block starting from "this", which
226 /// has successors. \return the root enclosing block if all enclosing blocks
227 /// have no successors.
228 VPBlockBase *getEnclosingBlockWithSuccessors();
229
230 /// \return the closest enclosing block starting from "this", which has
231 /// predecessors. \return the root enclosing block if all enclosing blocks
232 /// have no predecessors.
233 VPBlockBase *getEnclosingBlockWithPredecessors();
234
235 /// \return the successors either attached directly to this VPBlockBase or, if
236 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
237 /// successors of its own, search recursively for the first enclosing
238 /// VPRegionBlock that has successors and return them. If no such
239 /// VPRegionBlock exists, return the (empty) successors of the topmost
240 /// VPBlockBase reached.
242 return getEnclosingBlockWithSuccessors()->getSuccessors();
243 }
244
245 /// \return the hierarchical successor of this VPBlockBase if it has a single
246 /// hierarchical successor. Otherwise return a null pointer.
248 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
249 }
250
251 /// \return the predecessors either attached directly to this VPBlockBase or,
252 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
253 /// predecessors of its own, search recursively for the first enclosing
254 /// VPRegionBlock that has predecessors and return them. If no such
255 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
256 /// VPBlockBase reached.
258 return getEnclosingBlockWithPredecessors()->getPredecessors();
259 }
260
261 /// \return the hierarchical predecessor of this VPBlockBase if it has a
262 /// single hierarchical predecessor. Otherwise return a null pointer.
266
267 /// Set a given VPBlockBase \p Successor as the single successor of this
268 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
269 /// This VPBlockBase must have no successors.
271 assert(Successors.empty() && "Setting one successor when others exist.");
272 assert(Successor->getParent() == getParent() &&
273 "connected blocks must have the same parent");
274 appendSuccessor(Successor);
275 }
276
277 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
278 /// successors of this VPBlockBase. This VPBlockBase is not added as
279 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
280 /// successors.
281 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
282 assert(Successors.empty() && "Setting two successors when others exist.");
283 appendSuccessor(IfTrue);
284 appendSuccessor(IfFalse);
285 }
286
287 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
288 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
289 /// as successor of any VPBasicBlock in \p NewPreds.
291 assert(Predecessors.empty() && "Block predecessors already set.");
292 for (auto *Pred : NewPreds)
293 appendPredecessor(Pred);
294 }
295
296 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
297 /// This VPBlockBase must have no successors. This VPBlockBase is not added
298 /// as predecessor of any VPBasicBlock in \p NewSuccs.
300 assert(Successors.empty() && "Block successors already set.");
301 for (auto *Succ : NewSuccs)
302 appendSuccessor(Succ);
303 }
304
305 /// Remove all the predecessor of this block.
306 void clearPredecessors() { Predecessors.clear(); }
307
308 /// Remove all the successors of this block.
309 void clearSuccessors() { Successors.clear(); }
310
311 /// Swap predecessors of the block. The block must have exactly 2
312 /// predecessors.
314 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
315 std::swap(Predecessors[0], Predecessors[1]);
316 }
317
318 /// Swap successors of the block. The block must have exactly 2 successors.
319 // TODO: This should be part of introducing conditional branch recipes rather
320 // than being independent.
322 assert(Successors.size() == 2 && "must have 2 successors to swap");
323 std::swap(Successors[0], Successors[1]);
324 }
325
326 /// Returns the index for \p Pred in the blocks predecessors list.
327 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
328 assert(count(Predecessors, Pred) == 1 &&
329 "must have Pred exactly once in Predecessors");
330 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
331 }
332
333 /// Returns the index for \p Succ in the blocks successor list.
334 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
335 assert(count(Successors, Succ) == 1 &&
336 "must have Succ exactly once in Successors");
337 return std::distance(Successors.begin(), find(Successors, Succ));
338 }
339
340 /// The method which generates the output IR that correspond to this
341 /// VPBlockBase, thereby "executing" the VPlan.
342 virtual void execute(VPTransformState *State) = 0;
343
344 /// Return the cost of the block.
346
347#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
348 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
349 OS << getName();
350 }
351
352 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
353 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
354 /// consequtive numbers.
355 ///
356 /// Note that the numbering is applied to the whole VPlan, so printing
357 /// individual blocks is consistent with the whole VPlan printing.
358 virtual void print(raw_ostream &O, const Twine &Indent,
359 VPSlotTracker &SlotTracker) const = 0;
360
361 /// Print plain-text dump of this VPlan to \p O.
362 void print(raw_ostream &O) const;
363
364 /// Print the successors of this block to \p O, prefixing all lines with \p
365 /// Indent.
366 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
367
368 /// Dump this VPBlockBase to dbgs().
369 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
370#endif
371
372 /// Clone the current block and it's recipes without updating the operands of
373 /// the cloned recipes, including all blocks in the single-entry single-exit
374 /// region for VPRegionBlocks.
375 virtual VPBlockBase *clone() = 0;
376};
377
378/// VPRecipeBase is a base class modeling a sequence of one or more output IR
379/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
380/// and is responsible for deleting its defined values. Single-value
381/// recipes must inherit from VPSingleDef instead of inheriting from both
382/// VPRecipeBase and VPValue separately.
384 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
385 public VPDef,
386 public VPUser {
387 friend VPBasicBlock;
388 friend class VPBlockUtils;
389
390 /// Each VPRecipe belongs to a single VPBasicBlock.
391 VPBasicBlock *Parent = nullptr;
392
393 /// The debug location for the recipe.
394 DebugLoc DL;
395
396public:
397 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
399 : VPDef(SC), VPUser(Operands), DL(DL) {}
400
401 ~VPRecipeBase() override = default;
402
403 /// Clone the current recipe.
404 virtual VPRecipeBase *clone() = 0;
405
406 /// \return the VPBasicBlock which this VPRecipe belongs to.
407 VPBasicBlock *getParent() { return Parent; }
408 const VPBasicBlock *getParent() const { return Parent; }
409
410 /// \return the VPRegionBlock which the recipe belongs to.
411 VPRegionBlock *getRegion();
412 const VPRegionBlock *getRegion() const;
413
414 /// The method which generates the output IR instructions that correspond to
415 /// this VPRecipe, thereby "executing" the VPlan.
416 virtual void execute(VPTransformState &State) = 0;
417
418 /// Return the cost of this recipe, taking into account if the cost
419 /// computation should be skipped and the ForceTargetInstructionCost flag.
420 /// Also takes care of printing the cost for debugging.
422
423 /// Insert an unlinked recipe into a basic block immediately before
424 /// the specified recipe.
425 void insertBefore(VPRecipeBase *InsertPos);
426 /// Insert an unlinked recipe into \p BB immediately before the insertion
427 /// point \p IP;
428 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
429
430 /// Insert an unlinked Recipe into a basic block immediately after
431 /// the specified Recipe.
432 void insertAfter(VPRecipeBase *InsertPos);
433
434 /// Unlink this recipe from its current VPBasicBlock and insert it into
435 /// the VPBasicBlock that MovePos lives in, right after MovePos.
436 void moveAfter(VPRecipeBase *MovePos);
437
438 /// Unlink this recipe and insert into BB before I.
439 ///
440 /// \pre I is a valid iterator into BB.
441 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
442
443 /// This method unlinks 'this' from the containing basic block, but does not
444 /// delete it.
445 void removeFromParent();
446
447 /// This method unlinks 'this' from the containing basic block and deletes it.
448 ///
449 /// \returns an iterator pointing to the element after the erased one
451
452 /// Method to support type inquiry through isa, cast, and dyn_cast.
453 static inline bool classof(const VPDef *D) {
454 // All VPDefs are also VPRecipeBases.
455 return true;
456 }
457
458 static inline bool classof(const VPUser *U) { return true; }
459
460 /// Returns true if the recipe may have side-effects.
461 bool mayHaveSideEffects() const;
462
463 /// Returns true for PHI-like recipes.
464 bool isPhi() const;
465
466 /// Returns true if the recipe may read from memory.
467 bool mayReadFromMemory() const;
468
469 /// Returns true if the recipe may write to memory.
470 bool mayWriteToMemory() const;
471
472 /// Returns true if the recipe may read from or write to memory.
473 bool mayReadOrWriteMemory() const {
475 }
476
477 /// Returns the debug location of the recipe.
478 DebugLoc getDebugLoc() const { return DL; }
479
480 /// Return true if the recipe is a scalar cast.
481 bool isScalarCast() const;
482
483 /// Set the recipe's debug location to \p NewDL.
484 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
485
486#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
487 /// Print the recipe, delegating to printRecipe().
488 void print(raw_ostream &O, const Twine &Indent,
489 VPSlotTracker &SlotTracker) const override final;
490#endif
491
492protected:
493 /// Compute the cost of this recipe either using a recipe's specialized
494 /// implementation or using the legacy cost model and the underlying
495 /// instructions.
496 virtual InstructionCost computeCost(ElementCount VF,
497 VPCostContext &Ctx) const;
498
499#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
500 /// Each concrete VPRecipe prints itself, without printing common information,
501 /// like debug info or metadata.
502 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
503 VPSlotTracker &SlotTracker) const = 0;
504#endif
505};
506
507// Helper macro to define common classof implementations for recipes.
508#define VP_CLASSOF_IMPL(VPDefID) \
509 static inline bool classof(const VPDef *D) { \
510 return D->getVPDefID() == VPDefID; \
511 } \
512 static inline bool classof(const VPValue *V) { \
513 auto *R = V->getDefiningRecipe(); \
514 return R && R->getVPDefID() == VPDefID; \
515 } \
516 static inline bool classof(const VPUser *U) { \
517 auto *R = dyn_cast<VPRecipeBase>(U); \
518 return R && R->getVPDefID() == VPDefID; \
519 } \
520 static inline bool classof(const VPRecipeBase *R) { \
521 return R->getVPDefID() == VPDefID; \
522 } \
523 static inline bool classof(const VPSingleDefRecipe *R) { \
524 return R->getVPDefID() == VPDefID; \
525 }
526
527/// VPSingleDef is a base class for recipes for modeling a sequence of one or
528/// more output IR that define a single result VPValue.
529/// Note that VPRecipeBase must be inherited from before VPValue.
530class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
531public:
532 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
534 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
535
536 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
538 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
539
540 static inline bool classof(const VPRecipeBase *R) {
541 switch (R->getVPDefID()) {
542 case VPRecipeBase::VPDerivedIVSC:
543 case VPRecipeBase::VPEVLBasedIVPHISC:
544 case VPRecipeBase::VPExpandSCEVSC:
545 case VPRecipeBase::VPExpressionSC:
546 case VPRecipeBase::VPInstructionSC:
547 case VPRecipeBase::VPReductionEVLSC:
548 case VPRecipeBase::VPReductionSC:
549 case VPRecipeBase::VPReplicateSC:
550 case VPRecipeBase::VPScalarIVStepsSC:
551 case VPRecipeBase::VPVectorPointerSC:
552 case VPRecipeBase::VPVectorEndPointerSC:
553 case VPRecipeBase::VPWidenCallSC:
554 case VPRecipeBase::VPWidenCanonicalIVSC:
555 case VPRecipeBase::VPWidenCastSC:
556 case VPRecipeBase::VPWidenGEPSC:
557 case VPRecipeBase::VPWidenIntrinsicSC:
558 case VPRecipeBase::VPWidenSC:
559 case VPRecipeBase::VPWidenSelectSC:
560 case VPRecipeBase::VPBlendSC:
561 case VPRecipeBase::VPPredInstPHISC:
562 case VPRecipeBase::VPCanonicalIVPHISC:
563 case VPRecipeBase::VPActiveLaneMaskPHISC:
564 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
565 case VPRecipeBase::VPWidenPHISC:
566 case VPRecipeBase::VPWidenIntOrFpInductionSC:
567 case VPRecipeBase::VPWidenPointerInductionSC:
568 case VPRecipeBase::VPReductionPHISC:
569 case VPRecipeBase::VPPartialReductionSC:
570 return true;
571 case VPRecipeBase::VPBranchOnMaskSC:
572 case VPRecipeBase::VPInterleaveEVLSC:
573 case VPRecipeBase::VPInterleaveSC:
574 case VPRecipeBase::VPIRInstructionSC:
575 case VPRecipeBase::VPWidenLoadEVLSC:
576 case VPRecipeBase::VPWidenLoadSC:
577 case VPRecipeBase::VPWidenStoreEVLSC:
578 case VPRecipeBase::VPWidenStoreSC:
579 case VPRecipeBase::VPHistogramSC:
580 // TODO: Widened stores don't define a value, but widened loads do. Split
581 // the recipes to be able to make widened loads VPSingleDefRecipes.
582 return false;
583 }
584 llvm_unreachable("Unhandled VPDefID");
585 }
586
587 static inline bool classof(const VPUser *U) {
588 auto *R = dyn_cast<VPRecipeBase>(U);
589 return R && classof(R);
590 }
591
592 VPSingleDefRecipe *clone() override = 0;
593
594 /// Returns the underlying instruction.
601
602#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
603 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
605#endif
606};
607
608/// Class to record and manage LLVM IR flags.
610 enum class OperationType : unsigned char {
611 Cmp,
612 FCmp,
613 OverflowingBinOp,
614 Trunc,
615 DisjointOp,
616 PossiblyExactOp,
617 GEPOp,
618 FPMathOp,
619 NonNegOp,
620 Other
621 };
622
623public:
624 struct WrapFlagsTy {
625 char HasNUW : 1;
626 char HasNSW : 1;
627
629 };
630
632 char HasNUW : 1;
633 char HasNSW : 1;
634
636 };
637
642
644 char NonNeg : 1;
645 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
646 };
647
648private:
649 struct ExactFlagsTy {
650 char IsExact : 1;
651 };
652 struct FastMathFlagsTy {
653 char AllowReassoc : 1;
654 char NoNaNs : 1;
655 char NoInfs : 1;
656 char NoSignedZeros : 1;
657 char AllowReciprocal : 1;
658 char AllowContract : 1;
659 char ApproxFunc : 1;
660
661 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
662 };
663 /// Holds both the predicate and fast-math flags for floating-point
664 /// comparisons.
665 struct FCmpFlagsTy {
667 FastMathFlagsTy FMFs;
668 };
669
670 OperationType OpType;
671
672 union {
677 ExactFlagsTy ExactFlags;
680 FastMathFlagsTy FMFs;
681 FCmpFlagsTy FCmpFlags;
682 unsigned AllFlags;
683 };
684
685public:
686 VPIRFlags() : OpType(OperationType::Other), AllFlags(0) {}
687
689 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
690 OpType = OperationType::FCmp;
691 FCmpFlags.Pred = FCmp->getPredicate();
692 FCmpFlags.FMFs = FCmp->getFastMathFlags();
693 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
694 OpType = OperationType::Cmp;
695 CmpPredicate = Op->getPredicate();
696 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
697 OpType = OperationType::DisjointOp;
698 DisjointFlags.IsDisjoint = Op->isDisjoint();
699 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
700 OpType = OperationType::OverflowingBinOp;
701 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
702 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
703 OpType = OperationType::Trunc;
704 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
705 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
706 OpType = OperationType::PossiblyExactOp;
707 ExactFlags.IsExact = Op->isExact();
708 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
709 OpType = OperationType::GEPOp;
710 GEPFlags = GEP->getNoWrapFlags();
711 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
712 OpType = OperationType::NonNegOp;
713 NonNegFlags.NonNeg = PNNI->hasNonNeg();
714 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
715 OpType = OperationType::FPMathOp;
716 FMFs = Op->getFastMathFlags();
717 } else {
718 OpType = OperationType::Other;
719 AllFlags = 0;
720 }
721 }
722
724 : OpType(OperationType::Cmp), CmpPredicate(Pred) {}
725
727 : OpType(OperationType::FCmp) {
728 FCmpFlags.Pred = Pred;
729 FCmpFlags.FMFs = FMFs;
730 }
731
733 : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
734
736 : OpType(OperationType::Trunc), TruncFlags(TruncFlags) {}
737
738 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {}
739
741 : OpType(OperationType::DisjointOp), DisjointFlags(DisjointFlags) {}
742
744 : OpType(OperationType::NonNegOp), NonNegFlags(NonNegFlags) {}
745
747 : OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {}
748
750 OpType = Other.OpType;
751 AllFlags = Other.AllFlags;
752 }
753
754 /// Only keep flags also present in \p Other. \p Other must have the same
755 /// OpType as the current object.
756 void intersectFlags(const VPIRFlags &Other);
757
758 /// Drop all poison-generating flags.
760 // NOTE: This needs to be kept in-sync with
761 // Instruction::dropPoisonGeneratingFlags.
762 switch (OpType) {
763 case OperationType::OverflowingBinOp:
764 WrapFlags.HasNUW = false;
765 WrapFlags.HasNSW = false;
766 break;
767 case OperationType::Trunc:
768 TruncFlags.HasNUW = false;
769 TruncFlags.HasNSW = false;
770 break;
771 case OperationType::DisjointOp:
772 DisjointFlags.IsDisjoint = false;
773 break;
774 case OperationType::PossiblyExactOp:
775 ExactFlags.IsExact = false;
776 break;
777 case OperationType::GEPOp:
779 break;
780 case OperationType::FPMathOp:
781 case OperationType::FCmp:
782 getFMFsRef().NoNaNs = false;
783 getFMFsRef().NoInfs = false;
784 break;
785 case OperationType::NonNegOp:
786 NonNegFlags.NonNeg = false;
787 break;
788 case OperationType::Cmp:
789 case OperationType::Other:
790 break;
791 }
792 }
793
794 /// Apply the IR flags to \p I.
795 void applyFlags(Instruction &I) const {
796 switch (OpType) {
797 case OperationType::OverflowingBinOp:
798 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
799 I.setHasNoSignedWrap(WrapFlags.HasNSW);
800 break;
801 case OperationType::Trunc:
802 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
803 I.setHasNoSignedWrap(TruncFlags.HasNSW);
804 break;
805 case OperationType::DisjointOp:
806 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
807 break;
808 case OperationType::PossiblyExactOp:
809 I.setIsExact(ExactFlags.IsExact);
810 break;
811 case OperationType::GEPOp:
812 cast<GetElementPtrInst>(&I)->setNoWrapFlags(GEPFlags);
813 break;
814 case OperationType::FPMathOp:
815 case OperationType::FCmp: {
816 const FastMathFlagsTy &F = getFMFsRef();
817 I.setHasAllowReassoc(F.AllowReassoc);
818 I.setHasNoNaNs(F.NoNaNs);
819 I.setHasNoInfs(F.NoInfs);
820 I.setHasNoSignedZeros(F.NoSignedZeros);
821 I.setHasAllowReciprocal(F.AllowReciprocal);
822 I.setHasAllowContract(F.AllowContract);
823 I.setHasApproxFunc(F.ApproxFunc);
824 break;
825 }
826 case OperationType::NonNegOp:
827 I.setNonNeg(NonNegFlags.NonNeg);
828 break;
829 case OperationType::Cmp:
830 case OperationType::Other:
831 break;
832 }
833 }
834
836 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
837 "recipe doesn't have a compare predicate");
838 return OpType == OperationType::FCmp ? FCmpFlags.Pred : CmpPredicate;
839 }
840
842 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
843 "recipe doesn't have a compare predicate");
844 if (OpType == OperationType::FCmp)
845 FCmpFlags.Pred = Pred;
846 else
847 CmpPredicate = Pred;
848 }
849
851
852 /// Returns true if the recipe has a comparison predicate.
853 bool hasPredicate() const {
854 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
855 }
856
857 /// Returns true if the recipe has fast-math flags.
858 bool hasFastMathFlags() const {
859 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp;
860 }
861
863
864 /// Returns true if the recipe has non-negative flag.
865 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
866
867 bool isNonNeg() const {
868 assert(OpType == OperationType::NonNegOp &&
869 "recipe doesn't have a NNEG flag");
870 return NonNegFlags.NonNeg;
871 }
872
873 bool hasNoUnsignedWrap() const {
874 switch (OpType) {
875 case OperationType::OverflowingBinOp:
876 return WrapFlags.HasNUW;
877 case OperationType::Trunc:
878 return TruncFlags.HasNUW;
879 default:
880 llvm_unreachable("recipe doesn't have a NUW flag");
881 }
882 }
883
884 bool hasNoSignedWrap() const {
885 switch (OpType) {
886 case OperationType::OverflowingBinOp:
887 return WrapFlags.HasNSW;
888 case OperationType::Trunc:
889 return TruncFlags.HasNSW;
890 default:
891 llvm_unreachable("recipe doesn't have a NSW flag");
892 }
893 }
894
895 bool isDisjoint() const {
896 assert(OpType == OperationType::DisjointOp &&
897 "recipe cannot have a disjoing flag");
898 return DisjointFlags.IsDisjoint;
899 }
900
901private:
902 /// Get a reference to the fast-math flags for FPMathOp or FCmp.
903 FastMathFlagsTy &getFMFsRef() {
904 return OpType == OperationType::FCmp ? FCmpFlags.FMFs : FMFs;
905 }
906 const FastMathFlagsTy &getFMFsRef() const {
907 return OpType == OperationType::FCmp ? FCmpFlags.FMFs : FMFs;
908 }
909
910public:
911#if !defined(NDEBUG)
912 /// Returns true if the set flags are valid for \p Opcode.
913 bool flagsValidForOpcode(unsigned Opcode) const;
914#endif
915
916#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
917 void printFlags(raw_ostream &O) const;
918#endif
919};
920
921/// A pure-virtual common base class for recipes defining a single VPValue and
922/// using IR flags.
924 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
925 const VPIRFlags &Flags,
927 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
928
929 static inline bool classof(const VPRecipeBase *R) {
930 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
931 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
932 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
933 R->getVPDefID() == VPRecipeBase::VPWidenCallSC ||
934 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
935 R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC ||
936 R->getVPDefID() == VPRecipeBase::VPWidenSelectSC ||
937 R->getVPDefID() == VPRecipeBase::VPReductionSC ||
938 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
939 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
940 R->getVPDefID() == VPRecipeBase::VPVectorEndPointerSC ||
941 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
942 }
943
944 static inline bool classof(const VPUser *U) {
945 auto *R = dyn_cast<VPRecipeBase>(U);
946 return R && classof(R);
947 }
948
949 static inline bool classof(const VPValue *V) {
950 auto *R = dyn_cast_or_null<VPRecipeBase>(V->getDefiningRecipe());
951 return R && classof(R);
952 }
953
954 VPRecipeWithIRFlags *clone() override = 0;
955
956 static inline bool classof(const VPSingleDefRecipe *U) {
957 auto *R = dyn_cast<VPRecipeBase>(U);
958 return R && classof(R);
959 }
960
961 void execute(VPTransformState &State) override = 0;
962
963 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
965 VPCostContext &Ctx) const;
966};
967
968/// Helper to access the operand that contains the unroll part for this recipe
969/// after unrolling.
970template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
971protected:
972 /// Return the VPValue operand containing the unroll part or null if there is
973 /// no such operand.
974 VPValue *getUnrollPartOperand(const VPUser &U) const;
975
976 /// Return the unroll part.
977 unsigned getUnrollPart(const VPUser &U) const;
978};
979
980/// Helper to manage IR metadata for recipes. It filters out metadata that
981/// cannot be propagated.
984
985public:
986 VPIRMetadata() = default;
987
988 /// Adds metatadata that can be preserved from the original instruction
989 /// \p I.
991
992 /// Copy constructor for cloning.
993 VPIRMetadata(const VPIRMetadata &Other) = default;
994
996
997 /// Add all metadata to \p I.
998 void applyMetadata(Instruction &I) const;
999
1000 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1001 /// already exists, it will be replaced. Otherwise, it will be added.
1002 void setMetadata(unsigned Kind, MDNode *Node) {
1003 auto It =
1004 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1005 return P.first == Kind;
1006 });
1007 if (It != Metadata.end())
1008 It->second = Node;
1009 else
1010 Metadata.emplace_back(Kind, Node);
1011 }
1012
1013 /// Intersect this VPIRMetada object with \p MD, keeping only metadata
1014 /// nodes that are common to both.
1015 void intersect(const VPIRMetadata &MD);
1016
1017 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1018 MDNode *getMetadata(unsigned Kind) const {
1019 auto It =
1020 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1021 return It != Metadata.end() ? It->second : nullptr;
1022 }
1023};
1024
1025/// This is a concrete Recipe that models a single VPlan-level instruction.
1026/// While as any Recipe it may generate a sequence of IR instructions when
1027/// executed, these instructions would always form a single-def expression as
1028/// the VPInstruction is also a single def-use vertex.
1030 public VPIRMetadata,
1031 public VPUnrollPartAccessor<1> {
1032 friend class VPlanSlp;
1033
1034public:
1035 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1036 enum {
1038 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1039 // values of a first-order recurrence.
1043 // Creates a mask where each lane is active (true) whilst the current
1044 // counter (first operand + index) is less than the second operand. i.e.
1045 // mask[i] = icmpt ult (op0 + i), op1
1046 // The size of the mask returned is VF * Multiplier (UF, third op).
1050 // Increment the canonical IV separately for each unrolled part.
1055 /// Given operands of (the same) struct type, creates a struct of fixed-
1056 /// width vectors each containing a struct field of all operands. The
1057 /// number of operands matches the element count of every vector.
1059 /// Creates a fixed-width vector containing all operands. The number of
1060 /// operands matches the vector element count.
1062 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1063 /// abstract VPInstruction whose single defined VPValue represents VF
1064 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1065 /// VPInstructions.
1067 /// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
1068 /// where one of (x,y) is loop invariant, and both x and y are integer type.
1072 // Extracts the last lane from its operand if it is a vector, or the last
1073 // part if scalar. In the latter case, the recipe will be removed during
1074 // unrolling.
1076 // Extracts the last lane for each part from its operand.
1078 // Extracts the second-to-last lane from its operand or the second-to-last
1079 // part if it is scalar. In the latter case, the recipe will be removed
1080 // during unrolling.
1082 LogicalAnd, // Non-poison propagating logical And.
1083 // Add an offset in bytes (second operand) to a base pointer (first
1084 // operand). Only generates scalar values (either for the first lane only or
1085 // for all lanes, depending on its uses).
1087 // Add a vector offset in bytes (second operand) to a scalar base pointer
1088 // (first operand).
1090 // Returns a scalar boolean value, which is true if any lane of its
1091 // (boolean) vector operands is true. It produces the reduced value across
1092 // all unrolled iterations. Unrolling will add all copies of its original
1093 // operand as additional operands. AnyOf is poison-safe as all operands
1094 // will be frozen.
1096 // Calculates the first active lane index of the vector predicate operands.
1097 // It produces the lane index across all unrolled iterations. Unrolling will
1098 // add all copies of its original operand as additional operands.
1100
1101 // The opcodes below are used for VPInstructionWithType.
1102 //
1103 /// Scale the first operand (vector step) by the second operand
1104 /// (scalar-step). Casts both operands to the result type if needed.
1106 /// Start vector for reductions with 3 operands: the original start value,
1107 /// the identity value for the reduction and an integer indicating the
1108 /// scaling factor.
1110 // Creates a step vector starting from 0 to VF with a step of 1.
1112 /// Extracts a single lane (first operand) from a set of vector operands.
1113 /// The lane specifies an index into a vector formed by combining all vector
1114 /// operands (all operands after the first one).
1116 /// Explicit user for the resume phi of the canonical induction in the main
1117 /// VPlan, used by the epilogue vector loop.
1119 /// Returns the value for vscale.
1122 };
1123
1124 /// Returns true if this VPInstruction generates scalar values for all lanes.
1125 /// Most VPInstructions generate a single value per part, either vector or
1126 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1127 /// values per all lanes, stemming from an original ingredient. This method
1128 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1129 /// underlying ingredient.
1130 bool doesGeneratePerAllLanes() const;
1131
1132private:
1133 typedef unsigned char OpcodeTy;
1134 OpcodeTy Opcode;
1135
1136 /// An optional name that can be used for the generated IR instruction.
1137 std::string Name;
1138
1139 /// Returns true if we can generate a scalar for the first lane only if
1140 /// needed.
1141 bool canGenerateScalarForFirstLane() const;
1142
1143 /// Utility methods serving execute(): generates a single vector instance of
1144 /// the modeled instruction. \returns the generated value. . In some cases an
1145 /// existing value is returned rather than a generated one.
1146 Value *generate(VPTransformState &State);
1147
1148#if !defined(NDEBUG)
1149 /// Return the number of operands determined by the opcode of the
1150 /// VPInstruction. Returns -1u if the number of operands cannot be determined
1151 /// directly by the opcode.
1152 static unsigned getNumOperandsForOpcode(unsigned Opcode);
1153#endif
1154
1155public:
1156 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1157 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1158 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1159
1160 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1161
1162 VPInstruction *clone() override {
1163 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1164 getDebugLoc(), Name);
1165 if (getUnderlyingValue())
1166 New->setUnderlyingValue(getUnderlyingInstr());
1167 return New;
1168 }
1169
1170 unsigned getOpcode() const { return Opcode; }
1171
1172 /// Generate the instruction.
1173 /// TODO: We currently execute only per-part unless a specific instance is
1174 /// provided.
1175 void execute(VPTransformState &State) override;
1176
1177 /// Return the cost of this VPInstruction.
1178 InstructionCost computeCost(ElementCount VF,
1179 VPCostContext &Ctx) const override;
1180
1181#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1182 /// Print the VPInstruction to dbgs() (for debugging).
1183 LLVM_DUMP_METHOD void dump() const;
1184#endif
1185
1186 bool hasResult() const {
1187 // CallInst may or may not have a result, depending on the called function.
1188 // Conservatively return calls have results for now.
1189 switch (getOpcode()) {
1190 case Instruction::Ret:
1191 case Instruction::Br:
1192 case Instruction::Store:
1193 case Instruction::Switch:
1194 case Instruction::IndirectBr:
1195 case Instruction::Resume:
1196 case Instruction::CatchRet:
1197 case Instruction::Unreachable:
1198 case Instruction::Fence:
1199 case Instruction::AtomicRMW:
1202 return false;
1203 default:
1204 return true;
1205 }
1206 }
1207
1208 /// Returns true if the underlying opcode may read from or write to memory.
1209 bool opcodeMayReadOrWriteFromMemory() const;
1210
1211 /// Returns true if the recipe only uses the first lane of operand \p Op.
1212 bool usesFirstLaneOnly(const VPValue *Op) const override;
1213
1214 /// Returns true if the recipe only uses the first part of operand \p Op.
1215 bool usesFirstPartOnly(const VPValue *Op) const override;
1216
1217 /// Returns true if this VPInstruction produces a scalar value from a vector,
1218 /// e.g. by performing a reduction or extracting a lane.
1219 bool isVectorToScalar() const;
1220
1221 /// Returns true if this VPInstruction's operands are single scalars and the
1222 /// result is also a single scalar.
1223 bool isSingleScalar() const;
1224
1225 /// Returns the symbolic name assigned to the VPInstruction.
1226 StringRef getName() const { return Name; }
1227
1228 /// Set the symbolic name for the VPInstruction.
1229 void setName(StringRef NewName) { Name = NewName.str(); }
1230
1231protected:
1232#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1233 /// Print the VPInstruction to \p O.
1234 void printRecipe(raw_ostream &O, const Twine &Indent,
1235 VPSlotTracker &SlotTracker) const override;
1236#endif
1237};
1238
1239/// A specialization of VPInstruction augmenting it with a dedicated result
1240/// type, to be used when the opcode and operands of the VPInstruction don't
1241/// directly determine the result type. Note that there is no separate VPDef ID
1242/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1243/// distinguished purely by the opcode.
1245 /// Scalar result type produced by the recipe.
1246 Type *ResultTy;
1247
1248public:
1250 Type *ResultTy, const VPIRFlags &Flags = {},
1251 const VPIRMetadata &Metadata = {},
1253 const Twine &Name = "")
1254 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1255 ResultTy(ResultTy) {}
1256
1257 static inline bool classof(const VPRecipeBase *R) {
1258 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1259 // type information.
1260 if (R->isScalarCast())
1261 return true;
1262 auto *VPI = dyn_cast<VPInstruction>(R);
1263 if (!VPI)
1264 return false;
1265 switch (VPI->getOpcode()) {
1269 return true;
1270 default:
1271 return false;
1272 }
1273 }
1274
1275 static inline bool classof(const VPUser *R) {
1277 }
1278
1279 VPInstruction *clone() override {
1280 auto *New =
1282 *this, *this, getDebugLoc(), getName());
1283 New->setUnderlyingValue(getUnderlyingValue());
1284 return New;
1285 }
1286
1287 void execute(VPTransformState &State) override;
1288
1289 /// Return the cost of this VPInstruction.
1291 VPCostContext &Ctx) const override {
1292 // TODO: Compute accurate cost after retiring the legacy cost model.
1293 return 0;
1294 }
1295
1296 Type *getResultType() const { return ResultTy; }
1297
1298protected:
1299#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1300 /// Print the recipe.
1301 void printRecipe(raw_ostream &O, const Twine &Indent,
1302 VPSlotTracker &SlotTracker) const override;
1303#endif
1304};
1305
1306/// Helper type to provide functions to access incoming values and blocks for
1307/// phi-like recipes.
1309protected:
1310 /// Return a VPRecipeBase* to the current object.
1311 virtual const VPRecipeBase *getAsRecipe() const = 0;
1312
1313public:
1314 virtual ~VPPhiAccessors() = default;
1315
1316 /// Returns the incoming VPValue with index \p Idx.
1317 VPValue *getIncomingValue(unsigned Idx) const {
1318 return getAsRecipe()->getOperand(Idx);
1319 }
1320
1321 /// Returns the incoming block with index \p Idx.
1322 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1323
1324 /// Returns the number of incoming values, also number of incoming blocks.
1325 virtual unsigned getNumIncoming() const {
1326 return getAsRecipe()->getNumOperands();
1327 }
1328
1329 /// Returns an interator range over the incoming values.
1331 return make_range(getAsRecipe()->op_begin(),
1332 getAsRecipe()->op_begin() + getNumIncoming());
1333 }
1334
1336 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1337
1338 /// Returns an iterator range over the incoming blocks.
1340 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1341 return getIncomingBlock(Idx);
1342 };
1343 return map_range(index_range(0, getNumIncoming()), GetBlock);
1344 }
1345
1346 /// Returns an iterator range over pairs of incoming values and corresponding
1347 /// incoming blocks.
1353
1354 /// Removes the incoming value for \p IncomingBlock, which must be a
1355 /// predecessor.
1356 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1357
1358#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1359 /// Print the recipe.
1361#endif
1362};
1363
1365 VPPhi(ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "")
1366 : VPInstruction(Instruction::PHI, Operands, {}, {}, DL, Name) {}
1367
1368 static inline bool classof(const VPUser *U) {
1369 auto *VPI = dyn_cast<VPInstruction>(U);
1370 return VPI && VPI->getOpcode() == Instruction::PHI;
1371 }
1372
1373 static inline bool classof(const VPValue *V) {
1374 auto *VPI = dyn_cast<VPInstruction>(V);
1375 return VPI && VPI->getOpcode() == Instruction::PHI;
1376 }
1377
1378 static inline bool classof(const VPSingleDefRecipe *SDR) {
1379 auto *VPI = dyn_cast<VPInstruction>(SDR);
1380 return VPI && VPI->getOpcode() == Instruction::PHI;
1381 }
1382
1383 VPPhi *clone() override {
1384 auto *PhiR = new VPPhi(operands(), getDebugLoc(), getName());
1385 PhiR->setUnderlyingValue(getUnderlyingValue());
1386 return PhiR;
1387 }
1388
1389 void execute(VPTransformState &State) override;
1390
1391protected:
1392#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1393 /// Print the recipe.
1394 void printRecipe(raw_ostream &O, const Twine &Indent,
1395 VPSlotTracker &SlotTracker) const override;
1396#endif
1397
1398 const VPRecipeBase *getAsRecipe() const override { return this; }
1399};
1400
1401/// A recipe to wrap on original IR instruction not to be modified during
1402/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1403/// Expect PHIs, VPIRInstructions cannot have any operands.
1405 Instruction &I;
1406
1407protected:
1408 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1409 /// subclasses may need to be created, e.g. VPIRPhi.
1411 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1412
1413public:
1414 ~VPIRInstruction() override = default;
1415
1416 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1417 /// VPIRInstruction.
1419
1420 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1421
1423 auto *R = create(I);
1424 for (auto *Op : operands())
1425 R->addOperand(Op);
1426 return R;
1427 }
1428
1429 void execute(VPTransformState &State) override;
1430
1431 /// Return the cost of this VPIRInstruction.
1433 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1434
1435 Instruction &getInstruction() const { return I; }
1436
1437 bool usesScalars(const VPValue *Op) const override {
1439 "Op must be an operand of the recipe");
1440 return true;
1441 }
1442
1443 bool usesFirstPartOnly(const VPValue *Op) const override {
1445 "Op must be an operand of the recipe");
1446 return true;
1447 }
1448
1449 bool usesFirstLaneOnly(const VPValue *Op) const override {
1451 "Op must be an operand of the recipe");
1452 return true;
1453 }
1454
1455 /// Update the recipes first operand to the last lane of the operand using \p
1456 /// Builder. Must only be used for VPIRInstructions with at least one operand
1457 /// wrapping a PHINode.
1459
1460protected:
1461#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1462 /// Print the recipe.
1463 void printRecipe(raw_ostream &O, const Twine &Indent,
1464 VPSlotTracker &SlotTracker) const override;
1465#endif
1466};
1467
1468/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1469/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1470/// allowed, and it is used to add a new incoming value for the single
1471/// predecessor VPBB.
1473 public VPPhiAccessors {
1475
1476 static inline bool classof(const VPRecipeBase *U) {
1477 auto *R = dyn_cast<VPIRInstruction>(U);
1478 return R && isa<PHINode>(R->getInstruction());
1479 }
1480
1482
1483 void execute(VPTransformState &State) override;
1484
1485protected:
1486#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1487 /// Print the recipe.
1488 void printRecipe(raw_ostream &O, const Twine &Indent,
1489 VPSlotTracker &SlotTracker) const override;
1490#endif
1491
1492 const VPRecipeBase *getAsRecipe() const override { return this; }
1493};
1494
1495/// VPWidenRecipe is a recipe for producing a widened instruction using the
1496/// opcode and operands of the recipe. This recipe covers most of the
1497/// traditional vectorization cases where each recipe transforms into a
1498/// vectorized version of itself.
1500 public VPIRMetadata {
1501 unsigned Opcode;
1502
1503public:
1504 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1505 const VPIRFlags &Flags, const VPIRMetadata &Metadata,
1506 DebugLoc DL)
1507 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
1508 VPIRMetadata(Metadata), Opcode(Opcode) {}
1509
1511 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1512 DebugLoc DL = {})
1513 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
1514 VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
1515 setUnderlyingValue(&I);
1516 }
1517
1518 ~VPWidenRecipe() override = default;
1519
1520 VPWidenRecipe *clone() override {
1521 auto *R =
1522 new VPWidenRecipe(getOpcode(), operands(), *this, *this, getDebugLoc());
1523 R->setUnderlyingValue(getUnderlyingValue());
1524 return R;
1525 }
1526
1527 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1528
1529 /// Produce a widened instruction using the opcode and operands of the recipe,
1530 /// processing State.VF elements.
1531 void execute(VPTransformState &State) override;
1532
1533 /// Return the cost of this VPWidenRecipe.
1534 InstructionCost computeCost(ElementCount VF,
1535 VPCostContext &Ctx) const override;
1536
1537 unsigned getOpcode() const { return Opcode; }
1538
1539protected:
1540#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1541 /// Print the recipe.
1542 void printRecipe(raw_ostream &O, const Twine &Indent,
1543 VPSlotTracker &SlotTracker) const override;
1544#endif
1545};
1546
1547/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1549 /// Cast instruction opcode.
1550 Instruction::CastOps Opcode;
1551
1552 /// Result type for the cast.
1553 Type *ResultTy;
1554
1555public:
1557 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1558 const VPIRMetadata &Metadata = {},
1560 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL),
1561 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1562 assert(flagsValidForOpcode(Opcode) &&
1563 "Set flags not supported for the provided opcode");
1565 }
1566
1567 ~VPWidenCastRecipe() override = default;
1568
1570 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1572 *this, *this, getDebugLoc());
1573 }
1574
1575 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1576
1577 /// Produce widened copies of the cast.
1578 void execute(VPTransformState &State) override;
1579
1580 /// Return the cost of this VPWidenCastRecipe.
1582 VPCostContext &Ctx) const override;
1583
1584 Instruction::CastOps getOpcode() const { return Opcode; }
1585
1586 /// Returns the result type of the cast.
1587 Type *getResultType() const { return ResultTy; }
1588
1589protected:
1590#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1591 /// Print the recipe.
1592 void printRecipe(raw_ostream &O, const Twine &Indent,
1593 VPSlotTracker &SlotTracker) const override;
1594#endif
1595};
1596
1597/// A recipe for widening vector intrinsics.
1599 /// ID of the vector intrinsic to widen.
1600 Intrinsic::ID VectorIntrinsicID;
1601
1602 /// Scalar return type of the intrinsic.
1603 Type *ResultTy;
1604
1605 /// True if the intrinsic may read from memory.
1606 bool MayReadFromMemory;
1607
1608 /// True if the intrinsic may read write to memory.
1609 bool MayWriteToMemory;
1610
1611 /// True if the intrinsic may have side-effects.
1612 bool MayHaveSideEffects;
1613
1614public:
1616 ArrayRef<VPValue *> CallArguments, Type *Ty,
1617 const VPIRFlags &Flags = {},
1618 const VPIRMetadata &MD = {},
1620 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags,
1621 DL),
1622 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1623 MayReadFromMemory(CI.mayReadFromMemory()),
1624 MayWriteToMemory(CI.mayWriteToMemory()),
1625 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1626 setUnderlyingValue(&CI);
1627 }
1628
1630 ArrayRef<VPValue *> CallArguments, Type *Ty,
1631 const VPIRFlags &Flags = {},
1632 const VPIRMetadata &Metadata = {},
1634 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags,
1635 DL),
1636 VPIRMetadata(Metadata), VectorIntrinsicID(VectorIntrinsicID),
1637 ResultTy(Ty) {
1638 LLVMContext &Ctx = Ty->getContext();
1639 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1640 MemoryEffects ME = Attrs.getMemoryEffects();
1641 MayReadFromMemory = !ME.onlyWritesMemory();
1642 MayWriteToMemory = !ME.onlyReadsMemory();
1643 MayHaveSideEffects = MayWriteToMemory ||
1644 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1645 !Attrs.hasAttribute(Attribute::WillReturn);
1646 }
1647
1648 ~VPWidenIntrinsicRecipe() override = default;
1649
1651 if (Value *CI = getUnderlyingValue())
1652 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1653 operands(), ResultTy, *this, *this,
1654 getDebugLoc());
1655 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1656 *this, *this, getDebugLoc());
1657 }
1658
1659 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1660
1661 /// Produce a widened version of the vector intrinsic.
1662 void execute(VPTransformState &State) override;
1663
1664 /// Return the cost of this vector intrinsic.
1666 VPCostContext &Ctx) const override;
1667
1668 /// Return the ID of the intrinsic.
1669 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1670
1671 /// Return the scalar return type of the intrinsic.
1672 Type *getResultType() const { return ResultTy; }
1673
1674 /// Return to name of the intrinsic as string.
1676
1677 /// Returns true if the intrinsic may read from memory.
1678 bool mayReadFromMemory() const { return MayReadFromMemory; }
1679
1680 /// Returns true if the intrinsic may write to memory.
1681 bool mayWriteToMemory() const { return MayWriteToMemory; }
1682
1683 /// Returns true if the intrinsic may have side-effects.
1684 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1685
1686 bool usesFirstLaneOnly(const VPValue *Op) const override;
1687
1688protected:
1689#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1690 /// Print the recipe.
1691 void printRecipe(raw_ostream &O, const Twine &Indent,
1692 VPSlotTracker &SlotTracker) const override;
1693#endif
1694};
1695
1696/// A recipe for widening Call instructions using library calls.
1698 public VPIRMetadata {
1699 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1700 /// between a given VF and the chosen vectorized variant, so there will be a
1701 /// different VPlan for each VF with a valid variant.
1702 Function *Variant;
1703
1704public:
1706 ArrayRef<VPValue *> CallArguments,
1707 const VPIRFlags &Flags = {},
1708 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
1709 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments, Flags, DL),
1710 VPIRMetadata(Metadata), Variant(Variant) {
1711 setUnderlyingValue(UV);
1712 assert(
1713 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1714 "last operand must be the called function");
1715 }
1716
1717 ~VPWidenCallRecipe() override = default;
1718
1720 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
1721 *this, *this, getDebugLoc());
1722 }
1723
1724 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1725
1726 /// Produce a widened version of the call instruction.
1727 void execute(VPTransformState &State) override;
1728
1729 /// Return the cost of this VPWidenCallRecipe.
1730 InstructionCost computeCost(ElementCount VF,
1731 VPCostContext &Ctx) const override;
1732
1736
1739
1740protected:
1741#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1742 /// Print the recipe.
1743 void printRecipe(raw_ostream &O, const Twine &Indent,
1744 VPSlotTracker &SlotTracker) const override;
1745#endif
1746};
1747
1748/// A recipe representing a sequence of load -> update -> store as part of
1749/// a histogram operation. This means there may be aliasing between vector
1750/// lanes, which is handled by the llvm.experimental.vector.histogram family
1751/// of intrinsics. The only update operations currently supported are
1752/// 'add' and 'sub' where the other term is loop-invariant.
1754 /// Opcode of the update operation, currently either add or sub.
1755 unsigned Opcode;
1756
1757public:
1758 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1760 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
1761
1762 ~VPHistogramRecipe() override = default;
1763
1765 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1766 }
1767
1768 VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
1769
1770 /// Produce a vectorized histogram operation.
1771 void execute(VPTransformState &State) override;
1772
1773 /// Return the cost of this VPHistogramRecipe.
1775 VPCostContext &Ctx) const override;
1776
1777 unsigned getOpcode() const { return Opcode; }
1778
1779 /// Return the mask operand if one was provided, or a null pointer if all
1780 /// lanes should be executed unconditionally.
1781 VPValue *getMask() const {
1782 return getNumOperands() == 3 ? getOperand(2) : nullptr;
1783 }
1784
1785protected:
1786#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1787 /// Print the recipe
1788 void printRecipe(raw_ostream &O, const Twine &Indent,
1789 VPSlotTracker &SlotTracker) const override;
1790#endif
1791};
1792
1793/// A recipe for widening select instructions. Supports both wide vector and
1794/// single-scalar conditions, matching the behavior of LLVM IR's select
1795/// instruction.
1797 public VPIRMetadata {
1799 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1800 DebugLoc DL = {})
1801 : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, Flags, DL),
1802 VPIRMetadata(MD) {
1803 setUnderlyingValue(SI);
1804 }
1805
1806 ~VPWidenSelectRecipe() override = default;
1807
1810 operands(), *this, *this, getDebugLoc());
1811 }
1812
1813 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1814
1815 /// Produce a widened version of the select instruction.
1816 void execute(VPTransformState &State) override;
1817
1818 /// Return the cost of this VPWidenSelectRecipe.
1819 InstructionCost computeCost(ElementCount VF,
1820 VPCostContext &Ctx) const override;
1821
1822 unsigned getOpcode() const { return Instruction::Select; }
1823
1824 VPValue *getCond() const {
1825 return getOperand(0);
1826 }
1827
1828 /// Returns true if the recipe only uses the first lane of operand \p Op.
1829 bool usesFirstLaneOnly(const VPValue *Op) const override {
1831 "Op must be an operand of the recipe");
1832 return Op == getCond() && Op->isDefinedOutsideLoopRegions();
1833 }
1834
1835protected:
1836#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1837 /// Print the recipe.
1838 void printRecipe(raw_ostream &O, const Twine &Indent,
1839 VPSlotTracker &SlotTracker) const override;
1840#endif
1841};
1842
1843/// A recipe for handling GEP instructions.
1845 Type *SourceElementTy;
1846
1847 bool isPointerLoopInvariant() const {
1848 return getOperand(0)->isDefinedOutsideLoopRegions();
1849 }
1850
1851 bool isIndexLoopInvariant(unsigned I) const {
1852 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
1853 }
1854
1855 bool areAllOperandsInvariant() const {
1856 return all_of(operands(), [](VPValue *Op) {
1857 return Op->isDefinedOutsideLoopRegions();
1858 });
1859 }
1860
1861public:
1863 const VPIRFlags &Flags = {},
1865 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, Flags, DL),
1866 SourceElementTy(GEP->getSourceElementType()) {
1867 setUnderlyingValue(GEP);
1869 (void)Metadata;
1871 assert(Metadata.empty() && "unexpected metadata on GEP");
1872 }
1873
1874 ~VPWidenGEPRecipe() override = default;
1875
1878 operands(), *this, getDebugLoc());
1879 }
1880
1881 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1882
1883 /// This recipe generates a GEP instruction.
1884 unsigned getOpcode() const { return Instruction::GetElementPtr; }
1885
1886 /// Generate the gep nodes.
1887 void execute(VPTransformState &State) override;
1888
1889 Type *getSourceElementType() const { return SourceElementTy; }
1890
1891 /// Return the cost of this VPWidenGEPRecipe.
1893 VPCostContext &Ctx) const override {
1894 // TODO: Compute accurate cost after retiring the legacy cost model.
1895 return 0;
1896 }
1897
1898 /// Returns true if the recipe only uses the first lane of operand \p Op.
1899 bool usesFirstLaneOnly(const VPValue *Op) const override {
1901 "Op must be an operand of the recipe");
1902 if (Op == getOperand(0))
1903 return isPointerLoopInvariant();
1904 else
1905 return !isPointerLoopInvariant() && Op->isDefinedOutsideLoopRegions();
1906 }
1907
1908protected:
1909#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1910 /// Print the recipe.
1911 void printRecipe(raw_ostream &O, const Twine &Indent,
1912 VPSlotTracker &SlotTracker) const override;
1913#endif
1914};
1915
1916/// A recipe to compute a pointer to the last element of each part of a widened
1917/// memory access for widened memory accesses of IndexedTy. Used for
1918/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed.
1920 public VPUnrollPartAccessor<2> {
1921 Type *IndexedTy;
1922
1923 /// The constant stride of the pointer computed by this recipe, expressed in
1924 /// units of IndexedTy.
1925 int64_t Stride;
1926
1927public:
1929 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
1930 : VPRecipeWithIRFlags(VPDef::VPVectorEndPointerSC,
1931 ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
1932 IndexedTy(IndexedTy), Stride(Stride) {
1933 assert(Stride < 0 && "Stride must be negative");
1934 }
1935
1936 VP_CLASSOF_IMPL(VPDef::VPVectorEndPointerSC)
1937
1939 const VPValue *getVFValue() const { return getOperand(1); }
1940
1941 void execute(VPTransformState &State) override;
1942
1943 bool usesFirstLaneOnly(const VPValue *Op) const override {
1945 "Op must be an operand of the recipe");
1946 return true;
1947 }
1948
1949 /// Return the cost of this VPVectorPointerRecipe.
1951 VPCostContext &Ctx) const override {
1952 // TODO: Compute accurate cost after retiring the legacy cost model.
1953 return 0;
1954 }
1955
1956 /// Returns true if the recipe only uses the first part of operand \p Op.
1957 bool usesFirstPartOnly(const VPValue *Op) const override {
1959 "Op must be an operand of the recipe");
1960 assert(getNumOperands() <= 2 && "must have at most two operands");
1961 return true;
1962 }
1963
1965 return new VPVectorEndPointerRecipe(getOperand(0), getVFValue(), IndexedTy,
1966 Stride, getGEPNoWrapFlags(),
1967 getDebugLoc());
1968 }
1969
1970protected:
1971#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1972 /// Print the recipe.
1973 void printRecipe(raw_ostream &O, const Twine &Indent,
1974 VPSlotTracker &SlotTracker) const override;
1975#endif
1976};
1977
1978/// A recipe to compute the pointers for widened memory accesses of IndexTy.
1980 public VPUnrollPartAccessor<1> {
1981 Type *SourceElementTy;
1982
1983public:
1984 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
1986 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1987 GEPFlags, DL),
1988 SourceElementTy(SourceElementTy) {}
1989
1990 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1991
1992 void execute(VPTransformState &State) override;
1993
1994 Type *getSourceElementType() const { return SourceElementTy; }
1995
1996 bool usesFirstLaneOnly(const VPValue *Op) const override {
1998 "Op must be an operand of the recipe");
1999 return true;
2000 }
2001
2002 /// Returns true if the recipe only uses the first part of operand \p Op.
2003 bool usesFirstPartOnly(const VPValue *Op) const override {
2005 "Op must be an operand of the recipe");
2006 assert(getNumOperands() <= 2 && "must have at most two operands");
2007 return true;
2008 }
2009
2011 return new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
2013 }
2014
2015 /// Return true if this VPVectorPointerRecipe corresponds to part 0. Note that
2016 /// this is only accurate after the VPlan has been unrolled.
2017 bool isFirstPart() const { return getUnrollPart(*this) == 0; }
2018
2019 /// Return the cost of this VPHeaderPHIRecipe.
2021 VPCostContext &Ctx) const override {
2022 // TODO: Compute accurate cost after retiring the legacy cost model.
2023 return 0;
2024 }
2025
2026protected:
2027#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2028 /// Print the recipe.
2029 void printRecipe(raw_ostream &O, const Twine &Indent,
2030 VPSlotTracker &SlotTracker) const override;
2031#endif
2032};
2033
2034/// A pure virtual base class for all recipes modeling header phis, including
2035/// phis for first order recurrences, pointer inductions and reductions. The
2036/// start value is the first operand of the recipe and the incoming value from
2037/// the backedge is the second operand.
2038///
2039/// Inductions are modeled using the following sub-classes:
2040/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2041/// starting at a specified value (zero for the main vector loop, the resume
2042/// value for the epilogue vector loop) and stepping by 1. The induction
2043/// controls exiting of the vector loop by comparing against the vector trip
2044/// count. Produces a single scalar PHI for the induction value per
2045/// iteration.
2046/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2047/// floating point inductions with arbitrary start and step values. Produces
2048/// a vector PHI per-part.
2049/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
2050/// value of an IV with different start and step values. Produces a single
2051/// scalar value per iteration
2052/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
2053/// canonical or derived induction.
2054/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2055/// pointer induction. Produces either a vector PHI per-part or scalar values
2056/// per-lane based on the canonical induction.
2058 public VPPhiAccessors {
2059protected:
2060 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
2061 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2062 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>({Start}),
2063 UnderlyingInstr, DL) {}
2064
2065 const VPRecipeBase *getAsRecipe() const override { return this; }
2066
2067public:
2068 ~VPHeaderPHIRecipe() override = default;
2069
2070 /// Method to support type inquiry through isa, cast, and dyn_cast.
2071 static inline bool classof(const VPRecipeBase *R) {
2072 return R->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
2073 R->getVPDefID() <= VPDef::VPLastHeaderPHISC;
2074 }
2075 static inline bool classof(const VPValue *V) {
2076 return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
2077 }
2078
2079 /// Generate the phi nodes.
2080 void execute(VPTransformState &State) override = 0;
2081
2082 /// Return the cost of this header phi recipe.
2084 VPCostContext &Ctx) const override;
2085
2086 /// Returns the start value of the phi, if one is set.
2088 return getNumOperands() == 0 ? nullptr : getOperand(0);
2089 }
2091 return getNumOperands() == 0 ? nullptr : getOperand(0);
2092 }
2093
2094 /// Update the start value of the recipe.
2096
2097 /// Returns the incoming value from the loop backedge.
2099 return getOperand(1);
2100 }
2101
2102 /// Update the incoming value from the loop backedge.
2104
2105 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2106 /// to be a recipe.
2108 return *getBackedgeValue()->getDefiningRecipe();
2109 }
2110
2111protected:
2112#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2113 /// Print the recipe.
2114 void printRecipe(raw_ostream &O, const Twine &Indent,
2115 VPSlotTracker &SlotTracker) const override = 0;
2116#endif
2117};
2118
2119/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2120/// VPWidenPointerInductionRecipe), providing shared functionality, including
2121/// retrieving the step value, induction descriptor and original phi node.
2123 const InductionDescriptor &IndDesc;
2124
2125public:
2126 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2127 VPValue *Step, const InductionDescriptor &IndDesc,
2128 DebugLoc DL)
2129 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2130 addOperand(Step);
2131 }
2132
2133 static inline bool classof(const VPRecipeBase *R) {
2134 return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC ||
2135 R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
2136 }
2137
2138 static inline bool classof(const VPValue *V) {
2139 auto *R = V->getDefiningRecipe();
2140 return R && classof(R);
2141 }
2142
2143 static inline bool classof(const VPHeaderPHIRecipe *R) {
2144 return classof(static_cast<const VPRecipeBase *>(R));
2145 }
2146
2147 void execute(VPTransformState &State) override = 0;
2148
2149 /// Returns the step value of the induction.
2151 const VPValue *getStepValue() const { return getOperand(1); }
2152
2153 /// Update the step value of the recipe.
2154 void setStepValue(VPValue *V) { setOperand(1, V); }
2155
2157 const VPValue *getVFValue() const { return getOperand(2); }
2158
2159 /// Returns the number of incoming values, also number of incoming blocks.
2160 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2161 /// incoming value, its start value.
2162 unsigned getNumIncoming() const override { return 1; }
2163
2165
2166 /// Returns the induction descriptor for the recipe.
2167 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2168
2170 // TODO: All operands of base recipe must exist and be at same index in
2171 // derived recipe.
2173 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2174 }
2175
2177 // TODO: All operands of base recipe must exist and be at same index in
2178 // derived recipe.
2180 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2181 }
2182
2183 /// Returns true if the recipe only uses the first lane of operand \p Op.
2184 bool usesFirstLaneOnly(const VPValue *Op) const override {
2186 "Op must be an operand of the recipe");
2187 // The recipe creates its own wide start value, so it only requests the
2188 // first lane of the operand.
2189 // TODO: Remove once creating the start value is modeled separately.
2190 return Op == getStartValue() || Op == getStepValue();
2191 }
2192};
2193
2194/// A recipe for handling phi nodes of integer and floating-point inductions,
2195/// producing their vector values. This is an abstract recipe and must be
2196/// converted to concrete recipes before executing.
2198 public VPIRFlags {
2199 TruncInst *Trunc;
2200
2201 // If this recipe is unrolled it will have 2 additional operands.
2202 bool isUnrolled() const { return getNumOperands() == 5; }
2203
2204public:
2206 VPValue *VF, const InductionDescriptor &IndDesc,
2207 const VPIRFlags &Flags, DebugLoc DL)
2208 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2209 Step, IndDesc, DL),
2210 VPIRFlags(Flags), Trunc(nullptr) {
2211 addOperand(VF);
2212 }
2213
2215 VPValue *VF, const InductionDescriptor &IndDesc,
2216 TruncInst *Trunc, const VPIRFlags &Flags,
2217 DebugLoc DL)
2218 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2219 Step, IndDesc, DL),
2220 VPIRFlags(Flags), Trunc(Trunc) {
2221 addOperand(VF);
2223 (void)Metadata;
2224 if (Trunc)
2226 assert(Metadata.empty() && "unexpected metadata on Trunc");
2227 }
2228
2230
2236
2237 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
2238
2239 void execute(VPTransformState &State) override {
2240 llvm_unreachable("cannot execute this recipe, should be expanded via "
2241 "expandVPWidenIntOrFpInductionRecipe");
2242 }
2243
2245 // If the recipe has been unrolled return the VPValue for the induction
2246 // increment.
2247 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2248 }
2249
2250 /// Returns the number of incoming values, also number of incoming blocks.
2251 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2252 /// incoming value, its start value.
2253 unsigned getNumIncoming() const override { return 1; }
2254
2255 /// Returns the first defined value as TruncInst, if it is one or nullptr
2256 /// otherwise.
2257 TruncInst *getTruncInst() { return Trunc; }
2258 const TruncInst *getTruncInst() const { return Trunc; }
2259
2260 /// Returns true if the induction is canonical, i.e. starting at 0 and
2261 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2262 /// same type as the canonical induction.
2263 bool isCanonical() const;
2264
2265 /// Returns the scalar type of the induction.
2267 return Trunc ? Trunc->getType()
2269 }
2270
2271 /// Returns the VPValue representing the value of this induction at
2272 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2273 /// take place.
2275 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2276 }
2277
2278protected:
2279#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2280 /// Print the recipe.
2281 void printRecipe(raw_ostream &O, const Twine &Indent,
2282 VPSlotTracker &SlotTracker) const override;
2283#endif
2284};
2285
2287public:
2288 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2289 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2290 /// VF*UF.
2292 VPValue *NumUnrolledElems,
2293 const InductionDescriptor &IndDesc, DebugLoc DL)
2294 : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
2295 Step, IndDesc, DL) {
2296 addOperand(NumUnrolledElems);
2297 }
2298
2300
2306
2307 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
2308
2309 /// Generate vector values for the pointer induction.
2310 void execute(VPTransformState &State) override {
2311 llvm_unreachable("cannot execute this recipe, should be expanded via "
2312 "expandVPWidenPointerInduction");
2313 };
2314
2315 /// Returns true if only scalar values will be generated.
2316 bool onlyScalarsGenerated(bool IsScalable);
2317
2318protected:
2319#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2320 /// Print the recipe.
2321 void printRecipe(raw_ostream &O, const Twine &Indent,
2322 VPSlotTracker &SlotTracker) const override;
2323#endif
2324};
2325
2326/// A recipe for widened phis. Incoming values are operands of the recipe and
2327/// their operand index corresponds to the incoming predecessor block. If the
2328/// recipe is placed in an entry block to a (non-replicate) region, it must have
2329/// exactly 2 incoming values, the first from the predecessor of the region and
2330/// the second from the exiting block of the region.
2332 public VPPhiAccessors {
2333 /// Name to use for the generated IR instruction for the widened phi.
2334 std::string Name;
2335
2336public:
2337 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2338 /// debug location \p DL.
2339 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
2340 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2341 : VPSingleDefRecipe(VPDef::VPWidenPHISC, {}, Phi, DL), Name(Name.str()) {
2342 if (Start)
2343 addOperand(Start);
2344 }
2345
2348 getOperand(0), getDebugLoc(), Name);
2350 C->addOperand(Op);
2351 return C;
2352 }
2353
2354 ~VPWidenPHIRecipe() override = default;
2355
2356 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
2357
2358 /// Generate the phi/select nodes.
2359 void execute(VPTransformState &State) override;
2360
2361protected:
2362#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2363 /// Print the recipe.
2364 void printRecipe(raw_ostream &O, const Twine &Indent,
2365 VPSlotTracker &SlotTracker) const override;
2366#endif
2367
2368 const VPRecipeBase *getAsRecipe() const override { return this; }
2369};
2370
2371/// A recipe for handling first-order recurrence phis. The start value is the
2372/// first operand of the recipe and the incoming value from the backedge is the
2373/// second operand.
2376 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
2377
2378 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
2379
2384
2385 void execute(VPTransformState &State) override;
2386
2387 /// Return the cost of this first-order recurrence phi recipe.
2389 VPCostContext &Ctx) const override;
2390
2391 /// Returns true if the recipe only uses the first lane of operand \p Op.
2392 bool usesFirstLaneOnly(const VPValue *Op) const override {
2394 "Op must be an operand of the recipe");
2395 return Op == getStartValue();
2396 }
2397
2398protected:
2399#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2400 /// Print the recipe.
2401 void printRecipe(raw_ostream &O, const Twine &Indent,
2402 VPSlotTracker &SlotTracker) const override;
2403#endif
2404};
2405
2406/// A recipe for handling reduction phis. The start value is the first operand
2407/// of the recipe and the incoming value from the backedge is the second
2408/// operand.
2410 public VPUnrollPartAccessor<2> {
2411 /// The recurrence kind of the reduction.
2412 const RecurKind Kind;
2413
2414 /// The phi is part of an in-loop reduction.
2415 bool IsInLoop;
2416
2417 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
2418 bool IsOrdered;
2419
2420 /// When expanding the reduction PHI, the plan's VF element count is divided
2421 /// by this factor to form the reduction phi's VF.
2422 unsigned VFScaleFactor = 1;
2423
2424public:
2425 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2427 bool IsInLoop = false, bool IsOrdered = false,
2428 unsigned VFScaleFactor = 1)
2429 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
2430 IsInLoop(IsInLoop), IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
2431 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
2432 }
2433
2434 ~VPReductionPHIRecipe() override = default;
2435
2437 auto *R = new VPReductionPHIRecipe(
2439 *getOperand(0), IsInLoop, IsOrdered, VFScaleFactor);
2440 R->addOperand(getBackedgeValue());
2441 return R;
2442 }
2443
2444 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2445
2446 /// Generate the phi/select nodes.
2447 void execute(VPTransformState &State) override;
2448
2449 /// Get the factor that the VF of this recipe's output should be scaled by.
2450 unsigned getVFScaleFactor() const { return VFScaleFactor; }
2451
2452 /// Returns the number of incoming values, also number of incoming blocks.
2453 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2454 /// incoming value, its start value.
2455 unsigned getNumIncoming() const override { return 2; }
2456
2457 /// Returns the recurrence kind of the reduction.
2458 RecurKind getRecurrenceKind() const { return Kind; }
2459
2460 /// Returns true, if the phi is part of an ordered reduction.
2461 bool isOrdered() const { return IsOrdered; }
2462
2463 /// Returns true, if the phi is part of an in-loop reduction.
2464 bool isInLoop() const { return IsInLoop; }
2465
2466 /// Returns true if the recipe only uses the first lane of operand \p Op.
2467 bool usesFirstLaneOnly(const VPValue *Op) const override {
2469 "Op must be an operand of the recipe");
2470 return isOrdered() || isInLoop();
2471 }
2472
2473protected:
2474#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2475 /// Print the recipe.
2476 void printRecipe(raw_ostream &O, const Twine &Indent,
2477 VPSlotTracker &SlotTracker) const override;
2478#endif
2479};
2480
2481/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2482/// instructions.
2484public:
2485 /// The blend operation is a User of the incoming values and of their
2486 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2487 /// be omitted (implied by passing an odd number of operands) in which case
2488 /// all other incoming values are merged into it.
2490 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, DL) {
2491 assert(Operands.size() > 0 && "Expected at least one operand!");
2492 }
2493
2498
2499 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2500
2501 /// A normalized blend is one that has an odd number of operands, whereby the
2502 /// first operand does not have an associated mask.
2503 bool isNormalized() const { return getNumOperands() % 2; }
2504
2505 /// Return the number of incoming values, taking into account when normalized
2506 /// the first incoming value will have no mask.
2507 unsigned getNumIncomingValues() const {
2508 return (getNumOperands() + isNormalized()) / 2;
2509 }
2510
2511 /// Return incoming value number \p Idx.
2512 VPValue *getIncomingValue(unsigned Idx) const {
2513 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2514 }
2515
2516 /// Return mask number \p Idx.
2517 VPValue *getMask(unsigned Idx) const {
2518 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2519 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2520 }
2521
2522 /// Set mask number \p Idx to \p V.
2523 void setMask(unsigned Idx, VPValue *V) {
2524 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2525 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2526 }
2527
2528 void execute(VPTransformState &State) override {
2529 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2530 }
2531
2532 /// Return the cost of this VPWidenMemoryRecipe.
2533 InstructionCost computeCost(ElementCount VF,
2534 VPCostContext &Ctx) const override;
2535
2536 /// Returns true if the recipe only uses the first lane of operand \p Op.
2537 bool usesFirstLaneOnly(const VPValue *Op) const override {
2539 "Op must be an operand of the recipe");
2540 // Recursing through Blend recipes only, must terminate at header phi's the
2541 // latest.
2542 return all_of(users(),
2543 [this](VPUser *U) { return U->usesFirstLaneOnly(this); });
2544 }
2545
2546protected:
2547#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2548 /// Print the recipe.
2549 void printRecipe(raw_ostream &O, const Twine &Indent,
2550 VPSlotTracker &SlotTracker) const override;
2551#endif
2552};
2553
2554/// A common base class for interleaved memory operations.
2555/// An Interleaved memory operation is a memory access method that combines
2556/// multiple strided loads/stores into a single wide load/store with shuffles.
2557/// The first operand is the start address. The optional operands are, in order,
2558/// the stored values and the mask.
2560 public VPIRMetadata {
2562
2563 /// Indicates if the interleave group is in a conditional block and requires a
2564 /// mask.
2565 bool HasMask = false;
2566
2567 /// Indicates if gaps between members of the group need to be masked out or if
2568 /// unusued gaps can be loaded speculatively.
2569 bool NeedsMaskForGaps = false;
2570
2571protected:
2572 VPInterleaveBase(const unsigned char SC,
2574 ArrayRef<VPValue *> Operands,
2575 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2576 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2577 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2578 NeedsMaskForGaps(NeedsMaskForGaps) {
2579 // TODO: extend the masked interleaved-group support to reversed access.
2580 assert((!Mask || !IG->isReverse()) &&
2581 "Reversed masked interleave-group not supported.");
2582 for (unsigned I = 0; I < IG->getFactor(); ++I)
2583 if (Instruction *Inst = IG->getMember(I)) {
2584 if (Inst->getType()->isVoidTy())
2585 continue;
2586 new VPValue(Inst, this);
2587 }
2588
2589 for (auto *SV : StoredValues)
2590 addOperand(SV);
2591 if (Mask) {
2592 HasMask = true;
2593 addOperand(Mask);
2594 }
2595 }
2596
2597public:
2598 VPInterleaveBase *clone() override = 0;
2599
2600 static inline bool classof(const VPRecipeBase *R) {
2601 return R->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
2602 R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC;
2603 }
2604
2605 static inline bool classof(const VPUser *U) {
2606 auto *R = dyn_cast<VPRecipeBase>(U);
2607 return R && classof(R);
2608 }
2609
2610 /// Return the address accessed by this recipe.
2611 VPValue *getAddr() const {
2612 return getOperand(0); // Address is the 1st, mandatory operand.
2613 }
2614
2615 /// Return the mask used by this recipe. Note that a full mask is represented
2616 /// by a nullptr.
2617 VPValue *getMask() const {
2618 // Mask is optional and the last operand.
2619 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2620 }
2621
2622 /// Return true if the access needs a mask because of the gaps.
2623 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2624
2626
2627 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2628
2629 void execute(VPTransformState &State) override {
2630 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2631 }
2632
2633 /// Return the cost of this recipe.
2634 InstructionCost computeCost(ElementCount VF,
2635 VPCostContext &Ctx) const override;
2636
2637 /// Returns true if the recipe only uses the first lane of operand \p Op.
2638 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
2639
2640 /// Returns the number of stored operands of this interleave group. Returns 0
2641 /// for load interleave groups.
2642 virtual unsigned getNumStoreOperands() const = 0;
2643
2644 /// Return the VPValues stored by this interleave group. If it is a load
2645 /// interleave group, return an empty ArrayRef.
2647 return ArrayRef<VPValue *>(op_end() -
2648 (getNumStoreOperands() + (HasMask ? 1 : 0)),
2650 }
2651};
2652
2653/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2654/// or stores into one wide load/store and shuffles. The first operand of a
2655/// VPInterleave recipe is the address, followed by the stored values, followed
2656/// by an optional mask.
2658public:
2660 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2661 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2662 : VPInterleaveBase(VPDef::VPInterleaveSC, IG, Addr, StoredValues, Mask,
2663 NeedsMaskForGaps, MD, DL) {}
2664
2665 ~VPInterleaveRecipe() override = default;
2666
2670 needsMaskForGaps(), *this, getDebugLoc());
2671 }
2672
2673 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2674
2675 /// Generate the wide load or store, and shuffles.
2676 void execute(VPTransformState &State) override;
2677
2678 bool usesFirstLaneOnly(const VPValue *Op) const override {
2680 "Op must be an operand of the recipe");
2681 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2682 }
2683
2684 unsigned getNumStoreOperands() const override {
2685 return getNumOperands() - (getMask() ? 2 : 1);
2686 }
2687
2688protected:
2689#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2690 /// Print the recipe.
2691 void printRecipe(raw_ostream &O, const Twine &Indent,
2692 VPSlotTracker &SlotTracker) const override;
2693#endif
2694};
2695
2696/// A recipe for interleaved memory operations with vector-predication
2697/// intrinsics. The first operand is the address, the second operand is the
2698/// explicit vector length. Stored values and mask are optional operands.
2700public:
2702 : VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(),
2703 ArrayRef<VPValue *>({R.getAddr(), &EVL}),
2704 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2705 R.getDebugLoc()) {
2706 assert(!getInterleaveGroup()->isReverse() &&
2707 "Reversed interleave-group with tail folding is not supported.");
2708 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2709 "supported for scalable vector.");
2710 }
2711
2712 ~VPInterleaveEVLRecipe() override = default;
2713
2715 llvm_unreachable("cloning not implemented yet");
2716 }
2717
2718 VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC)
2719
2720 /// The VPValue of the explicit vector length.
2721 VPValue *getEVL() const { return getOperand(1); }
2722
2723 /// Generate the wide load or store, and shuffles.
2724 void execute(VPTransformState &State) override;
2725
2726 /// The recipe only uses the first lane of the address, and EVL operand.
2727 bool usesFirstLaneOnly(const VPValue *Op) const override {
2729 "Op must be an operand of the recipe");
2730 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
2731 Op == getEVL();
2732 }
2733
2734 unsigned getNumStoreOperands() const override {
2735 return getNumOperands() - (getMask() ? 3 : 2);
2736 }
2737
2738protected:
2739#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2740 /// Print the recipe.
2741 void printRecipe(raw_ostream &O, const Twine &Indent,
2742 VPSlotTracker &SlotTracker) const override;
2743#endif
2744};
2745
2746/// A recipe to represent inloop reduction operations, performing a reduction on
2747/// a vector operand into a scalar value, and adding the result to a chain.
2748/// The Operands are {ChainOp, VecOp, [Condition]}.
2750 /// The recurrence kind for the reduction in question.
2751 RecurKind RdxKind;
2752 bool IsOrdered;
2753 /// Whether the reduction is conditional.
2754 bool IsConditional = false;
2755
2756protected:
2757 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
2759 ArrayRef<VPValue *> Operands, VPValue *CondOp,
2760 bool IsOrdered, DebugLoc DL)
2761 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
2762 IsOrdered(IsOrdered) {
2763 if (CondOp) {
2764 IsConditional = true;
2765 addOperand(CondOp);
2766 }
2768 }
2769
2770public:
2772 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2773 bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown())
2774 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
2775 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2776 IsOrdered, DL) {}
2777
2779 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2780 bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown())
2781 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr,
2782 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2783 IsOrdered, DL) {}
2784
2785 ~VPReductionRecipe() override = default;
2786
2788 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
2790 getCondOp(), IsOrdered, getDebugLoc());
2791 }
2792
2793 static inline bool classof(const VPRecipeBase *R) {
2794 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2795 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
2796 R->getVPDefID() == VPRecipeBase::VPPartialReductionSC;
2797 }
2798
2799 static inline bool classof(const VPUser *U) {
2800 auto *R = dyn_cast<VPRecipeBase>(U);
2801 return R && classof(R);
2802 }
2803
2804 static inline bool classof(const VPValue *VPV) {
2805 const VPRecipeBase *R = VPV->getDefiningRecipe();
2806 return R && classof(R);
2807 }
2808
2809 static inline bool classof(const VPSingleDefRecipe *R) {
2810 return classof(static_cast<const VPRecipeBase *>(R));
2811 }
2812
2813 /// Generate the reduction in the loop.
2814 void execute(VPTransformState &State) override;
2815
2816 /// Return the cost of VPReductionRecipe.
2817 InstructionCost computeCost(ElementCount VF,
2818 VPCostContext &Ctx) const override;
2819
2820 /// Return the recurrence kind for the in-loop reduction.
2821 RecurKind getRecurrenceKind() const { return RdxKind; }
2822 /// Return true if the in-loop reduction is ordered.
2823 bool isOrdered() const { return IsOrdered; };
2824 /// Return true if the in-loop reduction is conditional.
2825 bool isConditional() const { return IsConditional; };
2826 /// The VPValue of the scalar Chain being accumulated.
2827 VPValue *getChainOp() const { return getOperand(0); }
2828 /// The VPValue of the vector value to be reduced.
2829 VPValue *getVecOp() const { return getOperand(1); }
2830 /// The VPValue of the condition for the block.
2832 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2833 }
2834
2835protected:
2836#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2837 /// Print the recipe.
2838 void printRecipe(raw_ostream &O, const Twine &Indent,
2839 VPSlotTracker &SlotTracker) const override;
2840#endif
2841};
2842
2843/// A recipe for forming partial reductions. In the loop, an accumulator and
2844/// vector operand are added together and passed to the next iteration as the
2845/// next accumulator. After the loop body, the accumulator is reduced to a
2846/// scalar value.
2848 unsigned Opcode;
2849
2850 /// The divisor by which the VF of this recipe's output should be divided
2851 /// during execution.
2852 unsigned VFScaleFactor;
2853
2854public:
2856 VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
2857 : VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1, Cond,
2858 VFScaleFactor, ReductionInst) {}
2859 VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1,
2860 VPValue *Cond, unsigned ScaleFactor,
2861 Instruction *ReductionInst = nullptr)
2862 : VPReductionRecipe(VPDef::VPPartialReductionSC, RecurKind::Add,
2863 FastMathFlags(), ReductionInst,
2864 ArrayRef<VPValue *>({Op0, Op1}), Cond, false, {}),
2865 Opcode(Opcode), VFScaleFactor(ScaleFactor) {
2866 [[maybe_unused]] auto *AccumulatorRecipe =
2868 // When cloning as part of a VPExpressionRecipe the chain op could have
2869 // replaced by a temporary VPValue, so it doesn't have a defining recipe.
2870 assert((!AccumulatorRecipe ||
2871 isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
2872 isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
2873 "Unexpected operand order for partial reduction recipe");
2874 }
2875 ~VPPartialReductionRecipe() override = default;
2876
2878 return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1),
2879 getCondOp(), VFScaleFactor,
2881 }
2882
2883 VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC)
2884
2885 /// Generate the reduction in the loop.
2886 void execute(VPTransformState &State) override;
2887
2888 /// Return the cost of this VPPartialReductionRecipe.
2890 VPCostContext &Ctx) const override;
2891
2892 /// Get the binary op's opcode.
2893 unsigned getOpcode() const { return Opcode; }
2894
2895 /// Get the factor that the VF of this recipe's output should be scaled by.
2896 unsigned getVFScaleFactor() const { return VFScaleFactor; }
2897
2898protected:
2899#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2900 /// Print the recipe.
2901 void printRecipe(raw_ostream &O, const Twine &Indent,
2902 VPSlotTracker &SlotTracker) const override;
2903#endif
2904};
2905
2906/// A recipe to represent inloop reduction operations with vector-predication
2907/// intrinsics, performing a reduction on a vector operand with the explicit
2908/// vector length (EVL) into a scalar value, and adding the result to a chain.
2909/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2911public:
2915 VPDef::VPReductionEVLSC, R.getRecurrenceKind(),
2916 R.getFastMathFlags(),
2918 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2919 R.isOrdered(), DL) {}
2920
2921 ~VPReductionEVLRecipe() override = default;
2922
2924 llvm_unreachable("cloning not implemented yet");
2925 }
2926
2927 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2928
2929 /// Generate the reduction in the loop
2930 void execute(VPTransformState &State) override;
2931
2932 /// The VPValue of the explicit vector length.
2933 VPValue *getEVL() const { return getOperand(2); }
2934
2935 /// Returns true if the recipe only uses the first lane of operand \p Op.
2936 bool usesFirstLaneOnly(const VPValue *Op) const override {
2938 "Op must be an operand of the recipe");
2939 return Op == getEVL();
2940 }
2941
2942protected:
2943#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2944 /// Print the recipe.
2945 void printRecipe(raw_ostream &O, const Twine &Indent,
2946 VPSlotTracker &SlotTracker) const override;
2947#endif
2948};
2949
2950/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2951/// copies of the original scalar type, one per lane, instead of producing a
2952/// single copy of widened type for all lanes. If the instruction is known to be
2953/// a single scalar, only one copy, per lane zero, will be generated.
2955 public VPIRMetadata {
2956 /// Indicator if only a single replica per lane is needed.
2957 bool IsSingleScalar;
2958
2959 /// Indicator if the replicas are also predicated.
2960 bool IsPredicated;
2961
2962public:
2964 bool IsSingleScalar, VPValue *Mask = nullptr,
2965 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
2966 DebugLoc DL = DebugLoc::getUnknown())
2967 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, Flags, DL),
2968 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
2969 IsPredicated(Mask) {
2970 setUnderlyingValue(I);
2971 if (Mask)
2972 addOperand(Mask);
2973 }
2974
2975 ~VPReplicateRecipe() override = default;
2976
2978 auto *Copy = new VPReplicateRecipe(
2979 getUnderlyingInstr(), operands(), IsSingleScalar,
2980 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
2981 Copy->transferFlags(*this);
2982 return Copy;
2983 }
2984
2985 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2986
2987 /// Generate replicas of the desired Ingredient. Replicas will be generated
2988 /// for all parts and lanes unless a specific part and lane are specified in
2989 /// the \p State.
2990 void execute(VPTransformState &State) override;
2991
2992 /// Return the cost of this VPReplicateRecipe.
2993 InstructionCost computeCost(ElementCount VF,
2994 VPCostContext &Ctx) const override;
2995
2996 bool isSingleScalar() const { return IsSingleScalar; }
2997
2998 bool isPredicated() const { return IsPredicated; }
2999
3000 /// Returns true if the recipe only uses the first lane of operand \p Op.
3001 bool usesFirstLaneOnly(const VPValue *Op) const override {
3003 "Op must be an operand of the recipe");
3004 return isSingleScalar();
3005 }
3006
3007 /// Returns true if the recipe uses scalars of operand \p Op.
3008 bool usesScalars(const VPValue *Op) const override {
3010 "Op must be an operand of the recipe");
3011 return true;
3012 }
3013
3014 /// Returns true if the recipe is used by a widened recipe via an intervening
3015 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
3016 /// in a vector.
3017 bool shouldPack() const;
3018
3019 /// Return the mask of a predicated VPReplicateRecipe.
3021 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3022 return getOperand(getNumOperands() - 1);
3023 }
3024
3025 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3026
3027protected:
3028#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3029 /// Print the recipe.
3030 void printRecipe(raw_ostream &O, const Twine &Indent,
3031 VPSlotTracker &SlotTracker) const override;
3032#endif
3033};
3034
3035/// A recipe for generating conditional branches on the bits of a mask.
3037public:
3039 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3040
3043 }
3044
3045 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
3046
3047 /// Generate the extraction of the appropriate bit from the block mask and the
3048 /// conditional branch.
3049 void execute(VPTransformState &State) override;
3050
3051 /// Return the cost of this VPBranchOnMaskRecipe.
3052 InstructionCost computeCost(ElementCount VF,
3053 VPCostContext &Ctx) const override;
3054
3055#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3056 /// Print the recipe.
3057 void printRecipe(raw_ostream &O, const Twine &Indent,
3058 VPSlotTracker &SlotTracker) const override {
3059 O << Indent << "BRANCH-ON-MASK ";
3061 }
3062#endif
3063
3064 /// Returns true if the recipe uses scalars of operand \p Op.
3065 bool usesScalars(const VPValue *Op) const override {
3067 "Op must be an operand of the recipe");
3068 return true;
3069 }
3070};
3071
3072/// A recipe to combine multiple recipes into a single 'expression' recipe,
3073/// which should be considered a single entity for cost-modeling and transforms.
3074/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3075/// expression recipes, before execute. The individual expression recipes are
3076/// completely disconnected from the def-use graph of other recipes not part of
3077/// the expression. Def-use edges between pairs of expression recipes remain
3078/// intact, whereas every edge between an expression recipe and a recipe outside
3079/// the expression is elevated to connect the non-expression recipe with the
3080/// VPExpressionRecipe itself.
3081class VPExpressionRecipe : public VPSingleDefRecipe {
3082 /// Recipes included in this VPExpressionRecipe. This could contain
3083 /// duplicates.
3084 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3085
3086 /// Temporary VPValues used for external operands of the expression, i.e.
3087 /// operands not defined by recipes in the expression.
3088 SmallVector<VPValue *> LiveInPlaceholders;
3089
3090 enum class ExpressionTypes {
3091 /// Represents an inloop extended reduction operation, performing a
3092 /// reduction on an extended vector operand into a scalar value, and adding
3093 /// the result to a chain.
3094 ExtendedReduction,
3095 /// Represent an inloop multiply-accumulate reduction, multiplying the
3096 /// extended vector operands, performing a reduction.add on the result, and
3097 /// adding the scalar result to a chain.
3098 ExtMulAccReduction,
3099 /// Represent an inloop multiply-accumulate reduction, multiplying the
3100 /// vector operands, performing a reduction.add on the result, and adding
3101 /// the scalar result to a chain.
3102 MulAccReduction,
3103 /// Represent an inloop multiply-accumulate reduction, multiplying the
3104 /// extended vector operands, negating the multiplication, performing a
3105 /// reduction.add on the result, and adding the scalar result to a chain.
3106 ExtNegatedMulAccReduction,
3107 };
3108
3109 /// Type of the expression.
3110 ExpressionTypes ExpressionType;
3111
3112 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3113 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3114 /// in the expression) are replaced by temporary VPValues and the original
3115 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3116 /// as needed (excluding last) to ensure they are only used by other recipes
3117 /// in the expression.
3118 VPExpressionRecipe(ExpressionTypes ExpressionType,
3119 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3120
3121public:
3123 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3125 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3128 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3129 {Ext0, Ext1, Mul, Red}) {}
3132 VPReductionRecipe *Red)
3133 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3134 {Ext0, Ext1, Mul, Sub, Red}) {
3135 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3136 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3137 "Expected an add reduction");
3138 assert(getNumOperands() >= 3 && "Expected at least three operands");
3139 [[maybe_unused]] auto *SubConst = dyn_cast<ConstantInt>(getOperand(2)->getLiveInIRValue());
3140 assert(SubConst && SubConst->getValue() == 0 &&
3141 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3142 }
3143
3145 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3146 for (auto *R : reverse(ExpressionRecipes)) {
3147 if (ExpressionRecipesSeen.insert(R).second)
3148 delete R;
3149 }
3150 for (VPValue *T : LiveInPlaceholders)
3151 delete T;
3152 }
3153
3154 VP_CLASSOF_IMPL(VPDef::VPExpressionSC)
3155
3156 VPExpressionRecipe *clone() override {
3157 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3158 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3159 for (auto *R : ExpressionRecipes)
3160 NewExpressiondRecipes.push_back(R->clone());
3161 for (auto *New : NewExpressiondRecipes) {
3162 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3163 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3164 // Update placeholder operands in the cloned recipe to use the external
3165 // operands, to be internalized when the cloned expression is constructed.
3166 for (const auto &[Placeholder, OutsideOp] :
3167 zip(LiveInPlaceholders, operands()))
3168 New->replaceUsesOfWith(Placeholder, OutsideOp);
3169 }
3170 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3171 }
3172
3173 /// Return the VPValue to use to infer the result type of the recipe.
3175 unsigned OpIdx =
3176 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3177 : 1;
3178 return getOperand(getNumOperands() - OpIdx);
3179 }
3180
3181 /// Insert the recipes of the expression back into the VPlan, directly before
3182 /// the current recipe. Leaves the expression recipe empty, which must be
3183 /// removed before codegen.
3184 void decompose();
3185
3186 unsigned getVFScaleFactor() const {
3187 auto *PR = dyn_cast<VPPartialReductionRecipe>(ExpressionRecipes.back());
3188 return PR ? PR->getVFScaleFactor() : 1;
3189 }
3190
3191 /// Method for generating code, must not be called as this recipe is abstract.
3192 void execute(VPTransformState &State) override {
3193 llvm_unreachable("recipe must be removed before execute");
3194 }
3195
3197 VPCostContext &Ctx) const override;
3198
3199 /// Returns true if this expression contains recipes that may read from or
3200 /// write to memory.
3201 bool mayReadOrWriteMemory() const;
3202
3203 /// Returns true if this expression contains recipes that may have side
3204 /// effects.
3205 bool mayHaveSideEffects() const;
3206
3207 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3208 bool isSingleScalar() const;
3209
3210protected:
3211#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3212 /// Print the recipe.
3213 void printRecipe(raw_ostream &O, const Twine &Indent,
3214 VPSlotTracker &SlotTracker) const override;
3215#endif
3216};
3217
3218/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3219/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3220/// order to merge values that are set under such a branch and feed their uses.
3221/// The phi nodes can be scalar or vector depending on the users of the value.
3222/// This recipe works in concert with VPBranchOnMaskRecipe.
3224public:
3225 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3226 /// nodes after merging back from a Branch-on-Mask.
3228 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {}
3229 ~VPPredInstPHIRecipe() override = default;
3230
3232 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3233 }
3234
3235 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
3236
3237 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3238 /// retain SSA form.
3239 void execute(VPTransformState &State) override;
3240
3241 /// Return the cost of this VPPredInstPHIRecipe.
3243 VPCostContext &Ctx) const override {
3244 // TODO: Compute accurate cost after retiring the legacy cost model.
3245 return 0;
3246 }
3247
3248 /// Returns true if the recipe uses scalars of operand \p Op.
3249 bool usesScalars(const VPValue *Op) const override {
3251 "Op must be an operand of the recipe");
3252 return true;
3253 }
3254
3255protected:
3256#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3257 /// Print the recipe.
3258 void printRecipe(raw_ostream &O, const Twine &Indent,
3259 VPSlotTracker &SlotTracker) const override;
3260#endif
3261};
3262
3263/// A common base class for widening memory operations. An optional mask can be
3264/// provided as the last operand.
3266 public VPIRMetadata {
3267protected:
3269
3270 /// Alignment information for this memory access.
3272
3273 /// Whether the accessed addresses are consecutive.
3275
3276 /// Whether the consecutive accessed addresses are in reverse order.
3278
3279 /// Whether the memory access is masked.
3280 bool IsMasked = false;
3281
3282 void setMask(VPValue *Mask) {
3283 assert(!IsMasked && "cannot re-set mask");
3284 if (!Mask)
3285 return;
3286 addOperand(Mask);
3287 IsMasked = true;
3288 }
3289
3290 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3291 std::initializer_list<VPValue *> Operands,
3292 bool Consecutive, bool Reverse,
3293 const VPIRMetadata &Metadata, DebugLoc DL)
3294 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3296 Reverse(Reverse) {
3297 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
3299 "Reversed acccess without VPVectorEndPointerRecipe address?");
3300 }
3301
3302public:
3304 llvm_unreachable("cloning not supported");
3305 }
3306
3307 static inline bool classof(const VPRecipeBase *R) {
3308 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
3309 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
3310 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
3311 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
3312 }
3313
3314 static inline bool classof(const VPUser *U) {
3315 auto *R = dyn_cast<VPRecipeBase>(U);
3316 return R && classof(R);
3317 }
3318
3319 /// Return whether the loaded-from / stored-to addresses are consecutive.
3320 bool isConsecutive() const { return Consecutive; }
3321
3322 /// Return whether the consecutive loaded/stored addresses are in reverse
3323 /// order.
3324 bool isReverse() const { return Reverse; }
3325
3326 /// Return the address accessed by this recipe.
3327 VPValue *getAddr() const { return getOperand(0); }
3328
3329 /// Returns true if the recipe is masked.
3330 bool isMasked() const { return IsMasked; }
3331
3332 /// Return the mask used by this recipe. Note that a full mask is represented
3333 /// by a nullptr.
3334 VPValue *getMask() const {
3335 // Mask is optional and therefore the last operand.
3336 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3337 }
3338
3339 /// Returns the alignment of the memory access.
3340 Align getAlign() const { return Alignment; }
3341
3342 /// Generate the wide load/store.
3343 void execute(VPTransformState &State) override {
3344 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3345 }
3346
3347 /// Return the cost of this VPWidenMemoryRecipe.
3348 InstructionCost computeCost(ElementCount VF,
3349 VPCostContext &Ctx) const override;
3350
3352};
3353
3354/// A recipe for widening load operations, using the address to load from and an
3355/// optional mask.
3357 public VPValue {
3359 bool Consecutive, bool Reverse,
3360 const VPIRMetadata &Metadata, DebugLoc DL)
3361 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
3362 Reverse, Metadata, DL),
3363 VPValue(this, &Load) {
3364 setMask(Mask);
3365 }
3366
3369 getMask(), Consecutive, Reverse, *this,
3370 getDebugLoc());
3371 }
3372
3373 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
3374
3375 /// Generate a wide load or gather.
3376 void execute(VPTransformState &State) override;
3377
3378 /// Returns true if the recipe only uses the first lane of operand \p Op.
3379 bool usesFirstLaneOnly(const VPValue *Op) const override {
3381 "Op must be an operand of the recipe");
3382 // Widened, consecutive loads operations only demand the first lane of
3383 // their address.
3384 return Op == getAddr() && isConsecutive();
3385 }
3386
3387protected:
3388#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3389 /// Print the recipe.
3390 void printRecipe(raw_ostream &O, const Twine &Indent,
3391 VPSlotTracker &SlotTracker) const override;
3392#endif
3393};
3394
3395/// A recipe for widening load operations with vector-predication intrinsics,
3396/// using the address to load from, the explicit vector length and an optional
3397/// mask.
3398struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
3400 VPValue *Mask)
3401 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3402 {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
3403 L.getDebugLoc()),
3404 VPValue(this, &getIngredient()) {
3405 setMask(Mask);
3406 }
3407
3408 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
3409
3410 /// Return the EVL operand.
3411 VPValue *getEVL() const { return getOperand(1); }
3412
3413 /// Generate the wide load or gather.
3414 void execute(VPTransformState &State) override;
3415
3416 /// Return the cost of this VPWidenLoadEVLRecipe.
3418 VPCostContext &Ctx) const override;
3419
3420 /// Returns true if the recipe only uses the first lane of operand \p Op.
3421 bool usesFirstLaneOnly(const VPValue *Op) const override {
3423 "Op must be an operand of the recipe");
3424 // Widened loads only demand the first lane of EVL and consecutive loads
3425 // only demand the first lane of their address.
3426 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3427 }
3428
3429protected:
3430#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3431 /// Print the recipe.
3432 void printRecipe(raw_ostream &O, const Twine &Indent,
3433 VPSlotTracker &SlotTracker) const override;
3434#endif
3435};
3436
3437/// A recipe for widening store operations, using the stored value, the address
3438/// to store to and an optional mask.
3440 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3441 VPValue *Mask, bool Consecutive, bool Reverse,
3442 const VPIRMetadata &Metadata, DebugLoc DL)
3443 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3444 Consecutive, Reverse, Metadata, DL) {
3445 setMask(Mask);
3446 }
3447
3453
3454 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
3455
3456 /// Return the value stored by this recipe.
3457 VPValue *getStoredValue() const { return getOperand(1); }
3458
3459 /// Generate a wide store or scatter.
3460 void execute(VPTransformState &State) override;
3461
3462 /// Returns true if the recipe only uses the first lane of operand \p Op.
3463 bool usesFirstLaneOnly(const VPValue *Op) const override {
3465 "Op must be an operand of the recipe");
3466 // Widened, consecutive stores only demand the first lane of their address,
3467 // unless the same operand is also stored.
3468 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3469 }
3470
3471protected:
3472#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3473 /// Print the recipe.
3474 void printRecipe(raw_ostream &O, const Twine &Indent,
3475 VPSlotTracker &SlotTracker) const override;
3476#endif
3477};
3478
3479/// A recipe for widening store operations with vector-predication intrinsics,
3480/// using the value to store, the address to store to, the explicit vector
3481/// length and an optional mask.
3484 VPValue *Mask)
3485 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3486 {Addr, S.getStoredValue(), &EVL}, S.isConsecutive(),
3487 S.isReverse(), S, S.getDebugLoc()) {
3488 setMask(Mask);
3489 }
3490
3491 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
3492
3493 /// Return the address accessed by this recipe.
3494 VPValue *getStoredValue() const { return getOperand(1); }
3495
3496 /// Return the EVL operand.
3497 VPValue *getEVL() const { return getOperand(2); }
3498
3499 /// Generate the wide store or scatter.
3500 void execute(VPTransformState &State) override;
3501
3502 /// Return the cost of this VPWidenStoreEVLRecipe.
3504 VPCostContext &Ctx) const override;
3505
3506 /// Returns true if the recipe only uses the first lane of operand \p Op.
3507 bool usesFirstLaneOnly(const VPValue *Op) const override {
3509 "Op must be an operand of the recipe");
3510 if (Op == getEVL()) {
3511 assert(getStoredValue() != Op && "unexpected store of EVL");
3512 return true;
3513 }
3514 // Widened, consecutive memory operations only demand the first lane of
3515 // their address, unless the same operand is also stored. That latter can
3516 // happen with opaque pointers.
3517 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3518 }
3519
3520protected:
3521#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3522 /// Print the recipe.
3523 void printRecipe(raw_ostream &O, const Twine &Indent,
3524 VPSlotTracker &SlotTracker) const override;
3525#endif
3526};
3527
3528/// Recipe to expand a SCEV expression.
3530 const SCEV *Expr;
3531
3532public:
3534 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr) {}
3535
3536 ~VPExpandSCEVRecipe() override = default;
3537
3538 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3539
3540 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
3541
3542 void execute(VPTransformState &State) override {
3543 llvm_unreachable("SCEV expressions must be expanded before final execute");
3544 }
3545
3546 /// Return the cost of this VPExpandSCEVRecipe.
3548 VPCostContext &Ctx) const override {
3549 // TODO: Compute accurate cost after retiring the legacy cost model.
3550 return 0;
3551 }
3552
3553 const SCEV *getSCEV() const { return Expr; }
3554
3555protected:
3556#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3557 /// Print the recipe.
3558 void printRecipe(raw_ostream &O, const Twine &Indent,
3559 VPSlotTracker &SlotTracker) const override;
3560#endif
3561};
3562
3563/// Canonical scalar induction phi of the vector loop. Starting at the specified
3564/// start value (either 0 or the resume value when vectorizing the epilogue
3565/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3566/// canonical induction variable.
3568public:
3570 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
3571
3572 ~VPCanonicalIVPHIRecipe() override = default;
3573
3575 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
3576 R->addOperand(getBackedgeValue());
3577 return R;
3578 }
3579
3580 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
3581
3582 void execute(VPTransformState &State) override {
3583 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3584 "scalar phi recipe");
3585 }
3586
3587 /// Returns the scalar type of the induction.
3589 return getStartValue()->getLiveInIRValue()->getType();
3590 }
3591
3592 /// Returns true if the recipe only uses the first lane of operand \p Op.
3593 bool usesFirstLaneOnly(const VPValue *Op) const override {
3595 "Op must be an operand of the recipe");
3596 return true;
3597 }
3598
3599 /// Returns true if the recipe only uses the first part of operand \p Op.
3600 bool usesFirstPartOnly(const VPValue *Op) const override {
3602 "Op must be an operand of the recipe");
3603 return true;
3604 }
3605
3606 /// Return the cost of this VPCanonicalIVPHIRecipe.
3608 VPCostContext &Ctx) const override {
3609 // For now, match the behavior of the legacy cost model.
3610 return 0;
3611 }
3612
3613protected:
3614#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3615 /// Print the recipe.
3616 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3617 VPSlotTracker &SlotTracker) const override;
3618#endif
3619};
3620
3621/// A recipe for generating the active lane mask for the vector loop that is
3622/// used to predicate the vector operations.
3623/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3624/// remove VPActiveLaneMaskPHIRecipe.
3626public:
3628 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
3629 DL) {}
3630
3631 ~VPActiveLaneMaskPHIRecipe() override = default;
3632
3635 if (getNumOperands() == 2)
3636 R->addOperand(getOperand(1));
3637 return R;
3638 }
3639
3640 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
3641
3642 /// Generate the active lane mask phi of the vector loop.
3643 void execute(VPTransformState &State) override;
3644
3645protected:
3646#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3647 /// Print the recipe.
3648 void printRecipe(raw_ostream &O, const Twine &Indent,
3649 VPSlotTracker &SlotTracker) const override;
3650#endif
3651};
3652
3653/// A recipe for generating the phi node for the current index of elements,
3654/// adjusted in accordance with EVL value. It starts at the start value of the
3655/// canonical induction and gets incremented by EVL in each iteration of the
3656/// vector loop.
3658public:
3660 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
3661
3662 ~VPEVLBasedIVPHIRecipe() override = default;
3663
3665 llvm_unreachable("cloning not implemented yet");
3666 }
3667
3668 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
3669
3670 void execute(VPTransformState &State) override {
3671 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3672 "scalar phi recipe");
3673 }
3674
3675 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3677 VPCostContext &Ctx) const override {
3678 // For now, match the behavior of the legacy cost model.
3679 return 0;
3680 }
3681
3682 /// Returns true if the recipe only uses the first lane of operand \p Op.
3683 bool usesFirstLaneOnly(const VPValue *Op) const override {
3685 "Op must be an operand of the recipe");
3686 return true;
3687 }
3688
3689protected:
3690#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3691 /// Print the recipe.
3692 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3693 VPSlotTracker &SlotTracker) const override;
3694#endif
3695};
3696
3697/// A Recipe for widening the canonical induction variable of the vector loop.
3699 public VPUnrollPartAccessor<1> {
3700public:
3702 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3703
3704 ~VPWidenCanonicalIVRecipe() override = default;
3705
3710
3711 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
3712
3713 /// Generate a canonical vector induction variable of the vector loop, with
3714 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3715 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3716 void execute(VPTransformState &State) override;
3717
3718 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3720 VPCostContext &Ctx) const override {
3721 // TODO: Compute accurate cost after retiring the legacy cost model.
3722 return 0;
3723 }
3724
3725protected:
3726#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3727 /// Print the recipe.
3728 void printRecipe(raw_ostream &O, const Twine &Indent,
3729 VPSlotTracker &SlotTracker) const override;
3730#endif
3731};
3732
3733/// A recipe for converting the input value \p IV value to the corresponding
3734/// value of an IV with different start and step values, using Start + IV *
3735/// Step.
3737 /// Kind of the induction.
3739 /// If not nullptr, the floating point induction binary operator. Must be set
3740 /// for floating point inductions.
3741 const FPMathOperator *FPBinOp;
3742
3743 /// Name to use for the generated IR instruction for the derived IV.
3744 std::string Name;
3745
3746public:
3748 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3749 const Twine &Name = "")
3751 IndDesc.getKind(),
3752 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3753 Start, CanonicalIV, Step, Name) {}
3754
3756 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
3757 VPValue *Step, const Twine &Name = "")
3758 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
3759 FPBinOp(FPBinOp), Name(Name.str()) {}
3760
3761 ~VPDerivedIVRecipe() override = default;
3762
3764 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3765 getStepValue());
3766 }
3767
3768 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
3769
3770 /// Generate the transformed value of the induction at offset StartValue (1.
3771 /// operand) + IV (2. operand) * StepValue (3, operand).
3772 void execute(VPTransformState &State) override;
3773
3774 /// Return the cost of this VPDerivedIVRecipe.
3776 VPCostContext &Ctx) const override {
3777 // TODO: Compute accurate cost after retiring the legacy cost model.
3778 return 0;
3779 }
3780
3782 return getStartValue()->getLiveInIRValue()->getType();
3783 }
3784
3785 VPValue *getStartValue() const { return getOperand(0); }
3786 VPValue *getStepValue() const { return getOperand(2); }
3787
3788 /// Returns true if the recipe only uses the first lane of operand \p Op.
3789 bool usesFirstLaneOnly(const VPValue *Op) const override {
3791 "Op must be an operand of the recipe");
3792 return true;
3793 }
3794
3795protected:
3796#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3797 /// Print the recipe.
3798 void printRecipe(raw_ostream &O, const Twine &Indent,
3799 VPSlotTracker &SlotTracker) const override;
3800#endif
3801};
3802
3803/// A recipe for handling phi nodes of integer and floating-point inductions,
3804/// producing their scalar values.
3806 public VPUnrollPartAccessor<3> {
3807 Instruction::BinaryOps InductionOpcode;
3808
3809public:
3812 DebugLoc DL)
3813 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3814 ArrayRef<VPValue *>({IV, Step, VF}), FMFs, DL),
3815 InductionOpcode(Opcode) {}
3816
3818 VPValue *Step, VPValue *VF,
3821 IV, Step, VF, IndDesc.getInductionOpcode(),
3822 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3823 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3824 : FastMathFlags(),
3825 DL) {}
3826
3827 ~VPScalarIVStepsRecipe() override = default;
3828
3830 return new VPScalarIVStepsRecipe(
3831 getOperand(0), getOperand(1), getOperand(2), InductionOpcode,
3833 getDebugLoc());
3834 }
3835
3836 /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
3837 /// this is only accurate after the VPlan has been unrolled.
3838 bool isPart0() const { return getUnrollPart(*this) == 0; }
3839
3840 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
3841
3842 /// Generate the scalarized versions of the phi node as needed by their users.
3843 void execute(VPTransformState &State) override;
3844
3845 /// Return the cost of this VPScalarIVStepsRecipe.
3847 VPCostContext &Ctx) const override {
3848 // TODO: Compute accurate cost after retiring the legacy cost model.
3849 return 0;
3850 }
3851
3852 VPValue *getStepValue() const { return getOperand(1); }
3853
3854 /// Returns true if the recipe only uses the first lane of operand \p Op.
3855 bool usesFirstLaneOnly(const VPValue *Op) const override {
3857 "Op must be an operand of the recipe");
3858 return true;
3859 }
3860
3861protected:
3862#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3863 /// Print the recipe.
3864 void printRecipe(raw_ostream &O, const Twine &Indent,
3865 VPSlotTracker &SlotTracker) const override;
3866#endif
3867};
3868
3869/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe
3870/// types implementing VPPhiAccessors. Used by isa<> & co.
3872 static inline bool isPossible(const VPRecipeBase *f) {
3873 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
3875 }
3876};
3877/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
3878/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
3879template <typename SrcTy>
3880struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
3881
3883
3884 /// doCast is used by cast<>.
3885 static inline VPPhiAccessors *doCast(SrcTy R) {
3886 return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
3887 switch (R->getVPDefID()) {
3888 case VPDef::VPInstructionSC:
3889 return cast<VPPhi>(R);
3890 case VPDef::VPIRInstructionSC:
3891 return cast<VPIRPhi>(R);
3892 case VPDef::VPWidenPHISC:
3893 return cast<VPWidenPHIRecipe>(R);
3894 default:
3895 return cast<VPHeaderPHIRecipe>(R);
3896 }
3897 }());
3898 }
3899
3900 /// doCastIfPossible is used by dyn_cast<>.
3901 static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
3902 if (!Self::isPossible(f))
3903 return nullptr;
3904 return doCast(f);
3905 }
3906};
3907template <>
3910template <>
3913
3914/// Casting from (const) VPRecipeBase -> (const) VPIRMetadata is supported for
3915/// all recipe types implementing VPIRMetadata. Used by isa<> & co.
3916namespace detail {
3917template <typename DstTy, typename RecipeBasePtrTy>
3918static inline auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy {
3919 switch (R->getVPDefID()) {
3920 case VPDef::VPInstructionSC:
3921 return cast<VPInstruction>(R);
3922 case VPDef::VPWidenSC:
3923 return cast<VPWidenRecipe>(R);
3924 case VPDef::VPWidenCastSC:
3925 return cast<VPWidenCastRecipe>(R);
3926 case VPDef::VPWidenIntrinsicSC:
3928 case VPDef::VPWidenCallSC:
3929 return cast<VPWidenCallRecipe>(R);
3930 case VPDef::VPWidenSelectSC:
3931 return cast<VPWidenSelectRecipe>(R);
3932 case VPDef::VPReplicateSC:
3933 return cast<VPReplicateRecipe>(R);
3934 case VPDef::VPInterleaveSC:
3935 case VPDef::VPInterleaveEVLSC:
3936 return cast<VPInterleaveBase>(R);
3937 case VPDef::VPWidenLoadSC:
3938 case VPDef::VPWidenLoadEVLSC:
3939 case VPDef::VPWidenStoreSC:
3940 case VPDef::VPWidenStoreEVLSC:
3941 return cast<VPWidenMemoryRecipe>(R);
3942 default:
3943 llvm_unreachable("invalid recipe for VPIRMetadata cast");
3944 }
3945}
3946} // namespace detail
3947
3948/// Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the
3949/// recipe types implementing VPIRMetadata. Used by cast<>, dyn_cast<> & co.
3950template <typename DstTy, typename SrcTy>
3951struct CastInfoVPIRMetadata : public CastIsPossible<DstTy, SrcTy> {
3952 static inline bool isPossible(SrcTy R) {
3953 // NOTE: Each recipe inheriting from VPIRMetadata must be listed here and
3954 // also handled in castToVPIRMetadata.
3960 }
3961
3962 using RetTy = DstTy *;
3963
3964 /// doCast is used by cast<>.
3965 static inline RetTy doCast(SrcTy R) {
3967 }
3968
3969 /// doCastIfPossible is used by dyn_cast<>.
3970 static inline RetTy doCastIfPossible(SrcTy R) {
3971 if (!isPossible(R))
3972 return nullptr;
3973 return doCast(R);
3974 }
3975};
3976template <>
3979template <>
3982
3983/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
3984/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3985/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3986class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
3987 friend class VPlan;
3988
3989 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
3990 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3991 : VPBlockBase(VPBasicBlockSC, Name.str()) {
3992 if (Recipe)
3993 appendRecipe(Recipe);
3994 }
3995
3996public:
3998
3999protected:
4000 /// The VPRecipes held in the order of output instructions to generate.
4002
4003 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4004 : VPBlockBase(BlockSC, Name.str()) {}
4005
4006public:
4007 ~VPBasicBlock() override {
4008 while (!Recipes.empty())
4009 Recipes.pop_back();
4010 }
4011
4012 /// Instruction iterators...
4017
4018 //===--------------------------------------------------------------------===//
4019 /// Recipe iterator methods
4020 ///
4021 inline iterator begin() { return Recipes.begin(); }
4022 inline const_iterator begin() const { return Recipes.begin(); }
4023 inline iterator end() { return Recipes.end(); }
4024 inline const_iterator end() const { return Recipes.end(); }
4025
4026 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4027 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4028 inline reverse_iterator rend() { return Recipes.rend(); }
4029 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4030
4031 inline size_t size() const { return Recipes.size(); }
4032 inline bool empty() const { return Recipes.empty(); }
4033 inline const VPRecipeBase &front() const { return Recipes.front(); }
4034 inline VPRecipeBase &front() { return Recipes.front(); }
4035 inline const VPRecipeBase &back() const { return Recipes.back(); }
4036 inline VPRecipeBase &back() { return Recipes.back(); }
4037
4038 /// Returns a reference to the list of recipes.
4040
4041 /// Returns a pointer to a member of the recipe list.
4042 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4043 return &VPBasicBlock::Recipes;
4044 }
4045
4046 /// Method to support type inquiry through isa, cast, and dyn_cast.
4047 static inline bool classof(const VPBlockBase *V) {
4048 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4049 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4050 }
4051
4052 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4053 assert(Recipe && "No recipe to append.");
4054 assert(!Recipe->Parent && "Recipe already in VPlan");
4055 Recipe->Parent = this;
4056 Recipes.insert(InsertPt, Recipe);
4057 }
4058
4059 /// Augment the existing recipes of a VPBasicBlock with an additional
4060 /// \p Recipe as the last recipe.
4061 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4062
4063 /// The method which generates the output IR instructions that correspond to
4064 /// this VPBasicBlock, thereby "executing" the VPlan.
4065 void execute(VPTransformState *State) override;
4066
4067 /// Return the cost of this VPBasicBlock.
4068 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4069
4070 /// Return the position of the first non-phi node recipe in the block.
4071 iterator getFirstNonPhi();
4072
4073 /// Returns an iterator range over the PHI-like recipes in the block.
4077
4078 /// Split current block at \p SplitAt by inserting a new block between the
4079 /// current block and its successors and moving all recipes starting at
4080 /// SplitAt to the new block. Returns the new block.
4081 VPBasicBlock *splitAt(iterator SplitAt);
4082
4083 VPRegionBlock *getEnclosingLoopRegion();
4084 const VPRegionBlock *getEnclosingLoopRegion() const;
4085
4086#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4087 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4088 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4089 ///
4090 /// Note that the numbering is applied to the whole VPlan, so printing
4091 /// individual blocks is consistent with the whole VPlan printing.
4092 void print(raw_ostream &O, const Twine &Indent,
4093 VPSlotTracker &SlotTracker) const override;
4094 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4095#endif
4096
4097 /// If the block has multiple successors, return the branch recipe terminating
4098 /// the block. If there are no or only a single successor, return nullptr;
4099 VPRecipeBase *getTerminator();
4100 const VPRecipeBase *getTerminator() const;
4101
4102 /// Returns true if the block is exiting it's parent region.
4103 bool isExiting() const;
4104
4105 /// Clone the current block and it's recipes, without updating the operands of
4106 /// the cloned recipes.
4107 VPBasicBlock *clone() override;
4108
4109 /// Returns the predecessor block at index \p Idx with the predecessors as per
4110 /// the corresponding plain CFG. If the block is an entry block to a region,
4111 /// the first predecessor is the single predecessor of a region, and the
4112 /// second predecessor is the exiting block of the region.
4113 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4114
4115protected:
4116 /// Execute the recipes in the IR basic block \p BB.
4117 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4118
4119 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4120 /// generated for this VPBB.
4121 void connectToPredecessors(VPTransformState &State);
4122
4123private:
4124 /// Create an IR BasicBlock to hold the output instructions generated by this
4125 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4126 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4127};
4128
4129inline const VPBasicBlock *
4131 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4132}
4133
4134/// A special type of VPBasicBlock that wraps an existing IR basic block.
4135/// Recipes of the block get added before the first non-phi instruction in the
4136/// wrapped block.
4137/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4138/// preheader block.
4139class VPIRBasicBlock : public VPBasicBlock {
4140 friend class VPlan;
4141
4142 BasicBlock *IRBB;
4143
4144 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4145 VPIRBasicBlock(BasicBlock *IRBB)
4146 : VPBasicBlock(VPIRBasicBlockSC,
4147 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4148 IRBB(IRBB) {}
4149
4150public:
4151 ~VPIRBasicBlock() override = default;
4152
4153 static inline bool classof(const VPBlockBase *V) {
4154 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4155 }
4156
4157 /// The method which generates the output IR instructions that correspond to
4158 /// this VPBasicBlock, thereby "executing" the VPlan.
4159 void execute(VPTransformState *State) override;
4160
4161 VPIRBasicBlock *clone() override;
4162
4163 BasicBlock *getIRBasicBlock() const { return IRBB; }
4164};
4165
4166/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4167/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4168/// A VPRegionBlock may indicate that its contents are to be replicated several
4169/// times. This is designed to support predicated scalarization, in which a
4170/// scalar if-then code structure needs to be generated VF * UF times. Having
4171/// this replication indicator helps to keep a single model for multiple
4172/// candidate VF's. The actual replication takes place only once the desired VF
4173/// and UF have been determined.
4174class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4175 friend class VPlan;
4176
4177 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4178 VPBlockBase *Entry;
4179
4180 /// Hold the Single Exiting block of the SESE region modelled by the
4181 /// VPRegionBlock.
4182 VPBlockBase *Exiting;
4183
4184 /// An indicator whether this region is to generate multiple replicated
4185 /// instances of output IR corresponding to its VPBlockBases.
4186 bool IsReplicator;
4187
4188 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
4189 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4190 const std::string &Name = "", bool IsReplicator = false)
4191 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
4192 IsReplicator(IsReplicator) {
4193 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
4194 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
4195 Entry->setParent(this);
4196 Exiting->setParent(this);
4197 }
4198 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
4199 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
4200 IsReplicator(IsReplicator) {}
4201
4202public:
4203 ~VPRegionBlock() override = default;
4204
4205 /// Method to support type inquiry through isa, cast, and dyn_cast.
4206 static inline bool classof(const VPBlockBase *V) {
4207 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4208 }
4209
4210 const VPBlockBase *getEntry() const { return Entry; }
4211 VPBlockBase *getEntry() { return Entry; }
4212
4213 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4214 /// EntryBlock must have no predecessors.
4215 void setEntry(VPBlockBase *EntryBlock) {
4216 assert(EntryBlock->getPredecessors().empty() &&
4217 "Entry block cannot have predecessors.");
4218 Entry = EntryBlock;
4219 EntryBlock->setParent(this);
4220 }
4221
4222 const VPBlockBase *getExiting() const { return Exiting; }
4223 VPBlockBase *getExiting() { return Exiting; }
4224
4225 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4226 /// ExitingBlock must have no successors.
4227 void setExiting(VPBlockBase *ExitingBlock) {
4228 assert(ExitingBlock->getSuccessors().empty() &&
4229 "Exit block cannot have successors.");
4230 Exiting = ExitingBlock;
4231 ExitingBlock->setParent(this);
4232 }
4233
4234 /// Returns the pre-header VPBasicBlock of the loop region.
4236 assert(!isReplicator() && "should only get pre-header of loop regions");
4237 return getSinglePredecessor()->getExitingBasicBlock();
4238 }
4239
4240 /// An indicator whether this region is to generate multiple replicated
4241 /// instances of output IR corresponding to its VPBlockBases.
4242 bool isReplicator() const { return IsReplicator; }
4243
4244 /// The method which generates the output IR instructions that correspond to
4245 /// this VPRegionBlock, thereby "executing" the VPlan.
4246 void execute(VPTransformState *State) override;
4247
4248 // Return the cost of this region.
4249 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4250
4251#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4252 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4253 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4254 /// consequtive numbers.
4255 ///
4256 /// Note that the numbering is applied to the whole VPlan, so printing
4257 /// individual regions is consistent with the whole VPlan printing.
4258 void print(raw_ostream &O, const Twine &Indent,
4259 VPSlotTracker &SlotTracker) const override;
4260 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4261#endif
4262
4263 /// Clone all blocks in the single-entry single-exit region of the block and
4264 /// their recipes without updating the operands of the cloned recipes.
4265 VPRegionBlock *clone() override;
4266
4267 /// Remove the current region from its VPlan, connecting its predecessor to
4268 /// its entry, and its exiting block to its successor.
4269 void dissolveToCFGLoop();
4270
4271 /// Returns the canonical induction recipe of the region.
4273 VPBasicBlock *EntryVPBB = getEntryBasicBlock();
4274 if (EntryVPBB->empty()) {
4275 // VPlan native path. TODO: Unify both code paths.
4276 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4277 }
4278 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4279 }
4281 return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
4282 }
4283
4284 /// Return the type of the canonical IV for loop regions.
4285 Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); }
4286 const Type *getCanonicalIVType() const {
4287 return getCanonicalIV()->getScalarType();
4288 }
4289};
4290
4292 return getParent()->getParent();
4293}
4294
4296 return getParent()->getParent();
4297}
4298
4299/// VPlan models a candidate for vectorization, encoding various decisions take
4300/// to produce efficient output IR, including which branches, basic-blocks and
4301/// output IR instructions to generate, and their cost. VPlan holds a
4302/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4303/// VPBasicBlock.
4304class VPlan {
4305 friend class VPlanPrinter;
4306 friend class VPSlotTracker;
4307
4308 /// VPBasicBlock corresponding to the original preheader. Used to place
4309 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4310 /// rest of VPlan execution.
4311 /// When this VPlan is used for the epilogue vector loop, the entry will be
4312 /// replaced by a new entry block created during skeleton creation.
4313 VPBasicBlock *Entry;
4314
4315 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4316 VPIRBasicBlock *ScalarHeader;
4317
4318 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4319 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4320 /// e.g. if the scalar epilogue always executes.
4322
4323 /// Holds the VFs applicable to this VPlan.
4325
4326 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4327 /// any UF.
4329
4330 /// Holds the name of the VPlan, for printing.
4331 std::string Name;
4332
4333 /// Represents the trip count of the original loop, for folding
4334 /// the tail.
4335 VPValue *TripCount = nullptr;
4336
4337 /// Represents the backedge taken count of the original loop, for folding
4338 /// the tail. It equals TripCount - 1.
4339 VPValue *BackedgeTakenCount = nullptr;
4340
4341 /// Represents the vector trip count.
4342 VPValue VectorTripCount;
4343
4344 /// Represents the vectorization factor of the loop.
4345 VPValue VF;
4346
4347 /// Represents the loop-invariant VF * UF of the vector loop region.
4348 VPValue VFxUF;
4349
4350 /// Holds a mapping between Values and their corresponding VPValue inside
4351 /// VPlan.
4352 Value2VPValueTy Value2VPValue;
4353
4354 /// Contains all the external definitions created for this VPlan. External
4355 /// definitions are VPValues that hold a pointer to their underlying IR.
4357
4358 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4359 /// VPlan is destroyed.
4360 SmallVector<VPBlockBase *> CreatedBlocks;
4361
4362 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4363 /// wrapping the original header of the scalar loop.
4364 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
4365 : Entry(Entry), ScalarHeader(ScalarHeader) {
4366 Entry->setPlan(this);
4367 assert(ScalarHeader->getNumSuccessors() == 0 &&
4368 "scalar header must be a leaf node");
4369 }
4370
4371public:
4372 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4373 /// original preheader and scalar header of \p L, to be used as entry and
4374 /// scalar header blocks of the new VPlan.
4375 VPlan(Loop *L);
4376
4377 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4378 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
4379 VPlan(BasicBlock *ScalarHeaderBB) {
4380 setEntry(createVPBasicBlock("preheader"));
4381 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4382 }
4383
4385
4387 Entry = VPBB;
4388 VPBB->setPlan(this);
4389 }
4390
4391 /// Generate the IR code for this VPlan.
4392 void execute(VPTransformState *State);
4393
4394 /// Return the cost of this plan.
4396
4397 VPBasicBlock *getEntry() { return Entry; }
4398 const VPBasicBlock *getEntry() const { return Entry; }
4399
4400 /// Returns the preheader of the vector loop region, if one exists, or null
4401 /// otherwise.
4403 VPRegionBlock *VectorRegion = getVectorLoopRegion();
4404 return VectorRegion
4405 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4406 : nullptr;
4407 }
4408
4409 /// Returns the VPRegionBlock of the vector loop.
4412
4413 /// Returns the 'middle' block of the plan, that is the block that selects
4414 /// whether to execute the scalar tail loop or the exit block from the loop
4415 /// latch. If there is an early exit from the vector loop, the middle block
4416 /// conceptully has the early exit block as third successor, split accross 2
4417 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4418 /// tail loop or the exit bock. If the scalar tail loop or exit block are
4419 /// known to always execute, the middle block may branch directly to that
4420 /// block. This function cannot be called once the vector loop region has been
4421 /// removed.
4423 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4424 assert(
4425 LoopRegion &&
4426 "cannot call the function after vector loop region has been removed");
4427 auto *RegionSucc = cast<VPBasicBlock>(LoopRegion->getSingleSuccessor());
4428 if (RegionSucc->getSingleSuccessor() ||
4429 is_contained(RegionSucc->getSuccessors(), getScalarPreheader()))
4430 return RegionSucc;
4431 // There is an early exit. The successor of RegionSucc is the middle block.
4432 return cast<VPBasicBlock>(RegionSucc->getSuccessors()[1]);
4433 }
4434
4436 return const_cast<VPlan *>(this)->getMiddleBlock();
4437 }
4438
4439 /// Return the VPBasicBlock for the preheader of the scalar loop.
4441 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
4442 }
4443
4444 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4445 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4446
4447 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4448 /// the original scalar loop.
4449 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4450
4451 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4452 /// exit block.
4454
4455 /// Returns true if \p VPBB is an exit block.
4456 bool isExitBlock(VPBlockBase *VPBB);
4457
4458 /// The trip count of the original loop.
4460 assert(TripCount && "trip count needs to be set before accessing it");
4461 return TripCount;
4462 }
4463
4464 /// Set the trip count assuming it is currently null; if it is not - use
4465 /// resetTripCount().
4466 void setTripCount(VPValue *NewTripCount) {
4467 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4468 TripCount = NewTripCount;
4469 }
4470
4471 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4472 /// the original trip count have been replaced.
4473 void resetTripCount(VPValue *NewTripCount) {
4474 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4475 "TripCount must be set when resetting");
4476 TripCount = NewTripCount;
4477 }
4478
4479 /// The backedge taken count of the original loop.
4481 if (!BackedgeTakenCount)
4482 BackedgeTakenCount = new VPValue();
4483 return BackedgeTakenCount;
4484 }
4485 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4486
4487 /// The vector trip count.
4488 VPValue &getVectorTripCount() { return VectorTripCount; }
4489
4490 /// Returns the VF of the vector loop region.
4491 VPValue &getVF() { return VF; };
4492 const VPValue &getVF() const { return VF; };
4493
4494 /// Returns VF * UF of the vector loop region.
4495 VPValue &getVFxUF() { return VFxUF; }
4496
4499 }
4500
4501 void addVF(ElementCount VF) { VFs.insert(VF); }
4502
4504 assert(hasVF(VF) && "Cannot set VF not already in plan");
4505 VFs.clear();
4506 VFs.insert(VF);
4507 }
4508
4509 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4510 bool hasScalableVF() const {
4511 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4512 }
4513
4514 /// Returns an iterator range over all VFs of the plan.
4517 return VFs;
4518 }
4519
4520 bool hasScalarVFOnly() const {
4521 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4522 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4523 "Plan with scalar VF should only have a single VF");
4524 return HasScalarVFOnly;
4525 }
4526
4527 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4528
4529 unsigned getUF() const {
4530 assert(UFs.size() == 1 && "Expected a single UF");
4531 return UFs[0];
4532 }
4533
4534 void setUF(unsigned UF) {
4535 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4536 UFs.clear();
4537 UFs.insert(UF);
4538 }
4539
4540 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4541 /// concrete UF.
4542 bool isUnrolled() const { return UFs.size() == 1; }
4543
4544 /// Return a string with the name of the plan and the applicable VFs and UFs.
4545 std::string getName() const;
4546
4547 void setName(const Twine &newName) { Name = newName.str(); }
4548
4549 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
4550 /// yet) for \p V.
4552 assert(V && "Trying to get or add the VPValue of a null Value");
4553 auto [It, Inserted] = Value2VPValue.try_emplace(V);
4554 if (Inserted) {
4555 VPValue *VPV = new VPValue(V);
4556 VPLiveIns.push_back(VPV);
4557 assert(VPV->isLiveIn() && "VPV must be a live-in.");
4558 It->second = VPV;
4559 }
4560
4561 assert(It->second->isLiveIn() && "Only live-ins should be in mapping");
4562 return It->second;
4563 }
4564
4565 /// Return a VPValue wrapping i1 true.
4566 VPValue *getTrue() { return getConstantInt(1, 1); }
4567
4568 /// Return a VPValue wrapping i1 false.
4569 VPValue *getFalse() { return getConstantInt(1, 0); }
4570
4571 /// Return a VPValue wrapping a ConstantInt with the given type and value.
4572 VPValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4573 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
4574 }
4575
4576 /// Return a VPValue wrapping a ConstantInt with the given bitwidth and value.
4578 bool IsSigned = false) {
4579 return getConstantInt(APInt(BitWidth, Val, IsSigned));
4580 }
4581
4582 /// Return a VPValue wrapping a ConstantInt with the given APInt value.
4584 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
4585 }
4586
4587 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
4588 VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
4589
4590 /// Return the list of live-in VPValues available in the VPlan.
4592 assert(all_of(Value2VPValue,
4593 [this](const auto &P) {
4594 return is_contained(VPLiveIns, P.second);
4595 }) &&
4596 "all VPValues in Value2VPValue must also be in VPLiveIns");
4597 return VPLiveIns;
4598 }
4599
4600#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4601 /// Print the live-ins of this VPlan to \p O.
4602 void printLiveIns(raw_ostream &O) const;
4603
4604 /// Print this VPlan to \p O.
4605 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
4606
4607 /// Print this VPlan in DOT format to \p O.
4608 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
4609
4610 /// Dump the plan to stderr (for debugging).
4611 LLVM_DUMP_METHOD void dump() const;
4612#endif
4613
4614 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4615 /// recipes to refer to the clones, and return it.
4617
4618 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4619 /// present. The returned block is owned by the VPlan and deleted once the
4620 /// VPlan is destroyed.
4622 VPRecipeBase *Recipe = nullptr) {
4623 auto *VPB = new VPBasicBlock(Name, Recipe);
4624 CreatedBlocks.push_back(VPB);
4625 return VPB;
4626 }
4627
4628 /// Create a new loop region with \p Name and entry and exiting blocks set
4629 /// to \p Entry and \p Exiting respectively, if set. The returned block is
4630 /// owned by the VPlan and deleted once the VPlan is destroyed.
4631 VPRegionBlock *createLoopRegion(const std::string &Name = "",
4632 VPBlockBase *Entry = nullptr,
4633 VPBlockBase *Exiting = nullptr) {
4634 auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name)
4635 : new VPRegionBlock(Name);
4636 CreatedBlocks.push_back(VPB);
4637 return VPB;
4638 }
4639
4640 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4641 /// returned block is owned by the VPlan and deleted once the VPlan is
4642 /// destroyed.
4644 const std::string &Name = "") {
4645 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
4646 CreatedBlocks.push_back(VPB);
4647 return VPB;
4648 }
4649
4650 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4651 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4652 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4654
4655 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4656 /// instructions in \p IRBB, except its terminator which is managed by the
4657 /// successors of the block in VPlan. The returned block is owned by the VPlan
4658 /// and deleted once the VPlan is destroyed.
4660
4661 /// Returns true if the VPlan is based on a loop with an early exit. That is
4662 /// the case if the VPlan has either more than one exit block or a single exit
4663 /// block with multiple predecessors (one for the exit via the latch and one
4664 /// via the other early exit).
4665 bool hasEarlyExit() const {
4666 return count_if(ExitBlocks,
4667 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4668 1 ||
4669 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4670 }
4671
4672 /// Returns true if the scalar tail may execute after the vector loop. Note
4673 /// that this relies on unneeded branches to the scalar tail loop being
4674 /// removed.
4675 bool hasScalarTail() const {
4676 return !(!getScalarPreheader()->hasPredecessors() ||
4678 }
4679};
4680
4681#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4682inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4683 Plan.print(OS);
4684 return OS;
4685}
4686#endif
4687
4688} // end namespace llvm
4689
4690#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
dxil translate DXIL Translate Metadata
This file defines the DenseMap class.
Hexagon Common GEP
iv users
Definition IVUsers.cpp:48
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
StandardInstrumentations SI(Mod->getContext(), Debug, VerifyEach)
static StringRef getName(Value *V)
const SmallVectorImpl< MachineOperand > & Cond
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition VPlan.h:508
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getUnknown()
Definition DebugLoc.h:162
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1078
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
This class represents the LLVM 'select' instruction.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:337
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:225
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3633
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:3627
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3986
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:4014
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4061
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:4016
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4013
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4039
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:3997
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:4003
iterator end()
Definition VPlan.h:4023
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4021
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:4015
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4074
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:770
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:216
~VPBasicBlock() override
Definition VPlan.h:4007
const_reverse_iterator rbegin() const
Definition VPlan.h:4027
reverse_iterator rend()
Definition VPlan.h:4028
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:4001
VPRecipeBase & back()
Definition VPlan.h:4036
const VPRecipeBase & front() const
Definition VPlan.h:4033
const_iterator begin() const
Definition VPlan.h:4022
VPRecipeBase & front()
Definition VPlan.h:4034
const VPRecipeBase & back() const
Definition VPlan.h:4035
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4052
bool empty() const
Definition VPlan.h:4032
const_iterator end() const
Definition VPlan.h:4024
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4047
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4042
reverse_iterator rbegin()
Definition VPlan.h:4026
friend class VPlan
Definition VPlan.h:3987
size_t size() const
Definition VPlan.h:4031
const_reverse_iterator rend() const
Definition VPlan.h:4029
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2512
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2517
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2507
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2528
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2537
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2494
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2489
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:2523
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2503
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:80
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:299
VPRegionBlock * getParent()
Definition VPlan.h:172
VPBlocksTy & getPredecessors()
Definition VPlan.h:204
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:201
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:369
void setName(const Twine &newName)
Definition VPlan.h:165
size_t getNumSuccessors() const
Definition VPlan.h:218
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:200
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:222
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:321
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:645
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:159
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:257
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:334
size_t getNumPredecessors() const
Definition VPlan.h:219
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:290
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:208
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:327
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:203
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:157
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:180
const VPRegionBlock * getParent() const
Definition VPlan.h:173
const std::string & getName() const
Definition VPlan.h:163
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:309
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:247
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:281
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:214
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:241
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:306
friend class VPBlockUtils
Definition VPlan.h:81
unsigned getVPBlockID() const
Definition VPlan.h:170
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:348
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:313
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:149
VPBlocksTy & getSuccessors()
Definition VPlan.h:198
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:200
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:166
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:270
void setParent(VPRegionBlock *P)
Definition VPlan.h:183
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:263
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:208
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:197
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3057
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3041
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3065
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3038
VPlan-based builder utility analogous to IRBuilder.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3567
~VPCanonicalIVPHIRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3593
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3574
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:3600
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition VPlan.h:3569
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:3588
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3582
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition VPlan.h:3607
This class augments a recipe with a set of VPValues defined by the recipe.
Definition VPlanValue.h:310
friend class VPValue
Definition VPlanValue.h:311
VPDef(const unsigned char SC)
Definition VPlanValue.h:390
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlan.h:3775
VPValue * getStepValue() const
Definition VPlan.h:3786
Type * getScalarType() const
Definition VPlan.h:3781
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3763
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3755
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3789
VPValue * getStartValue() const
Definition VPlan.h:3785
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3747
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3683
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3664
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3670
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPEVLBasedIVPHIRecipe.
Definition VPlan.h:3676
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:3659
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3542
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:3547
VPExpandSCEVRecipe(const SCEV *Expr)
Definition VPlan.h:3533
const SCEV * getSCEV() const
Definition VPlan.h:3553
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3538
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3192
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3174
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3156
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3144
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Sub, VPReductionRecipe *Red)
Definition VPlan.h:3130
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3122
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3126
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3186
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3124
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2065
static bool classof(const VPValue *V)
Definition VPlan.h:2075
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2098
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2103
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2087
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2095
static bool classof(const VPRecipeBase *R)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2071
VPValue * getStartValue() const
Definition VPlan.h:2090
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2107
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2060
~VPHeaderPHIRecipe() override=default
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VP_CLASSOF_IMPL(VPDef::VPHistogramSC)
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1764
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:1781
unsigned getOpcode() const
Definition VPlan.h:1777
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1758
~VPHistogramRecipe() override=default
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4139
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:446
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4163
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4153
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4140
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:471
Class to record and manage LLVM IR flags.
Definition VPlan.h:609
FastMathFlagsTy FMFs
Definition VPlan.h:680
bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:740
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:732
WrapFlagsTy WrapFlags
Definition VPlan.h:674
CmpInst::Predicate CmpPredicate
Definition VPlan.h:673
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:726
GEPNoWrapFlags GEPFlags
Definition VPlan.h:678
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:858
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
TruncFlagsTy TruncFlags
Definition VPlan.h:675
CmpInst::Predicate getPredicate() const
Definition VPlan.h:835
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:865
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:749
ExactFlagsTy ExactFlags
Definition VPlan.h:677
bool hasNoSignedWrap() const
Definition VPlan.h:884
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:895
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:735
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:738
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:743
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:723
bool isNonNeg() const
Definition VPlan.h:867
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:850
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:853
DisjointFlagsTy DisjointFlags
Definition VPlan.h:676
unsigned AllFlags
Definition VPlan.h:682
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:841
bool hasNoUnsignedWrap() const
Definition VPlan.h:873
FCmpFlagsTy FCmpFlags
Definition VPlan.h:681
NonNegFlagsTy NonNegFlags
Definition VPlan.h:679
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:759
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:795
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:746
VPIRFlags(Instruction &I)
Definition VPlan.h:688
Instruction & getInstruction() const
Definition VPlan.h:1435
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1443
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void extractLastLaneOfFirstOperand(VPBuilder &Builder)
Update the recipes first operand to the last lane of the operand using Builder.
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1422
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1449
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1437
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1410
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:982
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1018
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:990
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetada object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1002
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1249
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1290
static bool classof(const VPUser *R)
Definition VPlan.h:1275
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1257
Type * getResultType() const
Definition VPlan.h:1296
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1279
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1031
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1162
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1115
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
Definition VPlan.h:1069
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1105
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1118
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1066
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1109
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1061
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1058
@ VScale
Returns the value for vscale.
Definition VPlan.h:1120
@ CanonicalIVIncrementForPart
Definition VPlan.h:1051
bool hasResult() const
Definition VPlan.h:1186
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1226
unsigned getOpcode() const
Definition VPlan.h:1170
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
Definition VPlan.h:1229
friend class VPlanSlp
Definition VPlan.h:1032
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:2623
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2629
static bool classof(const VPUser *U)
Definition VPlan.h:2605
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2572
Instruction * getInsertPos() const
Definition VPlan.h:2627
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2600
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:2625
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:2617
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:2646
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:2611
A recipe for interleaved memory operations with vector-predication intrinsics.
Definition VPlan.h:2699
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:2727
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2721
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2734
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2714
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:2701
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:2657
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2684
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2667
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2678
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2659
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0, VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
Definition VPlan.h:2855
VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1, VPValue *Cond, unsigned ScaleFactor, Instruction *ReductionInst=nullptr)
Definition VPlan.h:2859
~VPPartialReductionRecipe() override=default
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by.
Definition VPlan.h:2896
void execute(VPTransformState &State) override
Generate the reduction in the loop.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPartialReductionRecipe.
unsigned getOpcode() const
Get the binary op's opcode.
Definition VPlan.h:2893
VPPartialReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2877
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1308
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1330
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1325
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4130
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1350
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1317
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1335
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1339
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3249
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3231
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3242
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3227
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:386
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:473
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4291
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:484
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:407
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:478
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:453
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:388
const VPBasicBlock * getParent() const
Definition VPlan.h:408
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:458
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:397
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2933
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2912
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2936
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2923
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2461
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2436
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by.
Definition VPlan.h:2450
~VPReductionPHIRecipe() override=default
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, bool IsInLoop=false, bool IsOrdered=false, unsigned VFScaleFactor=1)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2426
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2455
bool isInLoop() const
Returns true, if the phi is part of an in-loop reduction.
Definition VPlan.h:2464
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2467
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2458
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
Definition VPlan.h:2749
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:2825
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2793
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2778
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2809
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:2829
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:2831
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:2821
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:2823
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:2827
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2771
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2787
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, bool IsOrdered, DebugLoc DL)
Definition VPlan.h:2757
static bool classof(const VPUser *U)
Definition VPlan.h:2799
static bool classof(const VPValue *VPV)
Definition VPlan.h:2804
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4174
const VPBlockBase * getEntry() const
Definition VPlan.h:4210
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4285
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4242
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4227
VPBlockBase * getExiting()
Definition VPlan.h:4223
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4272
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4215
const Type * getCanonicalIVType() const
Definition VPlan.h:4286
const VPBlockBase * getExiting() const
Definition VPlan.h:4222
VPBlockBase * getEntry()
Definition VPlan.h:4211
const VPCanonicalIVPHIRecipe * getCanonicalIV() const
Definition VPlan.h:4280
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4235
friend class VPlan
Definition VPlan.h:4175
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4206
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:2955
bool isSingleScalar() const
Definition VPlan.h:2996
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2963
~VPReplicateRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3008
bool isPredicated() const
Definition VPlan.h:2998
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2977
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3001
unsigned getOpcode() const
Definition VPlan.h:3025
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:3020
VPValue * getStepValue() const
Definition VPlan.h:3852
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:3846
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3817
bool isPart0() const
Return true if this VPScalarIVStepsRecipe corresponds to part 0.
Definition VPlan.h:3838
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3829
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:3810
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3855
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:530
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:536
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:595
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:540
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:598
static bool classof(const VPUser *U)
Definition VPlan.h:587
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:532
This class can be used to assign names to VPValues.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition VPlan.h:970
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:207
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1420
operand_range operands()
Definition VPlanValue.h:275
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:251
unsigned getNumOperands() const
Definition VPlanValue.h:245
operand_iterator op_end()
Definition VPlanValue.h:273
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:246
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:226
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:269
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:268
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:48
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:131
friend class VPExpressionRecipe
Definition VPlanValue.h:53
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition VPlanValue.h:183
friend class VPDef
Definition VPlanValue.h:49
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:85
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
Definition VPlan.cpp:94
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:193
unsigned getNumUsers() const
Definition VPlanValue.h:113
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition VPlanValue.h:178
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1943
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1964
const VPValue * getVFValue() const
Definition VPlan.h:1939
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:1957
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:1950
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1928
bool isFirstPart() const
Return true if this VPVectorPointerRecipe corresponds to part 0.
Definition VPlan.h:2017
Type * getSourceElementType() const
Definition VPlan.h:1994
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1996
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2003
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1984
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:2020
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2010
A recipe for widening Call instructions using library calls.
Definition VPlan.h:1698
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1705
const_operand_range args() const
Definition VPlan.h:1738
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1719
operand_range args()
Definition VPlan.h:1737
Function * getCalledScalarFunction() const
Definition VPlan.h:1733
~VPWidenCallRecipe() override=default
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
~VPWidenCanonicalIVRecipe() override=default
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:3719
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3706
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition VPlan.h:3701
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1548
Instruction::CastOps getOpcode() const
Definition VPlan.h:1584
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition VPlan.h:1587
void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1556
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1569
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:1884
Type * getSourceElementType() const
Definition VPlan.h:1889
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:1892
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1876
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1862
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1899
void execute(VPTransformState &State) override=0
Generate the phi nodes.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2184
static bool classof(const VPValue *V)
Definition VPlan.h:2138
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2154
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2169
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2162
PHINode * getPHINode() const
Definition VPlan.h:2164
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2126
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2150
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2167
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2176
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2133
static bool classof(const VPHeaderPHIRecipe *R)
Definition VPlan.h:2143
const VPValue * getVFValue() const
Definition VPlan.h:2157
const VPValue * getStepValue() const
Definition VPlan.h:2151
const TruncInst * getTruncInst() const
Definition VPlan.h:2258
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2239
~VPWidenIntOrFpInductionRecipe() override=default
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2214
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2231
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2257
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2205
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2274
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2253
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:2266
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1598
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1629
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1669
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:1678
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1615
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:1684
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1650
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:1681
~VPWidenIntrinsicRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition VPlan.h:1672
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3280
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition VPlan.h:3277
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3320
static bool classof(const VPUser *U)
Definition VPlan.h:3314
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition VPlan.h:3343
Instruction & Ingredient
Definition VPlan.h:3268
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3303
Instruction & getIngredient() const
Definition VPlan.h:3351
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3274
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3307
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3334
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3271
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3330
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3290
void setMask(VPValue *Mask)
Definition VPlan.h:3282
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3340
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3327
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition VPlan.h:3324
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2368
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe for Phi with start value Start and debug location DL.
Definition VPlan.h:2339
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2346
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2301
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2310
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2291
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1500
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1520
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1510
VPWidenRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:1504
~VPWidenRecipe() override=default
unsigned getOpcode() const
Definition VPlan.h:1537
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition VPlanSLP.h:74
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4304
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1106
friend class VPSlotTracker
Definition VPlan.h:4306
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1082
bool hasVF(ElementCount VF) const
Definition VPlan.h:4509
LLVMContext & getContext() const
Definition VPlan.h:4497
VPBasicBlock * getEntry()
Definition VPlan.h:4397
VPValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4488
void setName(const Twine &newName)
Definition VPlan.h:4547
bool hasScalableVF() const
Definition VPlan.h:4510
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4495
VPValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4491
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4459
VPValue * getTrue()
Return a VPValue wrapping i1 true.
Definition VPlan.h:4566
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4480
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4516
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:890
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:868
const VPValue & getVF() const
Definition VPlan.h:4492
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:898
const VPBasicBlock * getEntry() const
Definition VPlan.h:4398
friend class VPlanPrinter
Definition VPlan.h:4305
VPValue * getConstantInt(const APInt &Val)
Return a VPValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:4583
unsigned getUF() const
Definition VPlan.h:4529
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:4643
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1220
bool hasUF(unsigned UF) const
Definition VPlan.h:4527
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4449
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4572
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4485
void setVF(ElementCount VF)
Definition VPlan.h:4503
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:4542
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1011
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4665
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:993
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4435
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4466
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4473
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4422
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4386
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4621
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1226
VPValue * getFalse()
Return a VPValue wrapping i1 false.
Definition VPlan.h:4569
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4551
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4631
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1112
bool hasScalarVFOnly() const
Definition VPlan.h:4520
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4440
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:905
ArrayRef< VPValue * > getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4591
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1065
void addVF(ElementCount VF)
Definition VPlan.h:4501
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4445
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4588
VPValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:4577
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1027
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4402
void setUF(unsigned UF)
Definition VPlan.h:4534
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4675
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1153
VPlan(BasicBlock *ScalarHeaderBB)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition VPlan.h:4379
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
Increasing range of size_t indices.
Definition STLExtras.h:2425
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:189
static auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy
Definition VPlan.h:3918
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:839
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:301
auto map_range(ContainerTy &&C, FuncTy F)
Definition STLExtras.h:364
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1954
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1961
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
DenseMap< Value *, VPValue * > Value2VPValueTy
Definition VPlanValue.h:199
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:76
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the recipe types implementing V...
Definition VPlan.h:3951
static RetTy doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3965
static RetTy doCastIfPossible(SrcTy R)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:3970
static bool isPossible(SrcTy R)
Definition VPlan.h:3952
Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the recipe types implementing...
Definition VPlan.h:3880
static VPPhiAccessors * doCastIfPossible(SrcTy f)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:3901
CastInfo< VPPhiAccessors, SrcTy > Self
Definition VPlan.h:3882
static VPPhiAccessors * doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3885
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
static bool isPossible(const VPRecipeBase *f)
Definition VPlan.h:3872
This struct provides a way to check if a given cast is possible.
Definition Casting.h:253
static bool isPossible(const SrcTy &f)
Definition Casting.h:254
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2380
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
Definition VPlan.h:2375
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2392
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:640
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:645
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:635
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:628
PHINode & getIRPhi()
Definition VPlan.h:1481
VPIRPhi(PHINode &PN)
Definition VPlan.h:1474
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1476
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1492
static bool classof(const VPUser *U)
Definition VPlan.h:1368
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1383
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1398
VPPhi(ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1365
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1378
static bool classof(const VPValue *V)
Definition VPlan.h:1373
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:923
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:929
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:924
static bool classof(const VPValue *V)
Definition VPlan.h:949
static bool classof(const VPSingleDefRecipe *U)
Definition VPlan.h:956
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:944
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3398
void execute(VPTransformState &State) override
Generate the wide load or gather.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3411
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3399
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3421
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3357
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3379
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3358
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3367
A recipe for widening select instructions.
Definition VPlan.h:1797
VPWidenSelectRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1808
VPWidenSelectRecipe(SelectInst *SI, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL={})
Definition VPlan.h:1798
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1829
VPValue * getCond() const
Definition VPlan.h:1824
unsigned getOpcode() const
Definition VPlan.h:1822
~VPWidenSelectRecipe() override=default
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3482
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3494
void execute(VPTransformState &State) override
Generate the wide store or scatter.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3507
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3483
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3497
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3439
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3457
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3448
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3463
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3440