LLVM 22.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/ADT/ilist.h"
33#include "llvm/ADT/ilist_node.h"
37#include "llvm/IR/DebugLoc.h"
38#include "llvm/IR/FMF.h"
39#include "llvm/IR/Operator.h"
42#include <cassert>
43#include <cstddef>
44#include <functional>
45#include <string>
46#include <utility>
47#include <variant>
48
49namespace llvm {
50
51class BasicBlock;
52class DominatorTree;
54class IRBuilderBase;
55struct VPTransformState;
56class raw_ostream;
58class SCEV;
59class Type;
60class VPBasicBlock;
61class VPBuilder;
62class VPDominatorTree;
63class VPRegionBlock;
64class VPlan;
65class VPLane;
67class VPlanSlp;
68class Value;
70
71struct VPCostContext;
72
73namespace Intrinsic {
74typedef unsigned ID;
75}
76
77using VPlanPtr = std::unique_ptr<VPlan>;
78
79/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
80/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
82 friend class VPBlockUtils;
83
84 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
85
86 /// An optional name for the block.
87 std::string Name;
88
89 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
90 /// it is a topmost VPBlockBase.
91 VPRegionBlock *Parent = nullptr;
92
93 /// List of predecessor blocks.
95
96 /// List of successor blocks.
98
99 /// VPlan containing the block. Can only be set on the entry block of the
100 /// plan.
101 VPlan *Plan = nullptr;
102
103 /// Add \p Successor as the last successor to this block.
104 void appendSuccessor(VPBlockBase *Successor) {
105 assert(Successor && "Cannot add nullptr successor!");
106 Successors.push_back(Successor);
107 }
108
109 /// Add \p Predecessor as the last predecessor to this block.
110 void appendPredecessor(VPBlockBase *Predecessor) {
111 assert(Predecessor && "Cannot add nullptr predecessor!");
112 Predecessors.push_back(Predecessor);
113 }
114
115 /// Remove \p Predecessor from the predecessors of this block.
116 void removePredecessor(VPBlockBase *Predecessor) {
117 auto Pos = find(Predecessors, Predecessor);
118 assert(Pos && "Predecessor does not exist");
119 Predecessors.erase(Pos);
120 }
121
122 /// Remove \p Successor from the successors of this block.
123 void removeSuccessor(VPBlockBase *Successor) {
124 auto Pos = find(Successors, Successor);
125 assert(Pos && "Successor does not exist");
126 Successors.erase(Pos);
127 }
128
129 /// This function replaces one predecessor with another, useful when
130 /// trying to replace an old block in the CFG with a new one.
131 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
132 auto I = find(Predecessors, Old);
133 assert(I != Predecessors.end());
134 assert(Old->getParent() == New->getParent() &&
135 "replaced predecessor must have the same parent");
136 *I = New;
137 }
138
139 /// This function replaces one successor with another, useful when
140 /// trying to replace an old block in the CFG with a new one.
141 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
142 auto I = find(Successors, Old);
143 assert(I != Successors.end());
144 assert(Old->getParent() == New->getParent() &&
145 "replaced successor must have the same parent");
146 *I = New;
147 }
148
149protected:
150 VPBlockBase(const unsigned char SC, const std::string &N)
151 : SubclassID(SC), Name(N) {}
152
153public:
154 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
155 /// that are actually instantiated. Values of this enumeration are kept in the
156 /// SubclassID field of the VPBlockBase objects. They are used for concrete
157 /// type identification.
158 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
159
161
162 virtual ~VPBlockBase() = default;
163
164 const std::string &getName() const { return Name; }
165
166 void setName(const Twine &newName) { Name = newName.str(); }
167
168 /// \return an ID for the concrete type of this object.
169 /// This is used to implement the classof checks. This should not be used
170 /// for any other purpose, as the values may change as LLVM evolves.
171 unsigned getVPBlockID() const { return SubclassID; }
172
173 VPRegionBlock *getParent() { return Parent; }
174 const VPRegionBlock *getParent() const { return Parent; }
175
176 /// \return A pointer to the plan containing the current block.
177 VPlan *getPlan();
178 const VPlan *getPlan() const;
179
180 /// Sets the pointer of the plan containing the block. The block must be the
181 /// entry block into the VPlan.
182 void setPlan(VPlan *ParentPlan);
183
184 void setParent(VPRegionBlock *P) { Parent = P; }
185
186 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
187 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
188 /// VPBlockBase is a VPBasicBlock, it is returned.
189 const VPBasicBlock *getEntryBasicBlock() const;
190 VPBasicBlock *getEntryBasicBlock();
191
192 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
193 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
194 /// VPBlockBase is a VPBasicBlock, it is returned.
195 const VPBasicBlock *getExitingBasicBlock() const;
196 VPBasicBlock *getExitingBasicBlock();
197
198 const VPBlocksTy &getSuccessors() const { return Successors; }
199 VPBlocksTy &getSuccessors() { return Successors; }
200
203
204 const VPBlocksTy &getPredecessors() const { return Predecessors; }
205 VPBlocksTy &getPredecessors() { return Predecessors; }
206
207 /// \return the successor of this VPBlockBase if it has a single successor.
208 /// Otherwise return a null pointer.
210 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
211 }
212
213 /// \return the predecessor of this VPBlockBase if it has a single
214 /// predecessor. Otherwise return a null pointer.
216 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
217 }
218
219 size_t getNumSuccessors() const { return Successors.size(); }
220 size_t getNumPredecessors() const { return Predecessors.size(); }
221
222 /// Returns true if this block has any predecessors.
223 bool hasPredecessors() const { return !Predecessors.empty(); }
224
225 /// An Enclosing Block of a block B is any block containing B, including B
226 /// itself. \return the closest enclosing block starting from "this", which
227 /// has successors. \return the root enclosing block if all enclosing blocks
228 /// have no successors.
229 VPBlockBase *getEnclosingBlockWithSuccessors();
230
231 /// \return the closest enclosing block starting from "this", which has
232 /// predecessors. \return the root enclosing block if all enclosing blocks
233 /// have no predecessors.
234 VPBlockBase *getEnclosingBlockWithPredecessors();
235
236 /// \return the successors either attached directly to this VPBlockBase or, if
237 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
238 /// successors of its own, search recursively for the first enclosing
239 /// VPRegionBlock that has successors and return them. If no such
240 /// VPRegionBlock exists, return the (empty) successors of the topmost
241 /// VPBlockBase reached.
243 return getEnclosingBlockWithSuccessors()->getSuccessors();
244 }
245
246 /// \return the hierarchical successor of this VPBlockBase if it has a single
247 /// hierarchical successor. Otherwise return a null pointer.
249 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
250 }
251
252 /// \return the predecessors either attached directly to this VPBlockBase or,
253 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
254 /// predecessors of its own, search recursively for the first enclosing
255 /// VPRegionBlock that has predecessors and return them. If no such
256 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
257 /// VPBlockBase reached.
259 return getEnclosingBlockWithPredecessors()->getPredecessors();
260 }
261
262 /// \return the hierarchical predecessor of this VPBlockBase if it has a
263 /// single hierarchical predecessor. Otherwise return a null pointer.
267
268 /// Set a given VPBlockBase \p Successor as the single successor of this
269 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
270 /// This VPBlockBase must have no successors.
272 assert(Successors.empty() && "Setting one successor when others exist.");
273 assert(Successor->getParent() == getParent() &&
274 "connected blocks must have the same parent");
275 appendSuccessor(Successor);
276 }
277
278 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
279 /// successors of this VPBlockBase. This VPBlockBase is not added as
280 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
281 /// successors.
282 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
283 assert(Successors.empty() && "Setting two successors when others exist.");
284 appendSuccessor(IfTrue);
285 appendSuccessor(IfFalse);
286 }
287
288 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
289 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
290 /// as successor of any VPBasicBlock in \p NewPreds.
292 assert(Predecessors.empty() && "Block predecessors already set.");
293 for (auto *Pred : NewPreds)
294 appendPredecessor(Pred);
295 }
296
297 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
298 /// This VPBlockBase must have no successors. This VPBlockBase is not added
299 /// as predecessor of any VPBasicBlock in \p NewSuccs.
301 assert(Successors.empty() && "Block successors already set.");
302 for (auto *Succ : NewSuccs)
303 appendSuccessor(Succ);
304 }
305
306 /// Remove all the predecessor of this block.
307 void clearPredecessors() { Predecessors.clear(); }
308
309 /// Remove all the successors of this block.
310 void clearSuccessors() { Successors.clear(); }
311
312 /// Swap predecessors of the block. The block must have exactly 2
313 /// predecessors.
315 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
316 std::swap(Predecessors[0], Predecessors[1]);
317 }
318
319 /// Swap successors of the block. The block must have exactly 2 successors.
320 // TODO: This should be part of introducing conditional branch recipes rather
321 // than being independent.
323 assert(Successors.size() == 2 && "must have 2 successors to swap");
324 std::swap(Successors[0], Successors[1]);
325 }
326
327 /// Returns the index for \p Pred in the blocks predecessors list.
328 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
329 assert(count(Predecessors, Pred) == 1 &&
330 "must have Pred exactly once in Predecessors");
331 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
332 }
333
334 /// Returns the index for \p Succ in the blocks successor list.
335 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
336 assert(count(Successors, Succ) == 1 &&
337 "must have Succ exactly once in Successors");
338 return std::distance(Successors.begin(), find(Successors, Succ));
339 }
340
341 /// The method which generates the output IR that correspond to this
342 /// VPBlockBase, thereby "executing" the VPlan.
343 virtual void execute(VPTransformState *State) = 0;
344
345 /// Return the cost of the block.
347
348#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
349 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
350 OS << getName();
351 }
352
353 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
354 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
355 /// consequtive numbers.
356 ///
357 /// Note that the numbering is applied to the whole VPlan, so printing
358 /// individual blocks is consistent with the whole VPlan printing.
359 virtual void print(raw_ostream &O, const Twine &Indent,
360 VPSlotTracker &SlotTracker) const = 0;
361
362 /// Print plain-text dump of this VPlan to \p O.
363 void print(raw_ostream &O) const;
364
365 /// Print the successors of this block to \p O, prefixing all lines with \p
366 /// Indent.
367 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
368
369 /// Dump this VPBlockBase to dbgs().
370 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
371#endif
372
373 /// Clone the current block and it's recipes without updating the operands of
374 /// the cloned recipes, including all blocks in the single-entry single-exit
375 /// region for VPRegionBlocks.
376 virtual VPBlockBase *clone() = 0;
377};
378
379/// VPRecipeBase is a base class modeling a sequence of one or more output IR
380/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
381/// and is responsible for deleting its defined values. Single-value
382/// recipes must inherit from VPSingleDef instead of inheriting from both
383/// VPRecipeBase and VPValue separately.
385 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
386 public VPDef,
387 public VPUser {
388 friend VPBasicBlock;
389 friend class VPBlockUtils;
390
391 /// Each VPRecipe belongs to a single VPBasicBlock.
392 VPBasicBlock *Parent = nullptr;
393
394 /// The debug location for the recipe.
395 DebugLoc DL;
396
397public:
398 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
400 : VPDef(SC), VPUser(Operands), DL(DL) {}
401
402 ~VPRecipeBase() override = default;
403
404 /// Clone the current recipe.
405 virtual VPRecipeBase *clone() = 0;
406
407 /// \return the VPBasicBlock which this VPRecipe belongs to.
408 VPBasicBlock *getParent() { return Parent; }
409 const VPBasicBlock *getParent() const { return Parent; }
410
411 /// \return the VPRegionBlock which the recipe belongs to.
412 VPRegionBlock *getRegion();
413 const VPRegionBlock *getRegion() const;
414
415 /// The method which generates the output IR instructions that correspond to
416 /// this VPRecipe, thereby "executing" the VPlan.
417 virtual void execute(VPTransformState &State) = 0;
418
419 /// Return the cost of this recipe, taking into account if the cost
420 /// computation should be skipped and the ForceTargetInstructionCost flag.
421 /// Also takes care of printing the cost for debugging.
423
424 /// Insert an unlinked recipe into a basic block immediately before
425 /// the specified recipe.
426 void insertBefore(VPRecipeBase *InsertPos);
427 /// Insert an unlinked recipe into \p BB immediately before the insertion
428 /// point \p IP;
429 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
430
431 /// Insert an unlinked Recipe into a basic block immediately after
432 /// the specified Recipe.
433 void insertAfter(VPRecipeBase *InsertPos);
434
435 /// Unlink this recipe from its current VPBasicBlock and insert it into
436 /// the VPBasicBlock that MovePos lives in, right after MovePos.
437 void moveAfter(VPRecipeBase *MovePos);
438
439 /// Unlink this recipe and insert into BB before I.
440 ///
441 /// \pre I is a valid iterator into BB.
442 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
443
444 /// This method unlinks 'this' from the containing basic block, but does not
445 /// delete it.
446 void removeFromParent();
447
448 /// This method unlinks 'this' from the containing basic block and deletes it.
449 ///
450 /// \returns an iterator pointing to the element after the erased one
452
453 /// Method to support type inquiry through isa, cast, and dyn_cast.
454 static inline bool classof(const VPDef *D) {
455 // All VPDefs are also VPRecipeBases.
456 return true;
457 }
458
459 static inline bool classof(const VPUser *U) { return true; }
460
461 /// Returns true if the recipe may have side-effects.
462 bool mayHaveSideEffects() const;
463
464 /// Returns true for PHI-like recipes.
465 bool isPhi() const;
466
467 /// Returns true if the recipe may read from memory.
468 bool mayReadFromMemory() const;
469
470 /// Returns true if the recipe may write to memory.
471 bool mayWriteToMemory() const;
472
473 /// Returns true if the recipe may read from or write to memory.
474 bool mayReadOrWriteMemory() const {
476 }
477
478 /// Returns the debug location of the recipe.
479 DebugLoc getDebugLoc() const { return DL; }
480
481 /// Return true if the recipe is a scalar cast.
482 bool isScalarCast() const;
483
484 /// Set the recipe's debug location to \p NewDL.
485 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
486
487#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
488 /// Print the recipe, delegating to printRecipe().
489 void print(raw_ostream &O, const Twine &Indent,
490 VPSlotTracker &SlotTracker) const override final;
491#endif
492
493protected:
494 /// Compute the cost of this recipe either using a recipe's specialized
495 /// implementation or using the legacy cost model and the underlying
496 /// instructions.
497 virtual InstructionCost computeCost(ElementCount VF,
498 VPCostContext &Ctx) const;
499
500#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
501 /// Each concrete VPRecipe prints itself, without printing common information,
502 /// like debug info or metadata.
503 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
504 VPSlotTracker &SlotTracker) const = 0;
505#endif
506};
507
508// Helper macro to define common classof implementations for recipes.
509#define VP_CLASSOF_IMPL(VPDefID) \
510 static inline bool classof(const VPDef *D) { \
511 return D->getVPDefID() == VPDefID; \
512 } \
513 static inline bool classof(const VPValue *V) { \
514 auto *R = V->getDefiningRecipe(); \
515 return R && R->getVPDefID() == VPDefID; \
516 } \
517 static inline bool classof(const VPUser *U) { \
518 auto *R = dyn_cast<VPRecipeBase>(U); \
519 return R && R->getVPDefID() == VPDefID; \
520 } \
521 static inline bool classof(const VPRecipeBase *R) { \
522 return R->getVPDefID() == VPDefID; \
523 } \
524 static inline bool classof(const VPSingleDefRecipe *R) { \
525 return R->getVPDefID() == VPDefID; \
526 }
527
528/// VPSingleDef is a base class for recipes for modeling a sequence of one or
529/// more output IR that define a single result VPValue.
530/// Note that VPRecipeBase must be inherited from before VPValue.
531class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
532public:
533 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
535 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
536
537 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
539 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
540
541 static inline bool classof(const VPRecipeBase *R) {
542 switch (R->getVPDefID()) {
543 case VPRecipeBase::VPDerivedIVSC:
544 case VPRecipeBase::VPEVLBasedIVPHISC:
545 case VPRecipeBase::VPExpandSCEVSC:
546 case VPRecipeBase::VPExpressionSC:
547 case VPRecipeBase::VPInstructionSC:
548 case VPRecipeBase::VPReductionEVLSC:
549 case VPRecipeBase::VPReductionSC:
550 case VPRecipeBase::VPReplicateSC:
551 case VPRecipeBase::VPScalarIVStepsSC:
552 case VPRecipeBase::VPVectorPointerSC:
553 case VPRecipeBase::VPVectorEndPointerSC:
554 case VPRecipeBase::VPWidenCallSC:
555 case VPRecipeBase::VPWidenCanonicalIVSC:
556 case VPRecipeBase::VPWidenCastSC:
557 case VPRecipeBase::VPWidenGEPSC:
558 case VPRecipeBase::VPWidenIntrinsicSC:
559 case VPRecipeBase::VPWidenSC:
560 case VPRecipeBase::VPWidenSelectSC:
561 case VPRecipeBase::VPBlendSC:
562 case VPRecipeBase::VPPredInstPHISC:
563 case VPRecipeBase::VPCanonicalIVPHISC:
564 case VPRecipeBase::VPActiveLaneMaskPHISC:
565 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
566 case VPRecipeBase::VPWidenPHISC:
567 case VPRecipeBase::VPWidenIntOrFpInductionSC:
568 case VPRecipeBase::VPWidenPointerInductionSC:
569 case VPRecipeBase::VPReductionPHISC:
570 return true;
571 case VPRecipeBase::VPBranchOnMaskSC:
572 case VPRecipeBase::VPInterleaveEVLSC:
573 case VPRecipeBase::VPInterleaveSC:
574 case VPRecipeBase::VPIRInstructionSC:
575 case VPRecipeBase::VPWidenLoadEVLSC:
576 case VPRecipeBase::VPWidenLoadSC:
577 case VPRecipeBase::VPWidenStoreEVLSC:
578 case VPRecipeBase::VPWidenStoreSC:
579 case VPRecipeBase::VPHistogramSC:
580 // TODO: Widened stores don't define a value, but widened loads do. Split
581 // the recipes to be able to make widened loads VPSingleDefRecipes.
582 return false;
583 }
584 llvm_unreachable("Unhandled VPDefID");
585 }
586
587 static inline bool classof(const VPUser *U) {
588 auto *R = dyn_cast<VPRecipeBase>(U);
589 return R && classof(R);
590 }
591
592 VPSingleDefRecipe *clone() override = 0;
593
594 /// Returns the underlying instruction.
601
602#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
603 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
605#endif
606};
607
608/// Class to record and manage LLVM IR flags.
610 enum class OperationType : unsigned char {
611 Cmp,
612 FCmp,
613 OverflowingBinOp,
614 Trunc,
615 DisjointOp,
616 PossiblyExactOp,
617 GEPOp,
618 FPMathOp,
619 NonNegOp,
620 Other
621 };
622
623public:
624 struct WrapFlagsTy {
625 char HasNUW : 1;
626 char HasNSW : 1;
627
629 };
630
632 char HasNUW : 1;
633 char HasNSW : 1;
634
636 };
637
642
644 char NonNeg : 1;
645 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
646 };
647
648private:
649 struct ExactFlagsTy {
650 char IsExact : 1;
651 };
652 struct FastMathFlagsTy {
653 char AllowReassoc : 1;
654 char NoNaNs : 1;
655 char NoInfs : 1;
656 char NoSignedZeros : 1;
657 char AllowReciprocal : 1;
658 char AllowContract : 1;
659 char ApproxFunc : 1;
660
661 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
662 };
663 /// Holds both the predicate and fast-math flags for floating-point
664 /// comparisons.
665 struct FCmpFlagsTy {
667 FastMathFlagsTy FMFs;
668 };
669
670 OperationType OpType;
671
672 union {
677 ExactFlagsTy ExactFlags;
680 FastMathFlagsTy FMFs;
681 FCmpFlagsTy FCmpFlags;
682 unsigned AllFlags;
683 };
684
685public:
686 VPIRFlags() : OpType(OperationType::Other), AllFlags(0) {}
687
689 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
690 OpType = OperationType::FCmp;
691 FCmpFlags.Pred = FCmp->getPredicate();
692 FCmpFlags.FMFs = FCmp->getFastMathFlags();
693 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
694 OpType = OperationType::Cmp;
695 CmpPredicate = Op->getPredicate();
696 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
697 OpType = OperationType::DisjointOp;
698 DisjointFlags.IsDisjoint = Op->isDisjoint();
699 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
700 OpType = OperationType::OverflowingBinOp;
701 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
702 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
703 OpType = OperationType::Trunc;
704 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
705 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
706 OpType = OperationType::PossiblyExactOp;
707 ExactFlags.IsExact = Op->isExact();
708 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
709 OpType = OperationType::GEPOp;
710 GEPFlags = GEP->getNoWrapFlags();
711 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
712 OpType = OperationType::NonNegOp;
713 NonNegFlags.NonNeg = PNNI->hasNonNeg();
714 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
715 OpType = OperationType::FPMathOp;
716 FMFs = Op->getFastMathFlags();
717 } else {
718 OpType = OperationType::Other;
719 AllFlags = 0;
720 }
721 }
722
724 : OpType(OperationType::Cmp), CmpPredicate(Pred) {}
725
727 : OpType(OperationType::FCmp) {
728 FCmpFlags.Pred = Pred;
729 FCmpFlags.FMFs = FMFs;
730 }
731
733 : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
734
736 : OpType(OperationType::Trunc), TruncFlags(TruncFlags) {}
737
738 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {}
739
741 : OpType(OperationType::DisjointOp), DisjointFlags(DisjointFlags) {}
742
744 : OpType(OperationType::NonNegOp), NonNegFlags(NonNegFlags) {}
745
747 : OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {}
748
750 OpType = Other.OpType;
751 AllFlags = Other.AllFlags;
752 }
753
754 /// Only keep flags also present in \p Other. \p Other must have the same
755 /// OpType as the current object.
756 void intersectFlags(const VPIRFlags &Other);
757
758 /// Drop all poison-generating flags.
760 // NOTE: This needs to be kept in-sync with
761 // Instruction::dropPoisonGeneratingFlags.
762 switch (OpType) {
763 case OperationType::OverflowingBinOp:
764 WrapFlags.HasNUW = false;
765 WrapFlags.HasNSW = false;
766 break;
767 case OperationType::Trunc:
768 TruncFlags.HasNUW = false;
769 TruncFlags.HasNSW = false;
770 break;
771 case OperationType::DisjointOp:
772 DisjointFlags.IsDisjoint = false;
773 break;
774 case OperationType::PossiblyExactOp:
775 ExactFlags.IsExact = false;
776 break;
777 case OperationType::GEPOp:
779 break;
780 case OperationType::FPMathOp:
781 case OperationType::FCmp:
782 getFMFsRef().NoNaNs = false;
783 getFMFsRef().NoInfs = false;
784 break;
785 case OperationType::NonNegOp:
786 NonNegFlags.NonNeg = false;
787 break;
788 case OperationType::Cmp:
789 case OperationType::Other:
790 break;
791 }
792 }
793
794 /// Apply the IR flags to \p I.
795 void applyFlags(Instruction &I) const {
796 switch (OpType) {
797 case OperationType::OverflowingBinOp:
798 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
799 I.setHasNoSignedWrap(WrapFlags.HasNSW);
800 break;
801 case OperationType::Trunc:
802 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
803 I.setHasNoSignedWrap(TruncFlags.HasNSW);
804 break;
805 case OperationType::DisjointOp:
806 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
807 break;
808 case OperationType::PossiblyExactOp:
809 I.setIsExact(ExactFlags.IsExact);
810 break;
811 case OperationType::GEPOp:
812 cast<GetElementPtrInst>(&I)->setNoWrapFlags(GEPFlags);
813 break;
814 case OperationType::FPMathOp:
815 case OperationType::FCmp: {
816 const FastMathFlagsTy &F = getFMFsRef();
817 I.setHasAllowReassoc(F.AllowReassoc);
818 I.setHasNoNaNs(F.NoNaNs);
819 I.setHasNoInfs(F.NoInfs);
820 I.setHasNoSignedZeros(F.NoSignedZeros);
821 I.setHasAllowReciprocal(F.AllowReciprocal);
822 I.setHasAllowContract(F.AllowContract);
823 I.setHasApproxFunc(F.ApproxFunc);
824 break;
825 }
826 case OperationType::NonNegOp:
827 I.setNonNeg(NonNegFlags.NonNeg);
828 break;
829 case OperationType::Cmp:
830 case OperationType::Other:
831 break;
832 }
833 }
834
836 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
837 "recipe doesn't have a compare predicate");
838 return OpType == OperationType::FCmp ? FCmpFlags.Pred : CmpPredicate;
839 }
840
842 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
843 "recipe doesn't have a compare predicate");
844 if (OpType == OperationType::FCmp)
845 FCmpFlags.Pred = Pred;
846 else
847 CmpPredicate = Pred;
848 }
849
851
852 /// Returns true if the recipe has a comparison predicate.
853 bool hasPredicate() const {
854 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
855 }
856
857 /// Returns true if the recipe has fast-math flags.
858 bool hasFastMathFlags() const {
859 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp;
860 }
861
863
864 /// Returns true if the recipe has non-negative flag.
865 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
866
867 bool isNonNeg() const {
868 assert(OpType == OperationType::NonNegOp &&
869 "recipe doesn't have a NNEG flag");
870 return NonNegFlags.NonNeg;
871 }
872
873 bool hasNoUnsignedWrap() const {
874 switch (OpType) {
875 case OperationType::OverflowingBinOp:
876 return WrapFlags.HasNUW;
877 case OperationType::Trunc:
878 return TruncFlags.HasNUW;
879 default:
880 llvm_unreachable("recipe doesn't have a NUW flag");
881 }
882 }
883
884 bool hasNoSignedWrap() const {
885 switch (OpType) {
886 case OperationType::OverflowingBinOp:
887 return WrapFlags.HasNSW;
888 case OperationType::Trunc:
889 return TruncFlags.HasNSW;
890 default:
891 llvm_unreachable("recipe doesn't have a NSW flag");
892 }
893 }
894
895 bool isDisjoint() const {
896 assert(OpType == OperationType::DisjointOp &&
897 "recipe cannot have a disjoing flag");
898 return DisjointFlags.IsDisjoint;
899 }
900
901private:
902 /// Get a reference to the fast-math flags for FPMathOp or FCmp.
903 FastMathFlagsTy &getFMFsRef() {
904 return OpType == OperationType::FCmp ? FCmpFlags.FMFs : FMFs;
905 }
906 const FastMathFlagsTy &getFMFsRef() const {
907 return OpType == OperationType::FCmp ? FCmpFlags.FMFs : FMFs;
908 }
909
910public:
911#if !defined(NDEBUG)
912 /// Returns true if the set flags are valid for \p Opcode.
913 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
914#endif
915
916#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
917 void printFlags(raw_ostream &O) const;
918#endif
919};
920
921/// A pure-virtual common base class for recipes defining a single VPValue and
922/// using IR flags.
924 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
925 const VPIRFlags &Flags,
927 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
928
929 static inline bool classof(const VPRecipeBase *R) {
930 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
931 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
932 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
933 R->getVPDefID() == VPRecipeBase::VPWidenCallSC ||
934 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
935 R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC ||
936 R->getVPDefID() == VPRecipeBase::VPWidenSelectSC ||
937 R->getVPDefID() == VPRecipeBase::VPReductionSC ||
938 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
939 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
940 R->getVPDefID() == VPRecipeBase::VPVectorEndPointerSC ||
941 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
942 }
943
944 static inline bool classof(const VPUser *U) {
945 auto *R = dyn_cast<VPRecipeBase>(U);
946 return R && classof(R);
947 }
948
949 static inline bool classof(const VPValue *V) {
950 auto *R = dyn_cast_or_null<VPRecipeBase>(V->getDefiningRecipe());
951 return R && classof(R);
952 }
953
954 VPRecipeWithIRFlags *clone() override = 0;
955
956 static inline bool classof(const VPSingleDefRecipe *U) {
957 auto *R = dyn_cast<VPRecipeBase>(U);
958 return R && classof(R);
959 }
960
961 void execute(VPTransformState &State) override = 0;
962
963 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
965 VPCostContext &Ctx) const;
966};
967
968/// Helper to access the operand that contains the unroll part for this recipe
969/// after unrolling.
970template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
971protected:
972 /// Return the VPValue operand containing the unroll part or null if there is
973 /// no such operand.
974 VPValue *getUnrollPartOperand(const VPUser &U) const;
975
976 /// Return the unroll part.
977 unsigned getUnrollPart(const VPUser &U) const;
978};
979
980/// Helper to manage IR metadata for recipes. It filters out metadata that
981/// cannot be propagated.
984
985public:
986 VPIRMetadata() = default;
987
988 /// Adds metatadata that can be preserved from the original instruction
989 /// \p I.
991
992 /// Copy constructor for cloning.
993 VPIRMetadata(const VPIRMetadata &Other) = default;
994
996
997 /// Add all metadata to \p I.
998 void applyMetadata(Instruction &I) const;
999
1000 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1001 /// already exists, it will be replaced. Otherwise, it will be added.
1002 void setMetadata(unsigned Kind, MDNode *Node) {
1003 auto It =
1004 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1005 return P.first == Kind;
1006 });
1007 if (It != Metadata.end())
1008 It->second = Node;
1009 else
1010 Metadata.emplace_back(Kind, Node);
1011 }
1012
1013 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1014 /// nodes that are common to both.
1015 void intersect(const VPIRMetadata &MD);
1016
1017 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1018 MDNode *getMetadata(unsigned Kind) const {
1019 auto It =
1020 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1021 return It != Metadata.end() ? It->second : nullptr;
1022 }
1023
1024#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1025 /// Print metadata with node IDs.
1026 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1027#endif
1028};
1029
1030/// This is a concrete Recipe that models a single VPlan-level instruction.
1031/// While as any Recipe it may generate a sequence of IR instructions when
1032/// executed, these instructions would always form a single-def expression as
1033/// the VPInstruction is also a single def-use vertex.
1035 public VPIRMetadata,
1036 public VPUnrollPartAccessor<1> {
1037 friend class VPlanSlp;
1038
1039public:
1040 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1041 enum {
1043 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1044 // values of a first-order recurrence.
1048 // Creates a mask where each lane is active (true) whilst the current
1049 // counter (first operand + index) is less than the second operand. i.e.
1050 // mask[i] = icmpt ult (op0 + i), op1
1051 // The size of the mask returned is VF * Multiplier (UF, third op).
1055 // Increment the canonical IV separately for each unrolled part.
1060 /// Given operands of (the same) struct type, creates a struct of fixed-
1061 /// width vectors each containing a struct field of all operands. The
1062 /// number of operands matches the element count of every vector.
1064 /// Creates a fixed-width vector containing all operands. The number of
1065 /// operands matches the vector element count.
1067 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1068 /// abstract VPInstruction whose single defined VPValue represents VF
1069 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1070 /// VPInstructions.
1072 /// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
1073 /// where one of (x,y) is loop invariant, and both x and y are integer type.
1077 // Extracts the last part of its operand. Removed during unrolling.
1079 // Extracts the last lane of its vector operand, per part.
1081 // Extracts the second-to-last lane from its operand or the second-to-last
1082 // part if it is scalar. In the latter case, the recipe will be removed
1083 // during unrolling.
1085 LogicalAnd, // Non-poison propagating logical And.
1086 // Add an offset in bytes (second operand) to a base pointer (first
1087 // operand). Only generates scalar values (either for the first lane only or
1088 // for all lanes, depending on its uses).
1090 // Add a vector offset in bytes (second operand) to a scalar base pointer
1091 // (first operand).
1093 // Returns a scalar boolean value, which is true if any lane of its
1094 // (boolean) vector operands is true. It produces the reduced value across
1095 // all unrolled iterations. Unrolling will add all copies of its original
1096 // operand as additional operands. AnyOf is poison-safe as all operands
1097 // will be frozen.
1099 // Calculates the first active lane index of the vector predicate operands.
1100 // It produces the lane index across all unrolled iterations. Unrolling will
1101 // add all copies of its original operand as additional operands.
1102 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1103 // result even with operands that are all zeroes.
1105 // Calculates the last active lane index of the vector predicate operands.
1106 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1107 // tail-folding to extract the correct live-out value from the last active
1108 // iteration. It produces the lane index across all unrolled iterations.
1109 // Unrolling will add all copies of its original operand as additional
1110 // operands.
1112
1113 // The opcodes below are used for VPInstructionWithType.
1114 //
1115 /// Scale the first operand (vector step) by the second operand
1116 /// (scalar-step). Casts both operands to the result type if needed.
1118 /// Start vector for reductions with 3 operands: the original start value,
1119 /// the identity value for the reduction and an integer indicating the
1120 /// scaling factor.
1122 // Creates a step vector starting from 0 to VF with a step of 1.
1124 /// Extracts a single lane (first operand) from a set of vector operands.
1125 /// The lane specifies an index into a vector formed by combining all vector
1126 /// operands (all operands after the first one).
1128 /// Explicit user for the resume phi of the canonical induction in the main
1129 /// VPlan, used by the epilogue vector loop.
1131 /// Returns the value for vscale.
1134 };
1135
1136 /// Returns true if this VPInstruction generates scalar values for all lanes.
1137 /// Most VPInstructions generate a single value per part, either vector or
1138 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1139 /// values per all lanes, stemming from an original ingredient. This method
1140 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1141 /// underlying ingredient.
1142 bool doesGeneratePerAllLanes() const;
1143
1144private:
1145 typedef unsigned char OpcodeTy;
1146 OpcodeTy Opcode;
1147
1148 /// An optional name that can be used for the generated IR instruction.
1149 std::string Name;
1150
1151 /// Returns true if we can generate a scalar for the first lane only if
1152 /// needed.
1153 bool canGenerateScalarForFirstLane() const;
1154
1155 /// Utility methods serving execute(): generates a single vector instance of
1156 /// the modeled instruction. \returns the generated value. . In some cases an
1157 /// existing value is returned rather than a generated one.
1158 Value *generate(VPTransformState &State);
1159
1160#if !defined(NDEBUG)
1161 /// Return the number of operands determined by the opcode of the
1162 /// VPInstruction. Returns -1u if the number of operands cannot be determined
1163 /// directly by the opcode.
1164 static unsigned getNumOperandsForOpcode(unsigned Opcode);
1165#endif
1166
1167public:
1168 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1169 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1170 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1171
1172 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1173
1174 VPInstruction *clone() override {
1175 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1176 getDebugLoc(), Name);
1177 if (getUnderlyingValue())
1178 New->setUnderlyingValue(getUnderlyingInstr());
1179 return New;
1180 }
1181
1182 unsigned getOpcode() const { return Opcode; }
1183
1184 /// Generate the instruction.
1185 /// TODO: We currently execute only per-part unless a specific instance is
1186 /// provided.
1187 void execute(VPTransformState &State) override;
1188
1189 /// Return the cost of this VPInstruction.
1190 InstructionCost computeCost(ElementCount VF,
1191 VPCostContext &Ctx) const override;
1192
1193#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1194 /// Print the VPInstruction to dbgs() (for debugging).
1195 LLVM_DUMP_METHOD void dump() const;
1196#endif
1197
1198 bool hasResult() const {
1199 // CallInst may or may not have a result, depending on the called function.
1200 // Conservatively return calls have results for now.
1201 switch (getOpcode()) {
1202 case Instruction::Ret:
1203 case Instruction::Br:
1204 case Instruction::Store:
1205 case Instruction::Switch:
1206 case Instruction::IndirectBr:
1207 case Instruction::Resume:
1208 case Instruction::CatchRet:
1209 case Instruction::Unreachable:
1210 case Instruction::Fence:
1211 case Instruction::AtomicRMW:
1214 return false;
1215 default:
1216 return true;
1217 }
1218 }
1219
1220 /// Returns true if the underlying opcode may read from or write to memory.
1221 bool opcodeMayReadOrWriteFromMemory() const;
1222
1223 /// Returns true if the recipe only uses the first lane of operand \p Op.
1224 bool usesFirstLaneOnly(const VPValue *Op) const override;
1225
1226 /// Returns true if the recipe only uses the first part of operand \p Op.
1227 bool usesFirstPartOnly(const VPValue *Op) const override;
1228
1229 /// Returns true if this VPInstruction produces a scalar value from a vector,
1230 /// e.g. by performing a reduction or extracting a lane.
1231 bool isVectorToScalar() const;
1232
1233 /// Returns true if this VPInstruction's operands are single scalars and the
1234 /// result is also a single scalar.
1235 bool isSingleScalar() const;
1236
1237 /// Returns the symbolic name assigned to the VPInstruction.
1238 StringRef getName() const { return Name; }
1239
1240 /// Set the symbolic name for the VPInstruction.
1241 void setName(StringRef NewName) { Name = NewName.str(); }
1242
1243protected:
1244#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1245 /// Print the VPInstruction to \p O.
1246 void printRecipe(raw_ostream &O, const Twine &Indent,
1247 VPSlotTracker &SlotTracker) const override;
1248#endif
1249};
1250
1251/// A specialization of VPInstruction augmenting it with a dedicated result
1252/// type, to be used when the opcode and operands of the VPInstruction don't
1253/// directly determine the result type. Note that there is no separate VPDef ID
1254/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1255/// distinguished purely by the opcode.
1257 /// Scalar result type produced by the recipe.
1258 Type *ResultTy;
1259
1260public:
1262 Type *ResultTy, const VPIRFlags &Flags = {},
1263 const VPIRMetadata &Metadata = {},
1265 const Twine &Name = "")
1266 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1267 ResultTy(ResultTy) {}
1268
1269 static inline bool classof(const VPRecipeBase *R) {
1270 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1271 // type information.
1272 if (R->isScalarCast())
1273 return true;
1274 auto *VPI = dyn_cast<VPInstruction>(R);
1275 if (!VPI)
1276 return false;
1277 switch (VPI->getOpcode()) {
1281 return true;
1282 default:
1283 return false;
1284 }
1285 }
1286
1287 static inline bool classof(const VPUser *R) {
1289 }
1290
1291 VPInstruction *clone() override {
1292 auto *New =
1294 *this, *this, getDebugLoc(), getName());
1295 New->setUnderlyingValue(getUnderlyingValue());
1296 return New;
1297 }
1298
1299 void execute(VPTransformState &State) override;
1300
1301 /// Return the cost of this VPInstruction.
1303 VPCostContext &Ctx) const override {
1304 // TODO: Compute accurate cost after retiring the legacy cost model.
1305 return 0;
1306 }
1307
1308 Type *getResultType() const { return ResultTy; }
1309
1310protected:
1311#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1312 /// Print the recipe.
1313 void printRecipe(raw_ostream &O, const Twine &Indent,
1314 VPSlotTracker &SlotTracker) const override;
1315#endif
1316};
1317
1318/// Helper type to provide functions to access incoming values and blocks for
1319/// phi-like recipes.
1321protected:
1322 /// Return a VPRecipeBase* to the current object.
1323 virtual const VPRecipeBase *getAsRecipe() const = 0;
1324
1325public:
1326 virtual ~VPPhiAccessors() = default;
1327
1328 /// Returns the incoming VPValue with index \p Idx.
1329 VPValue *getIncomingValue(unsigned Idx) const {
1330 return getAsRecipe()->getOperand(Idx);
1331 }
1332
1333 /// Returns the incoming block with index \p Idx.
1334 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1335
1336 /// Returns the number of incoming values, also number of incoming blocks.
1337 virtual unsigned getNumIncoming() const {
1338 return getAsRecipe()->getNumOperands();
1339 }
1340
1341 /// Returns an interator range over the incoming values.
1343 return make_range(getAsRecipe()->op_begin(),
1344 getAsRecipe()->op_begin() + getNumIncoming());
1345 }
1346
1348 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1349
1350 /// Returns an iterator range over the incoming blocks.
1352 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1353 return getIncomingBlock(Idx);
1354 };
1355 return map_range(index_range(0, getNumIncoming()), GetBlock);
1356 }
1357
1358 /// Returns an iterator range over pairs of incoming values and corresponding
1359 /// incoming blocks.
1365
1366 /// Removes the incoming value for \p IncomingBlock, which must be a
1367 /// predecessor.
1368 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1369
1370#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1371 /// Print the recipe.
1373#endif
1374};
1375
1377 VPPhi(ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "")
1378 : VPInstruction(Instruction::PHI, Operands, {}, {}, DL, Name) {}
1379
1380 static inline bool classof(const VPUser *U) {
1381 auto *VPI = dyn_cast<VPInstruction>(U);
1382 return VPI && VPI->getOpcode() == Instruction::PHI;
1383 }
1384
1385 static inline bool classof(const VPValue *V) {
1386 auto *VPI = dyn_cast<VPInstruction>(V);
1387 return VPI && VPI->getOpcode() == Instruction::PHI;
1388 }
1389
1390 static inline bool classof(const VPSingleDefRecipe *SDR) {
1391 auto *VPI = dyn_cast<VPInstruction>(SDR);
1392 return VPI && VPI->getOpcode() == Instruction::PHI;
1393 }
1394
1395 VPPhi *clone() override {
1396 auto *PhiR = new VPPhi(operands(), getDebugLoc(), getName());
1397 PhiR->setUnderlyingValue(getUnderlyingValue());
1398 return PhiR;
1399 }
1400
1401 void execute(VPTransformState &State) override;
1402
1403protected:
1404#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1405 /// Print the recipe.
1406 void printRecipe(raw_ostream &O, const Twine &Indent,
1407 VPSlotTracker &SlotTracker) const override;
1408#endif
1409
1410 const VPRecipeBase *getAsRecipe() const override { return this; }
1411};
1412
1413/// A recipe to wrap on original IR instruction not to be modified during
1414/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1415/// Expect PHIs, VPIRInstructions cannot have any operands.
1417 Instruction &I;
1418
1419protected:
1420 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1421 /// subclasses may need to be created, e.g. VPIRPhi.
1423 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1424
1425public:
1426 ~VPIRInstruction() override = default;
1427
1428 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1429 /// VPIRInstruction.
1431
1432 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1433
1435 auto *R = create(I);
1436 for (auto *Op : operands())
1437 R->addOperand(Op);
1438 return R;
1439 }
1440
1441 void execute(VPTransformState &State) override;
1442
1443 /// Return the cost of this VPIRInstruction.
1445 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1446
1447 Instruction &getInstruction() const { return I; }
1448
1449 bool usesScalars(const VPValue *Op) const override {
1451 "Op must be an operand of the recipe");
1452 return true;
1453 }
1454
1455 bool usesFirstPartOnly(const VPValue *Op) const override {
1457 "Op must be an operand of the recipe");
1458 return true;
1459 }
1460
1461 bool usesFirstLaneOnly(const VPValue *Op) const override {
1463 "Op must be an operand of the recipe");
1464 return true;
1465 }
1466
1467 /// Update the recipe's first operand to the last lane of the last part of the
1468 /// operand using \p Builder. Must only be used for VPIRInstructions with at
1469 /// least one operand wrapping a PHINode.
1471
1472protected:
1473#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1474 /// Print the recipe.
1475 void printRecipe(raw_ostream &O, const Twine &Indent,
1476 VPSlotTracker &SlotTracker) const override;
1477#endif
1478};
1479
1480/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1481/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1482/// allowed, and it is used to add a new incoming value for the single
1483/// predecessor VPBB.
1485 public VPPhiAccessors {
1487
1488 static inline bool classof(const VPRecipeBase *U) {
1489 auto *R = dyn_cast<VPIRInstruction>(U);
1490 return R && isa<PHINode>(R->getInstruction());
1491 }
1492
1494
1495 void execute(VPTransformState &State) override;
1496
1497protected:
1498#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1499 /// Print the recipe.
1500 void printRecipe(raw_ostream &O, const Twine &Indent,
1501 VPSlotTracker &SlotTracker) const override;
1502#endif
1503
1504 const VPRecipeBase *getAsRecipe() const override { return this; }
1505};
1506
1507/// VPWidenRecipe is a recipe for producing a widened instruction using the
1508/// opcode and operands of the recipe. This recipe covers most of the
1509/// traditional vectorization cases where each recipe transforms into a
1510/// vectorized version of itself.
1512 public VPIRMetadata {
1513 unsigned Opcode;
1514
1515public:
1517 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1518 DebugLoc DL = {})
1519 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
1520 VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
1521 setUnderlyingValue(&I);
1522 }
1523
1524 ~VPWidenRecipe() override = default;
1525
1526 VPWidenRecipe *clone() override {
1527 return new VPWidenRecipe(*getUnderlyingInstr(), operands(), *this, *this,
1528 getDebugLoc());
1529 }
1530
1531 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1532
1533 /// Produce a widened instruction using the opcode and operands of the recipe,
1534 /// processing State.VF elements.
1535 void execute(VPTransformState &State) override;
1536
1537 /// Return the cost of this VPWidenRecipe.
1538 InstructionCost computeCost(ElementCount VF,
1539 VPCostContext &Ctx) const override;
1540
1541 unsigned getOpcode() const { return Opcode; }
1542
1543protected:
1544#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1545 /// Print the recipe.
1546 void printRecipe(raw_ostream &O, const Twine &Indent,
1547 VPSlotTracker &SlotTracker) const override;
1548#endif
1549};
1550
1551/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1553 /// Cast instruction opcode.
1554 Instruction::CastOps Opcode;
1555
1556 /// Result type for the cast.
1557 Type *ResultTy;
1558
1559public:
1561 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1562 const VPIRMetadata &Metadata = {},
1564 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL),
1565 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1566 assert(flagsValidForOpcode(Opcode) &&
1567 "Set flags not supported for the provided opcode");
1569 }
1570
1571 ~VPWidenCastRecipe() override = default;
1572
1574 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1576 *this, *this, getDebugLoc());
1577 }
1578
1579 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1580
1581 /// Produce widened copies of the cast.
1582 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1583
1584 /// Return the cost of this VPWidenCastRecipe.
1586 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1587
1588 Instruction::CastOps getOpcode() const { return Opcode; }
1589
1590 /// Returns the result type of the cast.
1591 Type *getResultType() const { return ResultTy; }
1592
1593protected:
1594#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1595 /// Print the recipe.
1596 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1597 VPSlotTracker &SlotTracker) const override;
1598#endif
1599};
1600
1601/// A recipe for widening vector intrinsics.
1603 /// ID of the vector intrinsic to widen.
1604 Intrinsic::ID VectorIntrinsicID;
1605
1606 /// Scalar return type of the intrinsic.
1607 Type *ResultTy;
1608
1609 /// True if the intrinsic may read from memory.
1610 bool MayReadFromMemory;
1611
1612 /// True if the intrinsic may read write to memory.
1613 bool MayWriteToMemory;
1614
1615 /// True if the intrinsic may have side-effects.
1616 bool MayHaveSideEffects;
1617
1618public:
1620 ArrayRef<VPValue *> CallArguments, Type *Ty,
1621 const VPIRFlags &Flags = {},
1622 const VPIRMetadata &MD = {},
1624 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags,
1625 DL),
1626 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1627 MayReadFromMemory(CI.mayReadFromMemory()),
1628 MayWriteToMemory(CI.mayWriteToMemory()),
1629 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1630 setUnderlyingValue(&CI);
1631 }
1632
1634 ArrayRef<VPValue *> CallArguments, Type *Ty,
1635 const VPIRFlags &Flags = {},
1636 const VPIRMetadata &Metadata = {},
1638 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags,
1639 DL),
1640 VPIRMetadata(Metadata), VectorIntrinsicID(VectorIntrinsicID),
1641 ResultTy(Ty) {
1642 LLVMContext &Ctx = Ty->getContext();
1643 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1644 MemoryEffects ME = Attrs.getMemoryEffects();
1645 MayReadFromMemory = !ME.onlyWritesMemory();
1646 MayWriteToMemory = !ME.onlyReadsMemory();
1647 MayHaveSideEffects = MayWriteToMemory ||
1648 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1649 !Attrs.hasAttribute(Attribute::WillReturn);
1650 }
1651
1652 ~VPWidenIntrinsicRecipe() override = default;
1653
1655 if (Value *CI = getUnderlyingValue())
1656 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1657 operands(), ResultTy, *this, *this,
1658 getDebugLoc());
1659 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1660 *this, *this, getDebugLoc());
1661 }
1662
1663 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1664
1665 /// Produce a widened version of the vector intrinsic.
1666 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1667
1668 /// Return the cost of this vector intrinsic.
1670 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1671
1672 /// Return the ID of the intrinsic.
1673 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1674
1675 /// Return the scalar return type of the intrinsic.
1676 Type *getResultType() const { return ResultTy; }
1677
1678 /// Return to name of the intrinsic as string.
1680
1681 /// Returns true if the intrinsic may read from memory.
1682 bool mayReadFromMemory() const { return MayReadFromMemory; }
1683
1684 /// Returns true if the intrinsic may write to memory.
1685 bool mayWriteToMemory() const { return MayWriteToMemory; }
1686
1687 /// Returns true if the intrinsic may have side-effects.
1688 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1689
1690 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
1691
1692protected:
1693#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1694 /// Print the recipe.
1695 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1696 VPSlotTracker &SlotTracker) const override;
1697#endif
1698};
1699
1700/// A recipe for widening Call instructions using library calls.
1702 public VPIRMetadata {
1703 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1704 /// between a given VF and the chosen vectorized variant, so there will be a
1705 /// different VPlan for each VF with a valid variant.
1706 Function *Variant;
1707
1708public:
1710 ArrayRef<VPValue *> CallArguments,
1711 const VPIRFlags &Flags = {},
1712 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
1713 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments, Flags, DL),
1714 VPIRMetadata(Metadata), Variant(Variant) {
1715 setUnderlyingValue(UV);
1716 assert(
1717 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1718 "last operand must be the called function");
1719 }
1720
1721 ~VPWidenCallRecipe() override = default;
1722
1724 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
1725 *this, *this, getDebugLoc());
1726 }
1727
1728 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1729
1730 /// Produce a widened version of the call instruction.
1731 void execute(VPTransformState &State) override;
1732
1733 /// Return the cost of this VPWidenCallRecipe.
1734 InstructionCost computeCost(ElementCount VF,
1735 VPCostContext &Ctx) const override;
1736
1740
1743
1744protected:
1745#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1746 /// Print the recipe.
1747 void printRecipe(raw_ostream &O, const Twine &Indent,
1748 VPSlotTracker &SlotTracker) const override;
1749#endif
1750};
1751
1752/// A recipe representing a sequence of load -> update -> store as part of
1753/// a histogram operation. This means there may be aliasing between vector
1754/// lanes, which is handled by the llvm.experimental.vector.histogram family
1755/// of intrinsics. The only update operations currently supported are
1756/// 'add' and 'sub' where the other term is loop-invariant.
1758 /// Opcode of the update operation, currently either add or sub.
1759 unsigned Opcode;
1760
1761public:
1762 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1764 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
1765
1766 ~VPHistogramRecipe() override = default;
1767
1769 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1770 }
1771
1772 VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
1773
1774 /// Produce a vectorized histogram operation.
1775 void execute(VPTransformState &State) override;
1776
1777 /// Return the cost of this VPHistogramRecipe.
1779 VPCostContext &Ctx) const override;
1780
1781 unsigned getOpcode() const { return Opcode; }
1782
1783 /// Return the mask operand if one was provided, or a null pointer if all
1784 /// lanes should be executed unconditionally.
1785 VPValue *getMask() const {
1786 return getNumOperands() == 3 ? getOperand(2) : nullptr;
1787 }
1788
1789protected:
1790#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1791 /// Print the recipe
1792 void printRecipe(raw_ostream &O, const Twine &Indent,
1793 VPSlotTracker &SlotTracker) const override;
1794#endif
1795};
1796
1797/// A recipe for widening select instructions. Supports both wide vector and
1798/// single-scalar conditions, matching the behavior of LLVM IR's select
1799/// instruction.
1801 public VPIRMetadata {
1803 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1804 DebugLoc DL = {})
1805 : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, Flags, DL),
1806 VPIRMetadata(MD) {
1807 setUnderlyingValue(SI);
1808 }
1809
1810 ~VPWidenSelectRecipe() override = default;
1811
1814 operands(), *this, *this, getDebugLoc());
1815 }
1816
1817 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1818
1819 /// Produce a widened version of the select instruction.
1820 void execute(VPTransformState &State) override;
1821
1822 /// Return the cost of this VPWidenSelectRecipe.
1823 InstructionCost computeCost(ElementCount VF,
1824 VPCostContext &Ctx) const override;
1825
1826 unsigned getOpcode() const { return Instruction::Select; }
1827
1828 VPValue *getCond() const {
1829 return getOperand(0);
1830 }
1831
1832 /// Returns true if the recipe only uses the first lane of operand \p Op.
1833 bool usesFirstLaneOnly(const VPValue *Op) const override {
1835 "Op must be an operand of the recipe");
1836 return Op == getCond() && Op->isDefinedOutsideLoopRegions();
1837 }
1838
1839protected:
1840#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1841 /// Print the recipe.
1842 void printRecipe(raw_ostream &O, const Twine &Indent,
1843 VPSlotTracker &SlotTracker) const override;
1844#endif
1845};
1846
1847/// A recipe for handling GEP instructions.
1849 Type *SourceElementTy;
1850
1851 bool isPointerLoopInvariant() const {
1852 return getOperand(0)->isDefinedOutsideLoopRegions();
1853 }
1854
1855 bool isIndexLoopInvariant(unsigned I) const {
1856 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
1857 }
1858
1859public:
1861 const VPIRFlags &Flags = {},
1863 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, Flags, DL),
1864 SourceElementTy(GEP->getSourceElementType()) {
1865 setUnderlyingValue(GEP);
1867 (void)Metadata;
1869 assert(Metadata.empty() && "unexpected metadata on GEP");
1870 }
1871
1872 ~VPWidenGEPRecipe() override = default;
1873
1876 operands(), *this, getDebugLoc());
1877 }
1878
1879 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1880
1881 /// This recipe generates a GEP instruction.
1882 unsigned getOpcode() const { return Instruction::GetElementPtr; }
1883
1884 /// Generate the gep nodes.
1885 void execute(VPTransformState &State) override;
1886
1887 Type *getSourceElementType() const { return SourceElementTy; }
1888
1889 /// Return the cost of this VPWidenGEPRecipe.
1891 VPCostContext &Ctx) const override {
1892 // TODO: Compute accurate cost after retiring the legacy cost model.
1893 return 0;
1894 }
1895
1896 /// Returns true if the recipe only uses the first lane of operand \p Op.
1897 bool usesFirstLaneOnly(const VPValue *Op) const override;
1898
1899protected:
1900#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1901 /// Print the recipe.
1902 void printRecipe(raw_ostream &O, const Twine &Indent,
1903 VPSlotTracker &SlotTracker) const override;
1904#endif
1905};
1906
1907/// A recipe to compute a pointer to the last element of each part of a widened
1908/// memory access for widened memory accesses of IndexedTy. Used for
1909/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed.
1911 public VPUnrollPartAccessor<2> {
1912 Type *IndexedTy;
1913
1914 /// The constant stride of the pointer computed by this recipe, expressed in
1915 /// units of IndexedTy.
1916 int64_t Stride;
1917
1918public:
1920 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
1921 : VPRecipeWithIRFlags(VPDef::VPVectorEndPointerSC,
1922 ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
1923 IndexedTy(IndexedTy), Stride(Stride) {
1924 assert(Stride < 0 && "Stride must be negative");
1925 }
1926
1927 VP_CLASSOF_IMPL(VPDef::VPVectorEndPointerSC)
1928
1930 const VPValue *getVFValue() const { return getOperand(1); }
1931
1932 void execute(VPTransformState &State) override;
1933
1934 bool usesFirstLaneOnly(const VPValue *Op) const override {
1936 "Op must be an operand of the recipe");
1937 return true;
1938 }
1939
1940 /// Return the cost of this VPVectorPointerRecipe.
1942 VPCostContext &Ctx) const override {
1943 // TODO: Compute accurate cost after retiring the legacy cost model.
1944 return 0;
1945 }
1946
1947 /// Returns true if the recipe only uses the first part of operand \p Op.
1948 bool usesFirstPartOnly(const VPValue *Op) const override {
1950 "Op must be an operand of the recipe");
1951 assert(getNumOperands() <= 2 && "must have at most two operands");
1952 return true;
1953 }
1954
1956 return new VPVectorEndPointerRecipe(getOperand(0), getVFValue(), IndexedTy,
1957 Stride, getGEPNoWrapFlags(),
1958 getDebugLoc());
1959 }
1960
1961protected:
1962#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1963 /// Print the recipe.
1964 void printRecipe(raw_ostream &O, const Twine &Indent,
1965 VPSlotTracker &SlotTracker) const override;
1966#endif
1967};
1968
1969/// A recipe to compute the pointers for widened memory accesses of IndexTy.
1971 public VPUnrollPartAccessor<1> {
1972 Type *SourceElementTy;
1973
1974public:
1975 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
1977 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1978 GEPFlags, DL),
1979 SourceElementTy(SourceElementTy) {}
1980
1981 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1982
1983 void execute(VPTransformState &State) override;
1984
1985 Type *getSourceElementType() const { return SourceElementTy; }
1986
1987 bool usesFirstLaneOnly(const VPValue *Op) const override {
1989 "Op must be an operand of the recipe");
1990 return true;
1991 }
1992
1993 /// Returns true if the recipe only uses the first part of operand \p Op.
1994 bool usesFirstPartOnly(const VPValue *Op) const override {
1996 "Op must be an operand of the recipe");
1997 assert(getNumOperands() <= 2 && "must have at most two operands");
1998 return true;
1999 }
2000
2002 return new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
2004 }
2005
2006 /// Return true if this VPVectorPointerRecipe corresponds to part 0. Note that
2007 /// this is only accurate after the VPlan has been unrolled.
2008 bool isFirstPart() const { return getUnrollPart(*this) == 0; }
2009
2010 /// Return the cost of this VPHeaderPHIRecipe.
2012 VPCostContext &Ctx) const override {
2013 // TODO: Compute accurate cost after retiring the legacy cost model.
2014 return 0;
2015 }
2016
2017protected:
2018#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2019 /// Print the recipe.
2020 void printRecipe(raw_ostream &O, const Twine &Indent,
2021 VPSlotTracker &SlotTracker) const override;
2022#endif
2023};
2024
2025/// A pure virtual base class for all recipes modeling header phis, including
2026/// phis for first order recurrences, pointer inductions and reductions. The
2027/// start value is the first operand of the recipe and the incoming value from
2028/// the backedge is the second operand.
2029///
2030/// Inductions are modeled using the following sub-classes:
2031/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2032/// starting at a specified value (zero for the main vector loop, the resume
2033/// value for the epilogue vector loop) and stepping by 1. The induction
2034/// controls exiting of the vector loop by comparing against the vector trip
2035/// count. Produces a single scalar PHI for the induction value per
2036/// iteration.
2037/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2038/// floating point inductions with arbitrary start and step values. Produces
2039/// a vector PHI per-part.
2040/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
2041/// value of an IV with different start and step values. Produces a single
2042/// scalar value per iteration
2043/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
2044/// canonical or derived induction.
2045/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2046/// pointer induction. Produces either a vector PHI per-part or scalar values
2047/// per-lane based on the canonical induction.
2049 public VPPhiAccessors {
2050protected:
2051 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
2052 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2053 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>({Start}),
2054 UnderlyingInstr, DL) {}
2055
2056 const VPRecipeBase *getAsRecipe() const override { return this; }
2057
2058public:
2059 ~VPHeaderPHIRecipe() override = default;
2060
2061 /// Method to support type inquiry through isa, cast, and dyn_cast.
2062 static inline bool classof(const VPRecipeBase *R) {
2063 return R->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
2064 R->getVPDefID() <= VPDef::VPLastHeaderPHISC;
2065 }
2066 static inline bool classof(const VPValue *V) {
2067 return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
2068 }
2069 static inline bool classof(const VPSingleDefRecipe *R) {
2070 return isa<VPHeaderPHIRecipe>(static_cast<const VPRecipeBase *>(R));
2071 }
2072
2073 /// Generate the phi nodes.
2074 void execute(VPTransformState &State) override = 0;
2075
2076 /// Return the cost of this header phi recipe.
2078 VPCostContext &Ctx) const override;
2079
2080 /// Returns the start value of the phi, if one is set.
2082 return getNumOperands() == 0 ? nullptr : getOperand(0);
2083 }
2085 return getNumOperands() == 0 ? nullptr : getOperand(0);
2086 }
2087
2088 /// Update the start value of the recipe.
2090
2091 /// Returns the incoming value from the loop backedge.
2093 return getOperand(1);
2094 }
2095
2096 /// Update the incoming value from the loop backedge.
2098
2099 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2100 /// to be a recipe.
2102 return *getBackedgeValue()->getDefiningRecipe();
2103 }
2104
2105protected:
2106#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2107 /// Print the recipe.
2108 void printRecipe(raw_ostream &O, const Twine &Indent,
2109 VPSlotTracker &SlotTracker) const override = 0;
2110#endif
2111};
2112
2113/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2114/// VPWidenPointerInductionRecipe), providing shared functionality, including
2115/// retrieving the step value, induction descriptor and original phi node.
2117 const InductionDescriptor &IndDesc;
2118
2119public:
2120 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2121 VPValue *Step, const InductionDescriptor &IndDesc,
2122 DebugLoc DL)
2123 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2124 addOperand(Step);
2125 }
2126
2127 static inline bool classof(const VPRecipeBase *R) {
2128 return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC ||
2129 R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
2130 }
2131
2132 static inline bool classof(const VPValue *V) {
2133 auto *R = V->getDefiningRecipe();
2134 return R && classof(R);
2135 }
2136
2137 static inline bool classof(const VPSingleDefRecipe *R) {
2138 return classof(static_cast<const VPRecipeBase *>(R));
2139 }
2140
2141 void execute(VPTransformState &State) override = 0;
2142
2143 /// Returns the step value of the induction.
2145 const VPValue *getStepValue() const { return getOperand(1); }
2146
2147 /// Update the step value of the recipe.
2148 void setStepValue(VPValue *V) { setOperand(1, V); }
2149
2151 const VPValue *getVFValue() const { return getOperand(2); }
2152
2153 /// Returns the number of incoming values, also number of incoming blocks.
2154 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2155 /// incoming value, its start value.
2156 unsigned getNumIncoming() const override { return 1; }
2157
2159
2160 /// Returns the induction descriptor for the recipe.
2161 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2162
2164 // TODO: All operands of base recipe must exist and be at same index in
2165 // derived recipe.
2167 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2168 }
2169
2171 // TODO: All operands of base recipe must exist and be at same index in
2172 // derived recipe.
2174 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2175 }
2176
2177 /// Returns true if the recipe only uses the first lane of operand \p Op.
2178 bool usesFirstLaneOnly(const VPValue *Op) const override {
2180 "Op must be an operand of the recipe");
2181 // The recipe creates its own wide start value, so it only requests the
2182 // first lane of the operand.
2183 // TODO: Remove once creating the start value is modeled separately.
2184 return Op == getStartValue() || Op == getStepValue();
2185 }
2186};
2187
2188/// A recipe for handling phi nodes of integer and floating-point inductions,
2189/// producing their vector values. This is an abstract recipe and must be
2190/// converted to concrete recipes before executing.
2192 public VPIRFlags {
2193 TruncInst *Trunc;
2194
2195 // If this recipe is unrolled it will have 2 additional operands.
2196 bool isUnrolled() const { return getNumOperands() == 5; }
2197
2198public:
2200 VPValue *VF, const InductionDescriptor &IndDesc,
2201 const VPIRFlags &Flags, DebugLoc DL)
2202 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2203 Step, IndDesc, DL),
2204 VPIRFlags(Flags), Trunc(nullptr) {
2205 addOperand(VF);
2206 }
2207
2209 VPValue *VF, const InductionDescriptor &IndDesc,
2210 TruncInst *Trunc, const VPIRFlags &Flags,
2211 DebugLoc DL)
2212 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2213 Step, IndDesc, DL),
2214 VPIRFlags(Flags), Trunc(Trunc) {
2215 addOperand(VF);
2217 (void)Metadata;
2218 if (Trunc)
2220 assert(Metadata.empty() && "unexpected metadata on Trunc");
2221 }
2222
2224
2230
2231 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
2232
2233 void execute(VPTransformState &State) override {
2234 llvm_unreachable("cannot execute this recipe, should be expanded via "
2235 "expandVPWidenIntOrFpInductionRecipe");
2236 }
2237
2239 // If the recipe has been unrolled return the VPValue for the induction
2240 // increment.
2241 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2242 }
2243
2244 /// Returns the number of incoming values, also number of incoming blocks.
2245 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2246 /// incoming value, its start value.
2247 unsigned getNumIncoming() const override { return 1; }
2248
2249 /// Returns the first defined value as TruncInst, if it is one or nullptr
2250 /// otherwise.
2251 TruncInst *getTruncInst() { return Trunc; }
2252 const TruncInst *getTruncInst() const { return Trunc; }
2253
2254 /// Returns true if the induction is canonical, i.e. starting at 0 and
2255 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2256 /// same type as the canonical induction.
2257 bool isCanonical() const;
2258
2259 /// Returns the scalar type of the induction.
2261 return Trunc ? Trunc->getType()
2263 }
2264
2265 /// Returns the VPValue representing the value of this induction at
2266 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2267 /// take place.
2269 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2270 }
2271
2272protected:
2273#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2274 /// Print the recipe.
2275 void printRecipe(raw_ostream &O, const Twine &Indent,
2276 VPSlotTracker &SlotTracker) const override;
2277#endif
2278};
2279
2281public:
2282 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2283 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2284 /// VF*UF.
2286 VPValue *NumUnrolledElems,
2287 const InductionDescriptor &IndDesc, DebugLoc DL)
2288 : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
2289 Step, IndDesc, DL) {
2290 addOperand(NumUnrolledElems);
2291 }
2292
2294
2300
2301 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
2302
2303 /// Generate vector values for the pointer induction.
2304 void execute(VPTransformState &State) override {
2305 llvm_unreachable("cannot execute this recipe, should be expanded via "
2306 "expandVPWidenPointerInduction");
2307 };
2308
2309 /// Returns true if only scalar values will be generated.
2310 bool onlyScalarsGenerated(bool IsScalable);
2311
2312protected:
2313#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2314 /// Print the recipe.
2315 void printRecipe(raw_ostream &O, const Twine &Indent,
2316 VPSlotTracker &SlotTracker) const override;
2317#endif
2318};
2319
2320/// A recipe for widened phis. Incoming values are operands of the recipe and
2321/// their operand index corresponds to the incoming predecessor block. If the
2322/// recipe is placed in an entry block to a (non-replicate) region, it must have
2323/// exactly 2 incoming values, the first from the predecessor of the region and
2324/// the second from the exiting block of the region.
2326 public VPPhiAccessors {
2327 /// Name to use for the generated IR instruction for the widened phi.
2328 std::string Name;
2329
2330public:
2331 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2332 /// debug location \p DL.
2333 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
2334 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2335 : VPSingleDefRecipe(VPDef::VPWidenPHISC, {}, Phi, DL), Name(Name.str()) {
2336 if (Start)
2337 addOperand(Start);
2338 }
2339
2342 getOperand(0), getDebugLoc(), Name);
2344 C->addOperand(Op);
2345 return C;
2346 }
2347
2348 ~VPWidenPHIRecipe() override = default;
2349
2350 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
2351
2352 /// Generate the phi/select nodes.
2353 void execute(VPTransformState &State) override;
2354
2355protected:
2356#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2357 /// Print the recipe.
2358 void printRecipe(raw_ostream &O, const Twine &Indent,
2359 VPSlotTracker &SlotTracker) const override;
2360#endif
2361
2362 const VPRecipeBase *getAsRecipe() const override { return this; }
2363};
2364
2365/// A recipe for handling first-order recurrence phis. The start value is the
2366/// first operand of the recipe and the incoming value from the backedge is the
2367/// second operand.
2370 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
2371
2372 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
2373
2378
2379 void execute(VPTransformState &State) override;
2380
2381 /// Return the cost of this first-order recurrence phi recipe.
2383 VPCostContext &Ctx) const override;
2384
2385 /// Returns true if the recipe only uses the first lane of operand \p Op.
2386 bool usesFirstLaneOnly(const VPValue *Op) const override {
2388 "Op must be an operand of the recipe");
2389 return Op == getStartValue();
2390 }
2391
2392protected:
2393#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2394 /// Print the recipe.
2395 void printRecipe(raw_ostream &O, const Twine &Indent,
2396 VPSlotTracker &SlotTracker) const override;
2397#endif
2398};
2399
2400/// Possible variants of a reduction.
2401
2402/// This reduction is ordered and in-loop.
2403struct RdxOrdered {};
2404/// This reduction is in-loop.
2405struct RdxInLoop {};
2406/// This reduction is unordered with the partial result scaled down by some
2407/// factor.
2410};
2411using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2412
2413inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2414 unsigned ScaleFactor) {
2415 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2416 if (Ordered)
2417 return RdxOrdered{};
2418 if (InLoop)
2419 return RdxInLoop{};
2420 return RdxUnordered{/*VFScaleFactor=*/ScaleFactor};
2421}
2422
2423/// A recipe for handling reduction phis. The start value is the first operand
2424/// of the recipe and the incoming value from the backedge is the second
2425/// operand.
2427 public VPUnrollPartAccessor<2> {
2428 /// The recurrence kind of the reduction.
2429 const RecurKind Kind;
2430
2431 ReductionStyle Style;
2432
2433 /// The phi is part of a multi-use reduction (e.g., used in FindLastIV
2434 /// patterns for argmin/argmax).
2435 /// TODO: Also support cases where the phi itself has a single use, but its
2436 /// compare has multiple uses.
2437 bool HasUsesOutsideReductionChain;
2438
2439public:
2440 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2442 ReductionStyle Style,
2443 bool HasUsesOutsideReductionChain = false)
2444 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
2445 Style(Style),
2446 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {}
2447
2448 ~VPReductionPHIRecipe() override = default;
2449
2451 auto *R = new VPReductionPHIRecipe(
2453 *getOperand(0), Style, HasUsesOutsideReductionChain);
2454 R->addOperand(getBackedgeValue());
2455 return R;
2456 }
2457
2458 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2459
2460 /// Generate the phi/select nodes.
2461 void execute(VPTransformState &State) override;
2462
2463 /// Get the factor that the VF of this recipe's output should be scaled by, or
2464 /// 1 if it isn't scaled.
2465 unsigned getVFScaleFactor() const {
2466 auto *Partial = std::get_if<RdxUnordered>(&Style);
2467 return Partial ? Partial->VFScaleFactor : 1;
2468 }
2469
2470 /// Returns the number of incoming values, also number of incoming blocks.
2471 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2472 /// incoming value, its start value.
2473 unsigned getNumIncoming() const override { return 2; }
2474
2475 /// Returns the recurrence kind of the reduction.
2476 RecurKind getRecurrenceKind() const { return Kind; }
2477
2478 /// Returns true, if the phi is part of an ordered reduction.
2479 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); }
2480
2481 /// Returns true if the phi is part of an in-loop reduction.
2482 bool isInLoop() const {
2483 return std::holds_alternative<RdxInLoop>(Style) ||
2484 std::holds_alternative<RdxOrdered>(Style);
2485 }
2486
2487 /// Returns true if the reduction outputs a vector with a scaled down VF.
2488 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2489
2490 /// Returns true, if the phi is part of a multi-use reduction.
2492 return HasUsesOutsideReductionChain;
2493 }
2494
2495 /// Returns true if the recipe only uses the first lane of operand \p Op.
2496 bool usesFirstLaneOnly(const VPValue *Op) const override {
2498 "Op must be an operand of the recipe");
2499 return isOrdered() || isInLoop();
2500 }
2501
2502protected:
2503#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2504 /// Print the recipe.
2505 void printRecipe(raw_ostream &O, const Twine &Indent,
2506 VPSlotTracker &SlotTracker) const override;
2507#endif
2508};
2509
2510/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2511/// instructions.
2513public:
2514 /// The blend operation is a User of the incoming values and of their
2515 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2516 /// be omitted (implied by passing an odd number of operands) in which case
2517 /// all other incoming values are merged into it.
2519 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, DL) {
2520 assert(Operands.size() > 0 && "Expected at least one operand!");
2521 }
2522
2527
2528 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2529
2530 /// A normalized blend is one that has an odd number of operands, whereby the
2531 /// first operand does not have an associated mask.
2532 bool isNormalized() const { return getNumOperands() % 2; }
2533
2534 /// Return the number of incoming values, taking into account when normalized
2535 /// the first incoming value will have no mask.
2536 unsigned getNumIncomingValues() const {
2537 return (getNumOperands() + isNormalized()) / 2;
2538 }
2539
2540 /// Return incoming value number \p Idx.
2541 VPValue *getIncomingValue(unsigned Idx) const {
2542 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2543 }
2544
2545 /// Return mask number \p Idx.
2546 VPValue *getMask(unsigned Idx) const {
2547 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2548 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2549 }
2550
2551 /// Set mask number \p Idx to \p V.
2552 void setMask(unsigned Idx, VPValue *V) {
2553 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2554 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2555 }
2556
2557 void execute(VPTransformState &State) override {
2558 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2559 }
2560
2561 /// Return the cost of this VPWidenMemoryRecipe.
2562 InstructionCost computeCost(ElementCount VF,
2563 VPCostContext &Ctx) const override;
2564
2565 /// Returns true if the recipe only uses the first lane of operand \p Op.
2566 bool usesFirstLaneOnly(const VPValue *Op) const override {
2568 "Op must be an operand of the recipe");
2569 // Recursing through Blend recipes only, must terminate at header phi's the
2570 // latest.
2571 return all_of(users(),
2572 [this](VPUser *U) { return U->usesFirstLaneOnly(this); });
2573 }
2574
2575protected:
2576#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2577 /// Print the recipe.
2578 void printRecipe(raw_ostream &O, const Twine &Indent,
2579 VPSlotTracker &SlotTracker) const override;
2580#endif
2581};
2582
2583/// A common base class for interleaved memory operations.
2584/// An Interleaved memory operation is a memory access method that combines
2585/// multiple strided loads/stores into a single wide load/store with shuffles.
2586/// The first operand is the start address. The optional operands are, in order,
2587/// the stored values and the mask.
2589 public VPIRMetadata {
2591
2592 /// Indicates if the interleave group is in a conditional block and requires a
2593 /// mask.
2594 bool HasMask = false;
2595
2596 /// Indicates if gaps between members of the group need to be masked out or if
2597 /// unusued gaps can be loaded speculatively.
2598 bool NeedsMaskForGaps = false;
2599
2600protected:
2601 VPInterleaveBase(const unsigned char SC,
2603 ArrayRef<VPValue *> Operands,
2604 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2605 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2606 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2607 NeedsMaskForGaps(NeedsMaskForGaps) {
2608 // TODO: extend the masked interleaved-group support to reversed access.
2609 assert((!Mask || !IG->isReverse()) &&
2610 "Reversed masked interleave-group not supported.");
2611 for (unsigned I = 0; I < IG->getFactor(); ++I)
2612 if (Instruction *Inst = IG->getMember(I)) {
2613 if (Inst->getType()->isVoidTy())
2614 continue;
2615 new VPValue(Inst, this);
2616 }
2617
2618 for (auto *SV : StoredValues)
2619 addOperand(SV);
2620 if (Mask) {
2621 HasMask = true;
2622 addOperand(Mask);
2623 }
2624 }
2625
2626public:
2627 VPInterleaveBase *clone() override = 0;
2628
2629 static inline bool classof(const VPRecipeBase *R) {
2630 return R->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
2631 R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC;
2632 }
2633
2634 static inline bool classof(const VPUser *U) {
2635 auto *R = dyn_cast<VPRecipeBase>(U);
2636 return R && classof(R);
2637 }
2638
2639 /// Return the address accessed by this recipe.
2640 VPValue *getAddr() const {
2641 return getOperand(0); // Address is the 1st, mandatory operand.
2642 }
2643
2644 /// Return the mask used by this recipe. Note that a full mask is represented
2645 /// by a nullptr.
2646 VPValue *getMask() const {
2647 // Mask is optional and the last operand.
2648 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2649 }
2650
2651 /// Return true if the access needs a mask because of the gaps.
2652 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2653
2655
2656 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2657
2658 void execute(VPTransformState &State) override {
2659 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2660 }
2661
2662 /// Return the cost of this recipe.
2663 InstructionCost computeCost(ElementCount VF,
2664 VPCostContext &Ctx) const override;
2665
2666 /// Returns true if the recipe only uses the first lane of operand \p Op.
2667 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
2668
2669 /// Returns the number of stored operands of this interleave group. Returns 0
2670 /// for load interleave groups.
2671 virtual unsigned getNumStoreOperands() const = 0;
2672
2673 /// Return the VPValues stored by this interleave group. If it is a load
2674 /// interleave group, return an empty ArrayRef.
2676 return ArrayRef<VPValue *>(op_end() -
2677 (getNumStoreOperands() + (HasMask ? 1 : 0)),
2679 }
2680};
2681
2682/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2683/// or stores into one wide load/store and shuffles. The first operand of a
2684/// VPInterleave recipe is the address, followed by the stored values, followed
2685/// by an optional mask.
2687public:
2689 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2690 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2691 : VPInterleaveBase(VPDef::VPInterleaveSC, IG, Addr, StoredValues, Mask,
2692 NeedsMaskForGaps, MD, DL) {}
2693
2694 ~VPInterleaveRecipe() override = default;
2695
2699 needsMaskForGaps(), *this, getDebugLoc());
2700 }
2701
2702 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2703
2704 /// Generate the wide load or store, and shuffles.
2705 void execute(VPTransformState &State) override;
2706
2707 bool usesFirstLaneOnly(const VPValue *Op) const override {
2709 "Op must be an operand of the recipe");
2710 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2711 }
2712
2713 unsigned getNumStoreOperands() const override {
2714 return getNumOperands() - (getMask() ? 2 : 1);
2715 }
2716
2717protected:
2718#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2719 /// Print the recipe.
2720 void printRecipe(raw_ostream &O, const Twine &Indent,
2721 VPSlotTracker &SlotTracker) const override;
2722#endif
2723};
2724
2725/// A recipe for interleaved memory operations with vector-predication
2726/// intrinsics. The first operand is the address, the second operand is the
2727/// explicit vector length. Stored values and mask are optional operands.
2729public:
2731 : VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(),
2732 ArrayRef<VPValue *>({R.getAddr(), &EVL}),
2733 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2734 R.getDebugLoc()) {
2735 assert(!getInterleaveGroup()->isReverse() &&
2736 "Reversed interleave-group with tail folding is not supported.");
2737 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2738 "supported for scalable vector.");
2739 }
2740
2741 ~VPInterleaveEVLRecipe() override = default;
2742
2744 llvm_unreachable("cloning not implemented yet");
2745 }
2746
2747 VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC)
2748
2749 /// The VPValue of the explicit vector length.
2750 VPValue *getEVL() const { return getOperand(1); }
2751
2752 /// Generate the wide load or store, and shuffles.
2753 void execute(VPTransformState &State) override;
2754
2755 /// The recipe only uses the first lane of the address, and EVL operand.
2756 bool usesFirstLaneOnly(const VPValue *Op) const override {
2758 "Op must be an operand of the recipe");
2759 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
2760 Op == getEVL();
2761 }
2762
2763 unsigned getNumStoreOperands() const override {
2764 return getNumOperands() - (getMask() ? 3 : 2);
2765 }
2766
2767protected:
2768#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2769 /// Print the recipe.
2770 void printRecipe(raw_ostream &O, const Twine &Indent,
2771 VPSlotTracker &SlotTracker) const override;
2772#endif
2773};
2774
2775/// A recipe to represent inloop, ordered or partial reduction operations. It
2776/// performs a reduction on a vector operand into a scalar (vector in the case
2777/// of a partial reduction) value, and adds the result to a chain. The Operands
2778/// are {ChainOp, VecOp, [Condition]}.
2780
2781 /// The recurrence kind for the reduction in question.
2782 RecurKind RdxKind;
2783 /// Whether the reduction is conditional.
2784 bool IsConditional = false;
2785 ReductionStyle Style;
2786
2787protected:
2788 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
2790 ArrayRef<VPValue *> Operands, VPValue *CondOp,
2791 ReductionStyle Style, DebugLoc DL)
2792 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
2793 Style(Style) {
2794 if (CondOp) {
2795 IsConditional = true;
2796 addOperand(CondOp);
2797 }
2799 }
2800
2801public:
2803 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2805 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
2806 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, Style,
2807 DL) {}
2808
2810 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2812 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr,
2813 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, Style,
2814 DL) {}
2815
2816 ~VPReductionRecipe() override = default;
2817
2819 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
2821 getCondOp(), Style, getDebugLoc());
2822 }
2823
2824 static inline bool classof(const VPRecipeBase *R) {
2825 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2826 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
2827 }
2828
2829 static inline bool classof(const VPUser *U) {
2830 auto *R = dyn_cast<VPRecipeBase>(U);
2831 return R && classof(R);
2832 }
2833
2834 static inline bool classof(const VPValue *VPV) {
2835 const VPRecipeBase *R = VPV->getDefiningRecipe();
2836 return R && classof(R);
2837 }
2838
2839 static inline bool classof(const VPSingleDefRecipe *R) {
2840 return classof(static_cast<const VPRecipeBase *>(R));
2841 }
2842
2843 /// Generate the reduction in the loop.
2844 void execute(VPTransformState &State) override;
2845
2846 /// Return the cost of VPReductionRecipe.
2847 InstructionCost computeCost(ElementCount VF,
2848 VPCostContext &Ctx) const override;
2849
2850 /// Return the recurrence kind for the in-loop reduction.
2851 RecurKind getRecurrenceKind() const { return RdxKind; }
2852 /// Return true if the in-loop reduction is ordered.
2853 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); };
2854 /// Return true if the in-loop reduction is conditional.
2855 bool isConditional() const { return IsConditional; };
2856 /// Returns true if the reduction outputs a vector with a scaled down VF.
2857 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2858 /// Returns true if the reduction is in-loop.
2859 bool isInLoop() const {
2860 return std::holds_alternative<RdxInLoop>(Style) ||
2861 std::holds_alternative<RdxOrdered>(Style);
2862 }
2863 /// The VPValue of the scalar Chain being accumulated.
2864 VPValue *getChainOp() const { return getOperand(0); }
2865 /// The VPValue of the vector value to be reduced.
2866 VPValue *getVecOp() const { return getOperand(1); }
2867 /// The VPValue of the condition for the block.
2869 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2870 }
2871 /// Get the factor that the VF of this recipe's output should be scaled by, or
2872 /// 1 if it isn't scaled.
2873 unsigned getVFScaleFactor() const {
2874 auto *Partial = std::get_if<RdxUnordered>(&Style);
2875 return Partial ? Partial->VFScaleFactor : 1;
2876 }
2877
2878protected:
2879#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2880 /// Print the recipe.
2881 void printRecipe(raw_ostream &O, const Twine &Indent,
2882 VPSlotTracker &SlotTracker) const override;
2883#endif
2884};
2885
2886/// A recipe to represent inloop reduction operations with vector-predication
2887/// intrinsics, performing a reduction on a vector operand with the explicit
2888/// vector length (EVL) into a scalar value, and adding the result to a chain.
2889/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2891public:
2895 VPDef::VPReductionEVLSC, R.getRecurrenceKind(),
2896 R.getFastMathFlags(),
2898 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2899 getReductionStyle(/*InLoop=*/true, R.isOrdered(), 1), DL) {}
2900
2901 ~VPReductionEVLRecipe() override = default;
2902
2904 llvm_unreachable("cloning not implemented yet");
2905 }
2906
2907 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2908
2909 /// Generate the reduction in the loop
2910 void execute(VPTransformState &State) override;
2911
2912 /// The VPValue of the explicit vector length.
2913 VPValue *getEVL() const { return getOperand(2); }
2914
2915 /// Returns true if the recipe only uses the first lane of operand \p Op.
2916 bool usesFirstLaneOnly(const VPValue *Op) const override {
2918 "Op must be an operand of the recipe");
2919 return Op == getEVL();
2920 }
2921
2922protected:
2923#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2924 /// Print the recipe.
2925 void printRecipe(raw_ostream &O, const Twine &Indent,
2926 VPSlotTracker &SlotTracker) const override;
2927#endif
2928};
2929
2930/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2931/// copies of the original scalar type, one per lane, instead of producing a
2932/// single copy of widened type for all lanes. If the instruction is known to be
2933/// a single scalar, only one copy, per lane zero, will be generated.
2935 public VPIRMetadata {
2936 /// Indicator if only a single replica per lane is needed.
2937 bool IsSingleScalar;
2938
2939 /// Indicator if the replicas are also predicated.
2940 bool IsPredicated;
2941
2942public:
2944 bool IsSingleScalar, VPValue *Mask = nullptr,
2945 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
2946 DebugLoc DL = DebugLoc::getUnknown())
2947 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, Flags, DL),
2948 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
2949 IsPredicated(Mask) {
2950 setUnderlyingValue(I);
2951 if (Mask)
2952 addOperand(Mask);
2953 }
2954
2955 ~VPReplicateRecipe() override = default;
2956
2958 auto *Copy = new VPReplicateRecipe(
2959 getUnderlyingInstr(), operands(), IsSingleScalar,
2960 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
2961 Copy->transferFlags(*this);
2962 return Copy;
2963 }
2964
2965 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2966
2967 /// Generate replicas of the desired Ingredient. Replicas will be generated
2968 /// for all parts and lanes unless a specific part and lane are specified in
2969 /// the \p State.
2970 void execute(VPTransformState &State) override;
2971
2972 /// Return the cost of this VPReplicateRecipe.
2973 InstructionCost computeCost(ElementCount VF,
2974 VPCostContext &Ctx) const override;
2975
2976 bool isSingleScalar() const { return IsSingleScalar; }
2977
2978 bool isPredicated() const { return IsPredicated; }
2979
2980 /// Returns true if the recipe only uses the first lane of operand \p Op.
2981 bool usesFirstLaneOnly(const VPValue *Op) const override {
2983 "Op must be an operand of the recipe");
2984 return isSingleScalar();
2985 }
2986
2987 /// Returns true if the recipe uses scalars of operand \p Op.
2988 bool usesScalars(const VPValue *Op) const override {
2990 "Op must be an operand of the recipe");
2991 return true;
2992 }
2993
2994 /// Returns true if the recipe is used by a widened recipe via an intervening
2995 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2996 /// in a vector.
2997 bool shouldPack() const;
2998
2999 /// Return the mask of a predicated VPReplicateRecipe.
3001 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3002 return getOperand(getNumOperands() - 1);
3003 }
3004
3005 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3006
3007protected:
3008#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3009 /// Print the recipe.
3010 void printRecipe(raw_ostream &O, const Twine &Indent,
3011 VPSlotTracker &SlotTracker) const override;
3012#endif
3013};
3014
3015/// A recipe for generating conditional branches on the bits of a mask.
3017public:
3019 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3020
3023 }
3024
3025 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
3026
3027 /// Generate the extraction of the appropriate bit from the block mask and the
3028 /// conditional branch.
3029 void execute(VPTransformState &State) override;
3030
3031 /// Return the cost of this VPBranchOnMaskRecipe.
3032 InstructionCost computeCost(ElementCount VF,
3033 VPCostContext &Ctx) const override;
3034
3035#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3036 /// Print the recipe.
3037 void printRecipe(raw_ostream &O, const Twine &Indent,
3038 VPSlotTracker &SlotTracker) const override {
3039 O << Indent << "BRANCH-ON-MASK ";
3041 }
3042#endif
3043
3044 /// Returns true if the recipe uses scalars of operand \p Op.
3045 bool usesScalars(const VPValue *Op) const override {
3047 "Op must be an operand of the recipe");
3048 return true;
3049 }
3050};
3051
3052/// A recipe to combine multiple recipes into a single 'expression' recipe,
3053/// which should be considered a single entity for cost-modeling and transforms.
3054/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3055/// expression recipes, before execute. The individual expression recipes are
3056/// completely disconnected from the def-use graph of other recipes not part of
3057/// the expression. Def-use edges between pairs of expression recipes remain
3058/// intact, whereas every edge between an expression recipe and a recipe outside
3059/// the expression is elevated to connect the non-expression recipe with the
3060/// VPExpressionRecipe itself.
3061class VPExpressionRecipe : public VPSingleDefRecipe {
3062 /// Recipes included in this VPExpressionRecipe. This could contain
3063 /// duplicates.
3064 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3065
3066 /// Temporary VPValues used for external operands of the expression, i.e.
3067 /// operands not defined by recipes in the expression.
3068 SmallVector<VPValue *> LiveInPlaceholders;
3069
3070 enum class ExpressionTypes {
3071 /// Represents an inloop extended reduction operation, performing a
3072 /// reduction on an extended vector operand into a scalar value, and adding
3073 /// the result to a chain.
3074 ExtendedReduction,
3075 /// Represent an inloop multiply-accumulate reduction, multiplying the
3076 /// extended vector operands, performing a reduction.add on the result, and
3077 /// adding the scalar result to a chain.
3078 ExtMulAccReduction,
3079 /// Represent an inloop multiply-accumulate reduction, multiplying the
3080 /// vector operands, performing a reduction.add on the result, and adding
3081 /// the scalar result to a chain.
3082 MulAccReduction,
3083 /// Represent an inloop multiply-accumulate reduction, multiplying the
3084 /// extended vector operands, negating the multiplication, performing a
3085 /// reduction.add on the result, and adding the scalar result to a chain.
3086 ExtNegatedMulAccReduction,
3087 };
3088
3089 /// Type of the expression.
3090 ExpressionTypes ExpressionType;
3091
3092 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3093 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3094 /// in the expression) are replaced by temporary VPValues and the original
3095 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3096 /// as needed (excluding last) to ensure they are only used by other recipes
3097 /// in the expression.
3098 VPExpressionRecipe(ExpressionTypes ExpressionType,
3099 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3100
3101public:
3103 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3105 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3108 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3109 {Ext0, Ext1, Mul, Red}) {}
3112 VPReductionRecipe *Red)
3113 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3114 {Ext0, Ext1, Mul, Sub, Red}) {
3115 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3116 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3117 "Expected an add reduction");
3118 assert(getNumOperands() >= 3 && "Expected at least three operands");
3119 [[maybe_unused]] auto *SubConst = dyn_cast<ConstantInt>(getOperand(2)->getLiveInIRValue());
3120 assert(SubConst && SubConst->getValue() == 0 &&
3121 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3122 }
3123
3125 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3126 for (auto *R : reverse(ExpressionRecipes)) {
3127 if (ExpressionRecipesSeen.insert(R).second)
3128 delete R;
3129 }
3130 for (VPValue *T : LiveInPlaceholders)
3131 delete T;
3132 }
3133
3134 VP_CLASSOF_IMPL(VPDef::VPExpressionSC)
3135
3136 VPExpressionRecipe *clone() override {
3137 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3138 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3139 for (auto *R : ExpressionRecipes)
3140 NewExpressiondRecipes.push_back(R->clone());
3141 for (auto *New : NewExpressiondRecipes) {
3142 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3143 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3144 // Update placeholder operands in the cloned recipe to use the external
3145 // operands, to be internalized when the cloned expression is constructed.
3146 for (const auto &[Placeholder, OutsideOp] :
3147 zip(LiveInPlaceholders, operands()))
3148 New->replaceUsesOfWith(Placeholder, OutsideOp);
3149 }
3150 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3151 }
3152
3153 /// Return the VPValue to use to infer the result type of the recipe.
3155 unsigned OpIdx =
3156 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3157 : 1;
3158 return getOperand(getNumOperands() - OpIdx);
3159 }
3160
3161 /// Insert the recipes of the expression back into the VPlan, directly before
3162 /// the current recipe. Leaves the expression recipe empty, which must be
3163 /// removed before codegen.
3164 void decompose();
3165
3166 unsigned getVFScaleFactor() const {
3167 auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
3168 return PR ? PR->getVFScaleFactor() : 1;
3169 }
3170
3171 /// Method for generating code, must not be called as this recipe is abstract.
3172 void execute(VPTransformState &State) override {
3173 llvm_unreachable("recipe must be removed before execute");
3174 }
3175
3177 VPCostContext &Ctx) const override;
3178
3179 /// Returns true if this expression contains recipes that may read from or
3180 /// write to memory.
3181 bool mayReadOrWriteMemory() const;
3182
3183 /// Returns true if this expression contains recipes that may have side
3184 /// effects.
3185 bool mayHaveSideEffects() const;
3186
3187 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3188 bool isSingleScalar() const;
3189
3190protected:
3191#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3192 /// Print the recipe.
3193 void printRecipe(raw_ostream &O, const Twine &Indent,
3194 VPSlotTracker &SlotTracker) const override;
3195#endif
3196};
3197
3198/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3199/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3200/// order to merge values that are set under such a branch and feed their uses.
3201/// The phi nodes can be scalar or vector depending on the users of the value.
3202/// This recipe works in concert with VPBranchOnMaskRecipe.
3204public:
3205 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3206 /// nodes after merging back from a Branch-on-Mask.
3208 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {}
3209 ~VPPredInstPHIRecipe() override = default;
3210
3212 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3213 }
3214
3215 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
3216
3217 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3218 /// retain SSA form.
3219 void execute(VPTransformState &State) override;
3220
3221 /// Return the cost of this VPPredInstPHIRecipe.
3223 VPCostContext &Ctx) const override {
3224 // TODO: Compute accurate cost after retiring the legacy cost model.
3225 return 0;
3226 }
3227
3228 /// Returns true if the recipe uses scalars of operand \p Op.
3229 bool usesScalars(const VPValue *Op) const override {
3231 "Op must be an operand of the recipe");
3232 return true;
3233 }
3234
3235protected:
3236#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3237 /// Print the recipe.
3238 void printRecipe(raw_ostream &O, const Twine &Indent,
3239 VPSlotTracker &SlotTracker) const override;
3240#endif
3241};
3242
3243/// A common base class for widening memory operations. An optional mask can be
3244/// provided as the last operand.
3246 public VPIRMetadata {
3247protected:
3249
3250 /// Alignment information for this memory access.
3252
3253 /// Whether the accessed addresses are consecutive.
3255
3256 /// Whether the consecutive accessed addresses are in reverse order.
3258
3259 /// Whether the memory access is masked.
3260 bool IsMasked = false;
3261
3262 void setMask(VPValue *Mask) {
3263 assert(!IsMasked && "cannot re-set mask");
3264 if (!Mask)
3265 return;
3266 addOperand(Mask);
3267 IsMasked = true;
3268 }
3269
3270 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3271 std::initializer_list<VPValue *> Operands,
3272 bool Consecutive, bool Reverse,
3273 const VPIRMetadata &Metadata, DebugLoc DL)
3274 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3276 Reverse(Reverse) {
3277 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
3279 "Reversed acccess without VPVectorEndPointerRecipe address?");
3280 }
3281
3282public:
3284 llvm_unreachable("cloning not supported");
3285 }
3286
3287 static inline bool classof(const VPRecipeBase *R) {
3288 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
3289 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
3290 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
3291 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
3292 }
3293
3294 static inline bool classof(const VPUser *U) {
3295 auto *R = dyn_cast<VPRecipeBase>(U);
3296 return R && classof(R);
3297 }
3298
3299 /// Return whether the loaded-from / stored-to addresses are consecutive.
3300 bool isConsecutive() const { return Consecutive; }
3301
3302 /// Return whether the consecutive loaded/stored addresses are in reverse
3303 /// order.
3304 bool isReverse() const { return Reverse; }
3305
3306 /// Return the address accessed by this recipe.
3307 VPValue *getAddr() const { return getOperand(0); }
3308
3309 /// Returns true if the recipe is masked.
3310 bool isMasked() const { return IsMasked; }
3311
3312 /// Return the mask used by this recipe. Note that a full mask is represented
3313 /// by a nullptr.
3314 VPValue *getMask() const {
3315 // Mask is optional and therefore the last operand.
3316 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3317 }
3318
3319 /// Returns the alignment of the memory access.
3320 Align getAlign() const { return Alignment; }
3321
3322 /// Generate the wide load/store.
3323 void execute(VPTransformState &State) override {
3324 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3325 }
3326
3327 /// Return the cost of this VPWidenMemoryRecipe.
3328 InstructionCost computeCost(ElementCount VF,
3329 VPCostContext &Ctx) const override;
3330
3332};
3333
3334/// A recipe for widening load operations, using the address to load from and an
3335/// optional mask.
3337 public VPValue {
3339 bool Consecutive, bool Reverse,
3340 const VPIRMetadata &Metadata, DebugLoc DL)
3341 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
3342 Reverse, Metadata, DL),
3343 VPValue(this, &Load) {
3344 setMask(Mask);
3345 }
3346
3349 getMask(), Consecutive, Reverse, *this,
3350 getDebugLoc());
3351 }
3352
3353 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
3354
3355 /// Generate a wide load or gather.
3356 void execute(VPTransformState &State) override;
3357
3358 /// Returns true if the recipe only uses the first lane of operand \p Op.
3359 bool usesFirstLaneOnly(const VPValue *Op) const override {
3361 "Op must be an operand of the recipe");
3362 // Widened, consecutive loads operations only demand the first lane of
3363 // their address.
3364 return Op == getAddr() && isConsecutive();
3365 }
3366
3367protected:
3368#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3369 /// Print the recipe.
3370 void printRecipe(raw_ostream &O, const Twine &Indent,
3371 VPSlotTracker &SlotTracker) const override;
3372#endif
3373};
3374
3375/// A recipe for widening load operations with vector-predication intrinsics,
3376/// using the address to load from, the explicit vector length and an optional
3377/// mask.
3378struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
3380 VPValue *Mask)
3381 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3382 {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
3383 L.getDebugLoc()),
3384 VPValue(this, &getIngredient()) {
3385 setMask(Mask);
3386 }
3387
3388 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
3389
3390 /// Return the EVL operand.
3391 VPValue *getEVL() const { return getOperand(1); }
3392
3393 /// Generate the wide load or gather.
3394 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3395
3396 /// Return the cost of this VPWidenLoadEVLRecipe.
3398 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3399
3400 /// Returns true if the recipe only uses the first lane of operand \p Op.
3401 bool usesFirstLaneOnly(const VPValue *Op) const override {
3403 "Op must be an operand of the recipe");
3404 // Widened loads only demand the first lane of EVL and consecutive loads
3405 // only demand the first lane of their address.
3406 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3407 }
3408
3409protected:
3410#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3411 /// Print the recipe.
3412 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3413 VPSlotTracker &SlotTracker) const override;
3414#endif
3415};
3416
3417/// A recipe for widening store operations, using the stored value, the address
3418/// to store to and an optional mask.
3420 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3421 VPValue *Mask, bool Consecutive, bool Reverse,
3422 const VPIRMetadata &Metadata, DebugLoc DL)
3423 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3424 Consecutive, Reverse, Metadata, DL) {
3425 setMask(Mask);
3426 }
3427
3433
3434 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
3435
3436 /// Return the value stored by this recipe.
3437 VPValue *getStoredValue() const { return getOperand(1); }
3438
3439 /// Generate a wide store or scatter.
3440 void execute(VPTransformState &State) override;
3441
3442 /// Returns true if the recipe only uses the first lane of operand \p Op.
3443 bool usesFirstLaneOnly(const VPValue *Op) const override {
3445 "Op must be an operand of the recipe");
3446 // Widened, consecutive stores only demand the first lane of their address,
3447 // unless the same operand is also stored.
3448 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3449 }
3450
3451protected:
3452#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3453 /// Print the recipe.
3454 void printRecipe(raw_ostream &O, const Twine &Indent,
3455 VPSlotTracker &SlotTracker) const override;
3456#endif
3457};
3458
3459/// A recipe for widening store operations with vector-predication intrinsics,
3460/// using the value to store, the address to store to, the explicit vector
3461/// length and an optional mask.
3464 VPValue *Mask)
3465 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3466 {Addr, S.getStoredValue(), &EVL}, S.isConsecutive(),
3467 S.isReverse(), S, S.getDebugLoc()) {
3468 setMask(Mask);
3469 }
3470
3471 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
3472
3473 /// Return the address accessed by this recipe.
3474 VPValue *getStoredValue() const { return getOperand(1); }
3475
3476 /// Return the EVL operand.
3477 VPValue *getEVL() const { return getOperand(2); }
3478
3479 /// Generate the wide store or scatter.
3480 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3481
3482 /// Return the cost of this VPWidenStoreEVLRecipe.
3484 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3485
3486 /// Returns true if the recipe only uses the first lane of operand \p Op.
3487 bool usesFirstLaneOnly(const VPValue *Op) const override {
3489 "Op must be an operand of the recipe");
3490 if (Op == getEVL()) {
3491 assert(getStoredValue() != Op && "unexpected store of EVL");
3492 return true;
3493 }
3494 // Widened, consecutive memory operations only demand the first lane of
3495 // their address, unless the same operand is also stored. That latter can
3496 // happen with opaque pointers.
3497 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3498 }
3499
3500protected:
3501#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3502 /// Print the recipe.
3503 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3504 VPSlotTracker &SlotTracker) const override;
3505#endif
3506};
3507
3508/// Recipe to expand a SCEV expression.
3510 const SCEV *Expr;
3511
3512public:
3514 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr) {}
3515
3516 ~VPExpandSCEVRecipe() override = default;
3517
3518 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3519
3520 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
3521
3522 void execute(VPTransformState &State) override {
3523 llvm_unreachable("SCEV expressions must be expanded before final execute");
3524 }
3525
3526 /// Return the cost of this VPExpandSCEVRecipe.
3528 VPCostContext &Ctx) const override {
3529 // TODO: Compute accurate cost after retiring the legacy cost model.
3530 return 0;
3531 }
3532
3533 const SCEV *getSCEV() const { return Expr; }
3534
3535protected:
3536#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3537 /// Print the recipe.
3538 void printRecipe(raw_ostream &O, const Twine &Indent,
3539 VPSlotTracker &SlotTracker) const override;
3540#endif
3541};
3542
3543/// Canonical scalar induction phi of the vector loop. Starting at the specified
3544/// start value (either 0 or the resume value when vectorizing the epilogue
3545/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3546/// canonical induction variable.
3548public:
3550 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
3551
3552 ~VPCanonicalIVPHIRecipe() override = default;
3553
3555 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
3556 R->addOperand(getBackedgeValue());
3557 return R;
3558 }
3559
3560 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
3561
3562 void execute(VPTransformState &State) override {
3563 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3564 "scalar phi recipe");
3565 }
3566
3567 /// Returns the scalar type of the induction.
3569 return getStartValue()->getLiveInIRValue()->getType();
3570 }
3571
3572 /// Returns true if the recipe only uses the first lane of operand \p Op.
3573 bool usesFirstLaneOnly(const VPValue *Op) const override {
3575 "Op must be an operand of the recipe");
3576 return true;
3577 }
3578
3579 /// Returns true if the recipe only uses the first part of operand \p Op.
3580 bool usesFirstPartOnly(const VPValue *Op) const override {
3582 "Op must be an operand of the recipe");
3583 return true;
3584 }
3585
3586 /// Return the cost of this VPCanonicalIVPHIRecipe.
3588 VPCostContext &Ctx) const override {
3589 // For now, match the behavior of the legacy cost model.
3590 return 0;
3591 }
3592
3593protected:
3594#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3595 /// Print the recipe.
3596 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3597 VPSlotTracker &SlotTracker) const override;
3598#endif
3599};
3600
3601/// A recipe for generating the active lane mask for the vector loop that is
3602/// used to predicate the vector operations.
3603/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3604/// remove VPActiveLaneMaskPHIRecipe.
3606public:
3608 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
3609 DL) {}
3610
3611 ~VPActiveLaneMaskPHIRecipe() override = default;
3612
3615 if (getNumOperands() == 2)
3616 R->addOperand(getOperand(1));
3617 return R;
3618 }
3619
3620 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
3621
3622 /// Generate the active lane mask phi of the vector loop.
3623 void execute(VPTransformState &State) override;
3624
3625protected:
3626#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3627 /// Print the recipe.
3628 void printRecipe(raw_ostream &O, const Twine &Indent,
3629 VPSlotTracker &SlotTracker) const override;
3630#endif
3631};
3632
3633/// A recipe for generating the phi node for the current index of elements,
3634/// adjusted in accordance with EVL value. It starts at the start value of the
3635/// canonical induction and gets incremented by EVL in each iteration of the
3636/// vector loop.
3638public:
3640 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
3641
3642 ~VPEVLBasedIVPHIRecipe() override = default;
3643
3645 llvm_unreachable("cloning not implemented yet");
3646 }
3647
3648 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
3649
3650 void execute(VPTransformState &State) override {
3651 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3652 "scalar phi recipe");
3653 }
3654
3655 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3657 VPCostContext &Ctx) const override {
3658 // For now, match the behavior of the legacy cost model.
3659 return 0;
3660 }
3661
3662 /// Returns true if the recipe only uses the first lane of operand \p Op.
3663 bool usesFirstLaneOnly(const VPValue *Op) const override {
3665 "Op must be an operand of the recipe");
3666 return true;
3667 }
3668
3669protected:
3670#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3671 /// Print the recipe.
3672 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3673 VPSlotTracker &SlotTracker) const override;
3674#endif
3675};
3676
3677/// A Recipe for widening the canonical induction variable of the vector loop.
3679 public VPUnrollPartAccessor<1> {
3680public:
3682 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3683
3684 ~VPWidenCanonicalIVRecipe() override = default;
3685
3690
3691 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
3692
3693 /// Generate a canonical vector induction variable of the vector loop, with
3694 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3695 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3696 void execute(VPTransformState &State) override;
3697
3698 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3700 VPCostContext &Ctx) const override {
3701 // TODO: Compute accurate cost after retiring the legacy cost model.
3702 return 0;
3703 }
3704
3705protected:
3706#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3707 /// Print the recipe.
3708 void printRecipe(raw_ostream &O, const Twine &Indent,
3709 VPSlotTracker &SlotTracker) const override;
3710#endif
3711};
3712
3713/// A recipe for converting the input value \p IV value to the corresponding
3714/// value of an IV with different start and step values, using Start + IV *
3715/// Step.
3717 /// Kind of the induction.
3719 /// If not nullptr, the floating point induction binary operator. Must be set
3720 /// for floating point inductions.
3721 const FPMathOperator *FPBinOp;
3722
3723 /// Name to use for the generated IR instruction for the derived IV.
3724 std::string Name;
3725
3726public:
3728 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3729 const Twine &Name = "")
3731 IndDesc.getKind(),
3732 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3733 Start, CanonicalIV, Step, Name) {}
3734
3736 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
3737 VPValue *Step, const Twine &Name = "")
3738 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
3739 FPBinOp(FPBinOp), Name(Name.str()) {}
3740
3741 ~VPDerivedIVRecipe() override = default;
3742
3744 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3745 getStepValue());
3746 }
3747
3748 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
3749
3750 /// Generate the transformed value of the induction at offset StartValue (1.
3751 /// operand) + IV (2. operand) * StepValue (3, operand).
3752 void execute(VPTransformState &State) override;
3753
3754 /// Return the cost of this VPDerivedIVRecipe.
3756 VPCostContext &Ctx) const override {
3757 // TODO: Compute accurate cost after retiring the legacy cost model.
3758 return 0;
3759 }
3760
3762 return getStartValue()->getLiveInIRValue()->getType();
3763 }
3764
3765 VPValue *getStartValue() const { return getOperand(0); }
3766 VPValue *getStepValue() const { return getOperand(2); }
3767
3768 /// Returns true if the recipe only uses the first lane of operand \p Op.
3769 bool usesFirstLaneOnly(const VPValue *Op) const override {
3771 "Op must be an operand of the recipe");
3772 return true;
3773 }
3774
3775protected:
3776#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3777 /// Print the recipe.
3778 void printRecipe(raw_ostream &O, const Twine &Indent,
3779 VPSlotTracker &SlotTracker) const override;
3780#endif
3781};
3782
3783/// A recipe for handling phi nodes of integer and floating-point inductions,
3784/// producing their scalar values.
3786 public VPUnrollPartAccessor<3> {
3787 Instruction::BinaryOps InductionOpcode;
3788
3789public:
3792 DebugLoc DL)
3793 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3794 ArrayRef<VPValue *>({IV, Step, VF}), FMFs, DL),
3795 InductionOpcode(Opcode) {}
3796
3798 VPValue *Step, VPValue *VF,
3801 IV, Step, VF, IndDesc.getInductionOpcode(),
3802 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3803 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3804 : FastMathFlags(),
3805 DL) {}
3806
3807 ~VPScalarIVStepsRecipe() override = default;
3808
3810 return new VPScalarIVStepsRecipe(
3811 getOperand(0), getOperand(1), getOperand(2), InductionOpcode,
3813 getDebugLoc());
3814 }
3815
3816 /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
3817 /// this is only accurate after the VPlan has been unrolled.
3818 bool isPart0() const { return getUnrollPart(*this) == 0; }
3819
3820 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
3821
3822 /// Generate the scalarized versions of the phi node as needed by their users.
3823 void execute(VPTransformState &State) override;
3824
3825 /// Return the cost of this VPScalarIVStepsRecipe.
3827 VPCostContext &Ctx) const override {
3828 // TODO: Compute accurate cost after retiring the legacy cost model.
3829 return 0;
3830 }
3831
3832 VPValue *getStepValue() const { return getOperand(1); }
3833
3834 /// Returns true if the recipe only uses the first lane of operand \p Op.
3835 bool usesFirstLaneOnly(const VPValue *Op) const override {
3837 "Op must be an operand of the recipe");
3838 return true;
3839 }
3840
3841protected:
3842#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3843 /// Print the recipe.
3844 void printRecipe(raw_ostream &O, const Twine &Indent,
3845 VPSlotTracker &SlotTracker) const override;
3846#endif
3847};
3848
3849/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe
3850/// types implementing VPPhiAccessors. Used by isa<> & co.
3852 static inline bool isPossible(const VPRecipeBase *f) {
3853 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
3855 }
3856};
3857/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
3858/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
3859template <typename SrcTy>
3860struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
3861
3863
3864 /// doCast is used by cast<>.
3865 static inline VPPhiAccessors *doCast(SrcTy R) {
3866 return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
3867 switch (R->getVPDefID()) {
3868 case VPDef::VPInstructionSC:
3869 return cast<VPPhi>(R);
3870 case VPDef::VPIRInstructionSC:
3871 return cast<VPIRPhi>(R);
3872 case VPDef::VPWidenPHISC:
3873 return cast<VPWidenPHIRecipe>(R);
3874 default:
3875 return cast<VPHeaderPHIRecipe>(R);
3876 }
3877 }());
3878 }
3879
3880 /// doCastIfPossible is used by dyn_cast<>.
3881 static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
3882 if (!Self::isPossible(f))
3883 return nullptr;
3884 return doCast(f);
3885 }
3886};
3887template <>
3890template <>
3893
3894/// Casting from (const) VPRecipeBase -> (const) VPIRMetadata is supported for
3895/// all recipe types implementing VPIRMetadata. Used by isa<> & co.
3896namespace detail {
3897template <typename DstTy, typename RecipeBasePtrTy>
3898static inline auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy {
3899 switch (R->getVPDefID()) {
3900 case VPDef::VPInstructionSC:
3901 return cast<VPInstruction>(R);
3902 case VPDef::VPWidenSC:
3903 return cast<VPWidenRecipe>(R);
3904 case VPDef::VPWidenCastSC:
3905 return cast<VPWidenCastRecipe>(R);
3906 case VPDef::VPWidenIntrinsicSC:
3908 case VPDef::VPWidenCallSC:
3909 return cast<VPWidenCallRecipe>(R);
3910 case VPDef::VPWidenSelectSC:
3911 return cast<VPWidenSelectRecipe>(R);
3912 case VPDef::VPReplicateSC:
3913 return cast<VPReplicateRecipe>(R);
3914 case VPDef::VPInterleaveSC:
3915 case VPDef::VPInterleaveEVLSC:
3916 return cast<VPInterleaveBase>(R);
3917 case VPDef::VPWidenLoadSC:
3918 case VPDef::VPWidenLoadEVLSC:
3919 case VPDef::VPWidenStoreSC:
3920 case VPDef::VPWidenStoreEVLSC:
3921 return cast<VPWidenMemoryRecipe>(R);
3922 default:
3923 llvm_unreachable("invalid recipe for VPIRMetadata cast");
3924 }
3925}
3926} // namespace detail
3927
3928/// Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the
3929/// recipe types implementing VPIRMetadata. Used by cast<>, dyn_cast<> & co.
3930template <typename DstTy, typename SrcTy>
3931struct CastInfoVPIRMetadata : public CastIsPossible<DstTy, SrcTy> {
3932 static inline bool isPossible(SrcTy R) {
3933 // NOTE: Each recipe inheriting from VPIRMetadata must be listed here and
3934 // also handled in castToVPIRMetadata.
3940 }
3941
3942 using RetTy = DstTy *;
3943
3944 /// doCast is used by cast<>.
3945 static inline RetTy doCast(SrcTy R) {
3947 }
3948
3949 /// doCastIfPossible is used by dyn_cast<>.
3950 static inline RetTy doCastIfPossible(SrcTy R) {
3951 if (!isPossible(R))
3952 return nullptr;
3953 return doCast(R);
3954 }
3955};
3956template <>
3959template <>
3962
3963/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
3964/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3965/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3966class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
3967 friend class VPlan;
3968
3969 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
3970 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3971 : VPBlockBase(VPBasicBlockSC, Name.str()) {
3972 if (Recipe)
3973 appendRecipe(Recipe);
3974 }
3975
3976public:
3978
3979protected:
3980 /// The VPRecipes held in the order of output instructions to generate.
3982
3983 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
3984 : VPBlockBase(BlockSC, Name.str()) {}
3985
3986public:
3987 ~VPBasicBlock() override {
3988 while (!Recipes.empty())
3989 Recipes.pop_back();
3990 }
3991
3992 /// Instruction iterators...
3997
3998 //===--------------------------------------------------------------------===//
3999 /// Recipe iterator methods
4000 ///
4001 inline iterator begin() { return Recipes.begin(); }
4002 inline const_iterator begin() const { return Recipes.begin(); }
4003 inline iterator end() { return Recipes.end(); }
4004 inline const_iterator end() const { return Recipes.end(); }
4005
4006 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4007 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4008 inline reverse_iterator rend() { return Recipes.rend(); }
4009 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4010
4011 inline size_t size() const { return Recipes.size(); }
4012 inline bool empty() const { return Recipes.empty(); }
4013 inline const VPRecipeBase &front() const { return Recipes.front(); }
4014 inline VPRecipeBase &front() { return Recipes.front(); }
4015 inline const VPRecipeBase &back() const { return Recipes.back(); }
4016 inline VPRecipeBase &back() { return Recipes.back(); }
4017
4018 /// Returns a reference to the list of recipes.
4020
4021 /// Returns a pointer to a member of the recipe list.
4022 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4023 return &VPBasicBlock::Recipes;
4024 }
4025
4026 /// Method to support type inquiry through isa, cast, and dyn_cast.
4027 static inline bool classof(const VPBlockBase *V) {
4028 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4029 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4030 }
4031
4032 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4033 assert(Recipe && "No recipe to append.");
4034 assert(!Recipe->Parent && "Recipe already in VPlan");
4035 Recipe->Parent = this;
4036 Recipes.insert(InsertPt, Recipe);
4037 }
4038
4039 /// Augment the existing recipes of a VPBasicBlock with an additional
4040 /// \p Recipe as the last recipe.
4041 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4042
4043 /// The method which generates the output IR instructions that correspond to
4044 /// this VPBasicBlock, thereby "executing" the VPlan.
4045 void execute(VPTransformState *State) override;
4046
4047 /// Return the cost of this VPBasicBlock.
4048 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4049
4050 /// Return the position of the first non-phi node recipe in the block.
4051 iterator getFirstNonPhi();
4052
4053 /// Returns an iterator range over the PHI-like recipes in the block.
4057
4058 /// Split current block at \p SplitAt by inserting a new block between the
4059 /// current block and its successors and moving all recipes starting at
4060 /// SplitAt to the new block. Returns the new block.
4061 VPBasicBlock *splitAt(iterator SplitAt);
4062
4063 VPRegionBlock *getEnclosingLoopRegion();
4064 const VPRegionBlock *getEnclosingLoopRegion() const;
4065
4066#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4067 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4068 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4069 ///
4070 /// Note that the numbering is applied to the whole VPlan, so printing
4071 /// individual blocks is consistent with the whole VPlan printing.
4072 void print(raw_ostream &O, const Twine &Indent,
4073 VPSlotTracker &SlotTracker) const override;
4074 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4075#endif
4076
4077 /// If the block has multiple successors, return the branch recipe terminating
4078 /// the block. If there are no or only a single successor, return nullptr;
4079 VPRecipeBase *getTerminator();
4080 const VPRecipeBase *getTerminator() const;
4081
4082 /// Returns true if the block is exiting it's parent region.
4083 bool isExiting() const;
4084
4085 /// Clone the current block and it's recipes, without updating the operands of
4086 /// the cloned recipes.
4087 VPBasicBlock *clone() override;
4088
4089 /// Returns the predecessor block at index \p Idx with the predecessors as per
4090 /// the corresponding plain CFG. If the block is an entry block to a region,
4091 /// the first predecessor is the single predecessor of a region, and the
4092 /// second predecessor is the exiting block of the region.
4093 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4094
4095protected:
4096 /// Execute the recipes in the IR basic block \p BB.
4097 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4098
4099 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4100 /// generated for this VPBB.
4101 void connectToPredecessors(VPTransformState &State);
4102
4103private:
4104 /// Create an IR BasicBlock to hold the output instructions generated by this
4105 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4106 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4107};
4108
4109inline const VPBasicBlock *
4111 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4112}
4113
4114/// A special type of VPBasicBlock that wraps an existing IR basic block.
4115/// Recipes of the block get added before the first non-phi instruction in the
4116/// wrapped block.
4117/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4118/// preheader block.
4119class VPIRBasicBlock : public VPBasicBlock {
4120 friend class VPlan;
4121
4122 BasicBlock *IRBB;
4123
4124 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4125 VPIRBasicBlock(BasicBlock *IRBB)
4126 : VPBasicBlock(VPIRBasicBlockSC,
4127 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4128 IRBB(IRBB) {}
4129
4130public:
4131 ~VPIRBasicBlock() override = default;
4132
4133 static inline bool classof(const VPBlockBase *V) {
4134 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4135 }
4136
4137 /// The method which generates the output IR instructions that correspond to
4138 /// this VPBasicBlock, thereby "executing" the VPlan.
4139 void execute(VPTransformState *State) override;
4140
4141 VPIRBasicBlock *clone() override;
4142
4143 BasicBlock *getIRBasicBlock() const { return IRBB; }
4144};
4145
4146/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4147/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4148/// A VPRegionBlock may indicate that its contents are to be replicated several
4149/// times. This is designed to support predicated scalarization, in which a
4150/// scalar if-then code structure needs to be generated VF * UF times. Having
4151/// this replication indicator helps to keep a single model for multiple
4152/// candidate VF's. The actual replication takes place only once the desired VF
4153/// and UF have been determined.
4154class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4155 friend class VPlan;
4156
4157 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4158 VPBlockBase *Entry;
4159
4160 /// Hold the Single Exiting block of the SESE region modelled by the
4161 /// VPRegionBlock.
4162 VPBlockBase *Exiting;
4163
4164 /// An indicator whether this region is to generate multiple replicated
4165 /// instances of output IR corresponding to its VPBlockBases.
4166 bool IsReplicator;
4167
4168 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
4169 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4170 const std::string &Name = "", bool IsReplicator = false)
4171 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
4172 IsReplicator(IsReplicator) {
4173 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
4174 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
4175 Entry->setParent(this);
4176 Exiting->setParent(this);
4177 }
4178 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
4179 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
4180 IsReplicator(IsReplicator) {}
4181
4182public:
4183 ~VPRegionBlock() override = default;
4184
4185 /// Method to support type inquiry through isa, cast, and dyn_cast.
4186 static inline bool classof(const VPBlockBase *V) {
4187 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4188 }
4189
4190 const VPBlockBase *getEntry() const { return Entry; }
4191 VPBlockBase *getEntry() { return Entry; }
4192
4193 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4194 /// EntryBlock must have no predecessors.
4195 void setEntry(VPBlockBase *EntryBlock) {
4196 assert(EntryBlock->getPredecessors().empty() &&
4197 "Entry block cannot have predecessors.");
4198 Entry = EntryBlock;
4199 EntryBlock->setParent(this);
4200 }
4201
4202 const VPBlockBase *getExiting() const { return Exiting; }
4203 VPBlockBase *getExiting() { return Exiting; }
4204
4205 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4206 /// ExitingBlock must have no successors.
4207 void setExiting(VPBlockBase *ExitingBlock) {
4208 assert(ExitingBlock->getSuccessors().empty() &&
4209 "Exit block cannot have successors.");
4210 Exiting = ExitingBlock;
4211 ExitingBlock->setParent(this);
4212 }
4213
4214 /// Returns the pre-header VPBasicBlock of the loop region.
4216 assert(!isReplicator() && "should only get pre-header of loop regions");
4217 return getSinglePredecessor()->getExitingBasicBlock();
4218 }
4219
4220 /// An indicator whether this region is to generate multiple replicated
4221 /// instances of output IR corresponding to its VPBlockBases.
4222 bool isReplicator() const { return IsReplicator; }
4223
4224 /// The method which generates the output IR instructions that correspond to
4225 /// this VPRegionBlock, thereby "executing" the VPlan.
4226 void execute(VPTransformState *State) override;
4227
4228 // Return the cost of this region.
4229 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4230
4231#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4232 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4233 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4234 /// consequtive numbers.
4235 ///
4236 /// Note that the numbering is applied to the whole VPlan, so printing
4237 /// individual regions is consistent with the whole VPlan printing.
4238 void print(raw_ostream &O, const Twine &Indent,
4239 VPSlotTracker &SlotTracker) const override;
4240 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4241#endif
4242
4243 /// Clone all blocks in the single-entry single-exit region of the block and
4244 /// their recipes without updating the operands of the cloned recipes.
4245 VPRegionBlock *clone() override;
4246
4247 /// Remove the current region from its VPlan, connecting its predecessor to
4248 /// its entry, and its exiting block to its successor.
4249 void dissolveToCFGLoop();
4250
4251 /// Returns the canonical induction recipe of the region.
4253 VPBasicBlock *EntryVPBB = getEntryBasicBlock();
4254 if (EntryVPBB->empty()) {
4255 // VPlan native path. TODO: Unify both code paths.
4256 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4257 }
4258 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4259 }
4261 return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
4262 }
4263
4264 /// Return the type of the canonical IV for loop regions.
4265 Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); }
4266 const Type *getCanonicalIVType() const {
4267 return getCanonicalIV()->getScalarType();
4268 }
4269};
4270
4272 return getParent()->getParent();
4273}
4274
4276 return getParent()->getParent();
4277}
4278
4279/// VPlan models a candidate for vectorization, encoding various decisions take
4280/// to produce efficient output IR, including which branches, basic-blocks and
4281/// output IR instructions to generate, and their cost. VPlan holds a
4282/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4283/// VPBasicBlock.
4284class VPlan {
4285 friend class VPlanPrinter;
4286 friend class VPSlotTracker;
4287
4288 /// VPBasicBlock corresponding to the original preheader. Used to place
4289 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4290 /// rest of VPlan execution.
4291 /// When this VPlan is used for the epilogue vector loop, the entry will be
4292 /// replaced by a new entry block created during skeleton creation.
4293 VPBasicBlock *Entry;
4294
4295 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4296 VPIRBasicBlock *ScalarHeader;
4297
4298 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4299 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4300 /// e.g. if the scalar epilogue always executes.
4302
4303 /// Holds the VFs applicable to this VPlan.
4305
4306 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4307 /// any UF.
4309
4310 /// Holds the name of the VPlan, for printing.
4311 std::string Name;
4312
4313 /// Represents the trip count of the original loop, for folding
4314 /// the tail.
4315 VPValue *TripCount = nullptr;
4316
4317 /// Represents the backedge taken count of the original loop, for folding
4318 /// the tail. It equals TripCount - 1.
4319 VPValue *BackedgeTakenCount = nullptr;
4320
4321 /// Represents the vector trip count.
4322 VPValue VectorTripCount;
4323
4324 /// Represents the vectorization factor of the loop.
4325 VPValue VF;
4326
4327 /// Represents the loop-invariant VF * UF of the vector loop region.
4328 VPValue VFxUF;
4329
4330 /// Holds a mapping between Values and their corresponding VPValue inside
4331 /// VPlan.
4332 Value2VPValueTy Value2VPValue;
4333
4334 /// Contains all the external definitions created for this VPlan. External
4335 /// definitions are VPValues that hold a pointer to their underlying IR.
4337
4338 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4339 /// VPlan is destroyed.
4340 SmallVector<VPBlockBase *> CreatedBlocks;
4341
4342 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4343 /// wrapping the original header of the scalar loop.
4344 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
4345 : Entry(Entry), ScalarHeader(ScalarHeader) {
4346 Entry->setPlan(this);
4347 assert(ScalarHeader->getNumSuccessors() == 0 &&
4348 "scalar header must be a leaf node");
4349 }
4350
4351public:
4352 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4353 /// original preheader and scalar header of \p L, to be used as entry and
4354 /// scalar header blocks of the new VPlan.
4355 VPlan(Loop *L);
4356
4357 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4358 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
4359 VPlan(BasicBlock *ScalarHeaderBB) {
4360 setEntry(createVPBasicBlock("preheader"));
4361 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4362 }
4363
4365
4367 Entry = VPBB;
4368 VPBB->setPlan(this);
4369 }
4370
4371 /// Generate the IR code for this VPlan.
4372 void execute(VPTransformState *State);
4373
4374 /// Return the cost of this plan.
4376
4377 VPBasicBlock *getEntry() { return Entry; }
4378 const VPBasicBlock *getEntry() const { return Entry; }
4379
4380 /// Returns the preheader of the vector loop region, if one exists, or null
4381 /// otherwise.
4383 VPRegionBlock *VectorRegion = getVectorLoopRegion();
4384 return VectorRegion
4385 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4386 : nullptr;
4387 }
4388
4389 /// Returns the VPRegionBlock of the vector loop.
4392
4393 /// Returns the 'middle' block of the plan, that is the block that selects
4394 /// whether to execute the scalar tail loop or the exit block from the loop
4395 /// latch. If there is an early exit from the vector loop, the middle block
4396 /// conceptully has the early exit block as third successor, split accross 2
4397 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4398 /// tail loop or the exit bock. If the scalar tail loop or exit block are
4399 /// known to always execute, the middle block may branch directly to that
4400 /// block. This function cannot be called once the vector loop region has been
4401 /// removed.
4403 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4404 assert(
4405 LoopRegion &&
4406 "cannot call the function after vector loop region has been removed");
4407 auto *RegionSucc = cast<VPBasicBlock>(LoopRegion->getSingleSuccessor());
4408 if (RegionSucc->getSingleSuccessor() ||
4409 is_contained(RegionSucc->getSuccessors(), getScalarPreheader()))
4410 return RegionSucc;
4411 // There is an early exit. The successor of RegionSucc is the middle block.
4412 return cast<VPBasicBlock>(RegionSucc->getSuccessors()[1]);
4413 }
4414
4416 return const_cast<VPlan *>(this)->getMiddleBlock();
4417 }
4418
4419 /// Return the VPBasicBlock for the preheader of the scalar loop.
4421 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
4422 }
4423
4424 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4425 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4426
4427 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4428 /// the original scalar loop.
4429 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4430
4431 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4432 /// exit block.
4434
4435 /// Returns true if \p VPBB is an exit block.
4436 bool isExitBlock(VPBlockBase *VPBB);
4437
4438 /// The trip count of the original loop.
4440 assert(TripCount && "trip count needs to be set before accessing it");
4441 return TripCount;
4442 }
4443
4444 /// Set the trip count assuming it is currently null; if it is not - use
4445 /// resetTripCount().
4446 void setTripCount(VPValue *NewTripCount) {
4447 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4448 TripCount = NewTripCount;
4449 }
4450
4451 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4452 /// the original trip count have been replaced.
4453 void resetTripCount(VPValue *NewTripCount) {
4454 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4455 "TripCount must be set when resetting");
4456 TripCount = NewTripCount;
4457 }
4458
4459 /// The backedge taken count of the original loop.
4461 if (!BackedgeTakenCount)
4462 BackedgeTakenCount = new VPValue();
4463 return BackedgeTakenCount;
4464 }
4465 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4466
4467 /// The vector trip count.
4468 VPValue &getVectorTripCount() { return VectorTripCount; }
4469
4470 /// Returns the VF of the vector loop region.
4471 VPValue &getVF() { return VF; };
4472 const VPValue &getVF() const { return VF; };
4473
4474 /// Returns VF * UF of the vector loop region.
4475 VPValue &getVFxUF() { return VFxUF; }
4476
4479 }
4480
4481 void addVF(ElementCount VF) { VFs.insert(VF); }
4482
4484 assert(hasVF(VF) && "Cannot set VF not already in plan");
4485 VFs.clear();
4486 VFs.insert(VF);
4487 }
4488
4489 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4490 bool hasScalableVF() const {
4491 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4492 }
4493
4494 /// Returns an iterator range over all VFs of the plan.
4497 return VFs;
4498 }
4499
4500 bool hasScalarVFOnly() const {
4501 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4502 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4503 "Plan with scalar VF should only have a single VF");
4504 return HasScalarVFOnly;
4505 }
4506
4507 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4508
4509 unsigned getUF() const {
4510 assert(UFs.size() == 1 && "Expected a single UF");
4511 return UFs[0];
4512 }
4513
4514 void setUF(unsigned UF) {
4515 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4516 UFs.clear();
4517 UFs.insert(UF);
4518 }
4519
4520 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4521 /// concrete UF.
4522 bool isUnrolled() const { return UFs.size() == 1; }
4523
4524 /// Return a string with the name of the plan and the applicable VFs and UFs.
4525 std::string getName() const;
4526
4527 void setName(const Twine &newName) { Name = newName.str(); }
4528
4529 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
4530 /// yet) for \p V.
4532 assert(V && "Trying to get or add the VPValue of a null Value");
4533 auto [It, Inserted] = Value2VPValue.try_emplace(V);
4534 if (Inserted) {
4535 VPValue *VPV = new VPValue(V);
4536 VPLiveIns.push_back(VPV);
4537 assert(VPV->isLiveIn() && "VPV must be a live-in.");
4538 It->second = VPV;
4539 }
4540
4541 assert(It->second->isLiveIn() && "Only live-ins should be in mapping");
4542 return It->second;
4543 }
4544
4545 /// Return a VPValue wrapping i1 true.
4546 VPValue *getTrue() { return getConstantInt(1, 1); }
4547
4548 /// Return a VPValue wrapping i1 false.
4549 VPValue *getFalse() { return getConstantInt(1, 0); }
4550
4551 /// Return a VPValue wrapping a ConstantInt with the given type and value.
4552 VPValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4553 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
4554 }
4555
4556 /// Return a VPValue wrapping a ConstantInt with the given bitwidth and value.
4558 bool IsSigned = false) {
4559 return getConstantInt(APInt(BitWidth, Val, IsSigned));
4560 }
4561
4562 /// Return a VPValue wrapping a ConstantInt with the given APInt value.
4564 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
4565 }
4566
4567 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
4568 VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
4569
4570 /// Return the list of live-in VPValues available in the VPlan.
4572 assert(all_of(Value2VPValue,
4573 [this](const auto &P) {
4574 return is_contained(VPLiveIns, P.second);
4575 }) &&
4576 "all VPValues in Value2VPValue must also be in VPLiveIns");
4577 return VPLiveIns;
4578 }
4579
4580#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4581 /// Print the live-ins of this VPlan to \p O.
4582 void printLiveIns(raw_ostream &O) const;
4583
4584 /// Print this VPlan to \p O.
4585 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
4586
4587 /// Print this VPlan in DOT format to \p O.
4588 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
4589
4590 /// Dump the plan to stderr (for debugging).
4591 LLVM_DUMP_METHOD void dump() const;
4592#endif
4593
4594 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4595 /// recipes to refer to the clones, and return it.
4597
4598 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4599 /// present. The returned block is owned by the VPlan and deleted once the
4600 /// VPlan is destroyed.
4602 VPRecipeBase *Recipe = nullptr) {
4603 auto *VPB = new VPBasicBlock(Name, Recipe);
4604 CreatedBlocks.push_back(VPB);
4605 return VPB;
4606 }
4607
4608 /// Create a new loop region with \p Name and entry and exiting blocks set
4609 /// to \p Entry and \p Exiting respectively, if set. The returned block is
4610 /// owned by the VPlan and deleted once the VPlan is destroyed.
4611 VPRegionBlock *createLoopRegion(const std::string &Name = "",
4612 VPBlockBase *Entry = nullptr,
4613 VPBlockBase *Exiting = nullptr) {
4614 auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name)
4615 : new VPRegionBlock(Name);
4616 CreatedBlocks.push_back(VPB);
4617 return VPB;
4618 }
4619
4620 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4621 /// returned block is owned by the VPlan and deleted once the VPlan is
4622 /// destroyed.
4624 const std::string &Name = "") {
4625 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
4626 CreatedBlocks.push_back(VPB);
4627 return VPB;
4628 }
4629
4630 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4631 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4632 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4634
4635 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4636 /// instructions in \p IRBB, except its terminator which is managed by the
4637 /// successors of the block in VPlan. The returned block is owned by the VPlan
4638 /// and deleted once the VPlan is destroyed.
4640
4641 /// Returns true if the VPlan is based on a loop with an early exit. That is
4642 /// the case if the VPlan has either more than one exit block or a single exit
4643 /// block with multiple predecessors (one for the exit via the latch and one
4644 /// via the other early exit).
4645 bool hasEarlyExit() const {
4646 return count_if(ExitBlocks,
4647 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4648 1 ||
4649 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4650 }
4651
4652 /// Returns true if the scalar tail may execute after the vector loop. Note
4653 /// that this relies on unneeded branches to the scalar tail loop being
4654 /// removed.
4655 bool hasScalarTail() const {
4656 return !(!getScalarPreheader()->hasPredecessors() ||
4658 }
4659};
4660
4661#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4662inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4663 Plan.print(OS);
4664 return OS;
4665}
4666#endif
4667
4668} // end namespace llvm
4669
4670#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
dxil translate DXIL Translate Metadata
This file defines the DenseMap class.
Hexagon Common GEP
iv users
Definition IVUsers.cpp:48
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
StandardInstrumentations SI(Mod->getContext(), Debug, VerifyEach)
static StringRef getName(Value *V)
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition VPlan.h:509
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1078
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
This class represents the LLVM 'select' instruction.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:225
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3613
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:3607
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3966
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:3994
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4041
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:3996
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:3993
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4019
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:3977
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:3983
iterator end()
Definition VPlan.h:4003
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4001
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:3995
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4054
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:770
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:216
~VPBasicBlock() override
Definition VPlan.h:3987
const_reverse_iterator rbegin() const
Definition VPlan.h:4007
reverse_iterator rend()
Definition VPlan.h:4008
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:3981
VPRecipeBase & back()
Definition VPlan.h:4016
const VPRecipeBase & front() const
Definition VPlan.h:4013
const_iterator begin() const
Definition VPlan.h:4002
VPRecipeBase & front()
Definition VPlan.h:4014
const VPRecipeBase & back() const
Definition VPlan.h:4015
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4032
bool empty() const
Definition VPlan.h:4012
const_iterator end() const
Definition VPlan.h:4004
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4027
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4022
reverse_iterator rbegin()
Definition VPlan.h:4006
friend class VPlan
Definition VPlan.h:3967
size_t size() const
Definition VPlan.h:4011
const_reverse_iterator rend() const
Definition VPlan.h:4009
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2541
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2546
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2536
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2557
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2566
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2523
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2518
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:2552
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2532
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:81
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:300
VPRegionBlock * getParent()
Definition VPlan.h:173
VPBlocksTy & getPredecessors()
Definition VPlan.h:205
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:202
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:370
void setName(const Twine &newName)
Definition VPlan.h:166
size_t getNumSuccessors() const
Definition VPlan.h:219
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:201
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:223
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:322
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:645
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:160
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:258
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:335
size_t getNumPredecessors() const
Definition VPlan.h:220
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:291
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:208
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:328
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:204
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:158
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:180
const VPRegionBlock * getParent() const
Definition VPlan.h:174
const std::string & getName() const
Definition VPlan.h:164
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:310
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:248
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:282
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:215
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:242
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:307
friend class VPBlockUtils
Definition VPlan.h:82
unsigned getVPBlockID() const
Definition VPlan.h:171
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:349
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:314
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:150
VPBlocksTy & getSuccessors()
Definition VPlan.h:199
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:200
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:166
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:271
void setParent(VPRegionBlock *P)
Definition VPlan.h:184
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:264
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:209
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:198
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3037
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3021
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3045
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3018
VPlan-based builder utility analogous to IRBuilder.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3547
~VPCanonicalIVPHIRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3573
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3554
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:3580
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition VPlan.h:3549
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:3568
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3562
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition VPlan.h:3587
This class augments a recipe with a set of VPValues defined by the recipe.
Definition VPlanValue.h:310
friend class VPValue
Definition VPlanValue.h:311
VPDef(const unsigned char SC)
Definition VPlanValue.h:389
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlan.h:3755
VPValue * getStepValue() const
Definition VPlan.h:3766
Type * getScalarType() const
Definition VPlan.h:3761
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3743
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3735
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3769
VPValue * getStartValue() const
Definition VPlan.h:3765
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3727
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3663
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3644
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3650
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPEVLBasedIVPHIRecipe.
Definition VPlan.h:3656
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:3639
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3522
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:3527
VPExpandSCEVRecipe(const SCEV *Expr)
Definition VPlan.h:3513
const SCEV * getSCEV() const
Definition VPlan.h:3533
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3518
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3172
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3154
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3136
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3124
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Sub, VPReductionRecipe *Red)
Definition VPlan.h:3110
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3102
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3106
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3166
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3104
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2056
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2069
static bool classof(const VPValue *V)
Definition VPlan.h:2066
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2092
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2097
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2081
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2089
static bool classof(const VPRecipeBase *R)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2062
VPValue * getStartValue() const
Definition VPlan.h:2084
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2101
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2051
~VPHeaderPHIRecipe() override=default
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VP_CLASSOF_IMPL(VPDef::VPHistogramSC)
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1768
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:1785
unsigned getOpcode() const
Definition VPlan.h:1781
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1762
~VPHistogramRecipe() override=default
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4119
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:446
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4143
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4133
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4120
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:471
Class to record and manage LLVM IR flags.
Definition VPlan.h:609
FastMathFlagsTy FMFs
Definition VPlan.h:680
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:740
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:732
WrapFlagsTy WrapFlags
Definition VPlan.h:674
CmpInst::Predicate CmpPredicate
Definition VPlan.h:673
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:726
GEPNoWrapFlags GEPFlags
Definition VPlan.h:678
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:858
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
TruncFlagsTy TruncFlags
Definition VPlan.h:675
CmpInst::Predicate getPredicate() const
Definition VPlan.h:835
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:865
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:749
ExactFlagsTy ExactFlags
Definition VPlan.h:677
bool hasNoSignedWrap() const
Definition VPlan.h:884
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:895
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:735
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:738
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:743
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:723
bool isNonNeg() const
Definition VPlan.h:867
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:850
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:853
DisjointFlagsTy DisjointFlags
Definition VPlan.h:676
unsigned AllFlags
Definition VPlan.h:682
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:841
bool hasNoUnsignedWrap() const
Definition VPlan.h:873
FCmpFlagsTy FCmpFlags
Definition VPlan.h:681
NonNegFlagsTy NonNegFlags
Definition VPlan.h:679
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:759
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:795
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:746
VPIRFlags(Instruction &I)
Definition VPlan.h:688
Instruction & getInstruction() const
Definition VPlan.h:1447
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1455
void extractLastLaneOfLastPartOfFirstOperand(VPBuilder &Builder)
Update the recipe's first operand to the last lane of the last part of the operand using Builder.
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1434
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1461
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1449
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1422
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:982
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1018
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:990
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetadata object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print metadata with node IDs.
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1002
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1261
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1302
static bool classof(const VPUser *R)
Definition VPlan.h:1287
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1269
Type * getResultType() const
Definition VPlan.h:1308
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1291
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1036
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1174
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1127
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
Definition VPlan.h:1074
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1117
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1130
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1071
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1121
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1066
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1063
@ VScale
Returns the value for vscale.
Definition VPlan.h:1132
@ CanonicalIVIncrementForPart
Definition VPlan.h:1056
bool hasResult() const
Definition VPlan.h:1198
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1238
unsigned getOpcode() const
Definition VPlan.h:1182
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
Definition VPlan.h:1241
friend class VPlanSlp
Definition VPlan.h:1037
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:2652
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2658
static bool classof(const VPUser *U)
Definition VPlan.h:2634
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2601
Instruction * getInsertPos() const
Definition VPlan.h:2656
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2629
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:2654
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:2646
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:2675
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:2640
A recipe for interleaved memory operations with vector-predication intrinsics.
Definition VPlan.h:2728
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:2756
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2750
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2763
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2743
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:2730
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:2686
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2713
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2696
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2707
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2688
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1320
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1342
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1337
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4110
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1362
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1329
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1347
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1351
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3229
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3211
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3222
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3207
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:387
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:474
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4271
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:485
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:408
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:479
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:454
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:389
const VPBasicBlock * getParent() const
Definition VPlan.h:409
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:459
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:398
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2913
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2892
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2916
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2903
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2479
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2450
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2465
~VPReductionPHIRecipe() override=default
bool hasUsesOutsideReductionChain() const
Returns true, if the phi is part of a multi-use reduction.
Definition VPlan.h:2491
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2473
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2482
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2496
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2488
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, ReductionStyle Style, bool HasUsesOutsideReductionChain=false)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2441
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2476
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:2779
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, ReductionStyle Style, DebugLoc DL)
Definition VPlan.h:2788
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:2855
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2824
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2839
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:2866
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:2868
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:2851
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2802
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:2853
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2809
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2857
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:2864
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:2859
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2818
static bool classof(const VPUser *U)
Definition VPlan.h:2829
static bool classof(const VPValue *VPV)
Definition VPlan.h:2834
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2873
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4154
const VPBlockBase * getEntry() const
Definition VPlan.h:4190
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4265
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4222
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4207
VPBlockBase * getExiting()
Definition VPlan.h:4203
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4252
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4195
const Type * getCanonicalIVType() const
Definition VPlan.h:4266
const VPBlockBase * getExiting() const
Definition VPlan.h:4202
VPBlockBase * getEntry()
Definition VPlan.h:4191
const VPCanonicalIVPHIRecipe * getCanonicalIV() const
Definition VPlan.h:4260
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4215
friend class VPlan
Definition VPlan.h:4155
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4186
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:2935
bool isSingleScalar() const
Definition VPlan.h:2976
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2943
~VPReplicateRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:2988
bool isPredicated() const
Definition VPlan.h:2978
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2957
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2981
unsigned getOpcode() const
Definition VPlan.h:3005
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:3000
VPValue * getStepValue() const
Definition VPlan.h:3832
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:3826
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3797
bool isPart0() const
Return true if this VPScalarIVStepsRecipe corresponds to part 0.
Definition VPlan.h:3818
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3809
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:3790
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3835
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:531
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:537
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:595
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:541
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:598
static bool classof(const VPUser *U)
Definition VPlan.h:587
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:533
This class can be used to assign names to VPValues.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition VPlan.h:970
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:207
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1420
operand_range operands()
Definition VPlanValue.h:275
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:251
unsigned getNumOperands() const
Definition VPlanValue.h:245
operand_iterator op_end()
Definition VPlanValue.h:273
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:246
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:226
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:269
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:268
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:48
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:131
friend class VPExpressionRecipe
Definition VPlanValue.h:53
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition VPlanValue.h:183
friend class VPDef
Definition VPlanValue.h:49
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:85
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
Definition VPlan.cpp:94
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:193
unsigned getNumUsers() const
Definition VPlanValue.h:113
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition VPlanValue.h:178
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1934
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1955
const VPValue * getVFValue() const
Definition VPlan.h:1930
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:1948
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:1941
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1919
bool isFirstPart() const
Return true if this VPVectorPointerRecipe corresponds to part 0.
Definition VPlan.h:2008
Type * getSourceElementType() const
Definition VPlan.h:1985
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1987
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:1994
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1975
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:2011
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2001
A recipe for widening Call instructions using library calls.
Definition VPlan.h:1702
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1709
const_operand_range args() const
Definition VPlan.h:1742
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1723
operand_range args()
Definition VPlan.h:1741
Function * getCalledScalarFunction() const
Definition VPlan.h:1737
~VPWidenCallRecipe() override=default
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
~VPWidenCanonicalIVRecipe() override=default
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:3699
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3686
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition VPlan.h:3681
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1552
Instruction::CastOps getOpcode() const
Definition VPlan.h:1588
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition VPlan.h:1591
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1560
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1573
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:1882
Type * getSourceElementType() const
Definition VPlan.h:1887
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:1890
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1874
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1860
void execute(VPTransformState &State) override=0
Generate the phi nodes.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2178
static bool classof(const VPValue *V)
Definition VPlan.h:2132
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2148
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2163
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2156
PHINode * getPHINode() const
Definition VPlan.h:2158
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2120
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2144
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2161
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2170
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2127
const VPValue * getVFValue() const
Definition VPlan.h:2151
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2137
const VPValue * getStepValue() const
Definition VPlan.h:2145
const TruncInst * getTruncInst() const
Definition VPlan.h:2252
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2233
~VPWidenIntOrFpInductionRecipe() override=default
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2208
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2225
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2251
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2199
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2268
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2247
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:2260
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1602
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1633
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1673
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:1682
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1619
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:1688
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1654
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:1685
~VPWidenIntrinsicRecipe() override=default
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition VPlan.h:1676
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3260
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition VPlan.h:3257
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3300
static bool classof(const VPUser *U)
Definition VPlan.h:3294
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition VPlan.h:3323
Instruction & Ingredient
Definition VPlan.h:3248
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3283
Instruction & getIngredient() const
Definition VPlan.h:3331
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3254
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3287
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3314
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3251
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3310
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3270
void setMask(VPValue *Mask)
Definition VPlan.h:3262
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3320
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3307
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition VPlan.h:3304
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2362
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe for Phi with start value Start and debug location DL.
Definition VPlan.h:2333
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2340
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2295
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2304
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2285
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1512
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1526
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1516
~VPWidenRecipe() override=default
unsigned getOpcode() const
Definition VPlan.h:1541
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition VPlanSLP.h:74
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4284
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1106
friend class VPSlotTracker
Definition VPlan.h:4286
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1082
bool hasVF(ElementCount VF) const
Definition VPlan.h:4489
LLVMContext & getContext() const
Definition VPlan.h:4477
VPBasicBlock * getEntry()
Definition VPlan.h:4377
VPValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4468
void setName(const Twine &newName)
Definition VPlan.h:4527
bool hasScalableVF() const
Definition VPlan.h:4490
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4475
VPValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4471
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4439
VPValue * getTrue()
Return a VPValue wrapping i1 true.
Definition VPlan.h:4546
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4460
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4496
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:890
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:868
const VPValue & getVF() const
Definition VPlan.h:4472
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:898
const VPBasicBlock * getEntry() const
Definition VPlan.h:4378
friend class VPlanPrinter
Definition VPlan.h:4285
VPValue * getConstantInt(const APInt &Val)
Return a VPValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:4563
unsigned getUF() const
Definition VPlan.h:4509
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:4623
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1220
bool hasUF(unsigned UF) const
Definition VPlan.h:4507
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4429
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4552
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4465
void setVF(ElementCount VF)
Definition VPlan.h:4483
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:4522
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1011
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4645
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:993
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4415
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4446
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4453
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4402
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4366
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4601
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1226
VPValue * getFalse()
Return a VPValue wrapping i1 false.
Definition VPlan.h:4549
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4531
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4611
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1112
bool hasScalarVFOnly() const
Definition VPlan.h:4500
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4420
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:905
ArrayRef< VPValue * > getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4571
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1065
void addVF(ElementCount VF)
Definition VPlan.h:4481
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4425
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4568
VPValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:4557
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1027
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4382
void setUF(unsigned UF)
Definition VPlan.h:4514
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4655
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1153
VPlan(BasicBlock *ScalarHeaderBB)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition VPlan.h:4359
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
Increasing range of size_t indices.
Definition STLExtras.h:2437
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:189
static auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy
Definition VPlan.h:3898
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1763
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:839
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2413
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2484
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:301
auto map_range(ContainerTy &&C, FuncTy F)
Definition STLExtras.h:364
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1966
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1973
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1909
DenseMap< Value *, VPValue * > Value2VPValueTy
Definition VPlanValue.h:199
std::variant< RdxOrdered, RdxInLoop, RdxUnordered > ReductionStyle
Definition VPlan.h:2411
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the recipe types implementing V...
Definition VPlan.h:3931
static RetTy doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3945
static RetTy doCastIfPossible(SrcTy R)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:3950
static bool isPossible(SrcTy R)
Definition VPlan.h:3932
Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the recipe types implementing...
Definition VPlan.h:3860
static VPPhiAccessors * doCastIfPossible(SrcTy f)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:3881
CastInfo< VPPhiAccessors, SrcTy > Self
Definition VPlan.h:3862
static VPPhiAccessors * doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3865
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
static bool isPossible(const VPRecipeBase *f)
Definition VPlan.h:3852
This struct provides a way to check if a given cast is possible.
Definition Casting.h:253
static bool isPossible(const SrcTy &f)
Definition Casting.h:254
This reduction is in-loop.
Definition VPlan.h:2405
Possible variants of a reduction.
Definition VPlan.h:2403
This reduction is unordered with the partial result scaled down by some factor.
Definition VPlan.h:2408
unsigned VFScaleFactor
Definition VPlan.h:2409
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2374
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
Definition VPlan.h:2369
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2386
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:640
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:645
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:635
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:628
PHINode & getIRPhi()
Definition VPlan.h:1493
VPIRPhi(PHINode &PN)
Definition VPlan.h:1486
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1488
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1504
static bool classof(const VPUser *U)
Definition VPlan.h:1380
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1395
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1410
VPPhi(ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1377
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1390
static bool classof(const VPValue *V)
Definition VPlan.h:1385
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:923
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:929
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:924
static bool classof(const VPValue *V)
Definition VPlan.h:949
static bool classof(const VPSingleDefRecipe *U)
Definition VPlan.h:956
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:944
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3378
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide load or gather.
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3391
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3379
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3401
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3337
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3359
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3338
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3347
A recipe for widening select instructions.
Definition VPlan.h:1801
VPWidenSelectRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1812
VPWidenSelectRecipe(SelectInst *SI, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL={})
Definition VPlan.h:1802
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1833
VPValue * getCond() const
Definition VPlan.h:1828
unsigned getOpcode() const
Definition VPlan.h:1826
~VPWidenSelectRecipe() override=default
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3462
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3474
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide store or scatter.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3487
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3463
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3477
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3419
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3437
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3428
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3443
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3420