LLVM 22.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/ADT/ilist.h"
33#include "llvm/ADT/ilist_node.h"
36#include "llvm/IR/DebugLoc.h"
37#include "llvm/IR/FMF.h"
38#include "llvm/IR/Operator.h"
41#include <cassert>
42#include <cstddef>
43#include <functional>
44#include <string>
45#include <utility>
46
47namespace llvm {
48
49class BasicBlock;
50class DominatorTree;
52class IRBuilderBase;
53struct VPTransformState;
54class raw_ostream;
56class SCEV;
57class Type;
58class VPBasicBlock;
59class VPBuilder;
60class VPDominatorTree;
61class VPRegionBlock;
62class VPlan;
63class VPLane;
65class VPlanSlp;
66class Value;
68class LoopVersioning;
69
70struct VPCostContext;
71
72namespace Intrinsic {
73typedef unsigned ID;
74}
75
76using VPlanPtr = std::unique_ptr<VPlan>;
77
78/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
79/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
81 friend class VPBlockUtils;
82
83 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
84
85 /// An optional name for the block.
86 std::string Name;
87
88 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
89 /// it is a topmost VPBlockBase.
90 VPRegionBlock *Parent = nullptr;
91
92 /// List of predecessor blocks.
94
95 /// List of successor blocks.
97
98 /// VPlan containing the block. Can only be set on the entry block of the
99 /// plan.
100 VPlan *Plan = nullptr;
101
102 /// Add \p Successor as the last successor to this block.
103 void appendSuccessor(VPBlockBase *Successor) {
104 assert(Successor && "Cannot add nullptr successor!");
105 Successors.push_back(Successor);
106 }
107
108 /// Add \p Predecessor as the last predecessor to this block.
109 void appendPredecessor(VPBlockBase *Predecessor) {
110 assert(Predecessor && "Cannot add nullptr predecessor!");
111 Predecessors.push_back(Predecessor);
112 }
113
114 /// Remove \p Predecessor from the predecessors of this block.
115 void removePredecessor(VPBlockBase *Predecessor) {
116 auto Pos = find(Predecessors, Predecessor);
117 assert(Pos && "Predecessor does not exist");
118 Predecessors.erase(Pos);
119 }
120
121 /// Remove \p Successor from the successors of this block.
122 void removeSuccessor(VPBlockBase *Successor) {
123 auto Pos = find(Successors, Successor);
124 assert(Pos && "Successor does not exist");
125 Successors.erase(Pos);
126 }
127
128 /// This function replaces one predecessor with another, useful when
129 /// trying to replace an old block in the CFG with a new one.
130 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
131 auto I = find(Predecessors, Old);
132 assert(I != Predecessors.end());
133 assert(Old->getParent() == New->getParent() &&
134 "replaced predecessor must have the same parent");
135 *I = New;
136 }
137
138 /// This function replaces one successor with another, useful when
139 /// trying to replace an old block in the CFG with a new one.
140 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
141 auto I = find(Successors, Old);
142 assert(I != Successors.end());
143 assert(Old->getParent() == New->getParent() &&
144 "replaced successor must have the same parent");
145 *I = New;
146 }
147
148protected:
149 VPBlockBase(const unsigned char SC, const std::string &N)
150 : SubclassID(SC), Name(N) {}
151
152public:
153 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
154 /// that are actually instantiated. Values of this enumeration are kept in the
155 /// SubclassID field of the VPBlockBase objects. They are used for concrete
156 /// type identification.
157 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
158
160
161 virtual ~VPBlockBase() = default;
162
163 const std::string &getName() const { return Name; }
164
165 void setName(const Twine &newName) { Name = newName.str(); }
166
167 /// \return an ID for the concrete type of this object.
168 /// This is used to implement the classof checks. This should not be used
169 /// for any other purpose, as the values may change as LLVM evolves.
170 unsigned getVPBlockID() const { return SubclassID; }
171
172 VPRegionBlock *getParent() { return Parent; }
173 const VPRegionBlock *getParent() const { return Parent; }
174
175 /// \return A pointer to the plan containing the current block.
176 VPlan *getPlan();
177 const VPlan *getPlan() const;
178
179 /// Sets the pointer of the plan containing the block. The block must be the
180 /// entry block into the VPlan.
181 void setPlan(VPlan *ParentPlan);
182
183 void setParent(VPRegionBlock *P) { Parent = P; }
184
185 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
186 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
187 /// VPBlockBase is a VPBasicBlock, it is returned.
188 const VPBasicBlock *getEntryBasicBlock() const;
189 VPBasicBlock *getEntryBasicBlock();
190
191 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
192 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
193 /// VPBlockBase is a VPBasicBlock, it is returned.
194 const VPBasicBlock *getExitingBasicBlock() const;
195 VPBasicBlock *getExitingBasicBlock();
196
197 const VPBlocksTy &getSuccessors() const { return Successors; }
198 VPBlocksTy &getSuccessors() { return Successors; }
199
202
203 const VPBlocksTy &getPredecessors() const { return Predecessors; }
204 VPBlocksTy &getPredecessors() { return Predecessors; }
205
206 /// \return the successor of this VPBlockBase if it has a single successor.
207 /// Otherwise return a null pointer.
209 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
210 }
211
212 /// \return the predecessor of this VPBlockBase if it has a single
213 /// predecessor. Otherwise return a null pointer.
215 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
216 }
217
218 size_t getNumSuccessors() const { return Successors.size(); }
219 size_t getNumPredecessors() const { return Predecessors.size(); }
220
221 /// Returns true if this block has any predecessors.
222 bool hasPredecessors() const { return !Predecessors.empty(); }
223
224 /// An Enclosing Block of a block B is any block containing B, including B
225 /// itself. \return the closest enclosing block starting from "this", which
226 /// has successors. \return the root enclosing block if all enclosing blocks
227 /// have no successors.
228 VPBlockBase *getEnclosingBlockWithSuccessors();
229
230 /// \return the closest enclosing block starting from "this", which has
231 /// predecessors. \return the root enclosing block if all enclosing blocks
232 /// have no predecessors.
233 VPBlockBase *getEnclosingBlockWithPredecessors();
234
235 /// \return the successors either attached directly to this VPBlockBase or, if
236 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
237 /// successors of its own, search recursively for the first enclosing
238 /// VPRegionBlock that has successors and return them. If no such
239 /// VPRegionBlock exists, return the (empty) successors of the topmost
240 /// VPBlockBase reached.
242 return getEnclosingBlockWithSuccessors()->getSuccessors();
243 }
244
245 /// \return the hierarchical successor of this VPBlockBase if it has a single
246 /// hierarchical successor. Otherwise return a null pointer.
248 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
249 }
250
251 /// \return the predecessors either attached directly to this VPBlockBase or,
252 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
253 /// predecessors of its own, search recursively for the first enclosing
254 /// VPRegionBlock that has predecessors and return them. If no such
255 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
256 /// VPBlockBase reached.
258 return getEnclosingBlockWithPredecessors()->getPredecessors();
259 }
260
261 /// \return the hierarchical predecessor of this VPBlockBase if it has a
262 /// single hierarchical predecessor. Otherwise return a null pointer.
266
267 /// Set a given VPBlockBase \p Successor as the single successor of this
268 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
269 /// This VPBlockBase must have no successors.
271 assert(Successors.empty() && "Setting one successor when others exist.");
272 assert(Successor->getParent() == getParent() &&
273 "connected blocks must have the same parent");
274 appendSuccessor(Successor);
275 }
276
277 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
278 /// successors of this VPBlockBase. This VPBlockBase is not added as
279 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
280 /// successors.
281 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
282 assert(Successors.empty() && "Setting two successors when others exist.");
283 appendSuccessor(IfTrue);
284 appendSuccessor(IfFalse);
285 }
286
287 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
288 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
289 /// as successor of any VPBasicBlock in \p NewPreds.
291 assert(Predecessors.empty() && "Block predecessors already set.");
292 for (auto *Pred : NewPreds)
293 appendPredecessor(Pred);
294 }
295
296 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
297 /// This VPBlockBase must have no successors. This VPBlockBase is not added
298 /// as predecessor of any VPBasicBlock in \p NewSuccs.
300 assert(Successors.empty() && "Block successors already set.");
301 for (auto *Succ : NewSuccs)
302 appendSuccessor(Succ);
303 }
304
305 /// Remove all the predecessor of this block.
306 void clearPredecessors() { Predecessors.clear(); }
307
308 /// Remove all the successors of this block.
309 void clearSuccessors() { Successors.clear(); }
310
311 /// Swap predecessors of the block. The block must have exactly 2
312 /// predecessors.
314 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
315 std::swap(Predecessors[0], Predecessors[1]);
316 }
317
318 /// Swap successors of the block. The block must have exactly 2 successors.
319 // TODO: This should be part of introducing conditional branch recipes rather
320 // than being independent.
322 assert(Successors.size() == 2 && "must have 2 successors to swap");
323 std::swap(Successors[0], Successors[1]);
324 }
325
326 /// Returns the index for \p Pred in the blocks predecessors list.
327 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
328 assert(count(Predecessors, Pred) == 1 &&
329 "must have Pred exactly once in Predecessors");
330 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
331 }
332
333 /// Returns the index for \p Succ in the blocks successor list.
334 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
335 assert(count(Successors, Succ) == 1 &&
336 "must have Succ exactly once in Successors");
337 return std::distance(Successors.begin(), find(Successors, Succ));
338 }
339
340 /// The method which generates the output IR that correspond to this
341 /// VPBlockBase, thereby "executing" the VPlan.
342 virtual void execute(VPTransformState *State) = 0;
343
344 /// Return the cost of the block.
346
347#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
348 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
349 OS << getName();
350 }
351
352 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
353 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
354 /// consequtive numbers.
355 ///
356 /// Note that the numbering is applied to the whole VPlan, so printing
357 /// individual blocks is consistent with the whole VPlan printing.
358 virtual void print(raw_ostream &O, const Twine &Indent,
359 VPSlotTracker &SlotTracker) const = 0;
360
361 /// Print plain-text dump of this VPlan to \p O.
362 void print(raw_ostream &O) const;
363
364 /// Print the successors of this block to \p O, prefixing all lines with \p
365 /// Indent.
366 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
367
368 /// Dump this VPBlockBase to dbgs().
369 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
370#endif
371
372 /// Clone the current block and it's recipes without updating the operands of
373 /// the cloned recipes, including all blocks in the single-entry single-exit
374 /// region for VPRegionBlocks.
375 virtual VPBlockBase *clone() = 0;
376};
377
378/// VPRecipeBase is a base class modeling a sequence of one or more output IR
379/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
380/// and is responsible for deleting its defined values. Single-value
381/// recipes must inherit from VPSingleDef instead of inheriting from both
382/// VPRecipeBase and VPValue separately.
384 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
385 public VPDef,
386 public VPUser {
387 friend VPBasicBlock;
388 friend class VPBlockUtils;
389
390 /// Each VPRecipe belongs to a single VPBasicBlock.
391 VPBasicBlock *Parent = nullptr;
392
393 /// The debug location for the recipe.
394 DebugLoc DL;
395
396public:
397 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
399 : VPDef(SC), VPUser(Operands), DL(DL) {}
400
401 ~VPRecipeBase() override = default;
402
403 /// Clone the current recipe.
404 virtual VPRecipeBase *clone() = 0;
405
406 /// \return the VPBasicBlock which this VPRecipe belongs to.
407 VPBasicBlock *getParent() { return Parent; }
408 const VPBasicBlock *getParent() const { return Parent; }
409
410 /// \return the VPRegionBlock which the recipe belongs to.
411 VPRegionBlock *getRegion();
412 const VPRegionBlock *getRegion() const;
413
414 /// The method which generates the output IR instructions that correspond to
415 /// this VPRecipe, thereby "executing" the VPlan.
416 virtual void execute(VPTransformState &State) = 0;
417
418 /// Return the cost of this recipe, taking into account if the cost
419 /// computation should be skipped and the ForceTargetInstructionCost flag.
420 /// Also takes care of printing the cost for debugging.
422
423 /// Insert an unlinked recipe into a basic block immediately before
424 /// the specified recipe.
425 void insertBefore(VPRecipeBase *InsertPos);
426 /// Insert an unlinked recipe into \p BB immediately before the insertion
427 /// point \p IP;
428 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
429
430 /// Insert an unlinked Recipe into a basic block immediately after
431 /// the specified Recipe.
432 void insertAfter(VPRecipeBase *InsertPos);
433
434 /// Unlink this recipe from its current VPBasicBlock and insert it into
435 /// the VPBasicBlock that MovePos lives in, right after MovePos.
436 void moveAfter(VPRecipeBase *MovePos);
437
438 /// Unlink this recipe and insert into BB before I.
439 ///
440 /// \pre I is a valid iterator into BB.
441 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
442
443 /// This method unlinks 'this' from the containing basic block, but does not
444 /// delete it.
445 void removeFromParent();
446
447 /// This method unlinks 'this' from the containing basic block and deletes it.
448 ///
449 /// \returns an iterator pointing to the element after the erased one
451
452 /// Method to support type inquiry through isa, cast, and dyn_cast.
453 static inline bool classof(const VPDef *D) {
454 // All VPDefs are also VPRecipeBases.
455 return true;
456 }
457
458 static inline bool classof(const VPUser *U) { return true; }
459
460 /// Returns true if the recipe may have side-effects.
461 bool mayHaveSideEffects() const;
462
463 /// Returns true for PHI-like recipes.
464 bool isPhi() const;
465
466 /// Returns true if the recipe may read from memory.
467 bool mayReadFromMemory() const;
468
469 /// Returns true if the recipe may write to memory.
470 bool mayWriteToMemory() const;
471
472 /// Returns true if the recipe may read from or write to memory.
473 bool mayReadOrWriteMemory() const {
475 }
476
477 /// Returns the debug location of the recipe.
478 DebugLoc getDebugLoc() const { return DL; }
479
480 /// Return true if the recipe is a scalar cast.
481 bool isScalarCast() const;
482
483 /// Set the recipe's debug location to \p NewDL.
484 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
485
486protected:
487 /// Compute the cost of this recipe either using a recipe's specialized
488 /// implementation or using the legacy cost model and the underlying
489 /// instructions.
490 virtual InstructionCost computeCost(ElementCount VF,
491 VPCostContext &Ctx) const;
492};
493
494// Helper macro to define common classof implementations for recipes.
495#define VP_CLASSOF_IMPL(VPDefID) \
496 static inline bool classof(const VPDef *D) { \
497 return D->getVPDefID() == VPDefID; \
498 } \
499 static inline bool classof(const VPValue *V) { \
500 auto *R = V->getDefiningRecipe(); \
501 return R && R->getVPDefID() == VPDefID; \
502 } \
503 static inline bool classof(const VPUser *U) { \
504 auto *R = dyn_cast<VPRecipeBase>(U); \
505 return R && R->getVPDefID() == VPDefID; \
506 } \
507 static inline bool classof(const VPRecipeBase *R) { \
508 return R->getVPDefID() == VPDefID; \
509 } \
510 static inline bool classof(const VPSingleDefRecipe *R) { \
511 return R->getVPDefID() == VPDefID; \
512 }
513
514/// VPSingleDef is a base class for recipes for modeling a sequence of one or
515/// more output IR that define a single result VPValue.
516/// Note that VPRecipeBase must be inherited from before VPValue.
517class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
518public:
519 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
521 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
522
523 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
525 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
526
527 static inline bool classof(const VPRecipeBase *R) {
528 switch (R->getVPDefID()) {
529 case VPRecipeBase::VPDerivedIVSC:
530 case VPRecipeBase::VPEVLBasedIVPHISC:
531 case VPRecipeBase::VPExpandSCEVSC:
532 case VPRecipeBase::VPExpressionSC:
533 case VPRecipeBase::VPInstructionSC:
534 case VPRecipeBase::VPReductionEVLSC:
535 case VPRecipeBase::VPReductionSC:
536 case VPRecipeBase::VPReplicateSC:
537 case VPRecipeBase::VPScalarIVStepsSC:
538 case VPRecipeBase::VPVectorPointerSC:
539 case VPRecipeBase::VPVectorEndPointerSC:
540 case VPRecipeBase::VPWidenCallSC:
541 case VPRecipeBase::VPWidenCanonicalIVSC:
542 case VPRecipeBase::VPWidenCastSC:
543 case VPRecipeBase::VPWidenGEPSC:
544 case VPRecipeBase::VPWidenIntrinsicSC:
545 case VPRecipeBase::VPWidenSC:
546 case VPRecipeBase::VPWidenSelectSC:
547 case VPRecipeBase::VPBlendSC:
548 case VPRecipeBase::VPPredInstPHISC:
549 case VPRecipeBase::VPCanonicalIVPHISC:
550 case VPRecipeBase::VPActiveLaneMaskPHISC:
551 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
552 case VPRecipeBase::VPWidenPHISC:
553 case VPRecipeBase::VPWidenIntOrFpInductionSC:
554 case VPRecipeBase::VPWidenPointerInductionSC:
555 case VPRecipeBase::VPReductionPHISC:
556 case VPRecipeBase::VPPartialReductionSC:
557 return true;
558 case VPRecipeBase::VPBranchOnMaskSC:
559 case VPRecipeBase::VPInterleaveEVLSC:
560 case VPRecipeBase::VPInterleaveSC:
561 case VPRecipeBase::VPIRInstructionSC:
562 case VPRecipeBase::VPWidenLoadEVLSC:
563 case VPRecipeBase::VPWidenLoadSC:
564 case VPRecipeBase::VPWidenStoreEVLSC:
565 case VPRecipeBase::VPWidenStoreSC:
566 case VPRecipeBase::VPHistogramSC:
567 // TODO: Widened stores don't define a value, but widened loads do. Split
568 // the recipes to be able to make widened loads VPSingleDefRecipes.
569 return false;
570 }
571 llvm_unreachable("Unhandled VPDefID");
572 }
573
574 static inline bool classof(const VPUser *U) {
575 auto *R = dyn_cast<VPRecipeBase>(U);
576 return R && classof(R);
577 }
578
579 VPSingleDefRecipe *clone() override = 0;
580
581 /// Returns the underlying instruction.
588
589#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
590 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
591 LLVM_DUMP_METHOD void dump() const;
592#endif
593};
594
595/// Class to record and manage LLVM IR flags.
597 enum class OperationType : unsigned char {
598 Cmp,
599 OverflowingBinOp,
600 Trunc,
601 DisjointOp,
602 PossiblyExactOp,
603 GEPOp,
604 FPMathOp,
605 NonNegOp,
606 Other
607 };
608
609public:
610 struct WrapFlagsTy {
611 char HasNUW : 1;
612 char HasNSW : 1;
613
615 };
616
618 char HasNUW : 1;
619 char HasNSW : 1;
620
622 };
623
628
630 char NonNeg : 1;
631 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
632 };
633
634private:
635 struct ExactFlagsTy {
636 char IsExact : 1;
637 };
638 struct FastMathFlagsTy {
639 char AllowReassoc : 1;
640 char NoNaNs : 1;
641 char NoInfs : 1;
642 char NoSignedZeros : 1;
643 char AllowReciprocal : 1;
644 char AllowContract : 1;
645 char ApproxFunc : 1;
646
647 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
648 };
649
650 OperationType OpType;
651
652 union {
657 ExactFlagsTy ExactFlags;
660 FastMathFlagsTy FMFs;
661 unsigned AllFlags;
662 };
663
664public:
665 VPIRFlags() : OpType(OperationType::Other), AllFlags(0) {}
666
668 if (auto *Op = dyn_cast<CmpInst>(&I)) {
669 OpType = OperationType::Cmp;
670 CmpPredicate = Op->getPredicate();
671 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
672 OpType = OperationType::DisjointOp;
673 DisjointFlags.IsDisjoint = Op->isDisjoint();
674 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
675 OpType = OperationType::OverflowingBinOp;
676 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
677 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
678 OpType = OperationType::Trunc;
679 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
680 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
681 OpType = OperationType::PossiblyExactOp;
682 ExactFlags.IsExact = Op->isExact();
683 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
684 OpType = OperationType::GEPOp;
685 GEPFlags = GEP->getNoWrapFlags();
686 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
687 OpType = OperationType::NonNegOp;
688 NonNegFlags.NonNeg = PNNI->hasNonNeg();
689 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
690 OpType = OperationType::FPMathOp;
691 FMFs = Op->getFastMathFlags();
692 } else {
693 OpType = OperationType::Other;
694 AllFlags = 0;
695 }
696 }
697
699 : OpType(OperationType::Cmp), CmpPredicate(Pred) {}
700
702 : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
703
705 : OpType(OperationType::Trunc), TruncFlags(TruncFlags) {}
706
707 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {}
708
710 : OpType(OperationType::DisjointOp), DisjointFlags(DisjointFlags) {}
711
713 : OpType(OperationType::NonNegOp), NonNegFlags(NonNegFlags) {}
714
716 : OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {}
717
719 OpType = Other.OpType;
720 AllFlags = Other.AllFlags;
721 }
722
723 /// Only keep flags also present in \p Other. \p Other must have the same
724 /// OpType as the current object.
725 void intersectFlags(const VPIRFlags &Other);
726
727 /// Drop all poison-generating flags.
729 // NOTE: This needs to be kept in-sync with
730 // Instruction::dropPoisonGeneratingFlags.
731 switch (OpType) {
732 case OperationType::OverflowingBinOp:
733 WrapFlags.HasNUW = false;
734 WrapFlags.HasNSW = false;
735 break;
736 case OperationType::Trunc:
737 TruncFlags.HasNUW = false;
738 TruncFlags.HasNSW = false;
739 break;
740 case OperationType::DisjointOp:
741 DisjointFlags.IsDisjoint = false;
742 break;
743 case OperationType::PossiblyExactOp:
744 ExactFlags.IsExact = false;
745 break;
746 case OperationType::GEPOp:
748 break;
749 case OperationType::FPMathOp:
750 FMFs.NoNaNs = false;
751 FMFs.NoInfs = false;
752 break;
753 case OperationType::NonNegOp:
754 NonNegFlags.NonNeg = false;
755 break;
756 case OperationType::Cmp:
757 case OperationType::Other:
758 break;
759 }
760 }
761
762 /// Apply the IR flags to \p I.
763 void applyFlags(Instruction &I) const {
764 switch (OpType) {
765 case OperationType::OverflowingBinOp:
766 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
767 I.setHasNoSignedWrap(WrapFlags.HasNSW);
768 break;
769 case OperationType::Trunc:
770 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
771 I.setHasNoSignedWrap(TruncFlags.HasNSW);
772 break;
773 case OperationType::DisjointOp:
774 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
775 break;
776 case OperationType::PossiblyExactOp:
777 I.setIsExact(ExactFlags.IsExact);
778 break;
779 case OperationType::GEPOp:
780 cast<GetElementPtrInst>(&I)->setNoWrapFlags(GEPFlags);
781 break;
782 case OperationType::FPMathOp:
783 I.setHasAllowReassoc(FMFs.AllowReassoc);
784 I.setHasNoNaNs(FMFs.NoNaNs);
785 I.setHasNoInfs(FMFs.NoInfs);
786 I.setHasNoSignedZeros(FMFs.NoSignedZeros);
787 I.setHasAllowReciprocal(FMFs.AllowReciprocal);
788 I.setHasAllowContract(FMFs.AllowContract);
789 I.setHasApproxFunc(FMFs.ApproxFunc);
790 break;
791 case OperationType::NonNegOp:
792 I.setNonNeg(NonNegFlags.NonNeg);
793 break;
794 case OperationType::Cmp:
795 case OperationType::Other:
796 break;
797 }
798 }
799
801 assert(OpType == OperationType::Cmp &&
802 "recipe doesn't have a compare predicate");
803 return CmpPredicate;
804 }
805
807 assert(OpType == OperationType::Cmp &&
808 "recipe doesn't have a compare predicate");
809 CmpPredicate = Pred;
810 }
811
813
814 /// Returns true if the recipe has a comparison predicate.
815 bool hasPredicate() const { return OpType == OperationType::Cmp; }
816
817 /// Returns true if the recipe has fast-math flags.
818 bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
819
821
822 /// Returns true if the recipe has non-negative flag.
823 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
824
825 bool isNonNeg() const {
826 assert(OpType == OperationType::NonNegOp &&
827 "recipe doesn't have a NNEG flag");
828 return NonNegFlags.NonNeg;
829 }
830
831 bool hasNoUnsignedWrap() const {
832 switch (OpType) {
833 case OperationType::OverflowingBinOp:
834 return WrapFlags.HasNUW;
835 case OperationType::Trunc:
836 return TruncFlags.HasNUW;
837 default:
838 llvm_unreachable("recipe doesn't have a NUW flag");
839 }
840 }
841
842 bool hasNoSignedWrap() const {
843 switch (OpType) {
844 case OperationType::OverflowingBinOp:
845 return WrapFlags.HasNSW;
846 case OperationType::Trunc:
847 return TruncFlags.HasNSW;
848 default:
849 llvm_unreachable("recipe doesn't have a NSW flag");
850 }
851 }
852
853 bool isDisjoint() const {
854 assert(OpType == OperationType::DisjointOp &&
855 "recipe cannot have a disjoing flag");
856 return DisjointFlags.IsDisjoint;
857 }
858
859#if !defined(NDEBUG)
860 /// Returns true if the set flags are valid for \p Opcode.
861 bool flagsValidForOpcode(unsigned Opcode) const;
862#endif
863
864#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
865 void printFlags(raw_ostream &O) const;
866#endif
867};
868
869/// A pure-virtual common base class for recipes defining a single VPValue and
870/// using IR flags.
872 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
874 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags() {}
875
876 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
877 Instruction &I)
878 : VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()), VPIRFlags(I) {}
879
880 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
881 const VPIRFlags &Flags,
883 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
884
885 static inline bool classof(const VPRecipeBase *R) {
886 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
887 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
888 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
889 R->getVPDefID() == VPRecipeBase::VPWidenCallSC ||
890 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
891 R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC ||
892 R->getVPDefID() == VPRecipeBase::VPWidenSelectSC ||
893 R->getVPDefID() == VPRecipeBase::VPReductionSC ||
894 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
895 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
896 R->getVPDefID() == VPRecipeBase::VPVectorEndPointerSC ||
897 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
898 }
899
900 static inline bool classof(const VPUser *U) {
901 auto *R = dyn_cast<VPRecipeBase>(U);
902 return R && classof(R);
903 }
904
905 static inline bool classof(const VPValue *V) {
906 auto *R = dyn_cast_or_null<VPRecipeBase>(V->getDefiningRecipe());
907 return R && classof(R);
908 }
909
910 VPRecipeWithIRFlags *clone() override = 0;
911
912 static inline bool classof(const VPSingleDefRecipe *U) {
913 auto *R = dyn_cast<VPRecipeBase>(U);
914 return R && classof(R);
915 }
916
917 void execute(VPTransformState &State) override = 0;
918
919 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
921 VPCostContext &Ctx) const;
922};
923
924/// Helper to access the operand that contains the unroll part for this recipe
925/// after unrolling.
926template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
927protected:
928 /// Return the VPValue operand containing the unroll part or null if there is
929 /// no such operand.
930 VPValue *getUnrollPartOperand(const VPUser &U) const;
931
932 /// Return the unroll part.
933 unsigned getUnrollPart(const VPUser &U) const;
934};
935
936/// Helper to manage IR metadata for recipes. It filters out metadata that
937/// cannot be propagated.
940
941public:
942 VPIRMetadata() = default;
943
944 /// Adds metatadata that can be preserved from the original instruction
945 /// \p I.
947
948 /// Adds metatadata that can be preserved from the original instruction
949 /// \p I and noalias metadata guaranteed by runtime checks using \p LVer.
951
952 /// Copy constructor for cloning.
953 VPIRMetadata(const VPIRMetadata &Other) = default;
954
956
957 /// Add all metadata to \p I.
958 void applyMetadata(Instruction &I) const;
959
960 /// Add metadata with kind \p Kind and \p Node.
961 void addMetadata(unsigned Kind, MDNode *Node) {
962 Metadata.emplace_back(Kind, Node);
963 }
964
965 /// Intersect this VPIRMetada object with \p MD, keeping only metadata
966 /// nodes that are common to both.
967 void intersect(const VPIRMetadata &MD);
968};
969
970/// This is a concrete Recipe that models a single VPlan-level instruction.
971/// While as any Recipe it may generate a sequence of IR instructions when
972/// executed, these instructions would always form a single-def expression as
973/// the VPInstruction is also a single def-use vertex.
975 public VPIRMetadata,
976 public VPUnrollPartAccessor<1> {
977 friend class VPlanSlp;
978
979public:
980 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
981 enum {
983 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
984 // values of a first-order recurrence.
988 // Creates a mask where each lane is active (true) whilst the current
989 // counter (first operand + index) is less than the second operand. i.e.
990 // mask[i] = icmpt ult (op0 + i), op1
991 // The size of the mask returned is VF * Multiplier (UF, third op).
995 // Increment the canonical IV separately for each unrolled part.
1000 /// Given operands of (the same) struct type, creates a struct of fixed-
1001 /// width vectors each containing a struct field of all operands. The
1002 /// number of operands matches the element count of every vector.
1004 /// Creates a fixed-width vector containing all operands. The number of
1005 /// operands matches the vector element count.
1007 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1008 /// abstract VPInstruction whose single defined VPValue represents VF
1009 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1010 /// VPInstructions.
1012 /// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
1013 /// where one of (x,y) is loop invariant, and both x and y are integer type.
1017 // Extracts the last lane from its operand if it is a vector, or the last
1018 // part if scalar. In the latter case, the recipe will be removed during
1019 // unrolling.
1021 // Extracts the last lane for each part from its operand.
1023 // Extracts the second-to-last lane from its operand or the second-to-last
1024 // part if it is scalar. In the latter case, the recipe will be removed
1025 // during unrolling.
1027 LogicalAnd, // Non-poison propagating logical And.
1028 // Add an offset in bytes (second operand) to a base pointer (first
1029 // operand). Only generates scalar values (either for the first lane only or
1030 // for all lanes, depending on its uses).
1032 // Add a vector offset in bytes (second operand) to a scalar base pointer
1033 // (first operand).
1035 // Returns a scalar boolean value, which is true if any lane of its
1036 // (boolean) vector operands is true. It produces the reduced value across
1037 // all unrolled iterations. Unrolling will add all copies of its original
1038 // operand as additional operands. AnyOf is poison-safe as all operands
1039 // will be frozen.
1041 // Calculates the first active lane index of the vector predicate operands.
1042 // It produces the lane index across all unrolled iterations. Unrolling will
1043 // add all copies of its original operand as additional operands.
1045
1046 // The opcodes below are used for VPInstructionWithType.
1047 //
1048 /// Scale the first operand (vector step) by the second operand
1049 /// (scalar-step). Casts both operands to the result type if needed.
1051 /// Start vector for reductions with 3 operands: the original start value,
1052 /// the identity value for the reduction and an integer indicating the
1053 /// scaling factor.
1055 // Creates a step vector starting from 0 to VF with a step of 1.
1057 /// Extracts a single lane (first operand) from a set of vector operands.
1058 /// The lane specifies an index into a vector formed by combining all vector
1059 /// operands (all operands after the first one).
1061 /// Explicit user for the resume phi of the canonical induction in the main
1062 /// VPlan, used by the epilogue vector loop.
1064 /// Returns the value for vscale.
1067 };
1068
1069 /// Returns true if this VPInstruction generates scalar values for all lanes.
1070 /// Most VPInstructions generate a single value per part, either vector or
1071 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1072 /// values per all lanes, stemming from an original ingredient. This method
1073 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1074 /// underlying ingredient.
1075 bool doesGeneratePerAllLanes() const;
1076
1077private:
1078 typedef unsigned char OpcodeTy;
1079 OpcodeTy Opcode;
1080
1081 /// An optional name that can be used for the generated IR instruction.
1082 const std::string Name;
1083
1084 /// Returns true if we can generate a scalar for the first lane only if
1085 /// needed.
1086 bool canGenerateScalarForFirstLane() const;
1087
1088 /// Utility methods serving execute(): generates a single vector instance of
1089 /// the modeled instruction. \returns the generated value. . In some cases an
1090 /// existing value is returned rather than a generated one.
1091 Value *generate(VPTransformState &State);
1092
1093#if !defined(NDEBUG)
1094 /// Return the number of operands determined by the opcode of the
1095 /// VPInstruction. Returns -1u if the number of operands cannot be determined
1096 /// directly by the opcode.
1097 static unsigned getNumOperandsForOpcode(unsigned Opcode);
1098#endif
1099
1100public:
1101 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1102 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
1103 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
1104 VPIRMetadata(), Opcode(Opcode), Name(Name.str()) {}
1105
1106 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1107 const VPIRFlags &Flags, const VPIRMetadata &MD = {},
1108 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1109
1110 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1111
1112 VPInstruction *clone() override {
1113 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1114 getDebugLoc(), Name);
1115 if (getUnderlyingValue())
1116 New->setUnderlyingValue(getUnderlyingInstr());
1117 return New;
1118 }
1119
1120 unsigned getOpcode() const { return Opcode; }
1121
1122 /// Generate the instruction.
1123 /// TODO: We currently execute only per-part unless a specific instance is
1124 /// provided.
1125 void execute(VPTransformState &State) override;
1126
1127 /// Return the cost of this VPInstruction.
1128 InstructionCost computeCost(ElementCount VF,
1129 VPCostContext &Ctx) const override;
1130
1131#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1132 /// Print the VPInstruction to \p O.
1133 void print(raw_ostream &O, const Twine &Indent,
1134 VPSlotTracker &SlotTracker) const override;
1135
1136 /// Print the VPInstruction to dbgs() (for debugging).
1137 LLVM_DUMP_METHOD void dump() const;
1138#endif
1139
1140 bool hasResult() const {
1141 // CallInst may or may not have a result, depending on the called function.
1142 // Conservatively return calls have results for now.
1143 switch (getOpcode()) {
1144 case Instruction::Ret:
1145 case Instruction::Br:
1146 case Instruction::Store:
1147 case Instruction::Switch:
1148 case Instruction::IndirectBr:
1149 case Instruction::Resume:
1150 case Instruction::CatchRet:
1151 case Instruction::Unreachable:
1152 case Instruction::Fence:
1153 case Instruction::AtomicRMW:
1156 return false;
1157 default:
1158 return true;
1159 }
1160 }
1161
1162 /// Returns true if the underlying opcode may read from or write to memory.
1163 bool opcodeMayReadOrWriteFromMemory() const;
1164
1165 /// Returns true if the recipe only uses the first lane of operand \p Op.
1166 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1167
1168 /// Returns true if the recipe only uses the first part of operand \p Op.
1169 bool onlyFirstPartUsed(const VPValue *Op) const override;
1170
1171 /// Returns true if this VPInstruction produces a scalar value from a vector,
1172 /// e.g. by performing a reduction or extracting a lane.
1173 bool isVectorToScalar() const;
1174
1175 /// Returns true if this VPInstruction's operands are single scalars and the
1176 /// result is also a single scalar.
1177 bool isSingleScalar() const;
1178
1179 /// Returns the symbolic name assigned to the VPInstruction.
1180 StringRef getName() const { return Name; }
1181};
1182
1183/// A specialization of VPInstruction augmenting it with a dedicated result
1184/// type, to be used when the opcode and operands of the VPInstruction don't
1185/// directly determine the result type. Note that there is no separate VPDef ID
1186/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1187/// distinguished purely by the opcode.
1189 /// Scalar result type produced by the recipe.
1190 Type *ResultTy;
1191
1192public:
1194 Type *ResultTy, const VPIRFlags &Flags, DebugLoc DL,
1195 const Twine &Name = "")
1196 : VPInstruction(Opcode, Operands, Flags, {}, DL, Name),
1197 ResultTy(ResultTy) {}
1198
1200 Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags,
1201 const VPIRMetadata &Metadata, const Twine &Name = "")
1202 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1203 ResultTy(ResultTy) {}
1204
1205 static inline bool classof(const VPRecipeBase *R) {
1206 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1207 // type information.
1208 if (R->isScalarCast())
1209 return true;
1210 auto *VPI = dyn_cast<VPInstruction>(R);
1211 if (!VPI)
1212 return false;
1213 switch (VPI->getOpcode()) {
1217 return true;
1218 default:
1219 return false;
1220 }
1221 }
1222
1223 static inline bool classof(const VPUser *R) {
1225 }
1226
1227 VPInstruction *clone() override {
1228 auto *New =
1230 *this, getDebugLoc(), getName());
1231 New->setUnderlyingValue(getUnderlyingValue());
1232 return New;
1233 }
1234
1235 void execute(VPTransformState &State) override;
1236
1237 /// Return the cost of this VPInstruction.
1239 VPCostContext &Ctx) const override {
1240 // TODO: Compute accurate cost after retiring the legacy cost model.
1241 return 0;
1242 }
1243
1244 Type *getResultType() const { return ResultTy; }
1245
1246#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1247 /// Print the recipe.
1248 void print(raw_ostream &O, const Twine &Indent,
1249 VPSlotTracker &SlotTracker) const override;
1250#endif
1251};
1252
1253/// Helper type to provide functions to access incoming values and blocks for
1254/// phi-like recipes.
1256protected:
1257 /// Return a VPRecipeBase* to the current object.
1258 virtual const VPRecipeBase *getAsRecipe() const = 0;
1259
1260public:
1261 virtual ~VPPhiAccessors() = default;
1262
1263 /// Returns the incoming VPValue with index \p Idx.
1264 VPValue *getIncomingValue(unsigned Idx) const {
1265 return getAsRecipe()->getOperand(Idx);
1266 }
1267
1268 /// Returns the incoming block with index \p Idx.
1269 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1270
1271 /// Returns the number of incoming values, also number of incoming blocks.
1272 virtual unsigned getNumIncoming() const {
1273 return getAsRecipe()->getNumOperands();
1274 }
1275
1276 /// Returns an interator range over the incoming values.
1278 return make_range(getAsRecipe()->op_begin(),
1279 getAsRecipe()->op_begin() + getNumIncoming());
1280 }
1281
1283 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1284
1285 /// Returns an iterator range over the incoming blocks.
1287 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1288 return getIncomingBlock(Idx);
1289 };
1290 return map_range(index_range(0, getNumIncoming()), GetBlock);
1291 }
1292
1293 /// Returns an iterator range over pairs of incoming values and corresponding
1294 /// incoming blocks.
1300
1301 /// Removes the incoming value for \p IncomingBlock, which must be a
1302 /// predecessor.
1303 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1304
1305#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1306 /// Print the recipe.
1308#endif
1309};
1310
1312 VPPhi(ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "")
1313 : VPInstruction(Instruction::PHI, Operands, DL, Name) {}
1314
1315 static inline bool classof(const VPUser *U) {
1316 auto *VPI = dyn_cast<VPInstruction>(U);
1317 return VPI && VPI->getOpcode() == Instruction::PHI;
1318 }
1319
1320 static inline bool classof(const VPValue *V) {
1321 auto *VPI = dyn_cast<VPInstruction>(V);
1322 return VPI && VPI->getOpcode() == Instruction::PHI;
1323 }
1324
1325 static inline bool classof(const VPSingleDefRecipe *SDR) {
1326 auto *VPI = dyn_cast<VPInstruction>(SDR);
1327 return VPI && VPI->getOpcode() == Instruction::PHI;
1328 }
1329
1330 VPPhi *clone() override {
1331 auto *PhiR = new VPPhi(operands(), getDebugLoc(), getName());
1332 PhiR->setUnderlyingValue(getUnderlyingValue());
1333 return PhiR;
1334 }
1335
1336 void execute(VPTransformState &State) override;
1337
1338#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1339 /// Print the recipe.
1340 void print(raw_ostream &O, const Twine &Indent,
1341 VPSlotTracker &SlotTracker) const override;
1342#endif
1343
1344protected:
1345 const VPRecipeBase *getAsRecipe() const override { return this; }
1346};
1347
1348/// A recipe to wrap on original IR instruction not to be modified during
1349/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1350/// Expect PHIs, VPIRInstructions cannot have any operands.
1352 Instruction &I;
1353
1354protected:
1355 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1356 /// subclasses may need to be created, e.g. VPIRPhi.
1358 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1359
1360public:
1361 ~VPIRInstruction() override = default;
1362
1363 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1364 /// VPIRInstruction.
1366
1367 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1368
1370 auto *R = create(I);
1371 for (auto *Op : operands())
1372 R->addOperand(Op);
1373 return R;
1374 }
1375
1376 void execute(VPTransformState &State) override;
1377
1378 /// Return the cost of this VPIRInstruction.
1380 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1381
1382 Instruction &getInstruction() const { return I; }
1383
1384#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1385 /// Print the recipe.
1386 void print(raw_ostream &O, const Twine &Indent,
1387 VPSlotTracker &SlotTracker) const override;
1388#endif
1389
1390 bool usesScalars(const VPValue *Op) const override {
1392 "Op must be an operand of the recipe");
1393 return true;
1394 }
1395
1396 bool onlyFirstPartUsed(const VPValue *Op) const override {
1398 "Op must be an operand of the recipe");
1399 return true;
1400 }
1401
1402 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1404 "Op must be an operand of the recipe");
1405 return true;
1406 }
1407
1408 /// Update the recipes first operand to the last lane of the operand using \p
1409 /// Builder. Must only be used for VPIRInstructions with at least one operand
1410 /// wrapping a PHINode.
1412};
1413
1414/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1415/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1416/// allowed, and it is used to add a new incoming value for the single
1417/// predecessor VPBB.
1419 public VPPhiAccessors {
1421
1422 static inline bool classof(const VPRecipeBase *U) {
1423 auto *R = dyn_cast<VPIRInstruction>(U);
1424 return R && isa<PHINode>(R->getInstruction());
1425 }
1426
1428
1429 void execute(VPTransformState &State) override;
1430
1431#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1432 /// Print the recipe.
1433 void print(raw_ostream &O, const Twine &Indent,
1434 VPSlotTracker &SlotTracker) const override;
1435#endif
1436
1437protected:
1438 const VPRecipeBase *getAsRecipe() const override { return this; }
1439};
1440
1441/// VPWidenRecipe is a recipe for producing a widened instruction using the
1442/// opcode and operands of the recipe. This recipe covers most of the
1443/// traditional vectorization cases where each recipe transforms into a
1444/// vectorized version of itself.
1446 public VPIRMetadata {
1447 unsigned Opcode;
1448
1449public:
1450 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1451 const VPIRFlags &Flags, const VPIRMetadata &Metadata,
1452 DebugLoc DL)
1453 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
1454 VPIRMetadata(Metadata), Opcode(Opcode) {}
1455
1457 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), VPIRMetadata(I),
1458 Opcode(I.getOpcode()) {}
1459
1460 ~VPWidenRecipe() override = default;
1461
1462 VPWidenRecipe *clone() override {
1463 auto *R =
1464 new VPWidenRecipe(getOpcode(), operands(), *this, *this, getDebugLoc());
1465 R->setUnderlyingValue(getUnderlyingValue());
1466 return R;
1467 }
1468
1469 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1470
1471 /// Produce a widened instruction using the opcode and operands of the recipe,
1472 /// processing State.VF elements.
1473 void execute(VPTransformState &State) override;
1474
1475 /// Return the cost of this VPWidenRecipe.
1476 InstructionCost computeCost(ElementCount VF,
1477 VPCostContext &Ctx) const override;
1478
1479 unsigned getOpcode() const { return Opcode; }
1480
1481#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1482 /// Print the recipe.
1483 void print(raw_ostream &O, const Twine &Indent,
1484 VPSlotTracker &SlotTracker) const override;
1485#endif
1486};
1487
1488/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1490 /// Cast instruction opcode.
1491 Instruction::CastOps Opcode;
1492
1493 /// Result type for the cast.
1494 Type *ResultTy;
1495
1496public:
1498 CastInst &UI)
1499 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), VPIRMetadata(UI),
1500 Opcode(Opcode), ResultTy(ResultTy) {
1501 assert(UI.getOpcode() == Opcode &&
1502 "opcode of underlying cast doesn't match");
1503 }
1504
1506 const VPIRFlags &Flags = {},
1507 const VPIRMetadata &Metadata = {},
1509 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL),
1510 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1511 assert(flagsValidForOpcode(Opcode) &&
1512 "Set flags not supported for the provided opcode");
1513 }
1514
1515 ~VPWidenCastRecipe() override = default;
1516
1518 auto *New = new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy, *this,
1519 *this, getDebugLoc());
1520 if (auto *UV = getUnderlyingValue())
1521 New->setUnderlyingValue(UV);
1522 return New;
1523 }
1524
1525 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1526
1527 /// Produce widened copies of the cast.
1528 void execute(VPTransformState &State) override;
1529
1530 /// Return the cost of this VPWidenCastRecipe.
1532 VPCostContext &Ctx) const override;
1533
1534#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1535 /// Print the recipe.
1536 void print(raw_ostream &O, const Twine &Indent,
1537 VPSlotTracker &SlotTracker) const override;
1538#endif
1539
1540 Instruction::CastOps getOpcode() const { return Opcode; }
1541
1542 /// Returns the result type of the cast.
1543 Type *getResultType() const { return ResultTy; }
1544};
1545
1546/// A recipe for widening vector intrinsics.
1548 /// ID of the vector intrinsic to widen.
1549 Intrinsic::ID VectorIntrinsicID;
1550
1551 /// Scalar return type of the intrinsic.
1552 Type *ResultTy;
1553
1554 /// True if the intrinsic may read from memory.
1555 bool MayReadFromMemory;
1556
1557 /// True if the intrinsic may read write to memory.
1558 bool MayWriteToMemory;
1559
1560 /// True if the intrinsic may have side-effects.
1561 bool MayHaveSideEffects;
1562
1563public:
1565 ArrayRef<VPValue *> CallArguments, Type *Ty,
1567 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI),
1568 VPIRMetadata(CI), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1569 MayReadFromMemory(CI.mayReadFromMemory()),
1570 MayWriteToMemory(CI.mayWriteToMemory()),
1571 MayHaveSideEffects(CI.mayHaveSideEffects()) {}
1572
1574 ArrayRef<VPValue *> CallArguments, Type *Ty,
1576 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, DL),
1577 VPIRMetadata(), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) {
1578 LLVMContext &Ctx = Ty->getContext();
1579 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1580 MemoryEffects ME = Attrs.getMemoryEffects();
1581 MayReadFromMemory = !ME.onlyWritesMemory();
1582 MayWriteToMemory = !ME.onlyReadsMemory();
1583 MayHaveSideEffects = MayWriteToMemory ||
1584 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1585 !Attrs.hasAttribute(Attribute::WillReturn);
1586 }
1587
1588 ~VPWidenIntrinsicRecipe() override = default;
1589
1591 if (Value *CI = getUnderlyingValue())
1592 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1593 operands(), ResultTy, getDebugLoc());
1594 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1595 getDebugLoc());
1596 }
1597
1598 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1599
1600 /// Produce a widened version of the vector intrinsic.
1601 void execute(VPTransformState &State) override;
1602
1603 /// Return the cost of this vector intrinsic.
1605 VPCostContext &Ctx) const override;
1606
1607 /// Return the ID of the intrinsic.
1608 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1609
1610 /// Return the scalar return type of the intrinsic.
1611 Type *getResultType() const { return ResultTy; }
1612
1613 /// Return to name of the intrinsic as string.
1615
1616 /// Returns true if the intrinsic may read from memory.
1617 bool mayReadFromMemory() const { return MayReadFromMemory; }
1618
1619 /// Returns true if the intrinsic may write to memory.
1620 bool mayWriteToMemory() const { return MayWriteToMemory; }
1621
1622 /// Returns true if the intrinsic may have side-effects.
1623 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1624
1625#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1626 /// Print the recipe.
1627 void print(raw_ostream &O, const Twine &Indent,
1628 VPSlotTracker &SlotTracker) const override;
1629#endif
1630
1631 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1632};
1633
1634/// A recipe for widening Call instructions using library calls.
1636 public VPIRMetadata {
1637 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1638 /// between a given VF and the chosen vectorized variant, so there will be a
1639 /// different VPlan for each VF with a valid variant.
1640 Function *Variant;
1641
1642public:
1644 ArrayRef<VPValue *> CallArguments,
1646 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments,
1647 *cast<Instruction>(UV)),
1648 VPIRMetadata(*cast<Instruction>(UV)), Variant(Variant) {
1649 assert(
1651 "last operand must be the called function");
1652 }
1653
1654 ~VPWidenCallRecipe() override = default;
1655
1657 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
1658 getDebugLoc());
1659 }
1660
1661 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1662
1663 /// Produce a widened version of the call instruction.
1664 void execute(VPTransformState &State) override;
1665
1666 /// Return the cost of this VPWidenCallRecipe.
1667 InstructionCost computeCost(ElementCount VF,
1668 VPCostContext &Ctx) const override;
1669
1673
1676
1677#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1678 /// Print the recipe.
1679 void print(raw_ostream &O, const Twine &Indent,
1680 VPSlotTracker &SlotTracker) const override;
1681#endif
1682};
1683
1684/// A recipe representing a sequence of load -> update -> store as part of
1685/// a histogram operation. This means there may be aliasing between vector
1686/// lanes, which is handled by the llvm.experimental.vector.histogram family
1687/// of intrinsics. The only update operations currently supported are
1688/// 'add' and 'sub' where the other term is loop-invariant.
1690 /// Opcode of the update operation, currently either add or sub.
1691 unsigned Opcode;
1692
1693public:
1694 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1696 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
1697
1698 ~VPHistogramRecipe() override = default;
1699
1701 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1702 }
1703
1704 VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
1705
1706 /// Produce a vectorized histogram operation.
1707 void execute(VPTransformState &State) override;
1708
1709 /// Return the cost of this VPHistogramRecipe.
1711 VPCostContext &Ctx) const override;
1712
1713 unsigned getOpcode() const { return Opcode; }
1714
1715 /// Return the mask operand if one was provided, or a null pointer if all
1716 /// lanes should be executed unconditionally.
1717 VPValue *getMask() const {
1718 return getNumOperands() == 3 ? getOperand(2) : nullptr;
1719 }
1720
1721#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1722 /// Print the recipe
1723 void print(raw_ostream &O, const Twine &Indent,
1724 VPSlotTracker &SlotTracker) const override;
1725#endif
1726};
1727
1728/// A recipe for widening select instructions.
1730 public VPIRMetadata {
1732 : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, I),
1733 VPIRMetadata(I) {}
1734
1735 ~VPWidenSelectRecipe() override = default;
1736
1741
1742 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1743
1744 /// Produce a widened version of the select instruction.
1745 void execute(VPTransformState &State) override;
1746
1747 /// Return the cost of this VPWidenSelectRecipe.
1748 InstructionCost computeCost(ElementCount VF,
1749 VPCostContext &Ctx) const override;
1750
1751#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1752 /// Print the recipe.
1753 void print(raw_ostream &O, const Twine &Indent,
1754 VPSlotTracker &SlotTracker) const override;
1755#endif
1756
1757 unsigned getOpcode() const { return Instruction::Select; }
1758
1759 VPValue *getCond() const {
1760 return getOperand(0);
1761 }
1762
1763 bool isInvariantCond() const {
1764 return getCond()->isDefinedOutsideLoopRegions();
1765 }
1766
1767 /// Returns true if the recipe only uses the first lane of operand \p Op.
1768 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1770 "Op must be an operand of the recipe");
1771 return Op == getCond() && isInvariantCond();
1772 }
1773};
1774
1775/// A recipe for handling GEP instructions.
1777 Type *SourceElementTy;
1778
1779 bool isPointerLoopInvariant() const {
1780 return getOperand(0)->isDefinedOutsideLoopRegions();
1781 }
1782
1783 bool isIndexLoopInvariant(unsigned I) const {
1784 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
1785 }
1786
1787 bool areAllOperandsInvariant() const {
1788 return all_of(operands(), [](VPValue *Op) {
1789 return Op->isDefinedOutsideLoopRegions();
1790 });
1791 }
1792
1793public:
1795 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP),
1796 SourceElementTy(GEP->getSourceElementType()) {
1798 (void)Metadata;
1800 assert(Metadata.empty() && "unexpected metadata on GEP");
1801 }
1802
1803 ~VPWidenGEPRecipe() override = default;
1804
1809
1810 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1811
1812 /// This recipe generates a GEP instruction.
1813 unsigned getOpcode() const { return Instruction::GetElementPtr; }
1814
1815 /// Generate the gep nodes.
1816 void execute(VPTransformState &State) override;
1817
1818 Type *getSourceElementType() const { return SourceElementTy; }
1819
1820 /// Return the cost of this VPWidenGEPRecipe.
1822 VPCostContext &Ctx) const override {
1823 // TODO: Compute accurate cost after retiring the legacy cost model.
1824 return 0;
1825 }
1826
1827#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1828 /// Print the recipe.
1829 void print(raw_ostream &O, const Twine &Indent,
1830 VPSlotTracker &SlotTracker) const override;
1831#endif
1832
1833 /// Returns true if the recipe only uses the first lane of operand \p Op.
1834 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1836 "Op must be an operand of the recipe");
1837 if (Op == getOperand(0))
1838 return isPointerLoopInvariant();
1839 else
1840 return !isPointerLoopInvariant() && Op->isDefinedOutsideLoopRegions();
1841 }
1842};
1843
1844/// A recipe to compute a pointer to the last element of each part of a widened
1845/// memory access for widened memory accesses of IndexedTy. Used for
1846/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed.
1848 public VPUnrollPartAccessor<2> {
1849 Type *IndexedTy;
1850
1851 /// The constant stride of the pointer computed by this recipe, expressed in
1852 /// units of IndexedTy.
1853 int64_t Stride;
1854
1855public:
1857 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
1858 : VPRecipeWithIRFlags(VPDef::VPVectorEndPointerSC,
1859 ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
1860 IndexedTy(IndexedTy), Stride(Stride) {
1861 assert(Stride < 0 && "Stride must be negative");
1862 }
1863
1864 VP_CLASSOF_IMPL(VPDef::VPVectorEndPointerSC)
1865
1867 const VPValue *getVFValue() const { return getOperand(1); }
1868
1869 void execute(VPTransformState &State) override;
1870
1871 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1873 "Op must be an operand of the recipe");
1874 return true;
1875 }
1876
1877 /// Return the cost of this VPVectorPointerRecipe.
1879 VPCostContext &Ctx) const override {
1880 // TODO: Compute accurate cost after retiring the legacy cost model.
1881 return 0;
1882 }
1883
1884 /// Returns true if the recipe only uses the first part of operand \p Op.
1885 bool onlyFirstPartUsed(const VPValue *Op) const override {
1887 "Op must be an operand of the recipe");
1888 assert(getNumOperands() <= 2 && "must have at most two operands");
1889 return true;
1890 }
1891
1893 return new VPVectorEndPointerRecipe(getOperand(0), getVFValue(), IndexedTy,
1894 Stride, getGEPNoWrapFlags(),
1895 getDebugLoc());
1896 }
1897
1898#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1899 /// Print the recipe.
1900 void print(raw_ostream &O, const Twine &Indent,
1901 VPSlotTracker &SlotTracker) const override;
1902#endif
1903};
1904
1905/// A recipe to compute the pointers for widened memory accesses of IndexTy.
1907 public VPUnrollPartAccessor<1> {
1908 Type *SourceElementTy;
1909
1910public:
1913 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1914 GEPFlags, DL),
1915 SourceElementTy(SourceElementTy) {}
1916
1917 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1918
1919 void execute(VPTransformState &State) override;
1920
1921 Type *getSourceElementType() const { return SourceElementTy; }
1922
1923 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1925 "Op must be an operand of the recipe");
1926 return true;
1927 }
1928
1929 /// Returns true if the recipe only uses the first part of operand \p Op.
1930 bool onlyFirstPartUsed(const VPValue *Op) const override {
1932 "Op must be an operand of the recipe");
1933 assert(getNumOperands() <= 2 && "must have at most two operands");
1934 return true;
1935 }
1936
1938 return new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
1940 }
1941
1942 /// Return true if this VPVectorPointerRecipe corresponds to part 0. Note that
1943 /// this is only accurate after the VPlan has been unrolled.
1944 bool isFirstPart() const { return getUnrollPart(*this) == 0; }
1945
1946 /// Return the cost of this VPHeaderPHIRecipe.
1948 VPCostContext &Ctx) const override {
1949 // TODO: Compute accurate cost after retiring the legacy cost model.
1950 return 0;
1951 }
1952
1953#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1954 /// Print the recipe.
1955 void print(raw_ostream &O, const Twine &Indent,
1956 VPSlotTracker &SlotTracker) const override;
1957#endif
1958};
1959
1960/// A pure virtual base class for all recipes modeling header phis, including
1961/// phis for first order recurrences, pointer inductions and reductions. The
1962/// start value is the first operand of the recipe and the incoming value from
1963/// the backedge is the second operand.
1964///
1965/// Inductions are modeled using the following sub-classes:
1966/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
1967/// starting at a specified value (zero for the main vector loop, the resume
1968/// value for the epilogue vector loop) and stepping by 1. The induction
1969/// controls exiting of the vector loop by comparing against the vector trip
1970/// count. Produces a single scalar PHI for the induction value per
1971/// iteration.
1972/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
1973/// floating point inductions with arbitrary start and step values. Produces
1974/// a vector PHI per-part.
1975/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
1976/// value of an IV with different start and step values. Produces a single
1977/// scalar value per iteration
1978/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
1979/// canonical or derived induction.
1980/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
1981/// pointer induction. Produces either a vector PHI per-part or scalar values
1982/// per-lane based on the canonical induction.
1984 public VPPhiAccessors {
1985protected:
1986 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
1987 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
1988 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>({Start}),
1989 UnderlyingInstr, DL) {}
1990
1991 const VPRecipeBase *getAsRecipe() const override { return this; }
1992
1993public:
1994 ~VPHeaderPHIRecipe() override = default;
1995
1996 /// Method to support type inquiry through isa, cast, and dyn_cast.
1997 static inline bool classof(const VPRecipeBase *B) {
1998 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
1999 B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
2000 }
2001 static inline bool classof(const VPValue *V) {
2002 auto *B = V->getDefiningRecipe();
2003 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2004 B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
2005 }
2006
2007 /// Generate the phi nodes.
2008 void execute(VPTransformState &State) override = 0;
2009
2010 /// Return the cost of this header phi recipe.
2012 VPCostContext &Ctx) const override;
2013
2014#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2015 /// Print the recipe.
2016 void print(raw_ostream &O, const Twine &Indent,
2017 VPSlotTracker &SlotTracker) const override = 0;
2018#endif
2019
2020 /// Returns the start value of the phi, if one is set.
2022 return getNumOperands() == 0 ? nullptr : getOperand(0);
2023 }
2025 return getNumOperands() == 0 ? nullptr : getOperand(0);
2026 }
2027
2028 /// Update the start value of the recipe.
2030
2031 /// Returns the incoming value from the loop backedge.
2033 return getOperand(1);
2034 }
2035
2036 /// Update the incoming value from the loop backedge.
2038
2039 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2040 /// to be a recipe.
2042 return *getBackedgeValue()->getDefiningRecipe();
2043 }
2044};
2045
2046/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2047/// VPWidenPointerInductionRecipe), providing shared functionality, including
2048/// retrieving the step value, induction descriptor and original phi node.
2050 const InductionDescriptor &IndDesc;
2051
2052public:
2053 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2054 VPValue *Step, const InductionDescriptor &IndDesc,
2055 DebugLoc DL)
2056 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2057 addOperand(Step);
2058 }
2059
2060 static inline bool classof(const VPRecipeBase *R) {
2061 return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC ||
2062 R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
2063 }
2064
2065 static inline bool classof(const VPValue *V) {
2066 auto *R = V->getDefiningRecipe();
2067 return R && classof(R);
2068 }
2069
2070 static inline bool classof(const VPHeaderPHIRecipe *R) {
2071 return classof(static_cast<const VPRecipeBase *>(R));
2072 }
2073
2074 void execute(VPTransformState &State) override = 0;
2075
2076 /// Returns the step value of the induction.
2078 const VPValue *getStepValue() const { return getOperand(1); }
2079
2080 /// Update the step value of the recipe.
2081 void setStepValue(VPValue *V) { setOperand(1, V); }
2082
2084 const VPValue *getVFValue() const { return getOperand(2); }
2085
2086 /// Returns the number of incoming values, also number of incoming blocks.
2087 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2088 /// incoming value, its start value.
2089 unsigned getNumIncoming() const override { return 1; }
2090
2092
2093 /// Returns the induction descriptor for the recipe.
2094 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2095
2097 // TODO: All operands of base recipe must exist and be at same index in
2098 // derived recipe.
2100 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2101 }
2102
2104 // TODO: All operands of base recipe must exist and be at same index in
2105 // derived recipe.
2107 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2108 }
2109
2110 /// Returns true if the recipe only uses the first lane of operand \p Op.
2111 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2113 "Op must be an operand of the recipe");
2114 // The recipe creates its own wide start value, so it only requests the
2115 // first lane of the operand.
2116 // TODO: Remove once creating the start value is modeled separately.
2117 return Op == getStartValue() || Op == getStepValue();
2118 }
2119};
2120
2121/// A recipe for handling phi nodes of integer and floating-point inductions,
2122/// producing their vector values. This is an abstract recipe and must be
2123/// converted to concrete recipes before executing.
2125 TruncInst *Trunc;
2126
2127 // If this recipe is unrolled it will have 2 additional operands.
2128 bool isUnrolled() const { return getNumOperands() == 5; }
2129
2130public:
2132 VPValue *VF, const InductionDescriptor &IndDesc,
2133 DebugLoc DL)
2134 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2135 Step, IndDesc, DL),
2136 Trunc(nullptr) {
2137 addOperand(VF);
2138 }
2139
2141 VPValue *VF, const InductionDescriptor &IndDesc,
2142 TruncInst *Trunc, DebugLoc DL)
2143 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2144 Step, IndDesc, DL),
2145 Trunc(Trunc) {
2146 addOperand(VF);
2148 (void)Metadata;
2149 if (Trunc)
2151 assert(Metadata.empty() && "unexpected metadata on Trunc");
2152 }
2153
2155
2161
2162 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
2163
2164 void execute(VPTransformState &State) override {
2165 llvm_unreachable("cannot execute this recipe, should be expanded via "
2166 "expandVPWidenIntOrFpInductionRecipe");
2167 }
2168
2169#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2170 /// Print the recipe.
2171 void print(raw_ostream &O, const Twine &Indent,
2172 VPSlotTracker &SlotTracker) const override;
2173#endif
2174
2176 // If the recipe has been unrolled return the VPValue for the induction
2177 // increment.
2178 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2179 }
2180
2181 /// Returns the number of incoming values, also number of incoming blocks.
2182 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2183 /// incoming value, its start value.
2184 unsigned getNumIncoming() const override { return 1; }
2185
2186 /// Returns the first defined value as TruncInst, if it is one or nullptr
2187 /// otherwise.
2188 TruncInst *getTruncInst() { return Trunc; }
2189 const TruncInst *getTruncInst() const { return Trunc; }
2190
2191 /// Returns true if the induction is canonical, i.e. starting at 0 and
2192 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2193 /// same type as the canonical induction.
2194 bool isCanonical() const;
2195
2196 /// Returns the scalar type of the induction.
2198 return Trunc ? Trunc->getType()
2200 }
2201
2202 /// Returns the VPValue representing the value of this induction at
2203 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2204 /// take place.
2206 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2207 }
2208};
2209
2211 bool IsScalarAfterVectorization;
2212
2213public:
2214 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2215 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2216 /// VF*UF.
2218 VPValue *NumUnrolledElems,
2219 const InductionDescriptor &IndDesc,
2220 bool IsScalarAfterVectorization, DebugLoc DL)
2221 : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
2222 Step, IndDesc, DL),
2223 IsScalarAfterVectorization(IsScalarAfterVectorization) {
2224 addOperand(NumUnrolledElems);
2225 }
2226
2228
2232 getOperand(2), getInductionDescriptor(), IsScalarAfterVectorization,
2233 getDebugLoc());
2234 }
2235
2236 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
2237
2238 /// Generate vector values for the pointer induction.
2239 void execute(VPTransformState &State) override {
2240 llvm_unreachable("cannot execute this recipe, should be expanded via "
2241 "expandVPWidenPointerInduction");
2242 };
2243
2244 /// Returns true if only scalar values will be generated.
2245 bool onlyScalarsGenerated(bool IsScalable);
2246
2247#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2248 /// Print the recipe.
2249 void print(raw_ostream &O, const Twine &Indent,
2250 VPSlotTracker &SlotTracker) const override;
2251#endif
2252};
2253
2254/// A recipe for widened phis. Incoming values are operands of the recipe and
2255/// their operand index corresponds to the incoming predecessor block. If the
2256/// recipe is placed in an entry block to a (non-replicate) region, it must have
2257/// exactly 2 incoming values, the first from the predecessor of the region and
2258/// the second from the exiting block of the region.
2260 public VPPhiAccessors {
2261 /// Name to use for the generated IR instruction for the widened phi.
2262 std::string Name;
2263
2264protected:
2265 const VPRecipeBase *getAsRecipe() const override { return this; }
2266
2267public:
2268 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2269 /// debug location \p DL.
2270 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
2271 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2272 : VPSingleDefRecipe(VPDef::VPWidenPHISC, {}, Phi, DL), Name(Name.str()) {
2273 if (Start)
2274 addOperand(Start);
2275 }
2276
2279 getOperand(0), getDebugLoc(), Name);
2281 C->addOperand(Op);
2282 return C;
2283 }
2284
2285 ~VPWidenPHIRecipe() override = default;
2286
2287 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
2288
2289 /// Generate the phi/select nodes.
2290 void execute(VPTransformState &State) override;
2291
2292#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2293 /// Print the recipe.
2294 void print(raw_ostream &O, const Twine &Indent,
2295 VPSlotTracker &SlotTracker) const override;
2296#endif
2297};
2298
2299/// A recipe for handling first-order recurrence phis. The start value is the
2300/// first operand of the recipe and the incoming value from the backedge is the
2301/// second operand.
2304 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
2305
2306 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
2307
2312
2313 void execute(VPTransformState &State) override;
2314
2315 /// Return the cost of this first-order recurrence phi recipe.
2317 VPCostContext &Ctx) const override;
2318
2319#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2320 /// Print the recipe.
2321 void print(raw_ostream &O, const Twine &Indent,
2322 VPSlotTracker &SlotTracker) const override;
2323#endif
2324
2325 /// Returns true if the recipe only uses the first lane of operand \p Op.
2326 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2328 "Op must be an operand of the recipe");
2329 return Op == getStartValue();
2330 }
2331};
2332
2333/// A recipe for handling reduction phis. The start value is the first operand
2334/// of the recipe and the incoming value from the backedge is the second
2335/// operand.
2337 public VPUnrollPartAccessor<2> {
2338 /// The recurrence kind of the reduction.
2339 const RecurKind Kind;
2340
2341 /// The phi is part of an in-loop reduction.
2342 bool IsInLoop;
2343
2344 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
2345 bool IsOrdered;
2346
2347 /// When expanding the reduction PHI, the plan's VF element count is divided
2348 /// by this factor to form the reduction phi's VF.
2349 unsigned VFScaleFactor = 1;
2350
2351public:
2352 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2354 bool IsInLoop = false, bool IsOrdered = false,
2355 unsigned VFScaleFactor = 1)
2356 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
2357 IsInLoop(IsInLoop), IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
2358 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
2359 }
2360
2361 ~VPReductionPHIRecipe() override = default;
2362
2364 auto *R = new VPReductionPHIRecipe(
2366 *getOperand(0), IsInLoop, IsOrdered, VFScaleFactor);
2367 R->addOperand(getBackedgeValue());
2368 return R;
2369 }
2370
2371 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2372
2373 /// Generate the phi/select nodes.
2374 void execute(VPTransformState &State) override;
2375
2376 /// Get the factor that the VF of this recipe's output should be scaled by.
2377 unsigned getVFScaleFactor() const { return VFScaleFactor; }
2378
2379#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2380 /// Print the recipe.
2381 void print(raw_ostream &O, const Twine &Indent,
2382 VPSlotTracker &SlotTracker) const override;
2383#endif
2384
2385 /// Returns the number of incoming values, also number of incoming blocks.
2386 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2387 /// incoming value, its start value.
2388 unsigned getNumIncoming() const override { return 2; }
2389
2390 /// Returns the recurrence kind of the reduction.
2391 RecurKind getRecurrenceKind() const { return Kind; }
2392
2393 /// Returns true, if the phi is part of an ordered reduction.
2394 bool isOrdered() const { return IsOrdered; }
2395
2396 /// Returns true, if the phi is part of an in-loop reduction.
2397 bool isInLoop() const { return IsInLoop; }
2398
2399 /// Returns true if the recipe only uses the first lane of operand \p Op.
2400 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2402 "Op must be an operand of the recipe");
2403 return isOrdered() || isInLoop();
2404 }
2405};
2406
2407/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2408/// instructions.
2410public:
2411 /// The blend operation is a User of the incoming values and of their
2412 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2413 /// be omitted (implied by passing an odd number of operands) in which case
2414 /// all other incoming values are merged into it.
2416 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, DL) {
2417 assert(Operands.size() > 0 && "Expected at least one operand!");
2418 }
2419
2424
2425 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2426
2427 /// A normalized blend is one that has an odd number of operands, whereby the
2428 /// first operand does not have an associated mask.
2429 bool isNormalized() const { return getNumOperands() % 2; }
2430
2431 /// Return the number of incoming values, taking into account when normalized
2432 /// the first incoming value will have no mask.
2433 unsigned getNumIncomingValues() const {
2434 return (getNumOperands() + isNormalized()) / 2;
2435 }
2436
2437 /// Return incoming value number \p Idx.
2438 VPValue *getIncomingValue(unsigned Idx) const {
2439 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2440 }
2441
2442 /// Return mask number \p Idx.
2443 VPValue *getMask(unsigned Idx) const {
2444 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2445 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2446 }
2447
2448 /// Set mask number \p Idx to \p V.
2449 void setMask(unsigned Idx, VPValue *V) {
2450 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2451 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2452 }
2453
2454 void execute(VPTransformState &State) override {
2455 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2456 }
2457
2458 /// Return the cost of this VPWidenMemoryRecipe.
2459 InstructionCost computeCost(ElementCount VF,
2460 VPCostContext &Ctx) const override;
2461
2462#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2463 /// Print the recipe.
2464 void print(raw_ostream &O, const Twine &Indent,
2465 VPSlotTracker &SlotTracker) const override;
2466#endif
2467
2468 /// Returns true if the recipe only uses the first lane of operand \p Op.
2469 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2471 "Op must be an operand of the recipe");
2472 // Recursing through Blend recipes only, must terminate at header phi's the
2473 // latest.
2474 return all_of(users(),
2475 [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
2476 }
2477};
2478
2479/// A common base class for interleaved memory operations.
2480/// An Interleaved memory operation is a memory access method that combines
2481/// multiple strided loads/stores into a single wide load/store with shuffles.
2482/// The first operand is the start address. The optional operands are, in order,
2483/// the stored values and the mask.
2485 public VPIRMetadata {
2487
2488 /// Indicates if the interleave group is in a conditional block and requires a
2489 /// mask.
2490 bool HasMask = false;
2491
2492 /// Indicates if gaps between members of the group need to be masked out or if
2493 /// unusued gaps can be loaded speculatively.
2494 bool NeedsMaskForGaps = false;
2495
2496protected:
2497 VPInterleaveBase(const unsigned char SC,
2499 ArrayRef<VPValue *> Operands,
2500 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2501 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2502 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2503 NeedsMaskForGaps(NeedsMaskForGaps) {
2504 // TODO: extend the masked interleaved-group support to reversed access.
2505 assert((!Mask || !IG->isReverse()) &&
2506 "Reversed masked interleave-group not supported.");
2507 for (unsigned I = 0; I < IG->getFactor(); ++I)
2508 if (Instruction *Inst = IG->getMember(I)) {
2509 if (Inst->getType()->isVoidTy())
2510 continue;
2511 new VPValue(Inst, this);
2512 }
2513
2514 for (auto *SV : StoredValues)
2515 addOperand(SV);
2516 if (Mask) {
2517 HasMask = true;
2518 addOperand(Mask);
2519 }
2520 }
2521
2522public:
2523 VPInterleaveBase *clone() override = 0;
2524
2525 static inline bool classof(const VPRecipeBase *R) {
2526 return R->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
2527 R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC;
2528 }
2529
2530 static inline bool classof(const VPUser *U) {
2531 auto *R = dyn_cast<VPRecipeBase>(U);
2532 return R && classof(R);
2533 }
2534
2535 /// Return the address accessed by this recipe.
2536 VPValue *getAddr() const {
2537 return getOperand(0); // Address is the 1st, mandatory operand.
2538 }
2539
2540 /// Return the mask used by this recipe. Note that a full mask is represented
2541 /// by a nullptr.
2542 VPValue *getMask() const {
2543 // Mask is optional and the last operand.
2544 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2545 }
2546
2547 /// Return true if the access needs a mask because of the gaps.
2548 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2549
2551
2552 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2553
2554 void execute(VPTransformState &State) override {
2555 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2556 }
2557
2558 /// Return the cost of this recipe.
2559 InstructionCost computeCost(ElementCount VF,
2560 VPCostContext &Ctx) const override;
2561
2562 /// Returns true if the recipe only uses the first lane of operand \p Op.
2563 bool onlyFirstLaneUsed(const VPValue *Op) const override = 0;
2564
2565 /// Returns the number of stored operands of this interleave group. Returns 0
2566 /// for load interleave groups.
2567 virtual unsigned getNumStoreOperands() const = 0;
2568
2569 /// Return the VPValues stored by this interleave group. If it is a load
2570 /// interleave group, return an empty ArrayRef.
2572 return ArrayRef<VPValue *>(op_end() -
2573 (getNumStoreOperands() + (HasMask ? 1 : 0)),
2575 }
2576};
2577
2578/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2579/// or stores into one wide load/store and shuffles. The first operand of a
2580/// VPInterleave recipe is the address, followed by the stored values, followed
2581/// by an optional mask.
2583public:
2585 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2586 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2587 : VPInterleaveBase(VPDef::VPInterleaveSC, IG, Addr, StoredValues, Mask,
2588 NeedsMaskForGaps, MD, DL) {}
2589
2590 ~VPInterleaveRecipe() override = default;
2591
2595 needsMaskForGaps(), *this, getDebugLoc());
2596 }
2597
2598 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2599
2600 /// Generate the wide load or store, and shuffles.
2601 void execute(VPTransformState &State) override;
2602
2603#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2604 /// Print the recipe.
2605 void print(raw_ostream &O, const Twine &Indent,
2606 VPSlotTracker &SlotTracker) const override;
2607#endif
2608
2609 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2611 "Op must be an operand of the recipe");
2612 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2613 }
2614
2615 unsigned getNumStoreOperands() const override {
2616 return getNumOperands() - (getMask() ? 2 : 1);
2617 }
2618};
2619
2620/// A recipe for interleaved memory operations with vector-predication
2621/// intrinsics. The first operand is the address, the second operand is the
2622/// explicit vector length. Stored values and mask are optional operands.
2624public:
2626 : VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(),
2627 ArrayRef<VPValue *>({R.getAddr(), &EVL}),
2628 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2629 R.getDebugLoc()) {
2630 assert(!getInterleaveGroup()->isReverse() &&
2631 "Reversed interleave-group with tail folding is not supported.");
2632 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2633 "supported for scalable vector.");
2634 }
2635
2636 ~VPInterleaveEVLRecipe() override = default;
2637
2639 llvm_unreachable("cloning not implemented yet");
2640 }
2641
2642 VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC)
2643
2644 /// The VPValue of the explicit vector length.
2645 VPValue *getEVL() const { return getOperand(1); }
2646
2647 /// Generate the wide load or store, and shuffles.
2648 void execute(VPTransformState &State) override;
2649
2650#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2651 /// Print the recipe.
2652 void print(raw_ostream &O, const Twine &Indent,
2653 VPSlotTracker &SlotTracker) const override;
2654#endif
2655
2656 /// The recipe only uses the first lane of the address, and EVL operand.
2657 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2659 "Op must be an operand of the recipe");
2660 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
2661 Op == getEVL();
2662 }
2663
2664 unsigned getNumStoreOperands() const override {
2665 return getNumOperands() - (getMask() ? 3 : 2);
2666 }
2667};
2668
2669/// A recipe to represent inloop reduction operations, performing a reduction on
2670/// a vector operand into a scalar value, and adding the result to a chain.
2671/// The Operands are {ChainOp, VecOp, [Condition]}.
2673 /// The recurrence kind for the reduction in question.
2674 RecurKind RdxKind;
2675 bool IsOrdered;
2676 /// Whether the reduction is conditional.
2677 bool IsConditional = false;
2678
2679protected:
2680 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
2682 ArrayRef<VPValue *> Operands, VPValue *CondOp,
2683 bool IsOrdered, DebugLoc DL)
2684 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
2685 IsOrdered(IsOrdered) {
2686 if (CondOp) {
2687 IsConditional = true;
2688 addOperand(CondOp);
2689 }
2691 }
2692
2693public:
2695 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2696 bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown())
2697 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
2698 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2699 IsOrdered, DL) {}
2700
2702 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2703 bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown())
2704 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr,
2705 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2706 IsOrdered, DL) {}
2707
2708 ~VPReductionRecipe() override = default;
2709
2711 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
2713 getCondOp(), IsOrdered, getDebugLoc());
2714 }
2715
2716 static inline bool classof(const VPRecipeBase *R) {
2717 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2718 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
2719 R->getVPDefID() == VPRecipeBase::VPPartialReductionSC;
2720 }
2721
2722 static inline bool classof(const VPUser *U) {
2723 auto *R = dyn_cast<VPRecipeBase>(U);
2724 return R && classof(R);
2725 }
2726
2727 static inline bool classof(const VPValue *VPV) {
2728 const VPRecipeBase *R = VPV->getDefiningRecipe();
2729 return R && classof(R);
2730 }
2731
2732 static inline bool classof(const VPSingleDefRecipe *R) {
2733 return classof(static_cast<const VPRecipeBase *>(R));
2734 }
2735
2736 /// Generate the reduction in the loop.
2737 void execute(VPTransformState &State) override;
2738
2739 /// Return the cost of VPReductionRecipe.
2740 InstructionCost computeCost(ElementCount VF,
2741 VPCostContext &Ctx) const override;
2742
2743#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2744 /// Print the recipe.
2745 void print(raw_ostream &O, const Twine &Indent,
2746 VPSlotTracker &SlotTracker) const override;
2747#endif
2748
2749 /// Return the recurrence kind for the in-loop reduction.
2750 RecurKind getRecurrenceKind() const { return RdxKind; }
2751 /// Return true if the in-loop reduction is ordered.
2752 bool isOrdered() const { return IsOrdered; };
2753 /// Return true if the in-loop reduction is conditional.
2754 bool isConditional() const { return IsConditional; };
2755 /// The VPValue of the scalar Chain being accumulated.
2756 VPValue *getChainOp() const { return getOperand(0); }
2757 /// The VPValue of the vector value to be reduced.
2758 VPValue *getVecOp() const { return getOperand(1); }
2759 /// The VPValue of the condition for the block.
2761 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2762 }
2763};
2764
2765/// A recipe for forming partial reductions. In the loop, an accumulator and
2766/// vector operand are added together and passed to the next iteration as the
2767/// next accumulator. After the loop body, the accumulator is reduced to a
2768/// scalar value.
2770 unsigned Opcode;
2771
2772 /// The divisor by which the VF of this recipe's output should be divided
2773 /// during execution.
2774 unsigned VFScaleFactor;
2775
2776public:
2778 VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
2779 : VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1, Cond,
2780 VFScaleFactor, ReductionInst) {}
2781 VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1,
2782 VPValue *Cond, unsigned ScaleFactor,
2783 Instruction *ReductionInst = nullptr)
2784 : VPReductionRecipe(VPDef::VPPartialReductionSC, RecurKind::Add,
2785 FastMathFlags(), ReductionInst,
2786 ArrayRef<VPValue *>({Op0, Op1}), Cond, false, {}),
2787 Opcode(Opcode), VFScaleFactor(ScaleFactor) {
2788 [[maybe_unused]] auto *AccumulatorRecipe =
2790 // When cloning as part of a VPExpressionRecipe the chain op could have
2791 // replaced by a temporary VPValue, so it doesn't have a defining recipe.
2792 assert((!AccumulatorRecipe ||
2793 isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
2794 isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
2795 "Unexpected operand order for partial reduction recipe");
2796 }
2797 ~VPPartialReductionRecipe() override = default;
2798
2800 return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1),
2801 getCondOp(), VFScaleFactor,
2803 }
2804
2805 VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC)
2806
2807 /// Generate the reduction in the loop.
2808 void execute(VPTransformState &State) override;
2809
2810 /// Return the cost of this VPPartialReductionRecipe.
2812 VPCostContext &Ctx) const override;
2813
2814 /// Get the binary op's opcode.
2815 unsigned getOpcode() const { return Opcode; }
2816
2817 /// Get the factor that the VF of this recipe's output should be scaled by.
2818 unsigned getVFScaleFactor() const { return VFScaleFactor; }
2819
2820#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2821 /// Print the recipe.
2822 void print(raw_ostream &O, const Twine &Indent,
2823 VPSlotTracker &SlotTracker) const override;
2824#endif
2825};
2826
2827/// A recipe to represent inloop reduction operations with vector-predication
2828/// intrinsics, performing a reduction on a vector operand with the explicit
2829/// vector length (EVL) into a scalar value, and adding the result to a chain.
2830/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2832public:
2836 VPDef::VPReductionEVLSC, R.getRecurrenceKind(),
2837 R.getFastMathFlags(),
2839 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2840 R.isOrdered(), DL) {}
2841
2842 ~VPReductionEVLRecipe() override = default;
2843
2845 llvm_unreachable("cloning not implemented yet");
2846 }
2847
2848 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2849
2850 /// Generate the reduction in the loop
2851 void execute(VPTransformState &State) override;
2852
2853#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2854 /// Print the recipe.
2855 void print(raw_ostream &O, const Twine &Indent,
2856 VPSlotTracker &SlotTracker) const override;
2857#endif
2858
2859 /// The VPValue of the explicit vector length.
2860 VPValue *getEVL() const { return getOperand(2); }
2861
2862 /// Returns true if the recipe only uses the first lane of operand \p Op.
2863 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2865 "Op must be an operand of the recipe");
2866 return Op == getEVL();
2867 }
2868};
2869
2870/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2871/// copies of the original scalar type, one per lane, instead of producing a
2872/// single copy of widened type for all lanes. If the instruction is known to be
2873/// a single scalar, only one copy, per lane zero, will be generated.
2875 public VPIRMetadata {
2876 /// Indicator if only a single replica per lane is needed.
2877 bool IsSingleScalar;
2878
2879 /// Indicator if the replicas are also predicated.
2880 bool IsPredicated;
2881
2882public:
2884 bool IsSingleScalar, VPValue *Mask = nullptr,
2885 VPIRMetadata Metadata = {})
2886 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2887 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
2888 IsPredicated(Mask) {
2889 if (Mask)
2890 addOperand(Mask);
2891 }
2892
2893 ~VPReplicateRecipe() override = default;
2894
2896 auto *Copy =
2897 new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsSingleScalar,
2898 isPredicated() ? getMask() : nullptr, *this);
2899 Copy->transferFlags(*this);
2900 return Copy;
2901 }
2902
2903 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2904
2905 /// Generate replicas of the desired Ingredient. Replicas will be generated
2906 /// for all parts and lanes unless a specific part and lane are specified in
2907 /// the \p State.
2908 void execute(VPTransformState &State) override;
2909
2910 /// Return the cost of this VPReplicateRecipe.
2911 InstructionCost computeCost(ElementCount VF,
2912 VPCostContext &Ctx) const override;
2913
2914#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2915 /// Print the recipe.
2916 void print(raw_ostream &O, const Twine &Indent,
2917 VPSlotTracker &SlotTracker) const override;
2918#endif
2919
2920 bool isSingleScalar() const { return IsSingleScalar; }
2921
2922 bool isPredicated() const { return IsPredicated; }
2923
2924 /// Returns true if the recipe only uses the first lane of operand \p Op.
2925 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2927 "Op must be an operand of the recipe");
2928 return isSingleScalar();
2929 }
2930
2931 /// Returns true if the recipe uses scalars of operand \p Op.
2932 bool usesScalars(const VPValue *Op) const override {
2934 "Op must be an operand of the recipe");
2935 return true;
2936 }
2937
2938 /// Returns true if the recipe is used by a widened recipe via an intervening
2939 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2940 /// in a vector.
2941 bool shouldPack() const;
2942
2943 /// Return the mask of a predicated VPReplicateRecipe.
2945 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2946 return getOperand(getNumOperands() - 1);
2947 }
2948
2949 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2950};
2951
2952/// A recipe for generating conditional branches on the bits of a mask.
2954public:
2956 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {BlockInMask}, DL) {}
2957
2960 }
2961
2962 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
2963
2964 /// Generate the extraction of the appropriate bit from the block mask and the
2965 /// conditional branch.
2966 void execute(VPTransformState &State) override;
2967
2968 /// Return the cost of this VPBranchOnMaskRecipe.
2969 InstructionCost computeCost(ElementCount VF,
2970 VPCostContext &Ctx) const override;
2971
2972#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2973 /// Print the recipe.
2974 void print(raw_ostream &O, const Twine &Indent,
2975 VPSlotTracker &SlotTracker) const override {
2976 O << Indent << "BRANCH-ON-MASK ";
2978 }
2979#endif
2980
2981 /// Returns true if the recipe uses scalars of operand \p Op.
2982 bool usesScalars(const VPValue *Op) const override {
2984 "Op must be an operand of the recipe");
2985 return true;
2986 }
2987};
2988
2989/// A recipe to combine multiple recipes into a single 'expression' recipe,
2990/// which should be considered a single entity for cost-modeling and transforms.
2991/// The recipe needs to be 'decomposed', i.e. replaced by its individual
2992/// expression recipes, before execute. The individual expression recipes are
2993/// completely disconnected from the def-use graph of other recipes not part of
2994/// the expression. Def-use edges between pairs of expression recipes remain
2995/// intact, whereas every edge between an expression recipe and a recipe outside
2996/// the expression is elevated to connect the non-expression recipe with the
2997/// VPExpressionRecipe itself.
2998class VPExpressionRecipe : public VPSingleDefRecipe {
2999 /// Recipes included in this VPExpressionRecipe. This could contain
3000 /// duplicates.
3001 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3002
3003 /// Temporary VPValues used for external operands of the expression, i.e.
3004 /// operands not defined by recipes in the expression.
3005 SmallVector<VPValue *> LiveInPlaceholders;
3006
3007 enum class ExpressionTypes {
3008 /// Represents an inloop extended reduction operation, performing a
3009 /// reduction on an extended vector operand into a scalar value, and adding
3010 /// the result to a chain.
3011 ExtendedReduction,
3012 /// Represent an inloop multiply-accumulate reduction, multiplying the
3013 /// extended vector operands, performing a reduction.add on the result, and
3014 /// adding the scalar result to a chain.
3015 ExtMulAccReduction,
3016 /// Represent an inloop multiply-accumulate reduction, multiplying the
3017 /// vector operands, performing a reduction.add on the result, and adding
3018 /// the scalar result to a chain.
3019 MulAccReduction,
3020 /// Represent an inloop multiply-accumulate reduction, multiplying the
3021 /// extended vector operands, negating the multiplication, performing a
3022 /// reduction.add on the result, and adding the scalar result to a chain.
3023 ExtNegatedMulAccReduction,
3024 };
3025
3026 /// Type of the expression.
3027 ExpressionTypes ExpressionType;
3028
3029 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3030 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3031 /// in the expression) are replaced by temporary VPValues and the original
3032 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3033 /// as needed (excluding last) to ensure they are only used by other recipes
3034 /// in the expression.
3035 VPExpressionRecipe(ExpressionTypes ExpressionType,
3036 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3037
3038public:
3040 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3042 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3045 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3046 {Ext0, Ext1, Mul, Red}) {}
3049 VPReductionRecipe *Red)
3050 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3051 {Ext0, Ext1, Mul, Sub, Red}) {
3052 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3053 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3054 "Expected an add reduction");
3055 assert(getNumOperands() >= 3 && "Expected at least three operands");
3056 [[maybe_unused]] auto *SubConst = dyn_cast<ConstantInt>(getOperand(2)->getLiveInIRValue());
3057 assert(SubConst && SubConst->getValue() == 0 &&
3058 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3059 }
3060
3062 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3063 for (auto *R : reverse(ExpressionRecipes)) {
3064 if (ExpressionRecipesSeen.insert(R).second)
3065 delete R;
3066 }
3067 for (VPValue *T : LiveInPlaceholders)
3068 delete T;
3069 }
3070
3071 VP_CLASSOF_IMPL(VPDef::VPExpressionSC)
3072
3073 VPExpressionRecipe *clone() override {
3074 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3075 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3076 for (auto *R : ExpressionRecipes)
3077 NewExpressiondRecipes.push_back(R->clone());
3078 for (auto *New : NewExpressiondRecipes) {
3079 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3080 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3081 // Update placeholder operands in the cloned recipe to use the external
3082 // operands, to be internalized when the cloned expression is constructed.
3083 for (const auto &[Placeholder, OutsideOp] :
3084 zip(LiveInPlaceholders, operands()))
3085 New->replaceUsesOfWith(Placeholder, OutsideOp);
3086 }
3087 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3088 }
3089
3090 /// Return the VPValue to use to infer the result type of the recipe.
3092 unsigned OpIdx =
3093 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3094 : 1;
3095 return getOperand(getNumOperands() - OpIdx);
3096 }
3097
3098 /// Insert the recipes of the expression back into the VPlan, directly before
3099 /// the current recipe. Leaves the expression recipe empty, which must be
3100 /// removed before codegen.
3101 void decompose();
3102
3103 unsigned getVFScaleFactor() const {
3104 auto *PR = dyn_cast<VPPartialReductionRecipe>(ExpressionRecipes.back());
3105 return PR ? PR->getVFScaleFactor() : 1;
3106 }
3107
3108 /// Method for generating code, must not be called as this recipe is abstract.
3109 void execute(VPTransformState &State) override {
3110 llvm_unreachable("recipe must be removed before execute");
3111 }
3112
3114 VPCostContext &Ctx) const override;
3115
3116#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3117 /// Print the recipe.
3118 void print(raw_ostream &O, const Twine &Indent,
3119 VPSlotTracker &SlotTracker) const override;
3120#endif
3121
3122 /// Returns true if this expression contains recipes that may read from or
3123 /// write to memory.
3124 bool mayReadOrWriteMemory() const;
3125
3126 /// Returns true if this expression contains recipes that may have side
3127 /// effects.
3128 bool mayHaveSideEffects() const;
3129
3130 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3131 bool isSingleScalar() const;
3132};
3133
3134/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3135/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3136/// order to merge values that are set under such a branch and feed their uses.
3137/// The phi nodes can be scalar or vector depending on the users of the value.
3138/// This recipe works in concert with VPBranchOnMaskRecipe.
3140public:
3141 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3142 /// nodes after merging back from a Branch-on-Mask.
3144 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {}
3145 ~VPPredInstPHIRecipe() override = default;
3146
3148 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3149 }
3150
3151 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
3152
3153 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3154 /// retain SSA form.
3155 void execute(VPTransformState &State) override;
3156
3157 /// Return the cost of this VPPredInstPHIRecipe.
3159 VPCostContext &Ctx) const override {
3160 // TODO: Compute accurate cost after retiring the legacy cost model.
3161 return 0;
3162 }
3163
3164#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3165 /// Print the recipe.
3166 void print(raw_ostream &O, const Twine &Indent,
3167 VPSlotTracker &SlotTracker) const override;
3168#endif
3169
3170 /// Returns true if the recipe uses scalars of operand \p Op.
3171 bool usesScalars(const VPValue *Op) const override {
3173 "Op must be an operand of the recipe");
3174 return true;
3175 }
3176};
3177
3178/// A common base class for widening memory operations. An optional mask can be
3179/// provided as the last operand.
3181 public VPIRMetadata {
3182protected:
3184
3185 /// Alignment information for this memory access.
3187
3188 /// Whether the accessed addresses are consecutive.
3190
3191 /// Whether the consecutive accessed addresses are in reverse order.
3193
3194 /// Whether the memory access is masked.
3195 bool IsMasked = false;
3196
3197 void setMask(VPValue *Mask) {
3198 assert(!IsMasked && "cannot re-set mask");
3199 if (!Mask)
3200 return;
3201 addOperand(Mask);
3202 IsMasked = true;
3203 }
3204
3205 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3206 std::initializer_list<VPValue *> Operands,
3207 bool Consecutive, bool Reverse, Align Alignment,
3208 const VPIRMetadata &Metadata, DebugLoc DL)
3209 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3211 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
3213 !Reverse &&
3214 "Reversed acccess without VPVectorEndPointerRecipe address?");
3215 }
3216
3217public:
3219 llvm_unreachable("cloning not supported");
3220 }
3221
3222 static inline bool classof(const VPRecipeBase *R) {
3223 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
3224 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
3225 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
3226 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
3227 }
3228
3229 static inline bool classof(const VPUser *U) {
3230 auto *R = dyn_cast<VPRecipeBase>(U);
3231 return R && classof(R);
3232 }
3233
3234 /// Return whether the loaded-from / stored-to addresses are consecutive.
3235 bool isConsecutive() const { return Consecutive; }
3236
3237 /// Return whether the consecutive loaded/stored addresses are in reverse
3238 /// order.
3239 bool isReverse() const { return Reverse; }
3240
3241 /// Return the address accessed by this recipe.
3242 VPValue *getAddr() const { return getOperand(0); }
3243
3244 /// Returns true if the recipe is masked.
3245 bool isMasked() const { return IsMasked; }
3246
3247 /// Return the mask used by this recipe. Note that a full mask is represented
3248 /// by a nullptr.
3249 VPValue *getMask() const {
3250 // Mask is optional and therefore the last operand.
3251 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3252 }
3253
3254 /// Returns the alignment of the memory access.
3255 Align getAlign() const { return Alignment; }
3256
3257 /// Generate the wide load/store.
3258 void execute(VPTransformState &State) override {
3259 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3260 }
3261
3262 /// Return the cost of this VPWidenMemoryRecipe.
3263 InstructionCost computeCost(ElementCount VF,
3264 VPCostContext &Ctx) const override;
3265
3267};
3268
3269/// A recipe for widening load operations, using the address to load from and an
3270/// optional mask.
3272 public VPValue {
3274 bool Consecutive, bool Reverse, Align Alignment,
3275 const VPIRMetadata &Metadata, DebugLoc DL)
3276 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
3277 Reverse, Alignment, Metadata, DL),
3278 VPValue(this, &Load) {
3279 setMask(Mask);
3280 }
3281
3285 *this, getDebugLoc());
3286 }
3287
3288 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
3289
3290 /// Generate a wide load or gather.
3291 void execute(VPTransformState &State) override;
3292
3293#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3294 /// Print the recipe.
3295 void print(raw_ostream &O, const Twine &Indent,
3296 VPSlotTracker &SlotTracker) const override;
3297#endif
3298
3299 /// Returns true if the recipe only uses the first lane of operand \p Op.
3300 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3302 "Op must be an operand of the recipe");
3303 // Widened, consecutive loads operations only demand the first lane of
3304 // their address.
3305 return Op == getAddr() && isConsecutive();
3306 }
3307};
3308
3309/// A recipe for widening load operations with vector-predication intrinsics,
3310/// using the address to load from, the explicit vector length and an optional
3311/// mask.
3312struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
3314 VPValue *Mask)
3315 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3316 {Addr, &EVL}, L.isConsecutive(), L.isReverse(),
3317 L.getAlign(), L, L.getDebugLoc()),
3318 VPValue(this, &getIngredient()) {
3319 setMask(Mask);
3320 }
3321
3322 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
3323
3324 /// Return the EVL operand.
3325 VPValue *getEVL() const { return getOperand(1); }
3326
3327 /// Generate the wide load or gather.
3328 void execute(VPTransformState &State) override;
3329
3330 /// Return the cost of this VPWidenLoadEVLRecipe.
3332 VPCostContext &Ctx) const override;
3333
3334#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3335 /// Print the recipe.
3336 void print(raw_ostream &O, const Twine &Indent,
3337 VPSlotTracker &SlotTracker) const override;
3338#endif
3339
3340 /// Returns true if the recipe only uses the first lane of operand \p Op.
3341 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3343 "Op must be an operand of the recipe");
3344 // Widened loads only demand the first lane of EVL and consecutive loads
3345 // only demand the first lane of their address.
3346 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3347 }
3348};
3349
3350/// A recipe for widening store operations, using the stored value, the address
3351/// to store to and an optional mask.
3353 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3354 VPValue *Mask, bool Consecutive, bool Reverse,
3356 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3357 Consecutive, Reverse, Alignment, Metadata, DL) {
3358 setMask(Mask);
3359 }
3360
3366
3367 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
3368
3369 /// Return the value stored by this recipe.
3370 VPValue *getStoredValue() const { return getOperand(1); }
3371
3372 /// Generate a wide store or scatter.
3373 void execute(VPTransformState &State) override;
3374
3375#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3376 /// Print the recipe.
3377 void print(raw_ostream &O, const Twine &Indent,
3378 VPSlotTracker &SlotTracker) const override;
3379#endif
3380
3381 /// Returns true if the recipe only uses the first lane of operand \p Op.
3382 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3384 "Op must be an operand of the recipe");
3385 // Widened, consecutive stores only demand the first lane of their address,
3386 // unless the same operand is also stored.
3387 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3388 }
3389};
3390
3391/// A recipe for widening store operations with vector-predication intrinsics,
3392/// using the value to store, the address to store to, the explicit vector
3393/// length and an optional mask.
3396 VPValue *Mask)
3397 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3398 {Addr, S.getStoredValue(), &EVL}, S.isConsecutive(),
3399 S.isReverse(), S.getAlign(), S, S.getDebugLoc()) {
3400 setMask(Mask);
3401 }
3402
3403 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
3404
3405 /// Return the address accessed by this recipe.
3406 VPValue *getStoredValue() const { return getOperand(1); }
3407
3408 /// Return the EVL operand.
3409 VPValue *getEVL() const { return getOperand(2); }
3410
3411 /// Generate the wide store or scatter.
3412 void execute(VPTransformState &State) override;
3413
3414 /// Return the cost of this VPWidenStoreEVLRecipe.
3416 VPCostContext &Ctx) const override;
3417
3418#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3419 /// Print the recipe.
3420 void print(raw_ostream &O, const Twine &Indent,
3421 VPSlotTracker &SlotTracker) const override;
3422#endif
3423
3424 /// Returns true if the recipe only uses the first lane of operand \p Op.
3425 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3427 "Op must be an operand of the recipe");
3428 if (Op == getEVL()) {
3429 assert(getStoredValue() != Op && "unexpected store of EVL");
3430 return true;
3431 }
3432 // Widened, consecutive memory operations only demand the first lane of
3433 // their address, unless the same operand is also stored. That latter can
3434 // happen with opaque pointers.
3435 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3436 }
3437};
3438
3439/// Recipe to expand a SCEV expression.
3441 const SCEV *Expr;
3442
3443public:
3445 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr) {}
3446
3447 ~VPExpandSCEVRecipe() override = default;
3448
3449 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3450
3451 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
3452
3453 void execute(VPTransformState &State) override {
3454 llvm_unreachable("SCEV expressions must be expanded before final execute");
3455 }
3456
3457 /// Return the cost of this VPExpandSCEVRecipe.
3459 VPCostContext &Ctx) const override {
3460 // TODO: Compute accurate cost after retiring the legacy cost model.
3461 return 0;
3462 }
3463
3464#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3465 /// Print the recipe.
3466 void print(raw_ostream &O, const Twine &Indent,
3467 VPSlotTracker &SlotTracker) const override;
3468#endif
3469
3470 const SCEV *getSCEV() const { return Expr; }
3471};
3472
3473/// Canonical scalar induction phi of the vector loop. Starting at the specified
3474/// start value (either 0 or the resume value when vectorizing the epilogue
3475/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3476/// canonical induction variable.
3478public:
3480 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
3481
3482 ~VPCanonicalIVPHIRecipe() override = default;
3483
3485 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
3486 R->addOperand(getBackedgeValue());
3487 return R;
3488 }
3489
3490 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
3491
3492 void execute(VPTransformState &State) override {
3493 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3494 "scalar phi recipe");
3495 }
3496
3497#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3498 /// Print the recipe.
3499 void print(raw_ostream &O, const Twine &Indent,
3500 VPSlotTracker &SlotTracker) const override;
3501#endif
3502
3503 /// Returns the scalar type of the induction.
3505 return getStartValue()->getLiveInIRValue()->getType();
3506 }
3507
3508 /// Returns true if the recipe only uses the first lane of operand \p Op.
3509 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3511 "Op must be an operand of the recipe");
3512 return true;
3513 }
3514
3515 /// Returns true if the recipe only uses the first part of operand \p Op.
3516 bool onlyFirstPartUsed(const VPValue *Op) const override {
3518 "Op must be an operand of the recipe");
3519 return true;
3520 }
3521
3522 /// Return the cost of this VPCanonicalIVPHIRecipe.
3524 VPCostContext &Ctx) const override {
3525 // For now, match the behavior of the legacy cost model.
3526 return 0;
3527 }
3528};
3529
3530/// A recipe for generating the active lane mask for the vector loop that is
3531/// used to predicate the vector operations.
3532/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3533/// remove VPActiveLaneMaskPHIRecipe.
3535public:
3537 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
3538 DL) {}
3539
3540 ~VPActiveLaneMaskPHIRecipe() override = default;
3541
3544 if (getNumOperands() == 2)
3545 R->addOperand(getOperand(1));
3546 return R;
3547 }
3548
3549 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
3550
3551 /// Generate the active lane mask phi of the vector loop.
3552 void execute(VPTransformState &State) override;
3553
3554#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3555 /// Print the recipe.
3556 void print(raw_ostream &O, const Twine &Indent,
3557 VPSlotTracker &SlotTracker) const override;
3558#endif
3559};
3560
3561/// A recipe for generating the phi node for the current index of elements,
3562/// adjusted in accordance with EVL value. It starts at the start value of the
3563/// canonical induction and gets incremented by EVL in each iteration of the
3564/// vector loop.
3566public:
3568 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
3569
3570 ~VPEVLBasedIVPHIRecipe() override = default;
3571
3573 llvm_unreachable("cloning not implemented yet");
3574 }
3575
3576 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
3577
3578 void execute(VPTransformState &State) override {
3579 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3580 "scalar phi recipe");
3581 }
3582
3583 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3585 VPCostContext &Ctx) const override {
3586 // For now, match the behavior of the legacy cost model.
3587 return 0;
3588 }
3589
3590 /// Returns true if the recipe only uses the first lane of operand \p Op.
3591 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3593 "Op must be an operand of the recipe");
3594 return true;
3595 }
3596
3597#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3598 /// Print the recipe.
3599 void print(raw_ostream &O, const Twine &Indent,
3600 VPSlotTracker &SlotTracker) const override;
3601#endif
3602};
3603
3604/// A Recipe for widening the canonical induction variable of the vector loop.
3606 public VPUnrollPartAccessor<1> {
3607public:
3609 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3610
3611 ~VPWidenCanonicalIVRecipe() override = default;
3612
3617
3618 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
3619
3620 /// Generate a canonical vector induction variable of the vector loop, with
3621 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3622 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3623 void execute(VPTransformState &State) override;
3624
3625 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3627 VPCostContext &Ctx) const override {
3628 // TODO: Compute accurate cost after retiring the legacy cost model.
3629 return 0;
3630 }
3631
3632#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3633 /// Print the recipe.
3634 void print(raw_ostream &O, const Twine &Indent,
3635 VPSlotTracker &SlotTracker) const override;
3636#endif
3637};
3638
3639/// A recipe for converting the input value \p IV value to the corresponding
3640/// value of an IV with different start and step values, using Start + IV *
3641/// Step.
3643 /// Kind of the induction.
3645 /// If not nullptr, the floating point induction binary operator. Must be set
3646 /// for floating point inductions.
3647 const FPMathOperator *FPBinOp;
3648
3649 /// Name to use for the generated IR instruction for the derived IV.
3650 std::string Name;
3651
3652public:
3654 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3655 const Twine &Name = "")
3657 IndDesc.getKind(),
3658 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3659 Start, CanonicalIV, Step, Name) {}
3660
3662 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
3663 VPValue *Step, const Twine &Name = "")
3664 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
3665 FPBinOp(FPBinOp), Name(Name.str()) {}
3666
3667 ~VPDerivedIVRecipe() override = default;
3668
3670 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3671 getStepValue());
3672 }
3673
3674 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
3675
3676 /// Generate the transformed value of the induction at offset StartValue (1.
3677 /// operand) + IV (2. operand) * StepValue (3, operand).
3678 void execute(VPTransformState &State) override;
3679
3680 /// Return the cost of this VPDerivedIVRecipe.
3682 VPCostContext &Ctx) const override {
3683 // TODO: Compute accurate cost after retiring the legacy cost model.
3684 return 0;
3685 }
3686
3687#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3688 /// Print the recipe.
3689 void print(raw_ostream &O, const Twine &Indent,
3690 VPSlotTracker &SlotTracker) const override;
3691#endif
3692
3694 return getStartValue()->getLiveInIRValue()->getType();
3695 }
3696
3697 VPValue *getStartValue() const { return getOperand(0); }
3698 VPValue *getStepValue() const { return getOperand(2); }
3699
3700 /// Returns true if the recipe only uses the first lane of operand \p Op.
3701 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3703 "Op must be an operand of the recipe");
3704 return true;
3705 }
3706};
3707
3708/// A recipe for handling phi nodes of integer and floating-point inductions,
3709/// producing their scalar values.
3711 public VPUnrollPartAccessor<3> {
3712 Instruction::BinaryOps InductionOpcode;
3713
3714public:
3717 DebugLoc DL)
3718 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3719 ArrayRef<VPValue *>({IV, Step, VF}), FMFs, DL),
3720 InductionOpcode(Opcode) {}
3721
3723 VPValue *Step, VPValue *VF,
3726 IV, Step, VF, IndDesc.getInductionOpcode(),
3727 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3728 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3729 : FastMathFlags(),
3730 DL) {}
3731
3732 ~VPScalarIVStepsRecipe() override = default;
3733
3735 return new VPScalarIVStepsRecipe(
3736 getOperand(0), getOperand(1), getOperand(2), InductionOpcode,
3738 getDebugLoc());
3739 }
3740
3741 /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
3742 /// this is only accurate after the VPlan has been unrolled.
3743 bool isPart0() const { return getUnrollPart(*this) == 0; }
3744
3745 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
3746
3747 /// Generate the scalarized versions of the phi node as needed by their users.
3748 void execute(VPTransformState &State) override;
3749
3750 /// Return the cost of this VPScalarIVStepsRecipe.
3752 VPCostContext &Ctx) const override {
3753 // TODO: Compute accurate cost after retiring the legacy cost model.
3754 return 0;
3755 }
3756
3757#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3758 /// Print the recipe.
3759 void print(raw_ostream &O, const Twine &Indent,
3760 VPSlotTracker &SlotTracker) const override;
3761#endif
3762
3763 VPValue *getStepValue() const { return getOperand(1); }
3764
3765 /// Returns true if the recipe only uses the first lane of operand \p Op.
3766 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3768 "Op must be an operand of the recipe");
3769 return true;
3770 }
3771};
3772
3773/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe
3774/// types implementing VPPhiAccessors. Used by isa<> & co.
3776 static inline bool isPossible(const VPRecipeBase *f) {
3777 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
3779 }
3780};
3781/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
3782/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
3783template <typename SrcTy>
3784struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
3785
3787
3788 /// doCast is used by cast<>.
3789 static inline VPPhiAccessors *doCast(SrcTy R) {
3790 return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
3791 switch (R->getVPDefID()) {
3792 case VPDef::VPInstructionSC:
3793 return cast<VPPhi>(R);
3794 case VPDef::VPIRInstructionSC:
3795 return cast<VPIRPhi>(R);
3796 case VPDef::VPWidenPHISC:
3797 return cast<VPWidenPHIRecipe>(R);
3798 default:
3799 return cast<VPHeaderPHIRecipe>(R);
3800 }
3801 }());
3802 }
3803
3804 /// doCastIfPossible is used by dyn_cast<>.
3805 static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
3806 if (!Self::isPossible(f))
3807 return nullptr;
3808 return doCast(f);
3809 }
3810};
3811template <>
3814template <>
3817
3818/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
3819/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3820/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3821class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
3822 friend class VPlan;
3823
3824 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
3825 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3826 : VPBlockBase(VPBasicBlockSC, Name.str()) {
3827 if (Recipe)
3828 appendRecipe(Recipe);
3829 }
3830
3831public:
3833
3834protected:
3835 /// The VPRecipes held in the order of output instructions to generate.
3837
3838 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
3839 : VPBlockBase(BlockSC, Name.str()) {}
3840
3841public:
3842 ~VPBasicBlock() override {
3843 while (!Recipes.empty())
3844 Recipes.pop_back();
3845 }
3846
3847 /// Instruction iterators...
3852
3853 //===--------------------------------------------------------------------===//
3854 /// Recipe iterator methods
3855 ///
3856 inline iterator begin() { return Recipes.begin(); }
3857 inline const_iterator begin() const { return Recipes.begin(); }
3858 inline iterator end() { return Recipes.end(); }
3859 inline const_iterator end() const { return Recipes.end(); }
3860
3861 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
3862 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
3863 inline reverse_iterator rend() { return Recipes.rend(); }
3864 inline const_reverse_iterator rend() const { return Recipes.rend(); }
3865
3866 inline size_t size() const { return Recipes.size(); }
3867 inline bool empty() const { return Recipes.empty(); }
3868 inline const VPRecipeBase &front() const { return Recipes.front(); }
3869 inline VPRecipeBase &front() { return Recipes.front(); }
3870 inline const VPRecipeBase &back() const { return Recipes.back(); }
3871 inline VPRecipeBase &back() { return Recipes.back(); }
3872
3873 /// Returns a reference to the list of recipes.
3875
3876 /// Returns a pointer to a member of the recipe list.
3877 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
3878 return &VPBasicBlock::Recipes;
3879 }
3880
3881 /// Method to support type inquiry through isa, cast, and dyn_cast.
3882 static inline bool classof(const VPBlockBase *V) {
3883 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
3884 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3885 }
3886
3887 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
3888 assert(Recipe && "No recipe to append.");
3889 assert(!Recipe->Parent && "Recipe already in VPlan");
3890 Recipe->Parent = this;
3891 Recipes.insert(InsertPt, Recipe);
3892 }
3893
3894 /// Augment the existing recipes of a VPBasicBlock with an additional
3895 /// \p Recipe as the last recipe.
3896 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
3897
3898 /// The method which generates the output IR instructions that correspond to
3899 /// this VPBasicBlock, thereby "executing" the VPlan.
3900 void execute(VPTransformState *State) override;
3901
3902 /// Return the cost of this VPBasicBlock.
3903 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
3904
3905 /// Return the position of the first non-phi node recipe in the block.
3906 iterator getFirstNonPhi();
3907
3908 /// Returns an iterator range over the PHI-like recipes in the block.
3912
3913 /// Split current block at \p SplitAt by inserting a new block between the
3914 /// current block and its successors and moving all recipes starting at
3915 /// SplitAt to the new block. Returns the new block.
3916 VPBasicBlock *splitAt(iterator SplitAt);
3917
3918 VPRegionBlock *getEnclosingLoopRegion();
3919 const VPRegionBlock *getEnclosingLoopRegion() const;
3920
3921#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3922 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
3923 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
3924 ///
3925 /// Note that the numbering is applied to the whole VPlan, so printing
3926 /// individual blocks is consistent with the whole VPlan printing.
3927 void print(raw_ostream &O, const Twine &Indent,
3928 VPSlotTracker &SlotTracker) const override;
3929 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3930#endif
3931
3932 /// If the block has multiple successors, return the branch recipe terminating
3933 /// the block. If there are no or only a single successor, return nullptr;
3934 VPRecipeBase *getTerminator();
3935 const VPRecipeBase *getTerminator() const;
3936
3937 /// Returns true if the block is exiting it's parent region.
3938 bool isExiting() const;
3939
3940 /// Clone the current block and it's recipes, without updating the operands of
3941 /// the cloned recipes.
3942 VPBasicBlock *clone() override;
3943
3944 /// Returns the predecessor block at index \p Idx with the predecessors as per
3945 /// the corresponding plain CFG. If the block is an entry block to a region,
3946 /// the first predecessor is the single predecessor of a region, and the
3947 /// second predecessor is the exiting block of the region.
3948 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
3949
3950protected:
3951 /// Execute the recipes in the IR basic block \p BB.
3952 void executeRecipes(VPTransformState *State, BasicBlock *BB);
3953
3954 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
3955 /// generated for this VPBB.
3956 void connectToPredecessors(VPTransformState &State);
3957
3958private:
3959 /// Create an IR BasicBlock to hold the output instructions generated by this
3960 /// VPBasicBlock, and return it. Update the CFGState accordingly.
3961 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
3962};
3963
3964inline const VPBasicBlock *
3966 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
3967}
3968
3969/// A special type of VPBasicBlock that wraps an existing IR basic block.
3970/// Recipes of the block get added before the first non-phi instruction in the
3971/// wrapped block.
3972/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
3973/// preheader block.
3974class VPIRBasicBlock : public VPBasicBlock {
3975 friend class VPlan;
3976
3977 BasicBlock *IRBB;
3978
3979 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
3980 VPIRBasicBlock(BasicBlock *IRBB)
3981 : VPBasicBlock(VPIRBasicBlockSC,
3982 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
3983 IRBB(IRBB) {}
3984
3985public:
3986 ~VPIRBasicBlock() override = default;
3987
3988 static inline bool classof(const VPBlockBase *V) {
3989 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3990 }
3991
3992 /// The method which generates the output IR instructions that correspond to
3993 /// this VPBasicBlock, thereby "executing" the VPlan.
3994 void execute(VPTransformState *State) override;
3995
3996 VPIRBasicBlock *clone() override;
3997
3998 BasicBlock *getIRBasicBlock() const { return IRBB; }
3999};
4000
4001/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4002/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4003/// A VPRegionBlock may indicate that its contents are to be replicated several
4004/// times. This is designed to support predicated scalarization, in which a
4005/// scalar if-then code structure needs to be generated VF * UF times. Having
4006/// this replication indicator helps to keep a single model for multiple
4007/// candidate VF's. The actual replication takes place only once the desired VF
4008/// and UF have been determined.
4009class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4010 friend class VPlan;
4011
4012 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4013 VPBlockBase *Entry;
4014
4015 /// Hold the Single Exiting block of the SESE region modelled by the
4016 /// VPRegionBlock.
4017 VPBlockBase *Exiting;
4018
4019 /// An indicator whether this region is to generate multiple replicated
4020 /// instances of output IR corresponding to its VPBlockBases.
4021 bool IsReplicator;
4022
4023 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
4024 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4025 const std::string &Name = "", bool IsReplicator = false)
4026 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
4027 IsReplicator(IsReplicator) {
4028 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
4029 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
4030 Entry->setParent(this);
4031 Exiting->setParent(this);
4032 }
4033 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
4034 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
4035 IsReplicator(IsReplicator) {}
4036
4037public:
4038 ~VPRegionBlock() override = default;
4039
4040 /// Method to support type inquiry through isa, cast, and dyn_cast.
4041 static inline bool classof(const VPBlockBase *V) {
4042 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4043 }
4044
4045 const VPBlockBase *getEntry() const { return Entry; }
4046 VPBlockBase *getEntry() { return Entry; }
4047
4048 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4049 /// EntryBlock must have no predecessors.
4050 void setEntry(VPBlockBase *EntryBlock) {
4051 assert(EntryBlock->getPredecessors().empty() &&
4052 "Entry block cannot have predecessors.");
4053 Entry = EntryBlock;
4054 EntryBlock->setParent(this);
4055 }
4056
4057 const VPBlockBase *getExiting() const { return Exiting; }
4058 VPBlockBase *getExiting() { return Exiting; }
4059
4060 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4061 /// ExitingBlock must have no successors.
4062 void setExiting(VPBlockBase *ExitingBlock) {
4063 assert(ExitingBlock->getSuccessors().empty() &&
4064 "Exit block cannot have successors.");
4065 Exiting = ExitingBlock;
4066 ExitingBlock->setParent(this);
4067 }
4068
4069 /// Returns the pre-header VPBasicBlock of the loop region.
4071 assert(!isReplicator() && "should only get pre-header of loop regions");
4072 return getSinglePredecessor()->getExitingBasicBlock();
4073 }
4074
4075 /// An indicator whether this region is to generate multiple replicated
4076 /// instances of output IR corresponding to its VPBlockBases.
4077 bool isReplicator() const { return IsReplicator; }
4078
4079 /// The method which generates the output IR instructions that correspond to
4080 /// this VPRegionBlock, thereby "executing" the VPlan.
4081 void execute(VPTransformState *State) override;
4082
4083 // Return the cost of this region.
4084 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4085
4086#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4087 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4088 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4089 /// consequtive numbers.
4090 ///
4091 /// Note that the numbering is applied to the whole VPlan, so printing
4092 /// individual regions is consistent with the whole VPlan printing.
4093 void print(raw_ostream &O, const Twine &Indent,
4094 VPSlotTracker &SlotTracker) const override;
4095 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4096#endif
4097
4098 /// Clone all blocks in the single-entry single-exit region of the block and
4099 /// their recipes without updating the operands of the cloned recipes.
4100 VPRegionBlock *clone() override;
4101
4102 /// Remove the current region from its VPlan, connecting its predecessor to
4103 /// its entry, and its exiting block to its successor.
4104 void dissolveToCFGLoop();
4105
4106 /// Returns the canonical induction recipe of the region.
4108 VPBasicBlock *EntryVPBB = getEntryBasicBlock();
4109 if (EntryVPBB->empty()) {
4110 // VPlan native path. TODO: Unify both code paths.
4111 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4112 }
4113 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4114 }
4116 return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
4117 }
4118
4119 /// Return the type of the canonical IV for loop regions.
4120 Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); }
4121 const Type *getCanonicalIVType() const {
4122 return getCanonicalIV()->getScalarType();
4123 }
4124};
4125
4127 return getParent()->getParent();
4128}
4129
4131 return getParent()->getParent();
4132}
4133
4134/// VPlan models a candidate for vectorization, encoding various decisions take
4135/// to produce efficient output IR, including which branches, basic-blocks and
4136/// output IR instructions to generate, and their cost. VPlan holds a
4137/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4138/// VPBasicBlock.
4139class VPlan {
4140 friend class VPlanPrinter;
4141 friend class VPSlotTracker;
4142
4143 /// VPBasicBlock corresponding to the original preheader. Used to place
4144 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4145 /// rest of VPlan execution.
4146 /// When this VPlan is used for the epilogue vector loop, the entry will be
4147 /// replaced by a new entry block created during skeleton creation.
4148 VPBasicBlock *Entry;
4149
4150 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4151 VPIRBasicBlock *ScalarHeader;
4152
4153 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4154 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4155 /// e.g. if the scalar epilogue always executes.
4157
4158 /// Holds the VFs applicable to this VPlan.
4160
4161 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4162 /// any UF.
4164
4165 /// Holds the name of the VPlan, for printing.
4166 std::string Name;
4167
4168 /// Represents the trip count of the original loop, for folding
4169 /// the tail.
4170 VPValue *TripCount = nullptr;
4171
4172 /// Represents the backedge taken count of the original loop, for folding
4173 /// the tail. It equals TripCount - 1.
4174 VPValue *BackedgeTakenCount = nullptr;
4175
4176 /// Represents the vector trip count.
4177 VPValue VectorTripCount;
4178
4179 /// Represents the vectorization factor of the loop.
4180 VPValue VF;
4181
4182 /// Represents the loop-invariant VF * UF of the vector loop region.
4183 VPValue VFxUF;
4184
4185 /// Holds a mapping between Values and their corresponding VPValue inside
4186 /// VPlan.
4187 Value2VPValueTy Value2VPValue;
4188
4189 /// Contains all the external definitions created for this VPlan. External
4190 /// definitions are VPValues that hold a pointer to their underlying IR.
4192
4193 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4194 /// VPlan is destroyed.
4195 SmallVector<VPBlockBase *> CreatedBlocks;
4196
4197 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4198 /// wrapping the original header of the scalar loop.
4199 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
4200 : Entry(Entry), ScalarHeader(ScalarHeader) {
4201 Entry->setPlan(this);
4202 assert(ScalarHeader->getNumSuccessors() == 0 &&
4203 "scalar header must be a leaf node");
4204 }
4205
4206public:
4207 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4208 /// original preheader and scalar header of \p L, to be used as entry and
4209 /// scalar header blocks of the new VPlan.
4210 VPlan(Loop *L);
4211
4212 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4213 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
4214 VPlan(BasicBlock *ScalarHeaderBB, VPValue *TC) {
4215 setEntry(createVPBasicBlock("preheader"));
4216 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4217 TripCount = TC;
4218 }
4219
4221
4223 Entry = VPBB;
4224 VPBB->setPlan(this);
4225 }
4226
4227 /// Generate the IR code for this VPlan.
4228 void execute(VPTransformState *State);
4229
4230 /// Return the cost of this plan.
4232
4233 VPBasicBlock *getEntry() { return Entry; }
4234 const VPBasicBlock *getEntry() const { return Entry; }
4235
4236 /// Returns the preheader of the vector loop region, if one exists, or null
4237 /// otherwise.
4239 VPRegionBlock *VectorRegion = getVectorLoopRegion();
4240 return VectorRegion
4241 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4242 : nullptr;
4243 }
4244
4245 /// Returns the VPRegionBlock of the vector loop.
4248
4249 /// Returns the 'middle' block of the plan, that is the block that selects
4250 /// whether to execute the scalar tail loop or the exit block from the loop
4251 /// latch. If there is an early exit from the vector loop, the middle block
4252 /// conceptully has the early exit block as third successor, split accross 2
4253 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4254 /// tail loop or the exit bock. If the scalar tail loop or exit block are
4255 /// known to always execute, the middle block may branch directly to that
4256 /// block. This function cannot be called once the vector loop region has been
4257 /// removed.
4259 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4260 assert(
4261 LoopRegion &&
4262 "cannot call the function after vector loop region has been removed");
4263 auto *RegionSucc = cast<VPBasicBlock>(LoopRegion->getSingleSuccessor());
4264 if (RegionSucc->getSingleSuccessor() ||
4265 is_contained(RegionSucc->getSuccessors(), getScalarPreheader()))
4266 return RegionSucc;
4267 // There is an early exit. The successor of RegionSucc is the middle block.
4268 return cast<VPBasicBlock>(RegionSucc->getSuccessors()[1]);
4269 }
4270
4272 return const_cast<VPlan *>(this)->getMiddleBlock();
4273 }
4274
4275 /// Return the VPBasicBlock for the preheader of the scalar loop.
4277 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
4278 }
4279
4280 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4281 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4282
4283 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4284 /// the original scalar loop.
4285 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4286
4287 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4288 /// exit block.
4290
4291 /// Returns true if \p VPBB is an exit block.
4292 bool isExitBlock(VPBlockBase *VPBB);
4293
4294 /// The trip count of the original loop.
4296 assert(TripCount && "trip count needs to be set before accessing it");
4297 return TripCount;
4298 }
4299
4300 /// Set the trip count assuming it is currently null; if it is not - use
4301 /// resetTripCount().
4302 void setTripCount(VPValue *NewTripCount) {
4303 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4304 TripCount = NewTripCount;
4305 }
4306
4307 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4308 /// the original trip count have been replaced.
4309 void resetTripCount(VPValue *NewTripCount) {
4310 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4311 "TripCount must be set when resetting");
4312 TripCount = NewTripCount;
4313 }
4314
4315 /// The backedge taken count of the original loop.
4317 if (!BackedgeTakenCount)
4318 BackedgeTakenCount = new VPValue();
4319 return BackedgeTakenCount;
4320 }
4321 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4322
4323 /// The vector trip count.
4324 VPValue &getVectorTripCount() { return VectorTripCount; }
4325
4326 /// Returns the VF of the vector loop region.
4327 VPValue &getVF() { return VF; };
4328 const VPValue &getVF() const { return VF; };
4329
4330 /// Returns VF * UF of the vector loop region.
4331 VPValue &getVFxUF() { return VFxUF; }
4332
4335 }
4336
4337 void addVF(ElementCount VF) { VFs.insert(VF); }
4338
4340 assert(hasVF(VF) && "Cannot set VF not already in plan");
4341 VFs.clear();
4342 VFs.insert(VF);
4343 }
4344
4345 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4346 bool hasScalableVF() const {
4347 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4348 }
4349
4350 /// Returns an iterator range over all VFs of the plan.
4353 return VFs;
4354 }
4355
4356 bool hasScalarVFOnly() const {
4357 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4358 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4359 "Plan with scalar VF should only have a single VF");
4360 return HasScalarVFOnly;
4361 }
4362
4363 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4364
4365 unsigned getUF() const {
4366 assert(UFs.size() == 1 && "Expected a single UF");
4367 return UFs[0];
4368 }
4369
4370 void setUF(unsigned UF) {
4371 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4372 UFs.clear();
4373 UFs.insert(UF);
4374 }
4375
4376 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4377 /// concrete UF.
4378 bool isUnrolled() const { return UFs.size() == 1; }
4379
4380 /// Return a string with the name of the plan and the applicable VFs and UFs.
4381 std::string getName() const;
4382
4383 void setName(const Twine &newName) { Name = newName.str(); }
4384
4385 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
4386 /// yet) for \p V.
4388 assert(V && "Trying to get or add the VPValue of a null Value");
4389 auto [It, Inserted] = Value2VPValue.try_emplace(V);
4390 if (Inserted) {
4391 VPValue *VPV = new VPValue(V);
4392 VPLiveIns.push_back(VPV);
4393 assert(VPV->isLiveIn() && "VPV must be a live-in.");
4394 It->second = VPV;
4395 }
4396
4397 assert(It->second->isLiveIn() && "Only live-ins should be in mapping");
4398 return It->second;
4399 }
4400
4401 /// Return a VPValue wrapping i1 true.
4402 VPValue *getTrue() { return getConstantInt(1, 1); }
4403
4404 /// Return a VPValue wrapping i1 false.
4405 VPValue *getFalse() { return getConstantInt(1, 0); }
4406
4407 /// Return a VPValue wrapping a ConstantInt with the given type and value.
4408 VPValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4409 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
4410 }
4411
4412 /// Return a VPValue wrapping a ConstantInt with the given bitwidth and value.
4414 bool IsSigned = false) {
4415 return getConstantInt(APInt(BitWidth, Val, IsSigned));
4416 }
4417
4418 /// Return a VPValue wrapping a ConstantInt with the given APInt value.
4420 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
4421 }
4422
4423 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
4424 VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
4425
4426 /// Return the list of live-in VPValues available in the VPlan.
4428 assert(all_of(Value2VPValue,
4429 [this](const auto &P) {
4430 return is_contained(VPLiveIns, P.second);
4431 }) &&
4432 "all VPValues in Value2VPValue must also be in VPLiveIns");
4433 return VPLiveIns;
4434 }
4435
4436#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4437 /// Print the live-ins of this VPlan to \p O.
4438 void printLiveIns(raw_ostream &O) const;
4439
4440 /// Print this VPlan to \p O.
4441 void print(raw_ostream &O) const;
4442
4443 /// Print this VPlan in DOT format to \p O.
4444 void printDOT(raw_ostream &O) const;
4445
4446 /// Dump the plan to stderr (for debugging).
4447 LLVM_DUMP_METHOD void dump() const;
4448#endif
4449
4450 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4451 /// recipes to refer to the clones, and return it.
4452 VPlan *duplicate();
4453
4454 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4455 /// present. The returned block is owned by the VPlan and deleted once the
4456 /// VPlan is destroyed.
4458 VPRecipeBase *Recipe = nullptr) {
4459 auto *VPB = new VPBasicBlock(Name, Recipe);
4460 CreatedBlocks.push_back(VPB);
4461 return VPB;
4462 }
4463
4464 /// Create a new loop region with \p Name and entry and exiting blocks set
4465 /// to \p Entry and \p Exiting respectively, if set. The returned block is
4466 /// owned by the VPlan and deleted once the VPlan is destroyed.
4467 VPRegionBlock *createLoopRegion(const std::string &Name = "",
4468 VPBlockBase *Entry = nullptr,
4469 VPBlockBase *Exiting = nullptr) {
4470 auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name)
4471 : new VPRegionBlock(Name);
4472 CreatedBlocks.push_back(VPB);
4473 return VPB;
4474 }
4475
4476 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4477 /// returned block is owned by the VPlan and deleted once the VPlan is
4478 /// destroyed.
4480 const std::string &Name = "") {
4481 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
4482 CreatedBlocks.push_back(VPB);
4483 return VPB;
4484 }
4485
4486 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4487 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4488 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4490
4491 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4492 /// instructions in \p IRBB, except its terminator which is managed by the
4493 /// successors of the block in VPlan. The returned block is owned by the VPlan
4494 /// and deleted once the VPlan is destroyed.
4496
4497 /// Returns true if the VPlan is based on a loop with an early exit. That is
4498 /// the case if the VPlan has either more than one exit block or a single exit
4499 /// block with multiple predecessors (one for the exit via the latch and one
4500 /// via the other early exit).
4501 bool hasEarlyExit() const {
4502 return count_if(ExitBlocks,
4503 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4504 1 ||
4505 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4506 }
4507
4508 /// Returns true if the scalar tail may execute after the vector loop. Note
4509 /// that this relies on unneeded branches to the scalar tail loop being
4510 /// removed.
4511 bool hasScalarTail() const {
4512 return !(!getScalarPreheader()->hasPredecessors() ||
4514 }
4515};
4516
4517#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4518inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4519 Plan.print(OS);
4520 return OS;
4521}
4522#endif
4523
4524} // end namespace llvm
4525
4526#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
dxil translate DXIL Translate Metadata
This file defines the DenseMap class.
Hexagon Common GEP
static MaybeAlign getAlign(Value *Ptr)
iv users
Definition IVUsers.cpp:48
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define I(x, y, z)
Definition MD5.cpp:58
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
#define T
MachineInstr unsigned OpIdx
#define P(N)
static StringRef getName(Value *V)
const SmallVectorImpl< MachineOperand > & Cond
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition VPlan.h:495
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:361
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition InstrTypes.h:610
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getUnknown()
Definition DebugLoc.h:162
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:310
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1078
bool onlyWritesMemory() const
Whether this function only (at most) writes memory.
Definition ModRef.h:221
bool onlyReadsMemory() const
Whether this function only (at most) reads memory.
Definition ModRef.h:218
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
This class represents the LLVM 'select' instruction.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:338
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3542
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:3536
~VPActiveLaneMaskPHIRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3821
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:3849
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:3896
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:3851
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:3848
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:3874
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:3832
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:3838
iterator end()
Definition VPlan.h:3858
iterator begin()
Recipe iterator methods.
Definition VPlan.h:3856
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:3850
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:3909
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:785
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:220
~VPBasicBlock() override
Definition VPlan.h:3842
const_reverse_iterator rbegin() const
Definition VPlan.h:3862
reverse_iterator rend()
Definition VPlan.h:3863
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:3836
VPRecipeBase & back()
Definition VPlan.h:3871
const VPRecipeBase & front() const
Definition VPlan.h:3868
const_iterator begin() const
Definition VPlan.h:3857
VPRecipeBase & front()
Definition VPlan.h:3869
const VPRecipeBase & back() const
Definition VPlan.h:3870
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:3887
bool empty() const
Definition VPlan.h:3867
const_iterator end() const
Definition VPlan.h:3859
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:3882
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:3877
reverse_iterator rbegin()
Definition VPlan.h:3861
friend class VPlan
Definition VPlan.h:3822
size_t size() const
Definition VPlan.h:3866
const_reverse_iterator rend() const
Definition VPlan.h:3864
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2469
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2438
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2443
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2433
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2454
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2420
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2415
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:2449
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2429
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:80
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:299
VPRegionBlock * getParent()
Definition VPlan.h:172
VPBlocksTy & getPredecessors()
Definition VPlan.h:204
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:201
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:369
void setName(const Twine &newName)
Definition VPlan.h:165
size_t getNumSuccessors() const
Definition VPlan.h:218
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:200
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:222
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:321
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:660
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:159
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:257
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:334
size_t getNumPredecessors() const
Definition VPlan.h:219
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:290
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:212
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:327
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:203
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:157
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:184
const VPRegionBlock * getParent() const
Definition VPlan.h:173
const std::string & getName() const
Definition VPlan.h:163
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:309
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:247
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:281
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:214
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:241
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:306
friend class VPBlockUtils
Definition VPlan.h:81
unsigned getVPBlockID() const
Definition VPlan.h:170
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:348
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:313
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:149
VPBlocksTy & getSuccessors()
Definition VPlan.h:198
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:204
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:170
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:270
void setParent(VPRegionBlock *P)
Definition VPlan.h:183
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:263
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:208
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:197
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:2974
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2958
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:2982
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:2955
VPlan-based builder utility analogous to IRBuilder.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3477
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:3516
~VPCanonicalIVPHIRecipe() override=default
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3484
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition VPlan.h:3479
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3509
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:3504
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3492
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition VPlan.h:3523
This class augments a recipe with a set of VPValues defined by the recipe.
Definition VPlanValue.h:302
friend class VPValue
Definition VPlanValue.h:303
VPDef(const unsigned char SC)
Definition VPlanValue.h:382
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlan.h:3681
VPValue * getStepValue() const
Definition VPlan.h:3698
Type * getScalarType() const
Definition VPlan.h:3693
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3669
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3661
~VPDerivedIVRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3701
VPValue * getStartValue() const
Definition VPlan.h:3697
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3653
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3572
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3578
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPEVLBasedIVPHIRecipe.
Definition VPlan.h:3584
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:3567
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3591
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3453
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:3458
VPExpandSCEVRecipe(const SCEV *Expr)
Definition VPlan.h:3444
const SCEV * getSCEV() const
Definition VPlan.h:3470
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3449
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3109
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3091
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3073
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3061
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Sub, VPReductionRecipe *Red)
Definition VPlan.h:3047
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3039
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3043
unsigned getVFScaleFactor() const
Definition VPlan.h:3103
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3041
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1991
static bool classof(const VPValue *V)
Definition VPlan.h:2001
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2032
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2037
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2021
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2029
VPValue * getStartValue() const
Definition VPlan.h:2024
static bool classof(const VPRecipeBase *B)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:1997
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2041
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1986
~VPHeaderPHIRecipe() override=default
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VP_CLASSOF_IMPL(VPDef::VPHistogramSC)
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1700
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:1717
unsigned getOpcode() const
Definition VPlan.h:1713
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1694
~VPHistogramRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:3974
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:461
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:3998
static bool classof(const VPBlockBase *V)
Definition VPlan.h:3988
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:3975
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:486
Class to record and manage LLVM IR flags.
Definition VPlan.h:596
FastMathFlagsTy FMFs
Definition VPlan.h:660
bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:709
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:701
WrapFlagsTy WrapFlags
Definition VPlan.h:654
CmpInst::Predicate CmpPredicate
Definition VPlan.h:653
void printFlags(raw_ostream &O) const
GEPNoWrapFlags GEPFlags
Definition VPlan.h:658
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:818
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
TruncFlagsTy TruncFlags
Definition VPlan.h:655
CmpInst::Predicate getPredicate() const
Definition VPlan.h:800
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:823
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:718
ExactFlagsTy ExactFlags
Definition VPlan.h:657
bool hasNoSignedWrap() const
Definition VPlan.h:842
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:853
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:704
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:707
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:712
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:698
bool isNonNeg() const
Definition VPlan.h:825
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:812
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:815
DisjointFlagsTy DisjointFlags
Definition VPlan.h:656
unsigned AllFlags
Definition VPlan.h:661
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:806
bool hasNoUnsignedWrap() const
Definition VPlan.h:831
NonNegFlagsTy NonNegFlags
Definition VPlan.h:659
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:728
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:763
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:715
VPIRFlags(Instruction &I)
Definition VPlan.h:667
Instruction & getInstruction() const
Definition VPlan.h:1382
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1396
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void extractLastLaneOfFirstOperand(VPBuilder &Builder)
Update the recipes first operand to the last lane of the operand using Builder.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1402
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1369
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1390
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1357
Helper to manage IR metadata for recipes.
Definition VPlan.h:938
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:946
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetada object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void addMetadata(unsigned Kind, MDNode *Node)
Add metadata with kind Kind and Node.
Definition VPlan.h:961
void applyMetadata(Instruction &I) const
Add all metadata to I.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1238
static bool classof(const VPUser *R)
Definition VPlan.h:1223
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1205
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags, const VPIRMetadata &Metadata, const Twine &Name="")
Definition VPlan.h:1199
Type * getResultType() const
Definition VPlan.h:1244
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1193
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1227
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:976
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1101
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1112
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1060
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
Definition VPlan.h:1014
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1050
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1063
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1011
@ FirstOrderRecurrenceSplice
Definition VPlan.h:982
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1054
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1006
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1003
@ VScale
Returns the value for vscale.
Definition VPlan.h:1065
@ CanonicalIVIncrementForPart
Definition VPlan.h:996
@ CalculateTripCountMinusVF
Definition VPlan.h:994
bool hasResult() const
Definition VPlan.h:1140
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1180
unsigned getOpcode() const
Definition VPlan.h:1120
friend class VPlanSlp
Definition VPlan.h:977
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:2548
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2554
bool onlyFirstLaneUsed(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
static bool classof(const VPUser *U)
Definition VPlan.h:2530
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2497
Instruction * getInsertPos() const
Definition VPlan.h:2552
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2525
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:2550
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:2542
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:2571
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:2536
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2645
~VPInterleaveEVLRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:2657
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2664
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2638
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:2625
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:2582
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2615
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2609
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2592
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2584
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0, VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
Definition VPlan.h:2777
VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1, VPValue *Cond, unsigned ScaleFactor, Instruction *ReductionInst=nullptr)
Definition VPlan.h:2781
~VPPartialReductionRecipe() override=default
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by.
Definition VPlan.h:2818
void execute(VPTransformState &State) override
Generate the reduction in the loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPartialReductionRecipe.
unsigned getOpcode() const
Get the binary op's opcode.
Definition VPlan.h:2815
VPPartialReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2799
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1255
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1277
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1272
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:3965
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1297
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1264
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1282
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1286
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3171
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3147
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3158
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3143
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:386
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:473
VPRegionBlock * getRegion()
Definition VPlan.h:4126
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:484
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:407
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:478
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:453
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:388
const VPBasicBlock * getParent() const
Definition VPlan.h:408
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:458
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:397
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2863
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2860
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2833
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2844
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2394
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2363
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by.
Definition VPlan.h:2377
~VPReductionPHIRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2400
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, bool IsInLoop=false, bool IsOrdered=false, unsigned VFScaleFactor=1)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2353
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2388
bool isInLoop() const
Returns true, if the phi is part of an in-loop reduction.
Definition VPlan.h:2397
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2391
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
Definition VPlan.h:2672
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:2754
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2716
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2701
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2732
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:2758
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:2760
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:2750
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:2752
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:2756
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2694
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2710
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, bool IsOrdered, DebugLoc DL)
Definition VPlan.h:2680
static bool classof(const VPUser *U)
Definition VPlan.h:2722
static bool classof(const VPValue *VPV)
Definition VPlan.h:2727
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4009
const VPBlockBase * getEntry() const
Definition VPlan.h:4045
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4120
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4077
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4062
VPBlockBase * getExiting()
Definition VPlan.h:4058
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4107
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4050
const Type * getCanonicalIVType() const
Definition VPlan.h:4121
const VPBlockBase * getExiting() const
Definition VPlan.h:4057
VPBlockBase * getEntry()
Definition VPlan.h:4046
const VPCanonicalIVPHIRecipe * getCanonicalIV() const
Definition VPlan.h:4115
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4070
friend class VPlan
Definition VPlan.h:4010
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4041
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:2875
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, VPIRMetadata Metadata={})
Definition VPlan.h:2883
bool isSingleScalar() const
Definition VPlan.h:2920
~VPReplicateRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2925
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:2932
bool isPredicated() const
Definition VPlan.h:2922
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2895
unsigned getOpcode() const
Definition VPlan.h:2949
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:2944
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3766
VPValue * getStepValue() const
Definition VPlan.h:3763
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:3751
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3722
bool isPart0() const
Return true if this VPScalarIVStepsRecipe corresponds to part 0.
Definition VPlan.h:3743
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3734
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:3715
~VPScalarIVStepsRecipe() override=default
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:517
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:523
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:582
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:527
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:585
static bool classof(const VPUser *U)
Definition VPlan.h:574
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:519
This class can be used to assign names to VPValues.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition VPlan.h:926
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:199
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1436
operand_range operands()
Definition VPlanValue.h:267
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:243
unsigned getNumOperands() const
Definition VPlanValue.h:237
operand_iterator op_end()
Definition VPlanValue.h:265
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:238
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:218
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:261
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:260
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:48
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:135
friend class VPExpressionRecipe
Definition VPlanValue.h:53
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition VPlanValue.h:176
friend class VPDef
Definition VPlanValue.h:49
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:85
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
Definition VPlan.cpp:98
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:186
unsigned getNumUsers() const
Definition VPlanValue.h:113
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition VPlanValue.h:171
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:1885
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1871
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1892
const VPValue * getVFValue() const
Definition VPlan.h:1867
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:1878
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1856
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isFirstPart() const
Return true if this VPVectorPointerRecipe corresponds to part 0.
Definition VPlan.h:1944
Type * getSourceElementType() const
Definition VPlan.h:1921
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:1930
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1911
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1923
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:1947
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1937
const_operand_range args() const
Definition VPlan.h:1675
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1656
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1643
operand_range args()
Definition VPlan.h:1674
Function * getCalledScalarFunction() const
Definition VPlan.h:1670
~VPWidenCallRecipe() override=default
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenCanonicalIVRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:3626
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3613
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition VPlan.h:3608
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1489
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst &UI)
Definition VPlan.h:1497
Instruction::CastOps getOpcode() const
Definition VPlan.h:1540
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1505
Type * getResultType() const
Returns the result type of the cast.
Definition VPlan.h:1543
void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1517
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:1813
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1834
Type * getSourceElementType() const
Definition VPlan.h:1818
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef< VPValue * > Operands)
Definition VPlan.h:1794
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:1821
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1805
~VPWidenGEPRecipe() override=default
void execute(VPTransformState &State) override=0
Generate the phi nodes.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2111
static bool classof(const VPValue *V)
Definition VPlan.h:2065
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2081
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2096
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2089
PHINode * getPHINode() const
Definition VPlan.h:2091
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2053
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2077
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2094
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2103
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2060
static bool classof(const VPHeaderPHIRecipe *R)
Definition VPlan.h:2070
const VPValue * getVFValue() const
Definition VPlan.h:2084
const VPValue * getStepValue() const
Definition VPlan.h:2078
const TruncInst * getTruncInst() const
Definition VPlan.h:2189
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2164
~VPWidenIntOrFpInductionRecipe() override=default
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, DebugLoc DL)
Definition VPlan.h:2140
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2156
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2188
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2131
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2205
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2184
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:2197
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1573
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1608
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:1617
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1564
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:1623
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1590
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:1620
~VPWidenIntrinsicRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition VPlan.h:1611
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3195
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition VPlan.h:3192
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3235
static bool classof(const VPUser *U)
Definition VPlan.h:3229
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition VPlan.h:3258
Instruction & Ingredient
Definition VPlan.h:3183
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3218
Instruction & getIngredient() const
Definition VPlan.h:3266
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3189
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3222
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3249
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3186
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, Align Alignment, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3205
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3245
void setMask(VPValue *Mask)
Definition VPlan.h:3197
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3255
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3242
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition VPlan.h:3239
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2265
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe for Phi with start value Start and debug location DL.
Definition VPlan.h:2270
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2277
~VPWidenPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2229
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2239
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, bool IsScalarAfterVectorization, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2217
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1446
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1462
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands)
Definition VPlan.h:1456
VPWidenRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:1450
~VPWidenRecipe() override=default
unsigned getOpcode() const
Definition VPlan.h:1479
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition VPlanSLP.h:74
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4139
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1122
friend class VPSlotTracker
Definition VPlan.h:4141
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1098
bool hasVF(ElementCount VF) const
Definition VPlan.h:4345
LLVMContext & getContext() const
Definition VPlan.h:4333
VPBasicBlock * getEntry()
Definition VPlan.h:4233
VPValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4324
void setName(const Twine &newName)
Definition VPlan.h:4383
bool hasScalableVF() const
Definition VPlan.h:4346
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4331
VPValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4327
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4295
VPValue * getTrue()
Return a VPValue wrapping i1 true.
Definition VPlan.h:4402
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4316
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4352
VPlan(BasicBlock *ScalarHeaderBB, VPValue *TC)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition VPlan.h:4214
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:906
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:883
const VPValue & getVF() const
Definition VPlan.h:4328
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:914
const VPBasicBlock * getEntry() const
Definition VPlan.h:4234
friend class VPlanPrinter
Definition VPlan.h:4140
VPValue * getConstantInt(const APInt &Val)
Return a VPValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:4419
unsigned getUF() const
Definition VPlan.h:4365
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:4479
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1236
bool hasUF(unsigned UF) const
Definition VPlan.h:4363
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4285
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4408
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4321
void setVF(ElementCount VF)
Definition VPlan.h:4339
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:4378
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1027
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4501
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1009
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4271
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4302
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4309
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4258
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4222
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4457
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1242
VPValue * getFalse()
Return a VPValue wrapping i1 false.
Definition VPlan.h:4405
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4387
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4467
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1128
bool hasScalarVFOnly() const
Definition VPlan.h:4356
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4276
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:921
ArrayRef< VPValue * > getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4427
void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1081
void addVF(ElementCount VF)
Definition VPlan.h:4337
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4281
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4424
VPValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:4413
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1043
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4238
void setUF(unsigned UF)
Definition VPlan.h:4370
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4511
VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1169
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
Increasing range of size_t indices.
Definition STLExtras.h:2425
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:839
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:296
auto map_range(ContainerTy &&C, FuncTy F)
Definition STLExtras.h:364
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1954
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1961
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
DenseMap< Value *, VPValue * > Value2VPValueTy
Definition VPlanValue.h:192
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:76
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the recipe types implementing...
Definition VPlan.h:3784
static VPPhiAccessors * doCastIfPossible(SrcTy f)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:3805
CastInfo< VPPhiAccessors, SrcTy > Self
Definition VPlan.h:3786
static VPPhiAccessors * doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3789
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
static bool isPossible(const VPRecipeBase *f)
Definition VPlan.h:3776
This struct provides a way to check if a given cast is possible.
Definition Casting.h:253
static bool isPossible(const SrcTy &f)
Definition Casting.h:254
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2308
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
Definition VPlan.h:2303
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2326
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:626
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:631
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:621
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:614
PHINode & getIRPhi()
Definition VPlan.h:1427
VPIRPhi(PHINode &PN)
Definition VPlan.h:1420
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1422
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1438
static bool classof(const VPUser *U)
Definition VPlan.h:1315
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1330
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1345
VPPhi(ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1312
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1325
static bool classof(const VPValue *V)
Definition VPlan.h:1320
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:871
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:885
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, Instruction &I)
Definition VPlan.h:876
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:880
static bool classof(const VPValue *V)
Definition VPlan.h:905
static bool classof(const VPSingleDefRecipe *U)
Definition VPlan.h:912
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:900
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:872
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
void execute(VPTransformState &State) override
Generate the wide load or gather.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3325
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3313
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3341
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3272
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3300
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3282
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, Align Alignment, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3273
bool isInvariantCond() const
Definition VPlan.h:1763
VPWidenSelectRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1737
VPWidenSelectRecipe(SelectInst &I, ArrayRef< VPValue * > Operands)
Definition VPlan.h:1731
VPValue * getCond() const
Definition VPlan.h:1759
unsigned getOpcode() const
Definition VPlan.h:1757
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1768
~VPWidenSelectRecipe() override=default
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3406
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3425
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3395
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3409
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3352
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3382
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3370
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3361
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, Align Alignment, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3353