LLVM 23.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/Bitfields.h"
29#include "llvm/ADT/MapVector.h"
32#include "llvm/ADT/Twine.h"
33#include "llvm/ADT/ilist.h"
34#include "llvm/ADT/ilist_node.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/IR/FMF.h"
40#include "llvm/IR/Operator.h"
43#include <cassert>
44#include <cstddef>
45#include <functional>
46#include <string>
47#include <utility>
48#include <variant>
49
50namespace llvm {
51
52class BasicBlock;
53class DominatorTree;
55class IRBuilderBase;
56struct VPTransformState;
57class raw_ostream;
59class SCEV;
60class Type;
61class VPBasicBlock;
62class VPBuilder;
63class VPDominatorTree;
64class VPRegionBlock;
65class VPlan;
66class VPLane;
68class VPlanSlp;
69class Value;
71
72struct VPCostContext;
73
74namespace Intrinsic {
75typedef unsigned ID;
76}
77
78using VPlanPtr = std::unique_ptr<VPlan>;
79
80/// \enum UncountableExitStyle
81/// Different methods of handling early exits.
82///
85 /// No side effects to worry about, so we can process any uncountable exits
86 /// in the loop and branch either to the middle block if the trip count was
87 /// reached, or an early exitblock to determine which exit was taken.
89 /// All memory operations other than the load(s) required to determine whether
90 /// an uncountable exit occurre will be masked based on that condition. If an
91 /// uncountable exit is taken, then all lanes before the exiting lane will
92 /// complete, leaving just the final lane to execute in the scalar tail.
94};
95
96/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
97/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
99 friend class VPBlockUtils;
100
101 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
102
103 /// An optional name for the block.
104 std::string Name;
105
106 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
107 /// it is a topmost VPBlockBase.
108 VPRegionBlock *Parent = nullptr;
109
110 /// List of predecessor blocks.
112
113 /// List of successor blocks.
115
116 /// VPlan containing the block. Can only be set on the entry block of the
117 /// plan.
118 VPlan *Plan = nullptr;
119
120 /// Add \p Successor as the last successor to this block.
121 void appendSuccessor(VPBlockBase *Successor) {
122 assert(Successor && "Cannot add nullptr successor!");
123 Successors.push_back(Successor);
124 }
125
126 /// Add \p Predecessor as the last predecessor to this block.
127 void appendPredecessor(VPBlockBase *Predecessor) {
128 assert(Predecessor && "Cannot add nullptr predecessor!");
129 Predecessors.push_back(Predecessor);
130 }
131
132 /// Remove \p Predecessor from the predecessors of this block.
133 void removePredecessor(VPBlockBase *Predecessor) {
134 auto Pos = find(Predecessors, Predecessor);
135 assert(Pos && "Predecessor does not exist");
136 Predecessors.erase(Pos);
137 }
138
139 /// Remove \p Successor from the successors of this block.
140 void removeSuccessor(VPBlockBase *Successor) {
141 auto Pos = find(Successors, Successor);
142 assert(Pos && "Successor does not exist");
143 Successors.erase(Pos);
144 }
145
146 /// This function replaces one predecessor with another, useful when
147 /// trying to replace an old block in the CFG with a new one.
148 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
149 auto I = find(Predecessors, Old);
150 assert(I != Predecessors.end());
151 assert(Old->getParent() == New->getParent() &&
152 "replaced predecessor must have the same parent");
153 *I = New;
154 }
155
156 /// This function replaces one successor with another, useful when
157 /// trying to replace an old block in the CFG with a new one.
158 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
159 auto I = find(Successors, Old);
160 assert(I != Successors.end());
161 assert(Old->getParent() == New->getParent() &&
162 "replaced successor must have the same parent");
163 *I = New;
164 }
165
166protected:
167 VPBlockBase(const unsigned char SC, const std::string &N)
168 : SubclassID(SC), Name(N) {}
169
170public:
171 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
172 /// that are actually instantiated. Values of this enumeration are kept in the
173 /// SubclassID field of the VPBlockBase objects. They are used for concrete
174 /// type identification.
175 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
176
178
179 virtual ~VPBlockBase() = default;
180
181 const std::string &getName() const { return Name; }
182
183 void setName(const Twine &newName) { Name = newName.str(); }
184
185 /// \return an ID for the concrete type of this object.
186 /// This is used to implement the classof checks. This should not be used
187 /// for any other purpose, as the values may change as LLVM evolves.
188 unsigned getVPBlockID() const { return SubclassID; }
189
190 VPRegionBlock *getParent() { return Parent; }
191 const VPRegionBlock *getParent() const { return Parent; }
192
193 /// \return A pointer to the plan containing the current block.
194 VPlan *getPlan();
195 const VPlan *getPlan() const;
196
197 /// Sets the pointer of the plan containing the block. The block must be the
198 /// entry block into the VPlan.
199 void setPlan(VPlan *ParentPlan);
200
201 void setParent(VPRegionBlock *P) { Parent = P; }
202
203 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
204 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
205 /// VPBlockBase is a VPBasicBlock, it is returned.
206 const VPBasicBlock *getEntryBasicBlock() const;
207 VPBasicBlock *getEntryBasicBlock();
208
209 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
210 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
211 /// VPBlockBase is a VPBasicBlock, it is returned.
212 const VPBasicBlock *getExitingBasicBlock() const;
213 VPBasicBlock *getExitingBasicBlock();
214
215 const VPBlocksTy &getSuccessors() const { return Successors; }
216 VPBlocksTy &getSuccessors() { return Successors; }
217
218 /// Returns true if this block has any successors.
219 bool hasSuccessors() const { return !Successors.empty(); }
220 /// Returns true if this block has any predecessors.
221 bool hasPredecessors() const { return !Predecessors.empty(); }
222
225
226 const VPBlocksTy &getPredecessors() const { return Predecessors; }
227 VPBlocksTy &getPredecessors() { return Predecessors; }
228
229 /// \return the successor of this VPBlockBase if it has a single successor.
230 /// Otherwise return a null pointer.
232 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
233 }
234
235 /// \return the predecessor of this VPBlockBase if it has a single
236 /// predecessor. Otherwise return a null pointer.
238 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
239 }
240
241 size_t getNumSuccessors() const { return Successors.size(); }
242 size_t getNumPredecessors() const { return Predecessors.size(); }
243
244 /// An Enclosing Block of a block B is any block containing B, including B
245 /// itself. \return the closest enclosing block starting from "this", which
246 /// has successors. \return the root enclosing block if all enclosing blocks
247 /// have no successors.
248 VPBlockBase *getEnclosingBlockWithSuccessors();
249
250 /// \return the closest enclosing block starting from "this", which has
251 /// predecessors. \return the root enclosing block if all enclosing blocks
252 /// have no predecessors.
253 VPBlockBase *getEnclosingBlockWithPredecessors();
254
255 /// \return the successors either attached directly to this VPBlockBase or, if
256 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
257 /// successors of its own, search recursively for the first enclosing
258 /// VPRegionBlock that has successors and return them. If no such
259 /// VPRegionBlock exists, return the (empty) successors of the topmost
260 /// VPBlockBase reached.
262 return getEnclosingBlockWithSuccessors()->getSuccessors();
263 }
264
265 /// \return the hierarchical successor of this VPBlockBase if it has a single
266 /// hierarchical successor. Otherwise return a null pointer.
268 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
269 }
270
271 /// \return the predecessors either attached directly to this VPBlockBase or,
272 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
273 /// predecessors of its own, search recursively for the first enclosing
274 /// VPRegionBlock that has predecessors and return them. If no such
275 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
276 /// VPBlockBase reached.
278 return getEnclosingBlockWithPredecessors()->getPredecessors();
279 }
280
281 /// \return the hierarchical predecessor of this VPBlockBase if it has a
282 /// single hierarchical predecessor. Otherwise return a null pointer.
286
287 /// Set a given VPBlockBase \p Successor as the single successor of this
288 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
289 /// This VPBlockBase must have no successors.
291 assert(Successors.empty() && "Setting one successor when others exist.");
292 assert(Successor->getParent() == getParent() &&
293 "connected blocks must have the same parent");
294 appendSuccessor(Successor);
295 }
296
297 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
298 /// successors of this VPBlockBase. This VPBlockBase is not added as
299 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
300 /// successors.
301 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
302 assert(Successors.empty() && "Setting two successors when others exist.");
303 appendSuccessor(IfTrue);
304 appendSuccessor(IfFalse);
305 }
306
307 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
308 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
309 /// as successor of any VPBasicBlock in \p NewPreds.
311 assert(Predecessors.empty() && "Block predecessors already set.");
312 for (auto *Pred : NewPreds)
313 appendPredecessor(Pred);
314 }
315
316 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
317 /// This VPBlockBase must have no successors. This VPBlockBase is not added
318 /// as predecessor of any VPBasicBlock in \p NewSuccs.
320 assert(Successors.empty() && "Block successors already set.");
321 for (auto *Succ : NewSuccs)
322 appendSuccessor(Succ);
323 }
324
325 /// Remove all the predecessor of this block.
326 void clearPredecessors() { Predecessors.clear(); }
327
328 /// Remove all the successors of this block.
329 void clearSuccessors() { Successors.clear(); }
330
331 /// Swap predecessors of the block. The block must have exactly 2
332 /// predecessors.
334 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
335 std::swap(Predecessors[0], Predecessors[1]);
336 }
337
338 /// Swap successors of the block. The block must have exactly 2 successors.
339 // TODO: This should be part of introducing conditional branch recipes rather
340 // than being independent.
342 assert(Successors.size() == 2 && "must have 2 successors to swap");
343 std::swap(Successors[0], Successors[1]);
344 }
345
346 /// Returns the index for \p Pred in the blocks predecessors list.
347 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
348 assert(count(Predecessors, Pred) == 1 &&
349 "must have Pred exactly once in Predecessors");
350 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
351 }
352
353 /// Returns the index for \p Succ in the blocks successor list.
354 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
355 assert(count(Successors, Succ) == 1 &&
356 "must have Succ exactly once in Successors");
357 return std::distance(Successors.begin(), find(Successors, Succ));
358 }
359
360 /// The method which generates the output IR that correspond to this
361 /// VPBlockBase, thereby "executing" the VPlan.
362 virtual void execute(VPTransformState *State) = 0;
363
364 /// Return the cost of the block.
366
367#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
368 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
369 OS << getName();
370 }
371
372 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
373 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
374 /// consequtive numbers.
375 ///
376 /// Note that the numbering is applied to the whole VPlan, so printing
377 /// individual blocks is consistent with the whole VPlan printing.
378 virtual void print(raw_ostream &O, const Twine &Indent,
379 VPSlotTracker &SlotTracker) const = 0;
380
381 /// Print plain-text dump of this VPlan to \p O.
382 void print(raw_ostream &O) const;
383
384 /// Print the successors of this block to \p O, prefixing all lines with \p
385 /// Indent.
386 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
387
388 /// Dump this VPBlockBase to dbgs().
389 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
390#endif
391
392 /// Clone the current block and it's recipes without updating the operands of
393 /// the cloned recipes, including all blocks in the single-entry single-exit
394 /// region for VPRegionBlocks.
395 virtual VPBlockBase *clone() = 0;
396};
397
398/// VPRecipeBase is a base class modeling a sequence of one or more output IR
399/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
400/// and is responsible for deleting its defined values. Single-value
401/// recipes must inherit from VPSingleDef instead of inheriting from both
402/// VPRecipeBase and VPValue separately.
404 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
405 public VPDef,
406 public VPUser {
407 friend VPBasicBlock;
408 friend class VPBlockUtils;
409
410 /// Subclass identifier (for isa/dyn_cast).
411 const unsigned char SubclassID;
412
413 /// Each VPRecipe belongs to a single VPBasicBlock.
414 VPBasicBlock *Parent = nullptr;
415
416 /// The debug location for the recipe.
417 DebugLoc DL;
418
419public:
420 /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
421 /// that is actually instantiated. Values of this enumeration are kept in the
422 /// SubclassID field of the VPRecipeBase objects. They are used for concrete
423 /// type identification.
424 using VPRecipeTy = enum {
425 VPBranchOnMaskSC,
426 VPDerivedIVSC,
427 VPExpandSCEVSC,
428 VPExpressionSC,
429 VPIRInstructionSC,
430 VPInstructionSC,
431 VPInterleaveEVLSC,
432 VPInterleaveSC,
433 VPReductionEVLSC,
434 VPReductionSC,
435 VPReplicateSC,
436 VPScalarIVStepsSC,
437 VPVectorPointerSC,
438 VPVectorEndPointerSC,
439 VPWidenCallSC,
440 VPWidenCanonicalIVSC,
441 VPWidenCastSC,
442 VPWidenGEPSC,
443 VPWidenIntrinsicSC,
444 VPWidenLoadEVLSC,
445 VPWidenLoadSC,
446 VPWidenStoreEVLSC,
447 VPWidenStoreSC,
448 VPWidenSC,
449 VPBlendSC,
450 VPHistogramSC,
451 // START: Phi-like recipes. Need to be kept together.
452 VPWidenPHISC,
453 VPPredInstPHISC,
454 // START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
455 // VPHeaderPHIRecipe need to be kept together.
456 VPCanonicalIVPHISC,
457 VPCurrentIterationPHISC,
458 VPActiveLaneMaskPHISC,
459 VPFirstOrderRecurrencePHISC,
460 VPWidenIntOrFpInductionSC,
461 VPWidenPointerInductionSC,
462 VPReductionPHISC,
463 // END: SubclassID for recipes that inherit VPHeaderPHIRecipe
464 // END: Phi-like recipes
465 VPFirstPHISC = VPWidenPHISC,
466 VPFirstHeaderPHISC = VPCanonicalIVPHISC,
467 VPLastHeaderPHISC = VPReductionPHISC,
468 VPLastPHISC = VPReductionPHISC,
469 };
470
471 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
473 : VPDef(), VPUser(Operands), SubclassID(SC), DL(DL) {}
474
475 ~VPRecipeBase() override = default;
476
477 /// Clone the current recipe.
478 virtual VPRecipeBase *clone() = 0;
479
480 /// \return the VPBasicBlock which this VPRecipe belongs to.
481 VPBasicBlock *getParent() { return Parent; }
482 const VPBasicBlock *getParent() const { return Parent; }
483
484 /// \return the VPRegionBlock which the recipe belongs to.
485 VPRegionBlock *getRegion();
486 const VPRegionBlock *getRegion() const;
487
488 /// The method which generates the output IR instructions that correspond to
489 /// this VPRecipe, thereby "executing" the VPlan.
490 virtual void execute(VPTransformState &State) = 0;
491
492 /// Return the cost of this recipe, taking into account if the cost
493 /// computation should be skipped and the ForceTargetInstructionCost flag.
494 /// Also takes care of printing the cost for debugging.
496
497 /// Insert an unlinked recipe into a basic block immediately before
498 /// the specified recipe.
499 void insertBefore(VPRecipeBase *InsertPos);
500 /// Insert an unlinked recipe into \p BB immediately before the insertion
501 /// point \p IP;
502 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
503
504 /// Insert an unlinked Recipe into a basic block immediately after
505 /// the specified Recipe.
506 void insertAfter(VPRecipeBase *InsertPos);
507
508 /// Unlink this recipe from its current VPBasicBlock and insert it into
509 /// the VPBasicBlock that MovePos lives in, right after MovePos.
510 void moveAfter(VPRecipeBase *MovePos);
511
512 /// Unlink this recipe and insert into BB before I.
513 ///
514 /// \pre I is a valid iterator into BB.
515 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
516
517 /// This method unlinks 'this' from the containing basic block, but does not
518 /// delete it.
519 void removeFromParent();
520
521 /// This method unlinks 'this' from the containing basic block and deletes it.
522 ///
523 /// \returns an iterator pointing to the element after the erased one
525
526 /// \return an ID for the concrete type of this object.
527 unsigned getVPRecipeID() const { return SubclassID; }
528
529 /// Method to support type inquiry through isa, cast, and dyn_cast.
530 static inline bool classof(const VPDef *D) {
531 // All VPDefs are also VPRecipeBases.
532 return true;
533 }
534
535 static inline bool classof(const VPUser *U) { return true; }
536
537 /// Returns true if the recipe may have side-effects.
538 bool mayHaveSideEffects() const;
539
540 /// Returns true for PHI-like recipes.
541 bool isPhi() const;
542
543 /// Returns true if the recipe may read from memory.
544 bool mayReadFromMemory() const;
545
546 /// Returns true if the recipe may write to memory.
547 bool mayWriteToMemory() const;
548
549 /// Returns true if the recipe may read from or write to memory.
550 bool mayReadOrWriteMemory() const {
552 }
553
554 /// Returns the debug location of the recipe.
555 DebugLoc getDebugLoc() const { return DL; }
556
557 /// Return true if the recipe is a scalar cast.
558 bool isScalarCast() const;
559
560 /// Set the recipe's debug location to \p NewDL.
561 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
562
563#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
564 /// Dump the recipe to stderr (for debugging).
565 LLVM_ABI_FOR_TEST void dump() const;
566
567 /// Print the recipe, delegating to printRecipe().
568 void print(raw_ostream &O, const Twine &Indent,
570#endif
571
572protected:
573 /// Compute the cost of this recipe either using a recipe's specialized
574 /// implementation or using the legacy cost model and the underlying
575 /// instructions.
576 virtual InstructionCost computeCost(ElementCount VF,
577 VPCostContext &Ctx) const;
578
579#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
580 /// Each concrete VPRecipe prints itself, without printing common information,
581 /// like debug info or metadata.
582 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
583 VPSlotTracker &SlotTracker) const = 0;
584#endif
585};
586
587// Helper macro to define common classof implementations for recipes.
588#define VP_CLASSOF_IMPL(VPRecipeID) \
589 static inline bool classof(const VPRecipeBase *R) { \
590 return R->getVPRecipeID() == VPRecipeID; \
591 } \
592 static inline bool classof(const VPValue *V) { \
593 auto *R = V->getDefiningRecipe(); \
594 return R && R->getVPRecipeID() == VPRecipeID; \
595 } \
596 static inline bool classof(const VPUser *U) { \
597 auto *R = dyn_cast<VPRecipeBase>(U); \
598 return R && R->getVPRecipeID() == VPRecipeID; \
599 } \
600 static inline bool classof(const VPSingleDefRecipe *R) { \
601 return R->getVPRecipeID() == VPRecipeID; \
602 }
603
604/// VPSingleDef is a base class for recipes for modeling a sequence of one or
605/// more output IR that define a single result VPValue.
606/// Note that VPRecipeBase must be inherited from before VPValue.
608public:
609 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
611 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this) {}
612
613 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
615 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this, UV) {}
616
617 static inline bool classof(const VPRecipeBase *R) {
618 switch (R->getVPRecipeID()) {
619 case VPRecipeBase::VPDerivedIVSC:
620 case VPRecipeBase::VPExpandSCEVSC:
621 case VPRecipeBase::VPExpressionSC:
622 case VPRecipeBase::VPInstructionSC:
623 case VPRecipeBase::VPReductionEVLSC:
624 case VPRecipeBase::VPReductionSC:
625 case VPRecipeBase::VPReplicateSC:
626 case VPRecipeBase::VPScalarIVStepsSC:
627 case VPRecipeBase::VPVectorPointerSC:
628 case VPRecipeBase::VPVectorEndPointerSC:
629 case VPRecipeBase::VPWidenCallSC:
630 case VPRecipeBase::VPWidenCanonicalIVSC:
631 case VPRecipeBase::VPWidenCastSC:
632 case VPRecipeBase::VPWidenGEPSC:
633 case VPRecipeBase::VPWidenIntrinsicSC:
634 case VPRecipeBase::VPWidenSC:
635 case VPRecipeBase::VPBlendSC:
636 case VPRecipeBase::VPPredInstPHISC:
637 case VPRecipeBase::VPCanonicalIVPHISC:
638 case VPRecipeBase::VPCurrentIterationPHISC:
639 case VPRecipeBase::VPActiveLaneMaskPHISC:
640 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
641 case VPRecipeBase::VPWidenPHISC:
642 case VPRecipeBase::VPWidenIntOrFpInductionSC:
643 case VPRecipeBase::VPWidenPointerInductionSC:
644 case VPRecipeBase::VPReductionPHISC:
645 return true;
646 case VPRecipeBase::VPBranchOnMaskSC:
647 case VPRecipeBase::VPInterleaveEVLSC:
648 case VPRecipeBase::VPInterleaveSC:
649 case VPRecipeBase::VPIRInstructionSC:
650 case VPRecipeBase::VPWidenLoadEVLSC:
651 case VPRecipeBase::VPWidenLoadSC:
652 case VPRecipeBase::VPWidenStoreEVLSC:
653 case VPRecipeBase::VPWidenStoreSC:
654 case VPRecipeBase::VPHistogramSC:
655 // TODO: Widened stores don't define a value, but widened loads do. Split
656 // the recipes to be able to make widened loads VPSingleDefRecipes.
657 return false;
658 }
659 llvm_unreachable("Unhandled VPRecipeID");
660 }
661
662 static inline bool classof(const VPValue *V) {
663 auto *R = V->getDefiningRecipe();
664 return R && classof(R);
665 }
666
667 static inline bool classof(const VPUser *U) {
668 auto *R = dyn_cast<VPRecipeBase>(U);
669 return R && classof(R);
670 }
671
672 VPSingleDefRecipe *clone() override = 0;
673
674 /// Returns the underlying instruction.
681
682#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
683 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
685#endif
686};
687
688/// Class to record and manage LLVM IR flags.
691 enum class OperationType : unsigned char {
692 Cmp,
693 FCmp,
694 OverflowingBinOp,
695 Trunc,
696 DisjointOp,
697 PossiblyExactOp,
698 GEPOp,
699 FPMathOp,
700 NonNegOp,
701 ReductionOp,
702 Other
703 };
704
705public:
706 struct WrapFlagsTy {
707 char HasNUW : 1;
708 char HasNSW : 1;
709
711 };
712
714 char HasNUW : 1;
715 char HasNSW : 1;
716
718 };
719
724
726 char NonNeg : 1;
727 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
728 };
729
730private:
731 struct ExactFlagsTy {
732 char IsExact : 1;
733 ExactFlagsTy(bool Exact) : IsExact(Exact) {}
734 };
735 struct FastMathFlagsTy {
736 char AllowReassoc : 1;
737 char NoNaNs : 1;
738 char NoInfs : 1;
739 char NoSignedZeros : 1;
740 char AllowReciprocal : 1;
741 char AllowContract : 1;
742 char ApproxFunc : 1;
743
744 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
745 };
746 /// Holds both the predicate and fast-math flags for floating-point
747 /// comparisons.
748 struct FCmpFlagsTy {
749 uint8_t CmpPredStorage;
750 FastMathFlagsTy FMFs;
751 };
752 /// Holds reduction-specific flags: RecurKind, IsOrdered, IsInLoop, and FMFs.
753 struct ReductionFlagsTy {
754 // RecurKind has ~26 values, needs 5 bits but uses 6 bits to account for
755 // additional kinds.
756 unsigned char Kind : 6;
757 // TODO: Derive order/in-loop from plan and remove here.
758 unsigned char IsOrdered : 1;
759 unsigned char IsInLoop : 1;
760 FastMathFlagsTy FMFs;
761
762 ReductionFlagsTy(RecurKind Kind, bool IsOrdered, bool IsInLoop,
763 FastMathFlags FMFs)
764 : Kind(static_cast<unsigned char>(Kind)), IsOrdered(IsOrdered),
765 IsInLoop(IsInLoop), FMFs(FMFs) {}
766 };
767
768 OperationType OpType;
769
770 union {
775 ExactFlagsTy ExactFlags;
778 FastMathFlagsTy FMFs;
779 FCmpFlagsTy FCmpFlags;
780 ReductionFlagsTy ReductionFlags;
782 };
783
784public:
785 VPIRFlags() : OpType(OperationType::Other), AllFlags() {}
786
788 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
789 OpType = OperationType::FCmp;
791 FCmp->getPredicate());
792 assert(getPredicate() == FCmp->getPredicate() && "predicate truncated");
793 FCmpFlags.FMFs = FCmp->getFastMathFlags();
794 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
795 OpType = OperationType::Cmp;
797 Op->getPredicate());
798 assert(getPredicate() == Op->getPredicate() && "predicate truncated");
799 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
800 OpType = OperationType::DisjointOp;
801 DisjointFlags.IsDisjoint = Op->isDisjoint();
802 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
803 OpType = OperationType::OverflowingBinOp;
804 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
805 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
806 OpType = OperationType::Trunc;
807 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
808 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
809 OpType = OperationType::PossiblyExactOp;
810 ExactFlags.IsExact = Op->isExact();
811 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
812 OpType = OperationType::GEPOp;
813 GEPFlagsStorage = GEP->getNoWrapFlags().getRaw();
814 assert(getGEPNoWrapFlags() == GEP->getNoWrapFlags() &&
815 "wrap flags truncated");
816 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
817 OpType = OperationType::NonNegOp;
818 NonNegFlags.NonNeg = PNNI->hasNonNeg();
819 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
820 OpType = OperationType::FPMathOp;
821 FMFs = Op->getFastMathFlags();
822 }
823 }
824
825 VPIRFlags(CmpInst::Predicate Pred) : OpType(OperationType::Cmp), AllFlags() {
827 assert(getPredicate() == Pred && "predicate truncated");
828 }
829
831 : OpType(OperationType::FCmp), AllFlags() {
833 assert(getPredicate() == Pred && "predicate truncated");
834 FCmpFlags.FMFs = FMFs;
835 }
836
838 : OpType(OperationType::OverflowingBinOp), AllFlags() {
839 this->WrapFlags = WrapFlags;
840 }
841
843 : OpType(OperationType::Trunc), AllFlags() {
844 this->TruncFlags = TruncFlags;
845 }
846
847 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), AllFlags() {
848 this->FMFs = FMFs;
849 }
850
852 : OpType(OperationType::DisjointOp), AllFlags() {
853 this->DisjointFlags = DisjointFlags;
854 }
855
857 : OpType(OperationType::NonNegOp), AllFlags() {
858 this->NonNegFlags = NonNegFlags;
859 }
860
861 VPIRFlags(ExactFlagsTy ExactFlags)
862 : OpType(OperationType::PossiblyExactOp), AllFlags() {
863 this->ExactFlags = ExactFlags;
864 }
865
867 : OpType(OperationType::GEPOp), AllFlags() {
868 GEPFlagsStorage = GEPFlags.getRaw();
869 }
870
871 VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
872 : OpType(OperationType::ReductionOp), AllFlags() {
873 ReductionFlags = ReductionFlagsTy(Kind, IsOrdered, IsInLoop, FMFs);
874 }
875
877 OpType = Other.OpType;
878 AllFlags[0] = Other.AllFlags[0];
879 AllFlags[1] = Other.AllFlags[1];
880 }
881
882 /// Only keep flags also present in \p Other. \p Other must have the same
883 /// OpType as the current object.
884 void intersectFlags(const VPIRFlags &Other);
885
886 /// Drop all poison-generating flags.
888 // NOTE: This needs to be kept in-sync with
889 // Instruction::dropPoisonGeneratingFlags.
890 switch (OpType) {
891 case OperationType::OverflowingBinOp:
892 WrapFlags.HasNUW = false;
893 WrapFlags.HasNSW = false;
894 break;
895 case OperationType::Trunc:
896 TruncFlags.HasNUW = false;
897 TruncFlags.HasNSW = false;
898 break;
899 case OperationType::DisjointOp:
900 DisjointFlags.IsDisjoint = false;
901 break;
902 case OperationType::PossiblyExactOp:
903 ExactFlags.IsExact = false;
904 break;
905 case OperationType::GEPOp:
906 GEPFlagsStorage = 0;
907 break;
908 case OperationType::FPMathOp:
909 case OperationType::FCmp:
910 case OperationType::ReductionOp:
911 getFMFsRef().NoNaNs = false;
912 getFMFsRef().NoInfs = false;
913 break;
914 case OperationType::NonNegOp:
915 NonNegFlags.NonNeg = false;
916 break;
917 case OperationType::Cmp:
918 case OperationType::Other:
919 break;
920 }
921 }
922
923 /// Apply the IR flags to \p I.
924 void applyFlags(Instruction &I) const {
925 switch (OpType) {
926 case OperationType::OverflowingBinOp:
927 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
928 I.setHasNoSignedWrap(WrapFlags.HasNSW);
929 break;
930 case OperationType::Trunc:
931 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
932 I.setHasNoSignedWrap(TruncFlags.HasNSW);
933 break;
934 case OperationType::DisjointOp:
935 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
936 break;
937 case OperationType::PossiblyExactOp:
938 I.setIsExact(ExactFlags.IsExact);
939 break;
940 case OperationType::GEPOp:
941 cast<GetElementPtrInst>(&I)->setNoWrapFlags(
943 break;
944 case OperationType::FPMathOp:
945 case OperationType::FCmp: {
946 const FastMathFlagsTy &F = getFMFsRef();
947 I.setHasAllowReassoc(F.AllowReassoc);
948 I.setHasNoNaNs(F.NoNaNs);
949 I.setHasNoInfs(F.NoInfs);
950 I.setHasNoSignedZeros(F.NoSignedZeros);
951 I.setHasAllowReciprocal(F.AllowReciprocal);
952 I.setHasAllowContract(F.AllowContract);
953 I.setHasApproxFunc(F.ApproxFunc);
954 break;
955 }
956 case OperationType::NonNegOp:
957 I.setNonNeg(NonNegFlags.NonNeg);
958 break;
959 case OperationType::ReductionOp:
960 llvm_unreachable("reduction ops should not use applyFlags");
961 case OperationType::Cmp:
962 case OperationType::Other:
963 break;
964 }
965 }
966
968 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
969 "recipe doesn't have a compare predicate");
970 uint8_t Storage = OpType == OperationType::FCmp ? FCmpFlags.CmpPredStorage
973 }
974
976 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
977 "recipe doesn't have a compare predicate");
978 if (OpType == OperationType::FCmp)
980 else
982 assert(getPredicate() == Pred && "predicate truncated");
983 }
984
988
989 /// Returns true if the recipe has a comparison predicate.
990 bool hasPredicate() const {
991 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
992 }
993
994 /// Returns true if the recipe has fast-math flags.
995 bool hasFastMathFlags() const {
996 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||
997 OpType == OperationType::ReductionOp;
998 }
999
1001
1002 /// Returns true if the recipe has non-negative flag.
1003 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
1004
1005 bool isNonNeg() const {
1006 assert(OpType == OperationType::NonNegOp &&
1007 "recipe doesn't have a NNEG flag");
1008 return NonNegFlags.NonNeg;
1009 }
1010
1011 bool hasNoUnsignedWrap() const {
1012 switch (OpType) {
1013 case OperationType::OverflowingBinOp:
1014 return WrapFlags.HasNUW;
1015 case OperationType::Trunc:
1016 return TruncFlags.HasNUW;
1017 default:
1018 llvm_unreachable("recipe doesn't have a NUW flag");
1019 }
1020 }
1021
1022 bool hasNoSignedWrap() const {
1023 switch (OpType) {
1024 case OperationType::OverflowingBinOp:
1025 return WrapFlags.HasNSW;
1026 case OperationType::Trunc:
1027 return TruncFlags.HasNSW;
1028 default:
1029 llvm_unreachable("recipe doesn't have a NSW flag");
1030 }
1031 }
1032
1033 bool hasNoWrapFlags() const {
1034 switch (OpType) {
1035 case OperationType::OverflowingBinOp:
1036 case OperationType::Trunc:
1037 return true;
1038 default:
1039 return false;
1040 }
1041 }
1042
1044 return {hasNoUnsignedWrap(), hasNoSignedWrap()};
1045 }
1046
1047 bool isDisjoint() const {
1048 assert(OpType == OperationType::DisjointOp &&
1049 "recipe cannot have a disjoing flag");
1050 return DisjointFlags.IsDisjoint;
1051 }
1052
1054 assert(OpType == OperationType::ReductionOp &&
1055 "recipe doesn't have reduction flags");
1056 return static_cast<RecurKind>(ReductionFlags.Kind);
1057 }
1058
1059 bool isReductionOrdered() const {
1060 assert(OpType == OperationType::ReductionOp &&
1061 "recipe doesn't have reduction flags");
1062 return ReductionFlags.IsOrdered;
1063 }
1064
1065 bool isReductionInLoop() const {
1066 assert(OpType == OperationType::ReductionOp &&
1067 "recipe doesn't have reduction flags");
1068 return ReductionFlags.IsInLoop;
1069 }
1070
1071private:
1072 /// Get a reference to the fast-math flags for FPMathOp, FCmp or ReductionOp.
1073 FastMathFlagsTy &getFMFsRef() {
1074 if (OpType == OperationType::FCmp)
1075 return FCmpFlags.FMFs;
1076 if (OpType == OperationType::ReductionOp)
1077 return ReductionFlags.FMFs;
1078 return FMFs;
1079 }
1080 const FastMathFlagsTy &getFMFsRef() const {
1081 if (OpType == OperationType::FCmp)
1082 return FCmpFlags.FMFs;
1083 if (OpType == OperationType::ReductionOp)
1084 return ReductionFlags.FMFs;
1085 return FMFs;
1086 }
1087
1088public:
1089 /// Returns default flags for \p Opcode for opcodes that support it, asserts
1090 /// otherwise. Opcodes not supporting default flags include compares and
1091 /// ComputeReductionResult.
1092 static VPIRFlags getDefaultFlags(unsigned Opcode);
1093
1094#if !defined(NDEBUG)
1095 /// Returns true if the set flags are valid for \p Opcode.
1096 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
1097
1098 /// Returns true if \p Opcode has its required flags set.
1099 LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const;
1100#endif
1101
1102#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1103 void printFlags(raw_ostream &O) const;
1104#endif
1105};
1107
1108static_assert(sizeof(VPIRFlags) <= 3, "VPIRFlags should not grow");
1109
1110/// A pure-virtual common base class for recipes defining a single VPValue and
1111/// using IR flags.
1113 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1114 const VPIRFlags &Flags,
1116 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
1117
1118 static inline bool classof(const VPRecipeBase *R) {
1119 return R->getVPRecipeID() == VPRecipeBase::VPBlendSC ||
1120 R->getVPRecipeID() == VPRecipeBase::VPInstructionSC ||
1121 R->getVPRecipeID() == VPRecipeBase::VPWidenSC ||
1122 R->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC ||
1123 R->getVPRecipeID() == VPRecipeBase::VPWidenCallSC ||
1124 R->getVPRecipeID() == VPRecipeBase::VPWidenCastSC ||
1125 R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1126 R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
1127 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC ||
1128 R->getVPRecipeID() == VPRecipeBase::VPReplicateSC ||
1129 R->getVPRecipeID() == VPRecipeBase::VPVectorEndPointerSC ||
1130 R->getVPRecipeID() == VPRecipeBase::VPVectorPointerSC;
1131 }
1132
1133 static inline bool classof(const VPUser *U) {
1134 auto *R = dyn_cast<VPRecipeBase>(U);
1135 return R && classof(R);
1136 }
1137
1138 static inline bool classof(const VPValue *V) {
1139 auto *R = V->getDefiningRecipe();
1140 return R && classof(R);
1141 }
1142
1144
1145 static inline bool classof(const VPSingleDefRecipe *R) {
1146 return classof(static_cast<const VPRecipeBase *>(R));
1147 }
1148
1149 void execute(VPTransformState &State) override = 0;
1150
1151 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
1153 VPCostContext &Ctx) const;
1154};
1155
1156/// Helper to access the operand that contains the unroll part for this recipe
1157/// after unrolling.
1158template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
1159protected:
1160 /// Return the VPValue operand containing the unroll part or null if there is
1161 /// no such operand.
1162 VPValue *getUnrollPartOperand(const VPUser &U) const;
1163
1164 /// Return the unroll part.
1165 unsigned getUnrollPart(const VPUser &U) const;
1166};
1167
1168/// Helper to manage IR metadata for recipes. It filters out metadata that
1169/// cannot be propagated.
1172
1173public:
1174 VPIRMetadata() = default;
1175
1176 /// Adds metatadata that can be preserved from the original instruction
1177 /// \p I.
1179
1180 /// Copy constructor for cloning.
1182
1184
1185 /// Add all metadata to \p I.
1186 void applyMetadata(Instruction &I) const;
1187
1188 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1189 /// already exists, it will be replaced. Otherwise, it will be added.
1190 void setMetadata(unsigned Kind, MDNode *Node) {
1191 auto It =
1192 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1193 return P.first == Kind;
1194 });
1195 if (It != Metadata.end())
1196 It->second = Node;
1197 else
1198 Metadata.emplace_back(Kind, Node);
1199 }
1200
1201 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1202 /// nodes that are common to both.
1203 void intersect(const VPIRMetadata &MD);
1204
1205 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1206 MDNode *getMetadata(unsigned Kind) const {
1207 auto It =
1208 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1209 return It != Metadata.end() ? It->second : nullptr;
1210 }
1211
1212#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1213 /// Print metadata with node IDs.
1214 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1215#endif
1216};
1217
1218/// This is a concrete Recipe that models a single VPlan-level instruction.
1219/// While as any Recipe it may generate a sequence of IR instructions when
1220/// executed, these instructions would always form a single-def expression as
1221/// the VPInstruction is also a single def-use vertex. Most VPInstruction
1222/// opcodes can take an optional mask. Masks may be assigned during
1223/// predication.
1225 public VPIRMetadata {
1226 friend class VPlanSlp;
1227
1228public:
1229 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1230 enum {
1232 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1233 // values of a first-order recurrence.
1237 // Creates a mask where each lane is active (true) whilst the current
1238 // counter (first operand + index) is less than the second operand. i.e.
1239 // mask[i] = icmpt ult (op0 + i), op1
1240 // The size of the mask returned is VF * Multiplier (UF, third op).
1244 // Increment the canonical IV separately for each unrolled part.
1246 // Abstract instruction that compares two values and branches. This is
1247 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1250 // Branch with 2 boolean condition operands and 3 successors. If condition
1251 // 0 is true, branches to successor 0; if condition 1 is true, branches to
1252 // successor 1; otherwise branches to successor 2. Expanded after region
1253 // dissolution into: (1) an OR of the two conditions branching to
1254 // middle.split or successor 2, and (2) middle.split branching to successor
1255 // 0 or successor 1 based on condition 0.
1258 /// Given operands of (the same) struct type, creates a struct of fixed-
1259 /// width vectors each containing a struct field of all operands. The
1260 /// number of operands matches the element count of every vector.
1262 /// Creates a fixed-width vector containing all operands. The number of
1263 /// operands matches the vector element count.
1265 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1266 /// abstract VPInstruction whose single defined VPValue represents VF
1267 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1268 /// VPInstructions.
1270 /// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
1271 /// where one of (x,y) is loop invariant, and both x and y are integer type.
1274 // Extracts the last part of its operand. Removed during unrolling.
1276 // Extracts the last lane of its vector operand, per part.
1278 // Extracts the second-to-last lane from its operand or the second-to-last
1279 // part if it is scalar. In the latter case, the recipe will be removed
1280 // during unrolling.
1282 LogicalAnd, // Non-poison propagating logical And.
1283 LogicalOr, // Non-poison propagating logical Or.
1284 // Add an offset in bytes (second operand) to a base pointer (first
1285 // operand). Only generates scalar values (either for the first lane only or
1286 // for all lanes, depending on its uses).
1288 // Add a vector offset in bytes (second operand) to a scalar base pointer
1289 // (first operand).
1291 // Returns a scalar boolean value, which is true if any lane of its
1292 // (boolean) vector operands is true. It produces the reduced value across
1293 // all unrolled iterations. Unrolling will add all copies of its original
1294 // operand as additional operands. AnyOf is poison-safe as all operands
1295 // will be frozen.
1297 // Calculates the first active lane index of the vector predicate operands.
1298 // It produces the lane index across all unrolled iterations. Unrolling will
1299 // add all copies of its original operand as additional operands.
1300 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1301 // result even with operands that are all zeroes.
1303 // Calculates the last active lane index of the vector predicate operands.
1304 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1305 // tail-folding to extract the correct live-out value from the last active
1306 // iteration. It produces the lane index across all unrolled iterations.
1307 // Unrolling will add all copies of its original operand as additional
1308 // operands.
1310 // Returns a reversed vector for the operand.
1312
1313 // The opcodes below are used for VPInstructionWithType.
1314 //
1315 /// Scale the first operand (vector step) by the second operand
1316 /// (scalar-step). Casts both operands to the result type if needed.
1318 /// Start vector for reductions with 3 operands: the original start value,
1319 /// the identity value for the reduction and an integer indicating the
1320 /// scaling factor.
1322 // Creates a step vector starting from 0 to VF with a step of 1.
1324 /// Extracts a single lane (first operand) from a set of vector operands.
1325 /// The lane specifies an index into a vector formed by combining all vector
1326 /// operands (all operands after the first one).
1328 /// Explicit user for the resume phi of the canonical induction in the main
1329 /// VPlan, used by the epilogue vector loop.
1331 /// Extracts the last active lane from a set of vectors. The first operand
1332 /// is the default value if no lanes in the masks are active. Conceptually,
1333 /// this concatenates all data vectors (odd operands), concatenates all
1334 /// masks (even operands -- ignoring the default value), and returns the
1335 /// last active value from the combined data vector using the combined mask.
1337
1338 /// Returns the value for vscale.
1340 /// Compute the exiting value of a wide induction after vectorization, that
1341 /// is the value of the last lane of the induction increment (i.e. its
1342 /// backedge value). Has the wide induction recipe as operand.
1346 };
1347
1348 /// Returns true if this VPInstruction generates scalar values for all lanes.
1349 /// Most VPInstructions generate a single value per part, either vector or
1350 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1351 /// values per all lanes, stemming from an original ingredient. This method
1352 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1353 /// underlying ingredient.
1354 bool doesGeneratePerAllLanes() const;
1355
1356 /// Return the number of operands determined by the opcode of the
1357 /// VPInstruction, excluding mask. Returns -1u if the number of operands
1358 /// cannot be determined directly by the opcode.
1359 unsigned getNumOperandsForOpcode() const;
1360
1361private:
1362 typedef unsigned char OpcodeTy;
1363 OpcodeTy Opcode;
1364
1365 /// An optional name that can be used for the generated IR instruction.
1366 std::string Name;
1367
1368 /// Returns true if we can generate a scalar for the first lane only if
1369 /// needed.
1370 bool canGenerateScalarForFirstLane() const;
1371
1372 /// Utility methods serving execute(): generates a single vector instance of
1373 /// the modeled instruction. \returns the generated value. . In some cases an
1374 /// existing value is returned rather than a generated one.
1375 Value *generate(VPTransformState &State);
1376
1377 /// Returns true if the VPInstruction does not need masking.
1378 bool alwaysUnmasked() const {
1379 if (Opcode == VPInstruction::MaskedCond)
1380 return false;
1381
1382 // For now only VPInstructions with underlying values use masks.
1383 // TODO: provide masks to VPInstructions w/o underlying values.
1384 if (!getUnderlyingValue())
1385 return true;
1386
1387 return Opcode == Instruction::PHI || Opcode == Instruction::GetElementPtr;
1388 }
1389
1390public:
1391 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1392 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1393 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1394
1395 VP_CLASSOF_IMPL(VPRecipeBase::VPInstructionSC)
1396
1397 VPInstruction *clone() override {
1398 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1399 getDebugLoc(), Name);
1400 if (getUnderlyingValue())
1401 New->setUnderlyingValue(getUnderlyingInstr());
1402 return New;
1403 }
1404
1405 unsigned getOpcode() const { return Opcode; }
1406
1407 /// Generate the instruction.
1408 /// TODO: We currently execute only per-part unless a specific instance is
1409 /// provided.
1410 void execute(VPTransformState &State) override;
1411
1412 /// Return the cost of this VPInstruction.
1413 InstructionCost computeCost(ElementCount VF,
1414 VPCostContext &Ctx) const override;
1415
1416#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1417 /// Print the VPInstruction to dbgs() (for debugging).
1418 LLVM_DUMP_METHOD void dump() const;
1419#endif
1420
1421 bool hasResult() const {
1422 // CallInst may or may not have a result, depending on the called function.
1423 // Conservatively return calls have results for now.
1424 switch (getOpcode()) {
1425 case Instruction::Ret:
1426 case Instruction::UncondBr:
1427 case Instruction::CondBr:
1428 case Instruction::Store:
1429 case Instruction::Switch:
1430 case Instruction::IndirectBr:
1431 case Instruction::Resume:
1432 case Instruction::CatchRet:
1433 case Instruction::Unreachable:
1434 case Instruction::Fence:
1435 case Instruction::AtomicRMW:
1439 return false;
1440 default:
1441 return true;
1442 }
1443 }
1444
1445 /// Returns true if the VPInstruction has a mask operand.
1446 bool isMasked() const {
1447 unsigned NumOpsForOpcode = getNumOperandsForOpcode();
1448 // VPInstructions without a fixed number of operands cannot be masked.
1449 if (NumOpsForOpcode == -1u)
1450 return false;
1451 return NumOpsForOpcode + 1 == getNumOperands();
1452 }
1453
1454 /// Returns the number of operands, excluding the mask if the VPInstruction is
1455 /// masked.
1456 unsigned getNumOperandsWithoutMask() const {
1457 return getNumOperands() - isMasked();
1458 }
1459
1460 /// Add mask \p Mask to an unmasked VPInstruction, if it needs masking.
1461 void addMask(VPValue *Mask) {
1462 assert(!isMasked() && "recipe is already masked");
1463 if (alwaysUnmasked())
1464 return;
1465 addOperand(Mask);
1466 }
1467
1468 /// Returns the mask for the VPInstruction. Returns nullptr for unmasked
1469 /// VPInstructions.
1470 VPValue *getMask() const {
1471 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
1472 }
1473
1474 /// Returns an iterator range over the operands excluding the mask operand
1475 /// if present.
1482
1483 /// Returns true if the underlying opcode may read from or write to memory.
1484 bool opcodeMayReadOrWriteFromMemory() const;
1485
1486 /// Returns true if the recipe only uses the first lane of operand \p Op.
1487 bool usesFirstLaneOnly(const VPValue *Op) const override;
1488
1489 /// Returns true if the recipe only uses the first part of operand \p Op.
1490 bool usesFirstPartOnly(const VPValue *Op) const override;
1491
1492 /// Returns true if this VPInstruction produces a scalar value from a vector,
1493 /// e.g. by performing a reduction or extracting a lane.
1494 bool isVectorToScalar() const;
1495
1496 /// Returns true if this VPInstruction's operands are single scalars and the
1497 /// result is also a single scalar.
1498 bool isSingleScalar() const;
1499
1500 /// Returns the symbolic name assigned to the VPInstruction.
1501 StringRef getName() const { return Name; }
1502
1503 /// Set the symbolic name for the VPInstruction.
1504 void setName(StringRef NewName) { Name = NewName.str(); }
1505
1506protected:
1507#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1508 /// Print the VPInstruction to \p O.
1509 void printRecipe(raw_ostream &O, const Twine &Indent,
1510 VPSlotTracker &SlotTracker) const override;
1511#endif
1512};
1513
1514/// A specialization of VPInstruction augmenting it with a dedicated result
1515/// type, to be used when the opcode and operands of the VPInstruction don't
1516/// directly determine the result type. Note that there is no separate recipe ID
1517/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1518/// distinguished purely by the opcode.
1520 /// Scalar result type produced by the recipe.
1521 Type *ResultTy;
1522
1523public:
1525 Type *ResultTy, const VPIRFlags &Flags = {},
1526 const VPIRMetadata &Metadata = {},
1528 const Twine &Name = "")
1529 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1530 ResultTy(ResultTy) {}
1531
1532 static inline bool classof(const VPRecipeBase *R) {
1533 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1534 // type information.
1535 if (R->isScalarCast())
1536 return true;
1537 auto *VPI = dyn_cast<VPInstruction>(R);
1538 if (!VPI)
1539 return false;
1540 switch (VPI->getOpcode()) {
1544 case Instruction::Load:
1545 return true;
1546 default:
1547 return false;
1548 }
1549 }
1550
1551 static inline bool classof(const VPUser *R) {
1553 }
1554
1555 VPInstruction *clone() override {
1556 auto *New =
1558 *this, *this, getDebugLoc(), getName());
1559 New->setUnderlyingValue(getUnderlyingValue());
1560 return New;
1561 }
1562
1563 void execute(VPTransformState &State) override;
1564
1565 /// Return the cost of this VPInstruction.
1567 VPCostContext &Ctx) const override {
1568 // TODO: Compute accurate cost after retiring the legacy cost model.
1569 return 0;
1570 }
1571
1572 Type *getResultType() const { return ResultTy; }
1573
1574protected:
1575#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1576 /// Print the recipe.
1577 void printRecipe(raw_ostream &O, const Twine &Indent,
1578 VPSlotTracker &SlotTracker) const override;
1579#endif
1580};
1581
1582/// Helper type to provide functions to access incoming values and blocks for
1583/// phi-like recipes.
1585protected:
1586 /// Return a VPRecipeBase* to the current object.
1587 virtual const VPRecipeBase *getAsRecipe() const = 0;
1588
1589public:
1590 virtual ~VPPhiAccessors() = default;
1591
1592 /// Returns the incoming VPValue with index \p Idx.
1593 VPValue *getIncomingValue(unsigned Idx) const {
1594 return getAsRecipe()->getOperand(Idx);
1595 }
1596
1597 /// Returns the incoming block with index \p Idx.
1598 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1599
1600 /// Returns the incoming value for \p VPBB. \p VPBB must be an incoming block.
1601 VPValue *getIncomingValueForBlock(const VPBasicBlock *VPBB) const;
1602
1603 /// Sets the incoming value for \p VPBB to \p V. \p VPBB must be an incoming
1604 /// block.
1605 void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const;
1606
1607 /// Returns the number of incoming values, also number of incoming blocks.
1608 virtual unsigned getNumIncoming() const {
1609 return getAsRecipe()->getNumOperands();
1610 }
1611
1612 /// Returns an interator range over the incoming values.
1614 return make_range(getAsRecipe()->op_begin(),
1615 getAsRecipe()->op_begin() + getNumIncoming());
1616 }
1617
1619 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1620
1621 /// Returns an iterator range over the incoming blocks.
1623 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1624 return getIncomingBlock(Idx);
1625 };
1626 return map_range(index_range(0, getNumIncoming()), GetBlock);
1627 }
1628
1629 /// Returns an iterator range over pairs of incoming values and corresponding
1630 /// incoming blocks.
1636
1637 /// Removes the incoming value for \p IncomingBlock, which must be a
1638 /// predecessor.
1639 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1640
1641#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1642 /// Print the recipe.
1644#endif
1645};
1646
1649 const Twine &Name = "")
1650 : VPInstruction(Instruction::PHI, Operands, Flags, {}, DL, Name) {}
1651
1652 static inline bool classof(const VPUser *U) {
1653 auto *VPI = dyn_cast<VPInstruction>(U);
1654 return VPI && VPI->getOpcode() == Instruction::PHI;
1655 }
1656
1657 static inline bool classof(const VPValue *V) {
1658 auto *VPI = dyn_cast<VPInstruction>(V);
1659 return VPI && VPI->getOpcode() == Instruction::PHI;
1660 }
1661
1662 static inline bool classof(const VPSingleDefRecipe *SDR) {
1663 auto *VPI = dyn_cast<VPInstruction>(SDR);
1664 return VPI && VPI->getOpcode() == Instruction::PHI;
1665 }
1666
1667 VPPhi *clone() override {
1668 auto *PhiR = new VPPhi(operands(), *this, getDebugLoc(), getName());
1669 PhiR->setUnderlyingValue(getUnderlyingValue());
1670 return PhiR;
1671 }
1672
1673 void execute(VPTransformState &State) override;
1674
1675protected:
1676#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1677 /// Print the recipe.
1678 void printRecipe(raw_ostream &O, const Twine &Indent,
1679 VPSlotTracker &SlotTracker) const override;
1680#endif
1681
1682 const VPRecipeBase *getAsRecipe() const override { return this; }
1683};
1684
1685/// A recipe to wrap on original IR instruction not to be modified during
1686/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1687/// Expect PHIs, VPIRInstructions cannot have any operands.
1689 Instruction &I;
1690
1691protected:
1692 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1693 /// subclasses may need to be created, e.g. VPIRPhi.
1695 : VPRecipeBase(VPRecipeBase::VPIRInstructionSC, {}), I(I) {}
1696
1697public:
1698 ~VPIRInstruction() override = default;
1699
1700 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1701 /// VPIRInstruction.
1703
1704 VP_CLASSOF_IMPL(VPRecipeBase::VPIRInstructionSC)
1705
1707 auto *R = create(I);
1708 for (auto *Op : operands())
1709 R->addOperand(Op);
1710 return R;
1711 }
1712
1713 void execute(VPTransformState &State) override;
1714
1715 /// Return the cost of this VPIRInstruction.
1717 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1718
1719 Instruction &getInstruction() const { return I; }
1720
1721 bool usesScalars(const VPValue *Op) const override {
1723 "Op must be an operand of the recipe");
1724 return true;
1725 }
1726
1727 bool usesFirstPartOnly(const VPValue *Op) const override {
1729 "Op must be an operand of the recipe");
1730 return true;
1731 }
1732
1733 bool usesFirstLaneOnly(const VPValue *Op) const override {
1735 "Op must be an operand of the recipe");
1736 return true;
1737 }
1738
1739protected:
1740#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1741 /// Print the recipe.
1742 void printRecipe(raw_ostream &O, const Twine &Indent,
1743 VPSlotTracker &SlotTracker) const override;
1744#endif
1745};
1746
1747/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1748/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1749/// allowed, and it is used to add a new incoming value for the single
1750/// predecessor VPBB.
1752 public VPPhiAccessors {
1754
1755 static inline bool classof(const VPRecipeBase *U) {
1756 auto *R = dyn_cast<VPIRInstruction>(U);
1757 return R && isa<PHINode>(R->getInstruction());
1758 }
1759
1760 static inline bool classof(const VPUser *U) {
1761 auto *R = dyn_cast<VPRecipeBase>(U);
1762 return R && classof(R);
1763 }
1764
1766
1767 void execute(VPTransformState &State) override;
1768
1769protected:
1770#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1771 /// Print the recipe.
1772 void printRecipe(raw_ostream &O, const Twine &Indent,
1773 VPSlotTracker &SlotTracker) const override;
1774#endif
1775
1776 const VPRecipeBase *getAsRecipe() const override { return this; }
1777};
1778
1779/// VPWidenRecipe is a recipe for producing a widened instruction using the
1780/// opcode and operands of the recipe. This recipe covers most of the
1781/// traditional vectorization cases where each recipe transforms into a
1782/// vectorized version of itself.
1784 public VPIRMetadata {
1785 unsigned Opcode;
1786
1787public:
1789 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1790 DebugLoc DL = {})
1791 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL),
1792 VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
1793 setUnderlyingValue(&I);
1794 }
1795
1796 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1797 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1798 DebugLoc DL = {})
1799 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL),
1800 VPIRMetadata(Metadata), Opcode(Opcode) {}
1801
1802 ~VPWidenRecipe() override = default;
1803
1804 VPWidenRecipe *clone() override {
1805 if (auto *UV = getUnderlyingValue())
1806 return new VPWidenRecipe(*cast<Instruction>(UV), operands(), *this, *this,
1807 getDebugLoc());
1808 return new VPWidenRecipe(Opcode, operands(), *this, *this, getDebugLoc());
1809 }
1810
1811 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenSC)
1812
1813 /// Produce a widened instruction using the opcode and operands of the recipe,
1814 /// processing State.VF elements.
1815 void execute(VPTransformState &State) override;
1816
1817 /// Return the cost of this VPWidenRecipe.
1818 InstructionCost computeCost(ElementCount VF,
1819 VPCostContext &Ctx) const override;
1820
1821 unsigned getOpcode() const { return Opcode; }
1822
1823protected:
1824#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1825 /// Print the recipe.
1826 void printRecipe(raw_ostream &O, const Twine &Indent,
1827 VPSlotTracker &SlotTracker) const override;
1828#endif
1829
1830 /// Returns true if the recipe only uses the first lane of operand \p Op.
1831 bool usesFirstLaneOnly(const VPValue *Op) const override {
1833 "Op must be an operand of the recipe");
1834 return Opcode == Instruction::Select && Op == getOperand(0) &&
1835 Op->isDefinedOutsideLoopRegions();
1836 }
1837};
1838
1839/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1841 /// Cast instruction opcode.
1842 Instruction::CastOps Opcode;
1843
1844 /// Result type for the cast.
1845 Type *ResultTy;
1846
1847public:
1849 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1850 const VPIRMetadata &Metadata = {},
1852 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCastSC, Op, Flags, DL),
1853 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1854 assert(flagsValidForOpcode(Opcode) &&
1855 "Set flags not supported for the provided opcode");
1857 "Opcode requires specific flags to be set");
1859 }
1860
1861 ~VPWidenCastRecipe() override = default;
1862
1864 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1866 *this, *this, getDebugLoc());
1867 }
1868
1869 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCastSC)
1870
1871 /// Produce widened copies of the cast.
1872 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1873
1874 /// Return the cost of this VPWidenCastRecipe.
1876 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1877
1878 Instruction::CastOps getOpcode() const { return Opcode; }
1879
1880 /// Returns the result type of the cast.
1881 Type *getResultType() const { return ResultTy; }
1882
1883protected:
1884#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1885 /// Print the recipe.
1886 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1887 VPSlotTracker &SlotTracker) const override;
1888#endif
1889};
1890
1891/// A recipe for widening vector intrinsics.
1893 /// ID of the vector intrinsic to widen.
1894 Intrinsic::ID VectorIntrinsicID;
1895
1896 /// Scalar return type of the intrinsic.
1897 Type *ResultTy;
1898
1899 /// True if the intrinsic may read from memory.
1900 bool MayReadFromMemory;
1901
1902 /// True if the intrinsic may read write to memory.
1903 bool MayWriteToMemory;
1904
1905 /// True if the intrinsic may have side-effects.
1906 bool MayHaveSideEffects;
1907
1908public:
1910 ArrayRef<VPValue *> CallArguments, Type *Ty,
1911 const VPIRFlags &Flags = {},
1912 const VPIRMetadata &MD = {},
1914 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments,
1915 Flags, DL),
1916 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1917 MayReadFromMemory(CI.mayReadFromMemory()),
1918 MayWriteToMemory(CI.mayWriteToMemory()),
1919 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1920 setUnderlyingValue(&CI);
1921 }
1922
1924 ArrayRef<VPValue *> CallArguments, Type *Ty,
1925 const VPIRFlags &Flags = {},
1926 const VPIRMetadata &Metadata = {},
1928 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments,
1929 Flags, DL),
1930 VPIRMetadata(Metadata), VectorIntrinsicID(VectorIntrinsicID),
1931 ResultTy(Ty) {
1932 LLVMContext &Ctx = Ty->getContext();
1933 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1934 MemoryEffects ME = Attrs.getMemoryEffects();
1935 MayReadFromMemory = !ME.onlyWritesMemory();
1936 MayWriteToMemory = !ME.onlyReadsMemory();
1937 MayHaveSideEffects = MayWriteToMemory ||
1938 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1939 !Attrs.hasAttribute(Attribute::WillReturn);
1940 }
1941
1942 ~VPWidenIntrinsicRecipe() override = default;
1943
1945 if (Value *CI = getUnderlyingValue())
1946 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1947 operands(), ResultTy, *this, *this,
1948 getDebugLoc());
1949 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1950 *this, *this, getDebugLoc());
1951 }
1952
1953 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntrinsicSC)
1954
1955 /// Produce a widened version of the vector intrinsic.
1956 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1957
1958 /// Return the cost of this vector intrinsic.
1960 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1961
1962 /// Return the ID of the intrinsic.
1963 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1964
1965 /// Return the scalar return type of the intrinsic.
1966 Type *getResultType() const { return ResultTy; }
1967
1968 /// Return to name of the intrinsic as string.
1970
1971 /// Returns true if the intrinsic may read from memory.
1972 bool mayReadFromMemory() const { return MayReadFromMemory; }
1973
1974 /// Returns true if the intrinsic may write to memory.
1975 bool mayWriteToMemory() const { return MayWriteToMemory; }
1976
1977 /// Returns true if the intrinsic may have side-effects.
1978 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1979
1980 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
1981
1982protected:
1983#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1984 /// Print the recipe.
1985 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1986 VPSlotTracker &SlotTracker) const override;
1987#endif
1988};
1989
1990/// A recipe for widening Call instructions using library calls.
1992 public VPIRMetadata {
1993 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1994 /// between a given VF and the chosen vectorized variant, so there will be a
1995 /// different VPlan for each VF with a valid variant.
1996 Function *Variant;
1997
1998public:
2000 ArrayRef<VPValue *> CallArguments,
2001 const VPIRFlags &Flags = {},
2002 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
2003 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCallSC, CallArguments, Flags,
2004 DL),
2005 VPIRMetadata(Metadata), Variant(Variant) {
2006 setUnderlyingValue(UV);
2007 assert(
2008 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
2009 "last operand must be the called function");
2010 }
2011
2012 ~VPWidenCallRecipe() override = default;
2013
2015 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
2016 *this, *this, getDebugLoc());
2017 }
2018
2019 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCallSC)
2020
2021 /// Produce a widened version of the call instruction.
2022 void execute(VPTransformState &State) override;
2023
2024 /// Return the cost of this VPWidenCallRecipe.
2025 InstructionCost computeCost(ElementCount VF,
2026 VPCostContext &Ctx) const override;
2027
2031
2034
2035protected:
2036#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2037 /// Print the recipe.
2038 void printRecipe(raw_ostream &O, const Twine &Indent,
2039 VPSlotTracker &SlotTracker) const override;
2040#endif
2041};
2042
2043/// A recipe representing a sequence of load -> update -> store as part of
2044/// a histogram operation. This means there may be aliasing between vector
2045/// lanes, which is handled by the llvm.experimental.vector.histogram family
2046/// of intrinsics. The only update operations currently supported are
2047/// 'add' and 'sub' where the other term is loop-invariant.
2049 /// Opcode of the update operation, currently either add or sub.
2050 unsigned Opcode;
2051
2052public:
2053 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
2055 : VPRecipeBase(VPRecipeBase::VPHistogramSC, Operands, DL),
2056 Opcode(Opcode) {}
2057
2058 ~VPHistogramRecipe() override = default;
2059
2061 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
2062 }
2063
2064 VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC);
2065
2066 /// Produce a vectorized histogram operation.
2067 void execute(VPTransformState &State) override;
2068
2069 /// Return the cost of this VPHistogramRecipe.
2071 VPCostContext &Ctx) const override;
2072
2073 unsigned getOpcode() const { return Opcode; }
2074
2075 /// Return the mask operand if one was provided, or a null pointer if all
2076 /// lanes should be executed unconditionally.
2077 VPValue *getMask() const {
2078 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2079 }
2080
2081protected:
2082#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2083 /// Print the recipe
2084 void printRecipe(raw_ostream &O, const Twine &Indent,
2085 VPSlotTracker &SlotTracker) const override;
2086#endif
2087};
2088
2089/// A recipe for handling GEP instructions.
2091 Type *SourceElementTy;
2092
2093 bool isPointerLoopInvariant() const {
2094 return getOperand(0)->isDefinedOutsideLoopRegions();
2095 }
2096
2097 bool isIndexLoopInvariant(unsigned I) const {
2098 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
2099 }
2100
2101public:
2103 const VPIRFlags &Flags = {},
2105 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenGEPSC, Operands, Flags, DL),
2106 SourceElementTy(GEP->getSourceElementType()) {
2107 setUnderlyingValue(GEP);
2109 (void)Metadata;
2111 assert(Metadata.empty() && "unexpected metadata on GEP");
2112 }
2113
2114 ~VPWidenGEPRecipe() override = default;
2115
2118 operands(), *this, getDebugLoc());
2119 }
2120
2121 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenGEPSC)
2122
2123 /// This recipe generates a GEP instruction.
2124 unsigned getOpcode() const { return Instruction::GetElementPtr; }
2125
2126 /// Generate the gep nodes.
2127 void execute(VPTransformState &State) override;
2128
2129 Type *getSourceElementType() const { return SourceElementTy; }
2130
2131 /// Return the cost of this VPWidenGEPRecipe.
2133 VPCostContext &Ctx) const override {
2134 // TODO: Compute accurate cost after retiring the legacy cost model.
2135 return 0;
2136 }
2137
2138 /// Returns true if the recipe only uses the first lane of operand \p Op.
2139 bool usesFirstLaneOnly(const VPValue *Op) const override;
2140
2141protected:
2142#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2143 /// Print the recipe.
2144 void printRecipe(raw_ostream &O, const Twine &Indent,
2145 VPSlotTracker &SlotTracker) const override;
2146#endif
2147};
2148
2149/// A recipe to compute a pointer to the last element of each part of a widened
2150/// memory access for widened memory accesses of SourceElementTy. Used for
2151/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed. An extra
2152/// Offset operand is added by convertToConcreteRecipes when UF = 1, and by the
2153/// unroller otherwise.
2155 Type *SourceElementTy;
2156
2157 /// The constant stride of the pointer computed by this recipe, expressed in
2158 /// units of SourceElementTy.
2159 int64_t Stride;
2160
2161public:
2162 VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy,
2163 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
2164 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorEndPointerSC, {Ptr, VF},
2165 GEPFlags, DL),
2166 SourceElementTy(SourceElementTy), Stride(Stride) {
2167 assert(Stride < 0 && "Stride must be negative");
2168 }
2169
2170 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorEndPointerSC)
2171
2172 Type *getSourceElementType() const { return SourceElementTy; }
2173 int64_t getStride() const { return Stride; }
2174 VPValue *getPointer() const { return getOperand(0); }
2175 VPValue *getVFValue() const { return getOperand(1); }
2177 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2178 }
2179
2180 /// Adds the offset operand to the recipe.
2181 /// Offset = Stride * (VF - 1) + Part * Stride * VF.
2182 void materializeOffset(unsigned Part = 0);
2183
2184 void execute(VPTransformState &State) override;
2185
2186 bool usesFirstLaneOnly(const VPValue *Op) const override {
2188 "Op must be an operand of the recipe");
2189 return true;
2190 }
2191
2192 /// Return the cost of this VPVectorPointerRecipe.
2194 VPCostContext &Ctx) const override {
2195 // TODO: Compute accurate cost after retiring the legacy cost model.
2196 return 0;
2197 }
2198
2199 /// Returns true if the recipe only uses the first part of operand \p Op.
2200 bool usesFirstPartOnly(const VPValue *Op) const override {
2202 "Op must be an operand of the recipe");
2203 assert(getNumOperands() <= 2 && "must have at most two operands");
2204 return true;
2205 }
2206
2208 auto *VEPR = new VPVectorEndPointerRecipe(
2211 if (auto *Offset = getOffset())
2212 VEPR->addOperand(Offset);
2213 return VEPR;
2214 }
2215
2216protected:
2217#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2218 /// Print the recipe.
2219 void printRecipe(raw_ostream &O, const Twine &Indent,
2220 VPSlotTracker &SlotTracker) const override;
2221#endif
2222};
2223
2224/// A recipe to compute the pointers for widened memory accesses of \p
2225/// SourceElementTy. Unrolling adds an extra offset operand for unrolled parts >
2226/// 0 and it produces `GEP Ptr, Offset`. The offset for unrolled part 0 is 0.
2228 Type *SourceElementTy;
2229
2230public:
2231 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
2232 GEPNoWrapFlags GEPFlags, DebugLoc DL)
2233 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorPointerSC, Ptr, GEPFlags, DL),
2234 SourceElementTy(SourceElementTy) {}
2235
2236 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorPointerSC)
2237
2239 return getNumOperands() == 2 ? getOperand(1) : nullptr;
2240 }
2241
2242 void execute(VPTransformState &State) override;
2243
2244 Type *getSourceElementType() const { return SourceElementTy; }
2245
2246 bool usesFirstLaneOnly(const VPValue *Op) const override {
2248 "Op must be an operand of the recipe");
2249 return true;
2250 }
2251
2252 /// Returns true if the recipe only uses the first part of operand \p Op.
2253 bool usesFirstPartOnly(const VPValue *Op) const override {
2255 "Op must be an operand of the recipe");
2256 assert(getNumOperands() <= 2 && "must have at most two operands");
2257 return true;
2258 }
2259
2261 auto *Clone = new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
2263 if (auto *Off = getOffset())
2264 Clone->addOperand(Off);
2265 return Clone;
2266 }
2267
2268 /// Return the cost of this VPHeaderPHIRecipe.
2270 VPCostContext &Ctx) const override {
2271 // TODO: Compute accurate cost after retiring the legacy cost model.
2272 return 0;
2273 }
2274
2275protected:
2276#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2277 /// Print the recipe.
2278 void printRecipe(raw_ostream &O, const Twine &Indent,
2279 VPSlotTracker &SlotTracker) const override;
2280#endif
2281};
2282
2283/// A pure virtual base class for all recipes modeling header phis, including
2284/// phis for first order recurrences, pointer inductions and reductions. The
2285/// start value is the first operand of the recipe and the incoming value from
2286/// the backedge is the second operand.
2287///
2288/// Inductions are modeled using the following sub-classes:
2289/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2290/// starting at a specified value (zero for the main vector loop, the resume
2291/// value for the epilogue vector loop) and stepping by 1. The induction
2292/// controls exiting of the vector loop by comparing against the vector trip
2293/// count. Produces a single scalar PHI for the induction value per
2294/// iteration.
2295/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2296/// floating point inductions with arbitrary start and step values. Produces
2297/// a vector PHI per-part.
2298/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2299/// pointer induction. Produces either a vector PHI per-part or scalar values
2300/// per-lane based on the canonical induction.
2301/// * VPFirstOrderRecurrencePHIRecipe
2302/// * VPReductionPHIRecipe
2303/// * VPActiveLaneMaskPHIRecipe
2304/// * VPEVLBasedIVPHIRecipe
2306 public VPPhiAccessors {
2307protected:
2308 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2309 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2310 : VPSingleDefRecipe(VPRecipeID, Start, UnderlyingInstr, DL) {}
2311
2312 const VPRecipeBase *getAsRecipe() const override { return this; }
2313
2314public:
2315 ~VPHeaderPHIRecipe() override = default;
2316
2317 /// Method to support type inquiry through isa, cast, and dyn_cast.
2318 static inline bool classof(const VPRecipeBase *R) {
2319 return R->getVPRecipeID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2320 R->getVPRecipeID() <= VPRecipeBase::VPLastHeaderPHISC;
2321 }
2322 static inline bool classof(const VPValue *V) {
2323 return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
2324 }
2325 static inline bool classof(const VPSingleDefRecipe *R) {
2326 return isa<VPHeaderPHIRecipe>(static_cast<const VPRecipeBase *>(R));
2327 }
2328
2329 /// Generate the phi nodes.
2330 void execute(VPTransformState &State) override = 0;
2331
2332 /// Return the cost of this header phi recipe.
2334 VPCostContext &Ctx) const override;
2335
2336 /// Returns the start value of the phi, if one is set.
2338 return getNumOperands() == 0 ? nullptr : getOperand(0);
2339 }
2341 return getNumOperands() == 0 ? nullptr : getOperand(0);
2342 }
2343
2344 /// Update the start value of the recipe.
2346
2347 /// Returns the incoming value from the loop backedge.
2349 return getOperand(1);
2350 }
2351
2352 /// Update the incoming value from the loop backedge.
2354
2355 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2356 /// to be a recipe.
2358 return *getBackedgeValue()->getDefiningRecipe();
2359 }
2360
2361protected:
2362#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2363 /// Print the recipe.
2364 void printRecipe(raw_ostream &O, const Twine &Indent,
2365 VPSlotTracker &SlotTracker) const override = 0;
2366#endif
2367};
2368
2369/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2370/// VPWidenPointerInductionRecipe), providing shared functionality, including
2371/// retrieving the step value, induction descriptor and original phi node.
2373 const InductionDescriptor &IndDesc;
2374
2375public:
2376 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2377 VPValue *Step, const InductionDescriptor &IndDesc,
2378 DebugLoc DL)
2379 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2380 addOperand(Step);
2381 }
2382
2383 static inline bool classof(const VPRecipeBase *R) {
2384 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC ||
2385 R->getVPRecipeID() == VPRecipeBase::VPWidenPointerInductionSC;
2386 }
2387
2388 static inline bool classof(const VPValue *V) {
2389 auto *R = V->getDefiningRecipe();
2390 return R && classof(R);
2391 }
2392
2393 static inline bool classof(const VPSingleDefRecipe *R) {
2394 return classof(static_cast<const VPRecipeBase *>(R));
2395 }
2396
2397 void execute(VPTransformState &State) override = 0;
2398
2399 /// Returns the start value of the induction.
2401
2402 /// Returns the step value of the induction.
2404 const VPValue *getStepValue() const { return getOperand(1); }
2405
2406 /// Update the step value of the recipe.
2407 void setStepValue(VPValue *V) { setOperand(1, V); }
2408
2410 const VPValue *getVFValue() const { return getOperand(2); }
2411
2412 /// Returns the number of incoming values, also number of incoming blocks.
2413 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2414 /// incoming value, its start value.
2415 unsigned getNumIncoming() const override { return 1; }
2416
2417 /// Returns the underlying PHINode if one exists, or null otherwise.
2421
2422 /// Returns the induction descriptor for the recipe.
2423 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2424
2426 // TODO: All operands of base recipe must exist and be at same index in
2427 // derived recipe.
2429 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2430 }
2431
2433 // TODO: All operands of base recipe must exist and be at same index in
2434 // derived recipe.
2436 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2437 }
2438
2439 /// Returns true if the recipe only uses the first lane of operand \p Op.
2440 bool usesFirstLaneOnly(const VPValue *Op) const override {
2442 "Op must be an operand of the recipe");
2443 // The recipe creates its own wide start value, so it only requests the
2444 // first lane of the operand.
2445 // TODO: Remove once creating the start value is modeled separately.
2446 return Op == getStartValue() || Op == getStepValue();
2447 }
2448};
2449
2450/// A recipe for handling phi nodes of integer and floating-point inductions,
2451/// producing their vector values. This is an abstract recipe and must be
2452/// converted to concrete recipes before executing.
2454 public VPIRFlags {
2455 TruncInst *Trunc;
2456
2457 // If this recipe is unrolled it will have 2 additional operands.
2458 bool isUnrolled() const { return getNumOperands() == 5; }
2459
2460public:
2462 VPValue *VF, const InductionDescriptor &IndDesc,
2463 const VPIRFlags &Flags, DebugLoc DL)
2464 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2465 Start, Step, IndDesc, DL),
2466 VPIRFlags(Flags), Trunc(nullptr) {
2467 addOperand(VF);
2468 }
2469
2471 VPValue *VF, const InductionDescriptor &IndDesc,
2472 TruncInst *Trunc, const VPIRFlags &Flags,
2473 DebugLoc DL)
2474 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2475 Start, Step, IndDesc, DL),
2476 VPIRFlags(Flags), Trunc(Trunc) {
2477 addOperand(VF);
2479 (void)Metadata;
2480 if (Trunc)
2482 assert(Metadata.empty() && "unexpected metadata on Trunc");
2483 }
2484
2486
2492
2493 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntOrFpInductionSC)
2494
2495 void execute(VPTransformState &State) override {
2496 llvm_unreachable("cannot execute this recipe, should be expanded via "
2497 "expandVPWidenIntOrFpInductionRecipe");
2498 }
2499
2500 /// Returns the start value of the induction.
2502
2503 /// If the recipe has been unrolled, return the VPValue for the induction
2504 /// increment, otherwise return null.
2506 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2507 }
2508
2509 /// Returns the number of incoming values, also number of incoming blocks.
2510 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2511 /// incoming value, its start value.
2512 unsigned getNumIncoming() const override { return 1; }
2513
2514 /// Returns the first defined value as TruncInst, if it is one or nullptr
2515 /// otherwise.
2516 TruncInst *getTruncInst() { return Trunc; }
2517 const TruncInst *getTruncInst() const { return Trunc; }
2518
2519 /// Returns true if the induction is canonical, i.e. starting at 0 and
2520 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2521 /// same type as the canonical induction.
2522 bool isCanonical() const;
2523
2524 /// Returns the scalar type of the induction.
2526 return Trunc ? Trunc->getType() : getStartValue()->getType();
2527 }
2528
2529 /// Returns the VPValue representing the value of this induction at
2530 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2531 /// take place.
2533 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2534 }
2535
2536protected:
2537#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2538 /// Print the recipe.
2539 void printRecipe(raw_ostream &O, const Twine &Indent,
2540 VPSlotTracker &SlotTracker) const override;
2541#endif
2542};
2543
2545public:
2546 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2547 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2548 /// VF*UF.
2550 VPValue *NumUnrolledElems,
2551 const InductionDescriptor &IndDesc, DebugLoc DL)
2552 : VPWidenInductionRecipe(VPRecipeBase::VPWidenPointerInductionSC, Phi,
2553 Start, Step, IndDesc, DL) {
2554 addOperand(NumUnrolledElems);
2555 }
2556
2558
2564
2565 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPointerInductionSC)
2566
2567 /// Generate vector values for the pointer induction.
2568 void execute(VPTransformState &State) override {
2569 llvm_unreachable("cannot execute this recipe, should be expanded via "
2570 "expandVPWidenPointerInduction");
2571 };
2572
2573 /// Returns true if only scalar values will be generated.
2574 bool onlyScalarsGenerated(bool IsScalable);
2575
2576protected:
2577#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2578 /// Print the recipe.
2579 void printRecipe(raw_ostream &O, const Twine &Indent,
2580 VPSlotTracker &SlotTracker) const override;
2581#endif
2582};
2583
2584/// A recipe for widened phis. Incoming values are operands of the recipe and
2585/// their operand index corresponds to the incoming predecessor block. If the
2586/// recipe is placed in an entry block to a (non-replicate) region, it must have
2587/// exactly 2 incoming values, the first from the predecessor of the region and
2588/// the second from the exiting block of the region.
2590 public VPPhiAccessors {
2591 /// Name to use for the generated IR instruction for the widened phi.
2592 std::string Name;
2593
2594public:
2595 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2596 /// debug location \p DL.
2597 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
2598 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2599 : VPSingleDefRecipe(VPRecipeBase::VPWidenPHISC, {}, Phi, DL),
2600 Name(Name.str()) {
2601 if (Start)
2602 addOperand(Start);
2603 }
2604
2606 auto *C =
2608 getOperand(0), getDebugLoc(), Name);
2610 C->addOperand(Op);
2611 return C;
2612 }
2613
2614 ~VPWidenPHIRecipe() override = default;
2615
2616 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPHISC)
2617
2618 /// Generate the phi/select nodes.
2619 void execute(VPTransformState &State) override;
2620
2621 /// Return the cost of this VPWidenPHIRecipe.
2623 VPCostContext &Ctx) const override;
2624
2625protected:
2626#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2627 /// Print the recipe.
2628 void printRecipe(raw_ostream &O, const Twine &Indent,
2629 VPSlotTracker &SlotTracker) const override;
2630#endif
2631
2632 const VPRecipeBase *getAsRecipe() const override { return this; }
2633};
2634
2635/// A recipe for handling first-order recurrence phis. The start value is the
2636/// first operand of the recipe and the incoming value from the backedge is the
2637/// second operand.
2640 VPValue &BackedgeValue)
2641 : VPHeaderPHIRecipe(VPRecipeBase::VPFirstOrderRecurrencePHISC, Phi,
2642 &Start) {
2643 addOperand(&BackedgeValue);
2644 }
2645
2646 VP_CLASSOF_IMPL(VPRecipeBase::VPFirstOrderRecurrencePHISC)
2647
2652
2653 void execute(VPTransformState &State) override;
2654
2655 /// Return the cost of this first-order recurrence phi recipe.
2657 VPCostContext &Ctx) const override;
2658
2659 /// Returns true if the recipe only uses the first lane of operand \p Op.
2660 bool usesFirstLaneOnly(const VPValue *Op) const override {
2662 "Op must be an operand of the recipe");
2663 return Op == getStartValue();
2664 }
2665
2666protected:
2667#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2668 /// Print the recipe.
2669 void printRecipe(raw_ostream &O, const Twine &Indent,
2670 VPSlotTracker &SlotTracker) const override;
2671#endif
2672};
2673
2674/// Possible variants of a reduction.
2675
2676/// This reduction is ordered and in-loop.
2677struct RdxOrdered {};
2678/// This reduction is in-loop.
2679struct RdxInLoop {};
2680/// This reduction is unordered with the partial result scaled down by some
2681/// factor.
2684};
2685using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2686
2687inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2688 unsigned ScaleFactor) {
2689 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2690 if (Ordered)
2691 return RdxOrdered{};
2692 if (InLoop)
2693 return RdxInLoop{};
2694 return RdxUnordered{/*VFScaleFactor=*/ScaleFactor};
2695}
2696
2697/// A recipe for handling reduction phis. The start value is the first operand
2698/// of the recipe and the incoming value from the backedge is the second
2699/// operand.
2701 /// The recurrence kind of the reduction.
2702 const RecurKind Kind;
2703
2704 ReductionStyle Style;
2705
2706 /// The phi is part of a multi-use reduction (e.g., used in FindIV
2707 /// patterns for argmin/argmax).
2708 /// TODO: Also support cases where the phi itself has a single use, but its
2709 /// compare has multiple uses.
2710 bool HasUsesOutsideReductionChain;
2711
2712public:
2713 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2715 VPValue &BackedgeValue, ReductionStyle Style,
2716 const VPIRFlags &Flags,
2717 bool HasUsesOutsideReductionChain = false)
2718 : VPHeaderPHIRecipe(VPRecipeBase::VPReductionPHISC, Phi, &Start),
2719 VPIRFlags(Flags), Kind(Kind), Style(Style),
2720 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2721 addOperand(&BackedgeValue);
2722 }
2723
2724 ~VPReductionPHIRecipe() override = default;
2725
2727 return new VPReductionPHIRecipe(
2729 *getOperand(0), *getBackedgeValue(), Style, *this,
2730 HasUsesOutsideReductionChain);
2731 }
2732
2733 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionPHISC)
2734
2735 /// Generate the phi/select nodes.
2736 void execute(VPTransformState &State) override;
2737
2738 /// Get the factor that the VF of this recipe's output should be scaled by, or
2739 /// 1 if it isn't scaled.
2740 unsigned getVFScaleFactor() const {
2741 auto *Partial = std::get_if<RdxUnordered>(&Style);
2742 return Partial ? Partial->VFScaleFactor : 1;
2743 }
2744
2745 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2746 /// > 1.
2747 void setVFScaleFactor(unsigned ScaleFactor) {
2748 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2749 Style = RdxUnordered{ScaleFactor};
2750 }
2751
2752 /// Returns the number of incoming values, also number of incoming blocks.
2753 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2754 /// incoming value, its start value.
2755 unsigned getNumIncoming() const override { return 2; }
2756
2757 /// Returns the recurrence kind of the reduction.
2758 RecurKind getRecurrenceKind() const { return Kind; }
2759
2760 /// Returns true, if the phi is part of an ordered reduction.
2761 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); }
2762
2763 /// Returns true if the phi is part of an in-loop reduction.
2764 bool isInLoop() const {
2765 return std::holds_alternative<RdxInLoop>(Style) ||
2766 std::holds_alternative<RdxOrdered>(Style);
2767 }
2768
2769 /// Returns true if the reduction outputs a vector with a scaled down VF.
2770 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2771
2772 /// Returns true, if the phi is part of a multi-use reduction.
2774 return HasUsesOutsideReductionChain;
2775 }
2776
2777 /// Returns true if the recipe only uses the first lane of operand \p Op.
2778 bool usesFirstLaneOnly(const VPValue *Op) const override {
2780 "Op must be an operand of the recipe");
2781 return isOrdered() || isInLoop();
2782 }
2783
2784protected:
2785#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2786 /// Print the recipe.
2787 void printRecipe(raw_ostream &O, const Twine &Indent,
2788 VPSlotTracker &SlotTracker) const override;
2789#endif
2790};
2791
2792/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2793/// instructions.
2795public:
2796 /// The blend operation is a User of the incoming values and of their
2797 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2798 /// be omitted (implied by passing an odd number of operands) in which case
2799 /// all other incoming values are merged into it.
2801 const VPIRFlags &Flags, DebugLoc DL)
2802 : VPRecipeWithIRFlags(VPRecipeBase::VPBlendSC, Operands, Flags, DL) {
2803 assert(Operands.size() >= 2 && "Expected at least two operands!");
2804 setUnderlyingValue(Phi);
2805 }
2806
2807 VPBlendRecipe *clone() override {
2809 operands(), *this, getDebugLoc());
2810 }
2811
2812 VP_CLASSOF_IMPL(VPRecipeBase::VPBlendSC)
2813
2814 /// A normalized blend is one that has an odd number of operands, whereby the
2815 /// first operand does not have an associated mask.
2816 bool isNormalized() const { return getNumOperands() % 2; }
2817
2818 /// Return the number of incoming values, taking into account when normalized
2819 /// the first incoming value will have no mask.
2820 unsigned getNumIncomingValues() const {
2821 return (getNumOperands() + isNormalized()) / 2;
2822 }
2823
2824 /// Return incoming value number \p Idx.
2825 VPValue *getIncomingValue(unsigned Idx) const {
2826 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2827 }
2828
2829 /// Return mask number \p Idx.
2830 VPValue *getMask(unsigned Idx) const {
2831 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2832 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2833 }
2834
2835 /// Set mask number \p Idx to \p V.
2836 void setMask(unsigned Idx, VPValue *V) {
2837 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2838 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2839 }
2840
2841 void execute(VPTransformState &State) override {
2842 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2843 }
2844
2845 /// Return the cost of this VPWidenMemoryRecipe.
2846 InstructionCost computeCost(ElementCount VF,
2847 VPCostContext &Ctx) const override;
2848
2849 /// Returns true if the recipe only uses the first lane of operand \p Op.
2850 bool usesFirstLaneOnly(const VPValue *Op) const override {
2852 "Op must be an operand of the recipe");
2853 // Recursing through Blend recipes only, must terminate at header phi's the
2854 // latest.
2855 return all_of(users(),
2856 [this](VPUser *U) { return U->usesFirstLaneOnly(this); });
2857 }
2858
2859protected:
2860#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2861 /// Print the recipe.
2862 void printRecipe(raw_ostream &O, const Twine &Indent,
2863 VPSlotTracker &SlotTracker) const override;
2864#endif
2865};
2866
2867/// A common base class for interleaved memory operations.
2868/// An Interleaved memory operation is a memory access method that combines
2869/// multiple strided loads/stores into a single wide load/store with shuffles.
2870/// The first operand is the start address. The optional operands are, in order,
2871/// the stored values and the mask.
2873 public VPIRMetadata {
2875
2876 /// Indicates if the interleave group is in a conditional block and requires a
2877 /// mask.
2878 bool HasMask = false;
2879
2880 /// Indicates if gaps between members of the group need to be masked out or if
2881 /// unusued gaps can be loaded speculatively.
2882 bool NeedsMaskForGaps = false;
2883
2884protected:
2885 VPInterleaveBase(const unsigned char SC,
2887 ArrayRef<VPValue *> Operands,
2888 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2889 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2890 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2891 NeedsMaskForGaps(NeedsMaskForGaps) {
2892 // TODO: extend the masked interleaved-group support to reversed access.
2893 assert((!Mask || !IG->isReverse()) &&
2894 "Reversed masked interleave-group not supported.");
2895 if (StoredValues.empty()) {
2896 for (unsigned I = 0; I < IG->getFactor(); ++I)
2897 if (Instruction *Inst = IG->getMember(I)) {
2898 assert(!Inst->getType()->isVoidTy() && "must have result");
2899 new VPRecipeValue(this, Inst);
2900 }
2901 } else {
2902 for (auto *SV : StoredValues)
2903 addOperand(SV);
2904 }
2905 if (Mask) {
2906 HasMask = true;
2907 addOperand(Mask);
2908 }
2909 }
2910
2911public:
2912 VPInterleaveBase *clone() override = 0;
2913
2914 static inline bool classof(const VPRecipeBase *R) {
2915 return R->getVPRecipeID() == VPRecipeBase::VPInterleaveSC ||
2916 R->getVPRecipeID() == VPRecipeBase::VPInterleaveEVLSC;
2917 }
2918
2919 static inline bool classof(const VPUser *U) {
2920 auto *R = dyn_cast<VPRecipeBase>(U);
2921 return R && classof(R);
2922 }
2923
2924 /// Return the address accessed by this recipe.
2925 VPValue *getAddr() const {
2926 return getOperand(0); // Address is the 1st, mandatory operand.
2927 }
2928
2929 /// Return the mask used by this recipe. Note that a full mask is represented
2930 /// by a nullptr.
2931 VPValue *getMask() const {
2932 // Mask is optional and the last operand.
2933 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2934 }
2935
2936 /// Return true if the access needs a mask because of the gaps.
2937 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2938
2940
2941 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2942
2943 void execute(VPTransformState &State) override {
2944 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2945 }
2946
2947 /// Return the cost of this recipe.
2948 InstructionCost computeCost(ElementCount VF,
2949 VPCostContext &Ctx) const override;
2950
2951 /// Returns true if the recipe only uses the first lane of operand \p Op.
2952 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
2953
2954 /// Returns the number of stored operands of this interleave group. Returns 0
2955 /// for load interleave groups.
2956 virtual unsigned getNumStoreOperands() const = 0;
2957
2958 /// Return the VPValues stored by this interleave group. If it is a load
2959 /// interleave group, return an empty ArrayRef.
2961 return {op_end() - (getNumStoreOperands() + (HasMask ? 1 : 0)),
2963 }
2964};
2965
2966/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2967/// or stores into one wide load/store and shuffles. The first operand of a
2968/// VPInterleave recipe is the address, followed by the stored values, followed
2969/// by an optional mask.
2971public:
2973 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2974 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2975 : VPInterleaveBase(VPRecipeBase::VPInterleaveSC, IG, Addr, StoredValues,
2976 Mask, NeedsMaskForGaps, MD, DL) {}
2977
2978 ~VPInterleaveRecipe() override = default;
2979
2983 needsMaskForGaps(), *this, getDebugLoc());
2984 }
2985
2986 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveSC)
2987
2988 /// Generate the wide load or store, and shuffles.
2989 void execute(VPTransformState &State) override;
2990
2991 bool usesFirstLaneOnly(const VPValue *Op) const override {
2993 "Op must be an operand of the recipe");
2994 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2995 }
2996
2997 unsigned getNumStoreOperands() const override {
2998 return getNumOperands() - (getMask() ? 2 : 1);
2999 }
3000
3001protected:
3002#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3003 /// Print the recipe.
3004 void printRecipe(raw_ostream &O, const Twine &Indent,
3005 VPSlotTracker &SlotTracker) const override;
3006#endif
3007};
3008
3009/// A recipe for interleaved memory operations with vector-predication
3010/// intrinsics. The first operand is the address, the second operand is the
3011/// explicit vector length. Stored values and mask are optional operands.
3013public:
3015 : VPInterleaveBase(VPRecipeBase::VPInterleaveEVLSC,
3016 R.getInterleaveGroup(), {R.getAddr(), &EVL},
3017 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
3018 R.getDebugLoc()) {
3019 assert(!getInterleaveGroup()->isReverse() &&
3020 "Reversed interleave-group with tail folding is not supported.");
3021 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
3022 "supported for scalable vector.");
3023 }
3024
3025 ~VPInterleaveEVLRecipe() override = default;
3026
3028 llvm_unreachable("cloning not implemented yet");
3029 }
3030
3031 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveEVLSC)
3032
3033 /// The VPValue of the explicit vector length.
3034 VPValue *getEVL() const { return getOperand(1); }
3035
3036 /// Generate the wide load or store, and shuffles.
3037 void execute(VPTransformState &State) override;
3038
3039 /// The recipe only uses the first lane of the address, and EVL operand.
3040 bool usesFirstLaneOnly(const VPValue *Op) const override {
3042 "Op must be an operand of the recipe");
3043 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
3044 Op == getEVL();
3045 }
3046
3047 unsigned getNumStoreOperands() const override {
3048 return getNumOperands() - (getMask() ? 3 : 2);
3049 }
3050
3051protected:
3052#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3053 /// Print the recipe.
3054 void printRecipe(raw_ostream &O, const Twine &Indent,
3055 VPSlotTracker &SlotTracker) const override;
3056#endif
3057};
3058
3059/// A recipe to represent inloop, ordered or partial reduction operations. It
3060/// performs a reduction on a vector operand into a scalar (vector in the case
3061/// of a partial reduction) value, and adds the result to a chain. The Operands
3062/// are {ChainOp, VecOp, [Condition]}.
3064
3065 /// The recurrence kind for the reduction in question.
3066 RecurKind RdxKind;
3067 /// Whether the reduction is conditional.
3068 bool IsConditional = false;
3069 ReductionStyle Style;
3070
3071protected:
3072 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
3074 ArrayRef<VPValue *> Operands, VPValue *CondOp,
3075 ReductionStyle Style, DebugLoc DL)
3076 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
3077 Style(Style) {
3078 if (CondOp) {
3079 IsConditional = true;
3080 addOperand(CondOp);
3081 }
3083 }
3084
3085public:
3087 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3089 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, I,
3090 {ChainOp, VecOp}, CondOp, Style, DL) {}
3091
3093 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3095 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, nullptr,
3096 {ChainOp, VecOp}, CondOp, Style, DL) {}
3097
3098 ~VPReductionRecipe() override = default;
3099
3101 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
3103 getCondOp(), Style, getDebugLoc());
3104 }
3105
3106 static inline bool classof(const VPRecipeBase *R) {
3107 return R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
3108 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC;
3109 }
3110
3111 static inline bool classof(const VPUser *U) {
3112 auto *R = dyn_cast<VPRecipeBase>(U);
3113 return R && classof(R);
3114 }
3115
3116 static inline bool classof(const VPValue *VPV) {
3117 const VPRecipeBase *R = VPV->getDefiningRecipe();
3118 return R && classof(R);
3119 }
3120
3121 static inline bool classof(const VPSingleDefRecipe *R) {
3122 return classof(static_cast<const VPRecipeBase *>(R));
3123 }
3124
3125 /// Generate the reduction in the loop.
3126 void execute(VPTransformState &State) override;
3127
3128 /// Return the cost of VPReductionRecipe.
3129 InstructionCost computeCost(ElementCount VF,
3130 VPCostContext &Ctx) const override;
3131
3132 /// Return the recurrence kind for the in-loop reduction.
3133 RecurKind getRecurrenceKind() const { return RdxKind; }
3134 /// Return true if the in-loop reduction is ordered.
3135 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); };
3136 /// Return true if the in-loop reduction is conditional.
3137 bool isConditional() const { return IsConditional; };
3138 /// Returns true if the reduction outputs a vector with a scaled down VF.
3139 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
3140 /// Returns true if the reduction is in-loop.
3141 bool isInLoop() const {
3142 return std::holds_alternative<RdxInLoop>(Style) ||
3143 std::holds_alternative<RdxOrdered>(Style);
3144 }
3145 /// The VPValue of the scalar Chain being accumulated.
3146 VPValue *getChainOp() const { return getOperand(0); }
3147 /// The VPValue of the vector value to be reduced.
3148 VPValue *getVecOp() const { return getOperand(1); }
3149 /// The VPValue of the condition for the block.
3151 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
3152 }
3153 /// Get the factor that the VF of this recipe's output should be scaled by, or
3154 /// 1 if it isn't scaled.
3155 unsigned getVFScaleFactor() const {
3156 auto *Partial = std::get_if<RdxUnordered>(&Style);
3157 return Partial ? Partial->VFScaleFactor : 1;
3158 }
3159
3160protected:
3161#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3162 /// Print the recipe.
3163 void printRecipe(raw_ostream &O, const Twine &Indent,
3164 VPSlotTracker &SlotTracker) const override;
3165#endif
3166};
3167
3168/// A recipe to represent inloop reduction operations with vector-predication
3169/// intrinsics, performing a reduction on a vector operand with the explicit
3170/// vector length (EVL) into a scalar value, and adding the result to a chain.
3171/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
3173public:
3176 : VPReductionRecipe(VPRecipeBase::VPReductionEVLSC, R.getRecurrenceKind(),
3177 R.getFastMathFlags(),
3179 {R.getChainOp(), R.getVecOp(), &EVL}, CondOp,
3180 getReductionStyle(/*InLoop=*/true, R.isOrdered(), 1),
3181 DL) {}
3182
3183 ~VPReductionEVLRecipe() override = default;
3184
3186 llvm_unreachable("cloning not implemented yet");
3187 }
3188
3189 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionEVLSC)
3190
3191 /// Generate the reduction in the loop
3192 void execute(VPTransformState &State) override;
3193
3194 /// The VPValue of the explicit vector length.
3195 VPValue *getEVL() const { return getOperand(2); }
3196
3197 /// Returns true if the recipe only uses the first lane of operand \p Op.
3198 bool usesFirstLaneOnly(const VPValue *Op) const override {
3200 "Op must be an operand of the recipe");
3201 return Op == getEVL();
3202 }
3203
3204protected:
3205#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3206 /// Print the recipe.
3207 void printRecipe(raw_ostream &O, const Twine &Indent,
3208 VPSlotTracker &SlotTracker) const override;
3209#endif
3210};
3211
3212/// VPReplicateRecipe replicates a given instruction producing multiple scalar
3213/// copies of the original scalar type, one per lane, instead of producing a
3214/// single copy of widened type for all lanes. If the instruction is known to be
3215/// a single scalar, only one copy will be generated.
3217 public VPIRMetadata {
3218 /// Indicator if only a single replica per lane is needed.
3219 bool IsSingleScalar;
3220
3221 /// Indicator if the replicas are also predicated.
3222 bool IsPredicated;
3223
3224public:
3226 bool IsSingleScalar, VPValue *Mask = nullptr,
3227 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
3228 DebugLoc DL = DebugLoc::getUnknown())
3229 : VPRecipeWithIRFlags(VPRecipeBase::VPReplicateSC, Operands, Flags, DL),
3230 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
3231 IsPredicated(Mask) {
3232 setUnderlyingValue(I);
3233 if (Mask)
3234 addOperand(Mask);
3235 }
3236
3237 ~VPReplicateRecipe() override = default;
3238
3240 auto *Copy = new VPReplicateRecipe(
3241 getUnderlyingInstr(), operands(), IsSingleScalar,
3242 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
3243 Copy->transferFlags(*this);
3244 return Copy;
3245 }
3246
3247 VP_CLASSOF_IMPL(VPRecipeBase::VPReplicateSC)
3248
3249 /// Generate replicas of the desired Ingredient. Replicas will be generated
3250 /// for all parts and lanes unless a specific part and lane are specified in
3251 /// the \p State.
3252 void execute(VPTransformState &State) override;
3253
3254 /// Return the cost of this VPReplicateRecipe.
3255 InstructionCost computeCost(ElementCount VF,
3256 VPCostContext &Ctx) const override;
3257
3258 bool isSingleScalar() const { return IsSingleScalar; }
3259
3260 bool isPredicated() const { return IsPredicated; }
3261
3262 /// Returns true if the recipe only uses the first lane of operand \p Op.
3263 bool usesFirstLaneOnly(const VPValue *Op) const override {
3265 "Op must be an operand of the recipe");
3266 return isSingleScalar();
3267 }
3268
3269 /// Returns true if the recipe uses scalars of operand \p Op.
3270 bool usesScalars(const VPValue *Op) const override {
3272 "Op must be an operand of the recipe");
3273 return true;
3274 }
3275
3276 /// Returns true if the recipe is used by a widened recipe via an intervening
3277 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
3278 /// in a vector.
3279 bool shouldPack() const;
3280
3281 /// Return the mask of a predicated VPReplicateRecipe.
3283 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3284 return getOperand(getNumOperands() - 1);
3285 }
3286
3287 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3288
3289protected:
3290#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3291 /// Print the recipe.
3292 void printRecipe(raw_ostream &O, const Twine &Indent,
3293 VPSlotTracker &SlotTracker) const override;
3294#endif
3295};
3296
3297/// A recipe for generating conditional branches on the bits of a mask.
3299public:
3301 : VPRecipeBase(VPRecipeBase::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3302
3305 }
3306
3307 VP_CLASSOF_IMPL(VPRecipeBase::VPBranchOnMaskSC)
3308
3309 /// Generate the extraction of the appropriate bit from the block mask and the
3310 /// conditional branch.
3311 void execute(VPTransformState &State) override;
3312
3313 /// Return the cost of this VPBranchOnMaskRecipe.
3314 InstructionCost computeCost(ElementCount VF,
3315 VPCostContext &Ctx) const override;
3316
3317#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3318 /// Print the recipe.
3319 void printRecipe(raw_ostream &O, const Twine &Indent,
3320 VPSlotTracker &SlotTracker) const override {
3321 O << Indent << "BRANCH-ON-MASK ";
3323 }
3324#endif
3325
3326 /// Returns true if the recipe uses scalars of operand \p Op.
3327 bool usesScalars(const VPValue *Op) const override {
3329 "Op must be an operand of the recipe");
3330 return true;
3331 }
3332};
3333
3334/// A recipe to combine multiple recipes into a single 'expression' recipe,
3335/// which should be considered a single entity for cost-modeling and transforms.
3336/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3337/// expression recipes, before execute. The individual expression recipes are
3338/// completely disconnected from the def-use graph of other recipes not part of
3339/// the expression. Def-use edges between pairs of expression recipes remain
3340/// intact, whereas every edge between an expression recipe and a recipe outside
3341/// the expression is elevated to connect the non-expression recipe with the
3342/// VPExpressionRecipe itself.
3343class VPExpressionRecipe : public VPSingleDefRecipe {
3344 /// Recipes included in this VPExpressionRecipe. This could contain
3345 /// duplicates.
3346 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3347
3348 /// Temporary VPValues used for external operands of the expression, i.e.
3349 /// operands not defined by recipes in the expression.
3350 SmallVector<VPValue *> LiveInPlaceholders;
3351
3352 enum class ExpressionTypes {
3353 /// Represents an inloop extended reduction operation, performing a
3354 /// reduction on an extended vector operand into a scalar value, and adding
3355 /// the result to a chain.
3356 ExtendedReduction,
3357 /// Represent an inloop multiply-accumulate reduction, multiplying the
3358 /// extended vector operands, performing a reduction.add on the result, and
3359 /// adding the scalar result to a chain.
3360 ExtMulAccReduction,
3361 /// Represent an inloop multiply-accumulate reduction, multiplying the
3362 /// vector operands, performing a reduction.add on the result, and adding
3363 /// the scalar result to a chain.
3364 MulAccReduction,
3365 /// Represent an inloop multiply-accumulate reduction, multiplying the
3366 /// extended vector operands, negating the multiplication, performing a
3367 /// reduction.add on the result, and adding the scalar result to a chain.
3368 ExtNegatedMulAccReduction,
3369 };
3370
3371 /// Type of the expression.
3372 ExpressionTypes ExpressionType;
3373
3374 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3375 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3376 /// in the expression) are replaced by temporary VPValues and the original
3377 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3378 /// as needed (excluding last) to ensure they are only used by other recipes
3379 /// in the expression.
3380 VPExpressionRecipe(ExpressionTypes ExpressionType,
3381 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3382
3383public:
3385 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3387 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3390 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3391 {Ext0, Ext1, Mul, Red}) {}
3394 VPReductionRecipe *Red)
3395 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3396 {Ext0, Ext1, Mul, Sub, Red}) {
3397 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3398 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3399 "Expected an add reduction");
3400 assert(getNumOperands() >= 3 && "Expected at least three operands");
3401 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(getOperand(2));
3402 assert(SubConst && SubConst->isZero() &&
3403 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3404 }
3405
3407 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3408 for (auto *R : reverse(ExpressionRecipes)) {
3409 if (ExpressionRecipesSeen.insert(R).second)
3410 delete R;
3411 }
3412 for (VPValue *T : LiveInPlaceholders)
3413 delete T;
3414 }
3415
3416 VP_CLASSOF_IMPL(VPRecipeBase::VPExpressionSC)
3417
3418 VPExpressionRecipe *clone() override {
3419 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3420 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3421 for (auto *R : ExpressionRecipes)
3422 NewExpressiondRecipes.push_back(R->clone());
3423 for (auto *New : NewExpressiondRecipes) {
3424 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3425 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3426 // Update placeholder operands in the cloned recipe to use the external
3427 // operands, to be internalized when the cloned expression is constructed.
3428 for (const auto &[Placeholder, OutsideOp] :
3429 zip(LiveInPlaceholders, operands()))
3430 New->replaceUsesOfWith(Placeholder, OutsideOp);
3431 }
3432 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3433 }
3434
3435 /// Return the VPValue to use to infer the result type of the recipe.
3437 unsigned OpIdx =
3438 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3439 : 1;
3440 return getOperand(getNumOperands() - OpIdx);
3441 }
3442
3443 /// Insert the recipes of the expression back into the VPlan, directly before
3444 /// the current recipe. Leaves the expression recipe empty, which must be
3445 /// removed before codegen.
3446 void decompose();
3447
3448 unsigned getVFScaleFactor() const {
3449 auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
3450 return PR ? PR->getVFScaleFactor() : 1;
3451 }
3452
3453 /// Method for generating code, must not be called as this recipe is abstract.
3454 void execute(VPTransformState &State) override {
3455 llvm_unreachable("recipe must be removed before execute");
3456 }
3457
3459 VPCostContext &Ctx) const override;
3460
3461 /// Returns true if this expression contains recipes that may read from or
3462 /// write to memory.
3463 bool mayReadOrWriteMemory() const;
3464
3465 /// Returns true if this expression contains recipes that may have side
3466 /// effects.
3467 bool mayHaveSideEffects() const;
3468
3469 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3470 bool isSingleScalar() const;
3471
3472protected:
3473#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3474 /// Print the recipe.
3475 void printRecipe(raw_ostream &O, const Twine &Indent,
3476 VPSlotTracker &SlotTracker) const override;
3477#endif
3478};
3479
3480/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3481/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3482/// order to merge values that are set under such a branch and feed their uses.
3483/// The phi nodes can be scalar or vector depending on the users of the value.
3484/// This recipe works in concert with VPBranchOnMaskRecipe.
3486public:
3487 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3488 /// nodes after merging back from a Branch-on-Mask.
3490 : VPSingleDefRecipe(VPRecipeBase::VPPredInstPHISC, PredV, DL) {}
3491 ~VPPredInstPHIRecipe() override = default;
3492
3494 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3495 }
3496
3497 VP_CLASSOF_IMPL(VPRecipeBase::VPPredInstPHISC)
3498
3499 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3500 /// retain SSA form.
3501 void execute(VPTransformState &State) override;
3502
3503 /// Return the cost of this VPPredInstPHIRecipe.
3505 VPCostContext &Ctx) const override {
3506 // TODO: Compute accurate cost after retiring the legacy cost model.
3507 return 0;
3508 }
3509
3510 /// Returns true if the recipe uses scalars of operand \p Op.
3511 bool usesScalars(const VPValue *Op) const override {
3513 "Op must be an operand of the recipe");
3514 return true;
3515 }
3516
3517protected:
3518#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3519 /// Print the recipe.
3520 void printRecipe(raw_ostream &O, const Twine &Indent,
3521 VPSlotTracker &SlotTracker) const override;
3522#endif
3523};
3524
3525/// A common base class for widening memory operations. An optional mask can be
3526/// provided as the last operand.
3528 public VPIRMetadata {
3529protected:
3531
3532 /// Alignment information for this memory access.
3534
3535 /// Whether the accessed addresses are consecutive.
3537
3538 /// Whether the consecutive accessed addresses are in reverse order.
3540
3541 /// Whether the memory access is masked.
3542 bool IsMasked = false;
3543
3544 void setMask(VPValue *Mask) {
3545 assert(!IsMasked && "cannot re-set mask");
3546 if (!Mask)
3547 return;
3548 addOperand(Mask);
3549 IsMasked = true;
3550 }
3551
3552 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3553 std::initializer_list<VPValue *> Operands,
3554 bool Consecutive, bool Reverse,
3555 const VPIRMetadata &Metadata, DebugLoc DL)
3556 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3558 Reverse(Reverse) {
3559 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
3561 "Reversed acccess without VPVectorEndPointerRecipe address?");
3562 }
3563
3564public:
3566 llvm_unreachable("cloning not supported");
3567 }
3568
3569 static inline bool classof(const VPRecipeBase *R) {
3570 return R->getVPRecipeID() == VPRecipeBase::VPWidenLoadSC ||
3571 R->getVPRecipeID() == VPRecipeBase::VPWidenStoreSC ||
3572 R->getVPRecipeID() == VPRecipeBase::VPWidenLoadEVLSC ||
3573 R->getVPRecipeID() == VPRecipeBase::VPWidenStoreEVLSC;
3574 }
3575
3576 static inline bool classof(const VPUser *U) {
3577 auto *R = dyn_cast<VPRecipeBase>(U);
3578 return R && classof(R);
3579 }
3580
3581 /// Return whether the loaded-from / stored-to addresses are consecutive.
3582 bool isConsecutive() const { return Consecutive; }
3583
3584 /// Return whether the consecutive loaded/stored addresses are in reverse
3585 /// order.
3586 bool isReverse() const { return Reverse; }
3587
3588 /// Return the address accessed by this recipe.
3589 VPValue *getAddr() const { return getOperand(0); }
3590
3591 /// Returns true if the recipe is masked.
3592 bool isMasked() const { return IsMasked; }
3593
3594 /// Return the mask used by this recipe. Note that a full mask is represented
3595 /// by a nullptr.
3596 VPValue *getMask() const {
3597 // Mask is optional and therefore the last operand.
3598 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3599 }
3600
3601 /// Returns the alignment of the memory access.
3602 Align getAlign() const { return Alignment; }
3603
3604 /// Generate the wide load/store.
3605 void execute(VPTransformState &State) override {
3606 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3607 }
3608
3609 /// Return the cost of this VPWidenMemoryRecipe.
3610 InstructionCost computeCost(ElementCount VF,
3611 VPCostContext &Ctx) const override;
3612
3614};
3615
3616/// A recipe for widening load operations, using the address to load from and an
3617/// optional mask.
3619 public VPRecipeValue {
3621 bool Consecutive, bool Reverse,
3622 const VPIRMetadata &Metadata, DebugLoc DL)
3623 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenLoadSC, Load, {Addr},
3624 Consecutive, Reverse, Metadata, DL),
3625 VPRecipeValue(this, &Load) {
3626 setMask(Mask);
3627 }
3628
3631 getMask(), Consecutive, Reverse, *this,
3632 getDebugLoc());
3633 }
3634
3635 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC);
3636
3637 /// Generate a wide load or gather.
3638 void execute(VPTransformState &State) override;
3639
3640 /// Returns true if the recipe only uses the first lane of operand \p Op.
3641 bool usesFirstLaneOnly(const VPValue *Op) const override {
3643 "Op must be an operand of the recipe");
3644 // Widened, consecutive loads operations only demand the first lane of
3645 // their address.
3646 return Op == getAddr() && isConsecutive();
3647 }
3648
3649protected:
3650#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3651 /// Print the recipe.
3652 void printRecipe(raw_ostream &O, const Twine &Indent,
3653 VPSlotTracker &SlotTracker) const override;
3654#endif
3655};
3656
3657/// A recipe for widening load operations with vector-predication intrinsics,
3658/// using the address to load from, the explicit vector length and an optional
3659/// mask.
3661 public VPRecipeValue {
3663 VPValue *Mask)
3664 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenLoadEVLSC, L.getIngredient(),
3665 {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
3666 L.getDebugLoc()),
3667 VPRecipeValue(this, &getIngredient()) {
3668 setMask(Mask);
3669 }
3670
3671 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadEVLSC)
3672
3673 /// Return the EVL operand.
3674 VPValue *getEVL() const { return getOperand(1); }
3675
3676 /// Generate the wide load or gather.
3677 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3678
3679 /// Return the cost of this VPWidenLoadEVLRecipe.
3681 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3682
3683 /// Returns true if the recipe only uses the first lane of operand \p Op.
3684 bool usesFirstLaneOnly(const VPValue *Op) const override {
3686 "Op must be an operand of the recipe");
3687 // Widened loads only demand the first lane of EVL and consecutive loads
3688 // only demand the first lane of their address.
3689 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3690 }
3691
3692protected:
3693#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3694 /// Print the recipe.
3695 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3696 VPSlotTracker &SlotTracker) const override;
3697#endif
3698};
3699
3700/// A recipe for widening store operations, using the stored value, the address
3701/// to store to and an optional mask.
3703 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3704 VPValue *Mask, bool Consecutive, bool Reverse,
3705 const VPIRMetadata &Metadata, DebugLoc DL)
3706 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenStoreSC, Store,
3707 {Addr, StoredVal}, Consecutive, Reverse, Metadata,
3708 DL) {
3709 setMask(Mask);
3710 }
3711
3717
3718 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC);
3719
3720 /// Return the value stored by this recipe.
3721 VPValue *getStoredValue() const { return getOperand(1); }
3722
3723 /// Generate a wide store or scatter.
3724 void execute(VPTransformState &State) override;
3725
3726 /// Returns true if the recipe only uses the first lane of operand \p Op.
3727 bool usesFirstLaneOnly(const VPValue *Op) const override {
3729 "Op must be an operand of the recipe");
3730 // Widened, consecutive stores only demand the first lane of their address,
3731 // unless the same operand is also stored.
3732 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3733 }
3734
3735protected:
3736#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3737 /// Print the recipe.
3738 void printRecipe(raw_ostream &O, const Twine &Indent,
3739 VPSlotTracker &SlotTracker) const override;
3740#endif
3741};
3742
3743/// A recipe for widening store operations with vector-predication intrinsics,
3744/// using the value to store, the address to store to, the explicit vector
3745/// length and an optional mask.
3748 VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3749 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenStoreEVLSC, S.getIngredient(),
3750 {Addr, StoredVal, &EVL}, S.isConsecutive(),
3751 S.isReverse(), S, S.getDebugLoc()) {
3752 setMask(Mask);
3753 }
3754
3755 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreEVLSC)
3756
3757 /// Return the address accessed by this recipe.
3758 VPValue *getStoredValue() const { return getOperand(1); }
3759
3760 /// Return the EVL operand.
3761 VPValue *getEVL() const { return getOperand(2); }
3762
3763 /// Generate the wide store or scatter.
3764 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3765
3766 /// Return the cost of this VPWidenStoreEVLRecipe.
3768 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3769
3770 /// Returns true if the recipe only uses the first lane of operand \p Op.
3771 bool usesFirstLaneOnly(const VPValue *Op) const override {
3773 "Op must be an operand of the recipe");
3774 if (Op == getEVL()) {
3775 assert(getStoredValue() != Op && "unexpected store of EVL");
3776 return true;
3777 }
3778 // Widened, consecutive memory operations only demand the first lane of
3779 // their address, unless the same operand is also stored. That latter can
3780 // happen with opaque pointers.
3781 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3782 }
3783
3784protected:
3785#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3786 /// Print the recipe.
3787 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3788 VPSlotTracker &SlotTracker) const override;
3789#endif
3790};
3791
3792/// Recipe to expand a SCEV expression.
3794 const SCEV *Expr;
3795
3796public:
3798 : VPSingleDefRecipe(VPRecipeBase::VPExpandSCEVSC, {}), Expr(Expr) {}
3799
3800 ~VPExpandSCEVRecipe() override = default;
3801
3802 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3803
3804 VP_CLASSOF_IMPL(VPRecipeBase::VPExpandSCEVSC)
3805
3806 void execute(VPTransformState &State) override {
3807 llvm_unreachable("SCEV expressions must be expanded before final execute");
3808 }
3809
3810 /// Return the cost of this VPExpandSCEVRecipe.
3812 VPCostContext &Ctx) const override {
3813 // TODO: Compute accurate cost after retiring the legacy cost model.
3814 return 0;
3815 }
3816
3817 const SCEV *getSCEV() const { return Expr; }
3818
3819protected:
3820#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3821 /// Print the recipe.
3822 void printRecipe(raw_ostream &O, const Twine &Indent,
3823 VPSlotTracker &SlotTracker) const override;
3824#endif
3825};
3826
3827/// Canonical scalar induction phi of the vector loop. Starting at the specified
3828/// start value (either 0 or the resume value when vectorizing the epilogue
3829/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3830/// canonical induction variable.
3832public:
3834 : VPHeaderPHIRecipe(VPRecipeBase::VPCanonicalIVPHISC, nullptr, StartV,
3835 DL) {}
3836
3837 ~VPCanonicalIVPHIRecipe() override = default;
3838
3841 R->addOperand(getBackedgeValue());
3842 return R;
3843 }
3844
3845 VP_CLASSOF_IMPL(VPRecipeBase::VPCanonicalIVPHISC)
3846
3847 void execute(VPTransformState &State) override {
3848 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3849 "scalar phi recipe");
3850 }
3851
3852 /// Returns the start value of the canonical induction.
3854
3855 /// Returns the scalar type of the induction.
3856 Type *getScalarType() const { return getStartValue()->getType(); }
3857
3858 /// Returns true if the recipe only uses the first lane of operand \p Op.
3859 bool usesFirstLaneOnly(const VPValue *Op) const override {
3861 "Op must be an operand of the recipe");
3862 return true;
3863 }
3864
3865 /// Returns true if the recipe only uses the first part of operand \p Op.
3866 bool usesFirstPartOnly(const VPValue *Op) const override {
3868 "Op must be an operand of the recipe");
3869 return true;
3870 }
3871
3872 /// Return the cost of this VPCanonicalIVPHIRecipe.
3874 VPCostContext &Ctx) const override {
3875 // For now, match the behavior of the legacy cost model.
3876 return 0;
3877 }
3878
3879protected:
3880#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3881 /// Print the recipe.
3882 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3883 VPSlotTracker &SlotTracker) const override;
3884#endif
3885};
3886
3887/// A recipe for generating the active lane mask for the vector loop that is
3888/// used to predicate the vector operations.
3890public:
3892 : VPHeaderPHIRecipe(VPRecipeBase::VPActiveLaneMaskPHISC, nullptr,
3893 StartMask, DL) {}
3894
3895 ~VPActiveLaneMaskPHIRecipe() override = default;
3896
3899 if (getNumOperands() == 2)
3900 R->addOperand(getOperand(1));
3901 return R;
3902 }
3903
3904 VP_CLASSOF_IMPL(VPRecipeBase::VPActiveLaneMaskPHISC)
3905
3906 /// Generate the active lane mask phi of the vector loop.
3907 void execute(VPTransformState &State) override;
3908
3909protected:
3910#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3911 /// Print the recipe.
3912 void printRecipe(raw_ostream &O, const Twine &Indent,
3913 VPSlotTracker &SlotTracker) const override;
3914#endif
3915};
3916
3917/// A recipe for generating the phi node tracking the current scalar iteration
3918/// index. It starts at the start value of the canonical induction and gets
3919/// incremented by the number of scalar iterations processed by the vector loop
3920/// iteration. The increment does not have to be loop invariant.
3922public:
3924 : VPHeaderPHIRecipe(VPRecipeBase::VPCurrentIterationPHISC, nullptr,
3925 StartIV, DL) {}
3926
3927 ~VPCurrentIterationPHIRecipe() override = default;
3928
3930 llvm_unreachable("cloning not implemented yet");
3931 }
3932
3933 VP_CLASSOF_IMPL(VPRecipeBase::VPCurrentIterationPHISC)
3934
3935 void execute(VPTransformState &State) override {
3936 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3937 "scalar phi recipe");
3938 }
3939
3940 /// Return the cost of this VPCurrentIterationPHIRecipe.
3942 VPCostContext &Ctx) const override {
3943 // For now, match the behavior of the legacy cost model.
3944 return 0;
3945 }
3946
3947 /// Returns true if the recipe only uses the first lane of operand \p Op.
3948 bool usesFirstLaneOnly(const VPValue *Op) const override {
3950 "Op must be an operand of the recipe");
3951 return true;
3952 }
3953
3954protected:
3955#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3956 /// Print the recipe.
3957 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3958 VPSlotTracker &SlotTracker) const override;
3959#endif
3960};
3961
3962/// A Recipe for widening the canonical induction variable of the vector loop.
3964 public VPUnrollPartAccessor<1> {
3965public:
3967 : VPSingleDefRecipe(VPRecipeBase::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3968
3969 ~VPWidenCanonicalIVRecipe() override = default;
3970
3975
3976 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCanonicalIVSC)
3977
3978 /// Generate a canonical vector induction variable of the vector loop, with
3979 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3980 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3981 void execute(VPTransformState &State) override;
3982
3983 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3985 VPCostContext &Ctx) const override {
3986 // TODO: Compute accurate cost after retiring the legacy cost model.
3987 return 0;
3988 }
3989
3990protected:
3991#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3992 /// Print the recipe.
3993 void printRecipe(raw_ostream &O, const Twine &Indent,
3994 VPSlotTracker &SlotTracker) const override;
3995#endif
3996};
3997
3998/// A recipe for converting the input value \p IV value to the corresponding
3999/// value of an IV with different start and step values, using Start + IV *
4000/// Step.
4002 /// Kind of the induction.
4004 /// If not nullptr, the floating point induction binary operator. Must be set
4005 /// for floating point inductions.
4006 const FPMathOperator *FPBinOp;
4007
4008 /// Name to use for the generated IR instruction for the derived IV.
4009 std::string Name;
4010
4011public:
4013 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
4014 const Twine &Name = "")
4016 IndDesc.getKind(),
4017 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
4018 Start, CanonicalIV, Step, Name) {}
4019
4021 const FPMathOperator *FPBinOp, VPIRValue *Start,
4022 VPValue *IV, VPValue *Step, const Twine &Name = "")
4023 : VPSingleDefRecipe(VPRecipeBase::VPDerivedIVSC, {Start, IV, Step}),
4024 Kind(Kind), FPBinOp(FPBinOp), Name(Name.str()) {}
4025
4026 ~VPDerivedIVRecipe() override = default;
4027
4029 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
4030 getStepValue());
4031 }
4032
4033 VP_CLASSOF_IMPL(VPRecipeBase::VPDerivedIVSC)
4034
4035 /// Generate the transformed value of the induction at offset StartValue (1.
4036 /// operand) + IV (2. operand) * StepValue (3, operand).
4037 void execute(VPTransformState &State) override;
4038
4039 /// Return the cost of this VPDerivedIVRecipe.
4041 VPCostContext &Ctx) const override {
4042 // TODO: Compute accurate cost after retiring the legacy cost model.
4043 return 0;
4044 }
4045
4046 Type *getScalarType() const { return getStartValue()->getType(); }
4047
4049 VPValue *getStepValue() const { return getOperand(2); }
4050
4051 /// Returns true if the recipe only uses the first lane of operand \p Op.
4052 bool usesFirstLaneOnly(const VPValue *Op) const override {
4054 "Op must be an operand of the recipe");
4055 return true;
4056 }
4057
4058protected:
4059#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4060 /// Print the recipe.
4061 void printRecipe(raw_ostream &O, const Twine &Indent,
4062 VPSlotTracker &SlotTracker) const override;
4063#endif
4064};
4065
4066/// A recipe for handling phi nodes of integer and floating-point inductions,
4067/// producing their scalar values. Before unrolling by UF the recipe represents
4068/// the VF*UF scalar values to be produced, or UF scalar values if only first
4069/// lane is used, and has 3 operands: IV, step and VF. Unrolling adds one extra
4070/// operand StartIndex to all unroll parts except part 0, as the recipe
4071/// represents the VF scalar values (this number of values is taken from
4072/// State.VF rather than from the VF operand) starting at IV + StartIndex.
4074 Instruction::BinaryOps InductionOpcode;
4075
4076public:
4079 DebugLoc DL)
4080 : VPRecipeWithIRFlags(VPRecipeBase::VPScalarIVStepsSC, {IV, Step, VF},
4081 FMFs, DL),
4082 InductionOpcode(Opcode) {}
4083
4085 VPValue *Step, VPValue *VF,
4088 IV, Step, VF, IndDesc.getInductionOpcode(),
4089 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
4090 ? IndDesc.getInductionBinOp()->getFastMathFlags()
4091 : FastMathFlags(),
4092 DL) {}
4093
4094 ~VPScalarIVStepsRecipe() override = default;
4095
4097 auto *NewR = new VPScalarIVStepsRecipe(getOperand(0), getOperand(1),
4098 getOperand(2), InductionOpcode,
4100 if (VPValue *StartIndex = getStartIndex())
4101 NewR->setStartIndex(StartIndex);
4102 return NewR;
4103 }
4104
4105 VP_CLASSOF_IMPL(VPRecipeBase::VPScalarIVStepsSC)
4106
4107 /// Generate the scalarized versions of the phi node as needed by their users.
4108 void execute(VPTransformState &State) override;
4109
4110 /// Return the cost of this VPScalarIVStepsRecipe.
4112 VPCostContext &Ctx) const override {
4113 // TODO: Compute accurate cost after retiring the legacy cost model.
4114 return 0;
4115 }
4116
4117 VPValue *getStepValue() const { return getOperand(1); }
4118
4119 /// Return the number of scalars to produce per unroll part, used to compute
4120 /// StartIndex during unrolling.
4121 VPValue *getVFValue() const { return getOperand(2); }
4122
4123 /// Return the StartIndex, or null if known to be zero, valid only after
4124 /// unrolling.
4126 return getNumOperands() == 4 ? getOperand(3) : nullptr;
4127 }
4128
4129 /// Set or add the StartIndex operand.
4130 void setStartIndex(VPValue *StartIndex) {
4131 if (getNumOperands() == 4)
4132 setOperand(3, StartIndex);
4133 else
4134 addOperand(StartIndex);
4135 }
4136
4137 /// Returns true if the recipe only uses the first lane of operand \p Op.
4138 bool usesFirstLaneOnly(const VPValue *Op) const override {
4140 "Op must be an operand of the recipe");
4141 return true;
4142 }
4143
4144 Instruction::BinaryOps getInductionOpcode() const { return InductionOpcode; }
4145
4146protected:
4147#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4148 /// Print the recipe.
4149 void printRecipe(raw_ostream &O, const Twine &Indent,
4150 VPSlotTracker &SlotTracker) const override;
4151#endif
4152};
4153
4154/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe
4155/// types implementing VPPhiAccessors. Used by isa<> & co.
4157 static inline bool isPossible(const VPRecipeBase *f) {
4158 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
4160 }
4161};
4162/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
4163/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
4164template <typename SrcTy>
4165struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
4166
4168
4169 /// doCast is used by cast<>.
4170 static inline VPPhiAccessors *doCast(SrcTy R) {
4171 return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
4172 switch (R->getVPRecipeID()) {
4173 case VPRecipeBase::VPInstructionSC:
4174 return cast<VPPhi>(R);
4175 case VPRecipeBase::VPIRInstructionSC:
4176 return cast<VPIRPhi>(R);
4177 case VPRecipeBase::VPWidenPHISC:
4178 return cast<VPWidenPHIRecipe>(R);
4179 default:
4180 return cast<VPHeaderPHIRecipe>(R);
4181 }
4182 }());
4183 }
4184
4185 /// doCastIfPossible is used by dyn_cast<>.
4186 static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
4187 if (!Self::isPossible(f))
4188 return nullptr;
4189 return doCast(f);
4190 }
4191};
4192template <>
4195template <>
4198
4199/// Casting from (const) VPRecipeBase -> (const) VPIRMetadata is supported for
4200/// all recipe types implementing VPIRMetadata. Used by isa<> & co.
4201namespace detail {
4202template <typename DstTy, typename RecipeBasePtrTy>
4203static inline auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy {
4204 switch (R->getVPRecipeID()) {
4205 case VPRecipeBase::VPInstructionSC:
4206 return cast<VPInstruction>(R);
4207 case VPRecipeBase::VPWidenSC:
4208 return cast<VPWidenRecipe>(R);
4209 case VPRecipeBase::VPWidenCastSC:
4210 return cast<VPWidenCastRecipe>(R);
4211 case VPRecipeBase::VPWidenIntrinsicSC:
4213 case VPRecipeBase::VPWidenCallSC:
4214 return cast<VPWidenCallRecipe>(R);
4215 case VPRecipeBase::VPReplicateSC:
4216 return cast<VPReplicateRecipe>(R);
4217 case VPRecipeBase::VPInterleaveSC:
4218 case VPRecipeBase::VPInterleaveEVLSC:
4219 return cast<VPInterleaveBase>(R);
4220 case VPRecipeBase::VPWidenLoadSC:
4221 case VPRecipeBase::VPWidenLoadEVLSC:
4222 case VPRecipeBase::VPWidenStoreSC:
4223 case VPRecipeBase::VPWidenStoreEVLSC:
4224 return cast<VPWidenMemoryRecipe>(R);
4225 default:
4226 llvm_unreachable("invalid recipe for VPIRMetadata cast");
4227 }
4228}
4229} // namespace detail
4230
4231/// Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the
4232/// recipe types implementing VPIRMetadata. Used by cast<>, dyn_cast<> & co.
4233template <typename DstTy, typename SrcTy>
4234struct CastInfoVPIRMetadata : public CastIsPossible<DstTy, SrcTy> {
4235 static inline bool isPossible(SrcTy R) {
4236 // NOTE: Each recipe inheriting from VPIRMetadata must be listed here and
4237 // also handled in castToVPIRMetadata.
4242 R);
4243 }
4244
4245 using RetTy = DstTy *;
4246
4247 /// doCast is used by cast<>.
4248 static inline RetTy doCast(SrcTy R) {
4250 }
4251
4252 /// doCastIfPossible is used by dyn_cast<>.
4253 static inline RetTy doCastIfPossible(SrcTy R) {
4254 if (!isPossible(R))
4255 return nullptr;
4256 return doCast(R);
4257 }
4258};
4259template <>
4262template <>
4265
4266/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
4267/// holds a sequence of zero or more VPRecipe's each representing a sequence of
4268/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
4269class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
4270 friend class VPlan;
4271
4272 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
4273 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
4274 : VPBlockBase(VPBasicBlockSC, Name.str()) {
4275 if (Recipe)
4276 appendRecipe(Recipe);
4277 }
4278
4279public:
4281
4282protected:
4283 /// The VPRecipes held in the order of output instructions to generate.
4285
4286 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4287 : VPBlockBase(BlockSC, Name.str()) {}
4288
4289public:
4290 ~VPBasicBlock() override {
4291 while (!Recipes.empty())
4292 Recipes.pop_back();
4293 }
4294
4295 /// Instruction iterators...
4300
4301 //===--------------------------------------------------------------------===//
4302 /// Recipe iterator methods
4303 ///
4304 inline iterator begin() { return Recipes.begin(); }
4305 inline const_iterator begin() const { return Recipes.begin(); }
4306 inline iterator end() { return Recipes.end(); }
4307 inline const_iterator end() const { return Recipes.end(); }
4308
4309 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4310 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4311 inline reverse_iterator rend() { return Recipes.rend(); }
4312 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4313
4314 inline size_t size() const { return Recipes.size(); }
4315 inline bool empty() const { return Recipes.empty(); }
4316 inline const VPRecipeBase &front() const { return Recipes.front(); }
4317 inline VPRecipeBase &front() { return Recipes.front(); }
4318 inline const VPRecipeBase &back() const { return Recipes.back(); }
4319 inline VPRecipeBase &back() { return Recipes.back(); }
4320
4321 /// Returns a reference to the list of recipes.
4323
4324 /// Returns a pointer to a member of the recipe list.
4325 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4326 return &VPBasicBlock::Recipes;
4327 }
4328
4329 /// Method to support type inquiry through isa, cast, and dyn_cast.
4330 static inline bool classof(const VPBlockBase *V) {
4331 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4332 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4333 }
4334
4335 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4336 assert(Recipe && "No recipe to append.");
4337 assert(!Recipe->Parent && "Recipe already in VPlan");
4338 Recipe->Parent = this;
4339 Recipes.insert(InsertPt, Recipe);
4340 }
4341
4342 /// Augment the existing recipes of a VPBasicBlock with an additional
4343 /// \p Recipe as the last recipe.
4344 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4345
4346 /// The method which generates the output IR instructions that correspond to
4347 /// this VPBasicBlock, thereby "executing" the VPlan.
4348 void execute(VPTransformState *State) override;
4349
4350 /// Return the cost of this VPBasicBlock.
4351 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4352
4353 /// Return the position of the first non-phi node recipe in the block.
4354 iterator getFirstNonPhi();
4355
4356 /// Returns an iterator range over the PHI-like recipes in the block.
4360
4361 /// Split current block at \p SplitAt by inserting a new block between the
4362 /// current block and its successors and moving all recipes starting at
4363 /// SplitAt to the new block. Returns the new block.
4364 VPBasicBlock *splitAt(iterator SplitAt);
4365
4366 VPRegionBlock *getEnclosingLoopRegion();
4367 const VPRegionBlock *getEnclosingLoopRegion() const;
4368
4369#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4370 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4371 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4372 ///
4373 /// Note that the numbering is applied to the whole VPlan, so printing
4374 /// individual blocks is consistent with the whole VPlan printing.
4375 void print(raw_ostream &O, const Twine &Indent,
4376 VPSlotTracker &SlotTracker) const override;
4377 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4378#endif
4379
4380 /// If the block has multiple successors, return the branch recipe terminating
4381 /// the block. If there are no or only a single successor, return nullptr;
4382 VPRecipeBase *getTerminator();
4383 const VPRecipeBase *getTerminator() const;
4384
4385 /// Returns true if the block is exiting it's parent region.
4386 bool isExiting() const;
4387
4388 /// Clone the current block and it's recipes, without updating the operands of
4389 /// the cloned recipes.
4390 VPBasicBlock *clone() override;
4391
4392 /// Returns the predecessor block at index \p Idx with the predecessors as per
4393 /// the corresponding plain CFG. If the block is an entry block to a region,
4394 /// the first predecessor is the single predecessor of a region, and the
4395 /// second predecessor is the exiting block of the region.
4396 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4397
4398protected:
4399 /// Execute the recipes in the IR basic block \p BB.
4400 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4401
4402 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4403 /// generated for this VPBB.
4404 void connectToPredecessors(VPTransformState &State);
4405
4406private:
4407 /// Create an IR BasicBlock to hold the output instructions generated by this
4408 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4409 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4410};
4411
4412inline const VPBasicBlock *
4414 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4415}
4416
4417/// A special type of VPBasicBlock that wraps an existing IR basic block.
4418/// Recipes of the block get added before the first non-phi instruction in the
4419/// wrapped block.
4420/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4421/// preheader block.
4422class VPIRBasicBlock : public VPBasicBlock {
4423 friend class VPlan;
4424
4425 BasicBlock *IRBB;
4426
4427 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4428 VPIRBasicBlock(BasicBlock *IRBB)
4429 : VPBasicBlock(VPIRBasicBlockSC,
4430 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4431 IRBB(IRBB) {}
4432
4433public:
4434 ~VPIRBasicBlock() override = default;
4435
4436 static inline bool classof(const VPBlockBase *V) {
4437 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4438 }
4439
4440 /// The method which generates the output IR instructions that correspond to
4441 /// this VPBasicBlock, thereby "executing" the VPlan.
4442 void execute(VPTransformState *State) override;
4443
4444 VPIRBasicBlock *clone() override;
4445
4446 BasicBlock *getIRBasicBlock() const { return IRBB; }
4447};
4448
4449/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4450/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4451/// A VPRegionBlock may indicate that its contents are to be replicated several
4452/// times. This is designed to support predicated scalarization, in which a
4453/// scalar if-then code structure needs to be generated VF * UF times. Having
4454/// this replication indicator helps to keep a single model for multiple
4455/// candidate VF's. The actual replication takes place only once the desired VF
4456/// and UF have been determined.
4457class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4458 friend class VPlan;
4459
4460 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4461 VPBlockBase *Entry;
4462
4463 /// Hold the Single Exiting block of the SESE region modelled by the
4464 /// VPRegionBlock.
4465 VPBlockBase *Exiting;
4466
4467 /// An indicator whether this region is to generate multiple replicated
4468 /// instances of output IR corresponding to its VPBlockBases.
4469 bool IsReplicator;
4470
4471 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
4472 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4473 const std::string &Name = "", bool IsReplicator = false)
4474 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
4475 IsReplicator(IsReplicator) {
4476 if (Entry) {
4477 assert(!Entry->hasPredecessors() && "Entry block has predecessors.");
4478 assert(Exiting && "Must also pass Exiting if Entry is passed.");
4479 assert(!Exiting->hasSuccessors() && "Exit block has successors.");
4480 Entry->setParent(this);
4481 Exiting->setParent(this);
4482 }
4483 }
4484
4485public:
4486 ~VPRegionBlock() override = default;
4487
4488 /// Method to support type inquiry through isa, cast, and dyn_cast.
4489 static inline bool classof(const VPBlockBase *V) {
4490 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4491 }
4492
4493 const VPBlockBase *getEntry() const { return Entry; }
4494 VPBlockBase *getEntry() { return Entry; }
4495
4496 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4497 /// EntryBlock must have no predecessors.
4498 void setEntry(VPBlockBase *EntryBlock) {
4499 assert(!EntryBlock->hasPredecessors() &&
4500 "Entry block cannot have predecessors.");
4501 Entry = EntryBlock;
4502 EntryBlock->setParent(this);
4503 }
4504
4505 const VPBlockBase *getExiting() const { return Exiting; }
4506 VPBlockBase *getExiting() { return Exiting; }
4507
4508 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4509 /// ExitingBlock must have no successors.
4510 void setExiting(VPBlockBase *ExitingBlock) {
4511 assert(!ExitingBlock->hasSuccessors() &&
4512 "Exit block cannot have successors.");
4513 Exiting = ExitingBlock;
4514 ExitingBlock->setParent(this);
4515 }
4516
4517 /// Returns the pre-header VPBasicBlock of the loop region.
4519 assert(!isReplicator() && "should only get pre-header of loop regions");
4520 return getSinglePredecessor()->getExitingBasicBlock();
4521 }
4522
4523 /// An indicator whether this region is to generate multiple replicated
4524 /// instances of output IR corresponding to its VPBlockBases.
4525 bool isReplicator() const { return IsReplicator; }
4526
4527 /// The method which generates the output IR instructions that correspond to
4528 /// this VPRegionBlock, thereby "executing" the VPlan.
4529 void execute(VPTransformState *State) override;
4530
4531 // Return the cost of this region.
4532 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4533
4534#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4535 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4536 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4537 /// consequtive numbers.
4538 ///
4539 /// Note that the numbering is applied to the whole VPlan, so printing
4540 /// individual regions is consistent with the whole VPlan printing.
4541 void print(raw_ostream &O, const Twine &Indent,
4542 VPSlotTracker &SlotTracker) const override;
4543 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4544#endif
4545
4546 /// Clone all blocks in the single-entry single-exit region of the block and
4547 /// their recipes without updating the operands of the cloned recipes.
4548 VPRegionBlock *clone() override;
4549
4550 /// Remove the current region from its VPlan, connecting its predecessor to
4551 /// its entry, and its exiting block to its successor.
4552 void dissolveToCFGLoop();
4553
4554 /// Returns the canonical induction recipe of the region.
4556 VPBasicBlock *EntryVPBB = getEntryBasicBlock();
4557 if (EntryVPBB->empty()) {
4558 // VPlan native path. TODO: Unify both code paths.
4559 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4560 }
4561 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4562 }
4564 return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
4565 }
4566
4567 /// Return the type of the canonical IV for loop regions.
4568 Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); }
4569 const Type *getCanonicalIVType() const {
4570 return getCanonicalIV()->getScalarType();
4571 }
4572};
4573
4575 return getParent()->getParent();
4576}
4577
4579 return getParent()->getParent();
4580}
4581
4582/// VPlan models a candidate for vectorization, encoding various decisions take
4583/// to produce efficient output IR, including which branches, basic-blocks and
4584/// output IR instructions to generate, and their cost. VPlan holds a
4585/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4586/// VPBasicBlock.
4587class VPlan {
4588 friend class VPlanPrinter;
4589 friend class VPSlotTracker;
4590
4591 /// VPBasicBlock corresponding to the original preheader. Used to place
4592 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4593 /// rest of VPlan execution.
4594 /// When this VPlan is used for the epilogue vector loop, the entry will be
4595 /// replaced by a new entry block created during skeleton creation.
4596 VPBasicBlock *Entry;
4597
4598 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4599 VPIRBasicBlock *ScalarHeader;
4600
4601 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4602 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4603 /// e.g. if the scalar epilogue always executes.
4605
4606 /// Holds the VFs applicable to this VPlan.
4608
4609 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4610 /// any UF.
4612
4613 /// Holds the name of the VPlan, for printing.
4614 std::string Name;
4615
4616 /// Represents the trip count of the original loop, for folding
4617 /// the tail.
4618 VPValue *TripCount = nullptr;
4619
4620 /// Represents the backedge taken count of the original loop, for folding
4621 /// the tail. It equals TripCount - 1.
4622 VPSymbolicValue *BackedgeTakenCount = nullptr;
4623
4624 /// Represents the vector trip count.
4625 VPSymbolicValue VectorTripCount;
4626
4627 /// Represents the vectorization factor of the loop.
4628 VPSymbolicValue VF;
4629
4630 /// Represents the unroll factor of the loop.
4631 VPSymbolicValue UF;
4632
4633 /// Represents the loop-invariant VF * UF of the vector loop region.
4634 VPSymbolicValue VFxUF;
4635
4636 /// Contains all the external definitions created for this VPlan, as a mapping
4637 /// from IR Values to VPIRValues.
4639
4640 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4641 /// VPlan is destroyed.
4642 SmallVector<VPBlockBase *> CreatedBlocks;
4643
4644 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4645 /// wrapping the original header of the scalar loop.
4646 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
4647 : Entry(Entry), ScalarHeader(ScalarHeader) {
4648 Entry->setPlan(this);
4649 assert(ScalarHeader->getNumSuccessors() == 0 &&
4650 "scalar header must be a leaf node");
4651 }
4652
4653public:
4654 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4655 /// original preheader and scalar header of \p L, to be used as entry and
4656 /// scalar header blocks of the new VPlan.
4657 VPlan(Loop *L);
4658
4659 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4660 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
4661 VPlan(BasicBlock *ScalarHeaderBB) {
4662 setEntry(createVPBasicBlock("preheader"));
4663 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4664 }
4665
4667
4669 Entry = VPBB;
4670 VPBB->setPlan(this);
4671 }
4672
4673 /// Generate the IR code for this VPlan.
4674 void execute(VPTransformState *State);
4675
4676 /// Return the cost of this plan.
4678
4679 VPBasicBlock *getEntry() { return Entry; }
4680 const VPBasicBlock *getEntry() const { return Entry; }
4681
4682 /// Returns the preheader of the vector loop region, if one exists, or null
4683 /// otherwise.
4685 VPRegionBlock *VectorRegion = getVectorLoopRegion();
4686 return VectorRegion
4687 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4688 : nullptr;
4689 }
4690
4691 /// Returns the VPRegionBlock of the vector loop.
4694
4695 /// Returns the 'middle' block of the plan, that is the block that selects
4696 /// whether to execute the scalar tail loop or the exit block from the loop
4697 /// latch. If there is an early exit from the vector loop, the middle block
4698 /// conceptully has the early exit block as third successor, split accross 2
4699 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4700 /// tail loop or the exit block. If the scalar tail loop or exit block are
4701 /// known to always execute, the middle block may branch directly to that
4702 /// block. This function cannot be called once the vector loop region has been
4703 /// removed.
4705 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4706 assert(
4707 LoopRegion &&
4708 "cannot call the function after vector loop region has been removed");
4709 // The middle block is always the last successor of the region.
4710 return cast<VPBasicBlock>(LoopRegion->getSuccessors().back());
4711 }
4712
4714 return const_cast<VPlan *>(this)->getMiddleBlock();
4715 }
4716
4717 /// Return the VPBasicBlock for the preheader of the scalar loop.
4719 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
4720 }
4721
4722 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4723 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4724
4725 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4726 /// the original scalar loop.
4727 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4728
4729 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4730 /// exit block.
4732
4733 /// Returns true if \p VPBB is an exit block.
4734 bool isExitBlock(VPBlockBase *VPBB);
4735
4736 /// The trip count of the original loop.
4738 assert(TripCount && "trip count needs to be set before accessing it");
4739 return TripCount;
4740 }
4741
4742 /// Set the trip count assuming it is currently null; if it is not - use
4743 /// resetTripCount().
4744 void setTripCount(VPValue *NewTripCount) {
4745 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4746 TripCount = NewTripCount;
4747 }
4748
4749 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4750 /// the original trip count have been replaced.
4751 void resetTripCount(VPValue *NewTripCount) {
4752 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4753 "TripCount must be set when resetting");
4754 TripCount = NewTripCount;
4755 }
4756
4757 /// The backedge taken count of the original loop.
4759 if (!BackedgeTakenCount)
4760 BackedgeTakenCount = new VPSymbolicValue();
4761 return BackedgeTakenCount;
4762 }
4763 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4764
4765 /// The vector trip count.
4766 VPSymbolicValue &getVectorTripCount() { return VectorTripCount; }
4767
4768 /// Returns the VF of the vector loop region.
4769 VPSymbolicValue &getVF() { return VF; };
4770 const VPSymbolicValue &getVF() const { return VF; };
4771
4772 /// Returns the UF of the vector loop region.
4773 VPSymbolicValue &getUF() { return UF; };
4774
4775 /// Returns VF * UF of the vector loop region.
4776 VPSymbolicValue &getVFxUF() { return VFxUF; }
4777
4780 }
4781
4782 const DataLayout &getDataLayout() const {
4784 }
4785
4786 void addVF(ElementCount VF) { VFs.insert(VF); }
4787
4789 assert(hasVF(VF) && "Cannot set VF not already in plan");
4790 VFs.clear();
4791 VFs.insert(VF);
4792 }
4793
4794 /// Remove \p VF from the plan.
4796 assert(hasVF(VF) && "tried to remove VF not present in plan");
4797 VFs.remove(VF);
4798 }
4799
4800 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4801 bool hasScalableVF() const {
4802 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4803 }
4804
4805 /// Returns an iterator range over all VFs of the plan.
4808 return VFs;
4809 }
4810
4811 bool hasScalarVFOnly() const {
4812 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4813 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4814 "Plan with scalar VF should only have a single VF");
4815 return HasScalarVFOnly;
4816 }
4817
4818 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4819
4820 /// Returns the concrete UF of the plan, after unrolling.
4821 unsigned getConcreteUF() const {
4822 assert(UFs.size() == 1 && "Expected a single UF");
4823 return UFs[0];
4824 }
4825
4826 void setUF(unsigned UF) {
4827 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4828 UFs.clear();
4829 UFs.insert(UF);
4830 }
4831
4832 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4833 /// concrete UF.
4834 bool isUnrolled() const { return UFs.size() == 1; }
4835
4836 /// Return a string with the name of the plan and the applicable VFs and UFs.
4837 std::string getName() const;
4838
4839 void setName(const Twine &newName) { Name = newName.str(); }
4840
4841 /// Gets the live-in VPIRValue for \p V or adds a new live-in (if none exists
4842 /// yet) for \p V.
4844 assert(V && "Trying to get or add the VPIRValue of a null Value");
4845 auto [It, Inserted] = LiveIns.try_emplace(V);
4846 if (Inserted) {
4847 if (auto *CI = dyn_cast<ConstantInt>(V))
4848 It->second = new VPConstantInt(CI);
4849 else
4850 It->second = new VPIRValue(V);
4851 }
4852
4853 assert(isa<VPIRValue>(It->second) &&
4854 "Only VPIRValues should be in mapping");
4855 return It->second;
4856 }
4858 assert(V && "Trying to get or add the VPIRValue of a null VPIRValue");
4859 return getOrAddLiveIn(V->getValue());
4860 }
4861
4862 /// Return a VPIRValue wrapping i1 true.
4863 VPIRValue *getTrue() { return getConstantInt(1, 1); }
4864
4865 /// Return a VPIRValue wrapping i1 false.
4866 VPIRValue *getFalse() { return getConstantInt(1, 0); }
4867
4868 /// Return a VPIRValue wrapping the null value of type \p Ty.
4869 VPIRValue *getZero(Type *Ty) { return getConstantInt(Ty, 0); }
4870
4871 /// Return a VPIRValue wrapping the AllOnes value of type \p Ty.
4873 return getConstantInt(APInt::getAllOnes(Ty->getIntegerBitWidth()));
4874 }
4875
4876 /// Return a VPIRValue wrapping a ConstantInt with the given type and value.
4877 VPIRValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4878 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
4879 }
4880
4881 /// Return a VPIRValue wrapping a ConstantInt with the given bitwidth and
4882 /// value.
4884 bool IsSigned = false) {
4885 return getConstantInt(APInt(BitWidth, Val, IsSigned));
4886 }
4887
4888 /// Return a VPIRValue wrapping a ConstantInt with the given APInt value.
4890 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
4891 }
4892
4893 /// Return the live-in VPIRValue for \p V, if there is one or nullptr
4894 /// otherwise.
4895 VPIRValue *getLiveIn(Value *V) const { return LiveIns.lookup(V); }
4896
4897 /// Return the list of live-in VPValues available in the VPlan.
4898 auto getLiveIns() const { return LiveIns.values(); }
4899
4900#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4901 /// Print the live-ins of this VPlan to \p O.
4902 void printLiveIns(raw_ostream &O) const;
4903
4904 /// Print this VPlan to \p O.
4905 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
4906
4907 /// Print this VPlan in DOT format to \p O.
4908 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
4909
4910 /// Dump the plan to stderr (for debugging).
4911 LLVM_DUMP_METHOD void dump() const;
4912#endif
4913
4914 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4915 /// recipes to refer to the clones, and return it.
4917
4918 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4919 /// present. The returned block is owned by the VPlan and deleted once the
4920 /// VPlan is destroyed.
4922 VPRecipeBase *Recipe = nullptr) {
4923 auto *VPB = new VPBasicBlock(Name, Recipe);
4924 CreatedBlocks.push_back(VPB);
4925 return VPB;
4926 }
4927
4928 /// Create a new loop region with \p Name and entry and exiting blocks set
4929 /// to \p Entry and \p Exiting respectively, if set. The returned block is
4930 /// owned by the VPlan and deleted once the VPlan is destroyed.
4931 VPRegionBlock *createLoopRegion(const std::string &Name = "",
4932 VPBlockBase *Entry = nullptr,
4933 VPBlockBase *Exiting = nullptr) {
4934 auto *VPB = new VPRegionBlock(Entry, Exiting, Name);
4935 CreatedBlocks.push_back(VPB);
4936 return VPB;
4937 }
4938
4939 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4940 /// returned block is owned by the VPlan and deleted once the VPlan is
4941 /// destroyed.
4943 const std::string &Name = "") {
4944 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
4945 CreatedBlocks.push_back(VPB);
4946 return VPB;
4947 }
4948
4949 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4950 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4951 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4953
4954 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4955 /// instructions in \p IRBB, except its terminator which is managed by the
4956 /// successors of the block in VPlan. The returned block is owned by the VPlan
4957 /// and deleted once the VPlan is destroyed.
4959
4960 /// Returns true if the VPlan is based on a loop with an early exit. That is
4961 /// the case if the VPlan has either more than one exit block or a single exit
4962 /// block with multiple predecessors (one for the exit via the latch and one
4963 /// via the other early exit).
4964 bool hasEarlyExit() const {
4965 return count_if(ExitBlocks,
4966 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4967 1 ||
4968 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4969 }
4970
4971 /// Returns true if the scalar tail may execute after the vector loop. Note
4972 /// that this relies on unneeded branches to the scalar tail loop being
4973 /// removed.
4974 bool hasScalarTail() const {
4975 return !(!getScalarPreheader()->hasPredecessors() ||
4977 }
4978};
4979
4980#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4981inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4982 Plan.print(OS);
4983 return OS;
4984}
4985#endif
4986
4987} // end namespace llvm
4988
4989#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file implements methods to test, set and extract typed bits from packed unsigned integers.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
#define LLVM_PACKED_START
Definition Compiler.h:554
dxil translate DXIL Translate Metadata
Hexagon Common GEP
iv users
Definition IVUsers.cpp:48
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
static StringRef getName(Value *V)
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPRecipeID)
Definition VPlan.h:588
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags fromRaw(unsigned Flags)
unsigned getRaw() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1080
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:222
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:130
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3897
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:3891
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4269
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:4297
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4344
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:4299
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4296
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4322
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:4280
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:4286
iterator end()
Definition VPlan.h:4306
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4304
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:4298
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4357
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:785
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:232
~VPBasicBlock() override
Definition VPlan.h:4290
const_reverse_iterator rbegin() const
Definition VPlan.h:4310
reverse_iterator rend()
Definition VPlan.h:4311
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:4284
VPRecipeBase & back()
Definition VPlan.h:4319
const VPRecipeBase & front() const
Definition VPlan.h:4316
const_iterator begin() const
Definition VPlan.h:4305
VPRecipeBase & front()
Definition VPlan.h:4317
const VPRecipeBase & back() const
Definition VPlan.h:4318
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4335
bool empty() const
Definition VPlan.h:4315
const_iterator end() const
Definition VPlan.h:4307
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4330
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4325
reverse_iterator rbegin()
Definition VPlan.h:4309
friend class VPlan
Definition VPlan.h:4270
size_t size() const
Definition VPlan.h:4314
const_reverse_iterator rend() const
Definition VPlan.h:4312
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2825
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2830
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2800
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2820
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2841
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2850
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2807
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:2836
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2816
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:98
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:319
VPRegionBlock * getParent()
Definition VPlan.h:190
VPBlocksTy & getPredecessors()
Definition VPlan.h:227
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:224
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:389
void setName(const Twine &newName)
Definition VPlan.h:183
size_t getNumSuccessors() const
Definition VPlan.h:241
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:223
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:221
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:341
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:666
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:177
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:277
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:354
size_t getNumPredecessors() const
Definition VPlan.h:242
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:310
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:224
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:347
bool hasSuccessors() const
Returns true if this block has any successors.
Definition VPlan.h:219
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:226
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:175
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:196
const VPRegionBlock * getParent() const
Definition VPlan.h:191
const std::string & getName() const
Definition VPlan.h:181
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:329
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:267
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:301
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:237
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:261
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:326
friend class VPBlockUtils
Definition VPlan.h:99
unsigned getVPBlockID() const
Definition VPlan.h:188
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:368
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:333
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:167
VPBlocksTy & getSuccessors()
Definition VPlan.h:216
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:216
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:182
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:290
void setParent(VPRegionBlock *P)
Definition VPlan.h:201
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:283
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:231
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:215
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3319
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3303
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3327
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3300
VPlan-based builder utility analogous to IRBuilder.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3831
~VPCanonicalIVPHIRecipe() override=default
VPCanonicalIVPHIRecipe(VPIRValue *StartV, DebugLoc DL)
Definition VPlan.h:3833
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3859
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3839
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:3866
VPIRValue * getStartValue() const
Returns the start value of the canonical induction.
Definition VPlan.h:3853
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:3856
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3847
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition VPlan.h:3873
VPCurrentIterationPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3929
VPCurrentIterationPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:3923
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCurrentIterationPHIRecipe.
Definition VPlan.h:3941
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3935
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3948
~VPCurrentIterationPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
VPIRValue * getStartValue() const
Definition VPlan.h:4048
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlan.h:4040
VPValue * getStepValue() const
Definition VPlan.h:4049
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPIRValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:4012
Type * getScalarType() const
Definition VPlan.h:4046
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4028
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4052
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:4020
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3806
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:3811
VPExpandSCEVRecipe(const SCEV *Expr)
Definition VPlan.h:3797
const SCEV * getSCEV() const
Definition VPlan.h:3817
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3802
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3454
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3436
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3418
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3406
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Sub, VPReductionRecipe *Red)
Definition VPlan.h:3392
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3384
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3388
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3448
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3386
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2308
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2312
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2325
static bool classof(const VPValue *V)
Definition VPlan.h:2322
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2348
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2353
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2337
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2345
static bool classof(const VPRecipeBase *R)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2318
VPValue * getStartValue() const
Definition VPlan.h:2340
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2357
~VPHeaderPHIRecipe() override=default
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2060
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:2077
unsigned getOpcode() const
Definition VPlan.h:2073
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2053
VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC)
~VPHistogramRecipe() override=default
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4422
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:461
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4446
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4436
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4423
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:486
Class to record and manage LLVM IR flags.
Definition VPlan.h:690
FastMathFlagsTy FMFs
Definition VPlan.h:778
ReductionFlagsTy ReductionFlags
Definition VPlan.h:780
LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const
Returns true if Opcode has its required flags set.
bool hasNoWrapFlags() const
Definition VPlan.h:1033
VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
Definition VPlan.h:871
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:851
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:837
WrapFlagsTy WrapFlags
Definition VPlan.h:772
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:830
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:995
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
bool isReductionOrdered() const
Definition VPlan.h:1059
TruncFlagsTy TruncFlags
Definition VPlan.h:773
CmpInst::Predicate getPredicate() const
Definition VPlan.h:967
WrapFlagsTy getNoWrapFlags() const
Definition VPlan.h:1043
uint8_t AllFlags[2]
Definition VPlan.h:781
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:1003
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:876
ExactFlagsTy ExactFlags
Definition VPlan.h:775
bool hasNoSignedWrap() const
Definition VPlan.h:1022
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:1047
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:842
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:847
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:856
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:825
uint8_t GEPFlagsStorage
Definition VPlan.h:776
VPIRFlags(ExactFlagsTy ExactFlags)
Definition VPlan.h:861
bool isNonNeg() const
Definition VPlan.h:1005
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:985
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:990
DisjointFlagsTy DisjointFlags
Definition VPlan.h:774
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:975
bool hasNoUnsignedWrap() const
Definition VPlan.h:1011
FCmpFlagsTy FCmpFlags
Definition VPlan.h:779
NonNegFlagsTy NonNegFlags
Definition VPlan.h:777
bool isReductionInLoop() const
Definition VPlan.h:1065
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:887
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:924
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:866
uint8_t CmpPredStorage
Definition VPlan.h:771
RecurKind getRecurKind() const
Definition VPlan.h:1053
VPIRFlags(Instruction &I)
Definition VPlan.h:787
Instruction & getInstruction() const
Definition VPlan.h:1719
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1727
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1706
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1733
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1721
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1694
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1170
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1206
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:1178
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetadata object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print metadata with node IDs.
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1190
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1524
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1566
static bool classof(const VPUser *R)
Definition VPlan.h:1551
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1532
Type * getResultType() const
Definition VPlan.h:1572
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1555
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1225
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the VPInstruction is masked.
Definition VPlan.h:1456
iterator_range< operand_iterator > operandsWithoutMask()
Returns an iterator range over the operands excluding the mask operand if present.
Definition VPlan.h:1476
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1397
@ ExtractLastActive
Extracts the last active lane from a set of vectors.
Definition VPlan.h:1336
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1327
@ ExitingIVValue
Compute the exiting value of a wide induction after vectorization, that is the value of the last lane...
Definition VPlan.h:1343
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
Definition VPlan.h:1272
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1317
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1330
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1269
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1321
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1264
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1261
@ VScale
Returns the value for vscale.
Definition VPlan.h:1339
@ CanonicalIVIncrementForPart
Definition VPlan.h:1245
bool hasResult() const
Definition VPlan.h:1421
iterator_range< const_operand_iterator > operandsWithoutMask() const
Definition VPlan.h:1479
void addMask(VPValue *Mask)
Add mask Mask to an unmasked VPInstruction, if it needs masking.
Definition VPlan.h:1461
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1501
unsigned getOpcode() const
Definition VPlan.h:1405
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
Definition VPlan.h:1504
friend class VPlanSlp
Definition VPlan.h:1226
VPValue * getMask() const
Returns the mask for the VPInstruction.
Definition VPlan.h:1470
unsigned getNumOperandsForOpcode() const
Return the number of operands determined by the opcode of the VPInstruction, excluding mask.
bool isMasked() const
Returns true if the VPInstruction has a mask operand.
Definition VPlan.h:1446
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:2937
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2943
static bool classof(const VPUser *U)
Definition VPlan.h:2919
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2885
Instruction * getInsertPos() const
Definition VPlan.h:2941
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2914
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:2939
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:2931
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:2960
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:2925
A recipe for interleaved memory operations with vector-predication intrinsics.
Definition VPlan.h:3012
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:3040
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3034
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:3047
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3027
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3014
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:2970
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2997
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2980
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2991
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2972
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1584
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPValue * getIncomingValueForBlock(const VPBasicBlock *VPBB) const
Returns the incoming value for VPBB. VPBB must be an incoming block.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1613
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1608
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4413
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1633
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1593
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const
Sets the incoming value for VPBB to V.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1618
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1622
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3511
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3493
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3504
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3489
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:406
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:550
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4574
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:561
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:481
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:555
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:530
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:408
const VPBasicBlock * getParent() const
Definition VPlan.h:482
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:535
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
unsigned getVPRecipeID() const
Definition VPlan.h:527
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
enum { VPBranchOnMaskSC, VPDerivedIVSC, VPExpandSCEVSC, VPExpressionSC, VPIRInstructionSC, VPInstructionSC, VPInterleaveEVLSC, VPInterleaveSC, VPReductionEVLSC, VPReductionSC, VPReplicateSC, VPScalarIVStepsSC, VPVectorPointerSC, VPVectorEndPointerSC, VPWidenCallSC, VPWidenCanonicalIVSC, VPWidenCastSC, VPWidenGEPSC, VPWidenIntrinsicSC, VPWidenLoadEVLSC, VPWidenLoadSC, VPWidenStoreEVLSC, VPWidenStoreSC, VPWidenSC, VPBlendSC, VPHistogramSC, VPWidenPHISC, VPPredInstPHISC, VPCanonicalIVPHISC, VPCurrentIterationPHISC, VPActiveLaneMaskPHISC, VPFirstOrderRecurrencePHISC, VPWidenIntOrFpInductionSC, VPWidenPointerInductionSC, VPReductionPHISC, VPFirstPHISC=VPWidenPHISC, VPFirstHeaderPHISC=VPCanonicalIVPHISC, VPLastHeaderPHISC=VPReductionPHISC, VPLastPHISC=VPReductionPHISC, } VPRecipeTy
An enumeration for keeping track of the concrete subclass of VPRecipeBase that is actually instantiat...
Definition VPlan.h:424
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:471
A VPValue defined by a recipe that produces one or more values.
Definition VPlanValue.h:270
LLVM_ABI_FOR_TEST VPRecipeValue(VPRecipeBase *Def, Value *UV=nullptr)
Definition VPlan.cpp:143
friend class VPValue
Definition VPlanValue.h:271
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3195
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3174
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3198
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3185
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2761
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
Definition VPlan.h:2747
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2726
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2740
~VPReductionPHIRecipe() override=default
bool hasUsesOutsideReductionChain() const
Returns true, if the phi is part of a multi-use reduction.
Definition VPlan.h:2773
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2755
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, VPValue &BackedgeValue, ReductionStyle Style, const VPIRFlags &Flags, bool HasUsesOutsideReductionChain=false)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2714
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2764
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2778
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2770
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2758
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:3063
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, ReductionStyle Style, DebugLoc DL)
Definition VPlan.h:3072
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:3137
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3106
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:3121
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:3148
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:3150
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:3133
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3086
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:3135
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3092
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:3139
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:3146
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:3141
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3100
static bool classof(const VPUser *U)
Definition VPlan.h:3111
static bool classof(const VPValue *VPV)
Definition VPlan.h:3116
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:3155
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4457
const VPBlockBase * getEntry() const
Definition VPlan.h:4493
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4568
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4525
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4510
VPBlockBase * getExiting()
Definition VPlan.h:4506
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4555
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4498
const Type * getCanonicalIVType() const
Definition VPlan.h:4569
const VPBlockBase * getExiting() const
Definition VPlan.h:4505
VPBlockBase * getEntry()
Definition VPlan.h:4494
const VPCanonicalIVPHIRecipe * getCanonicalIV() const
Definition VPlan.h:4563
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4518
friend class VPlan
Definition VPlan.h:4458
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4489
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3217
bool isSingleScalar() const
Definition VPlan.h:3258
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3225
~VPReplicateRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3270
bool isPredicated() const
Definition VPlan.h:3260
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3239
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3263
unsigned getOpcode() const
Definition VPlan.h:3287
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:3282
Instruction::BinaryOps getInductionOpcode() const
Definition VPlan.h:4144
VPValue * getStepValue() const
Definition VPlan.h:4117
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:4111
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:4084
void setStartIndex(VPValue *StartIndex)
Set or add the StartIndex operand.
Definition VPlan.h:4130
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4096
VPValue * getStartIndex() const
Return the StartIndex, or null if known to be zero, valid only after unrolling.
Definition VPlan.h:4125
VPValue * getVFValue() const
Return the number of scalars to produce per unroll part, used to compute StartIndex during unrolling.
Definition VPlan.h:4121
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:4077
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4138
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:607
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:613
static bool classof(const VPValue *V)
Definition VPlan.h:662
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:675
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:617
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:678
static bool classof(const VPUser *U)
Definition VPlan.h:667
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:609
This class can be used to assign names to VPValues.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition VPlan.h:1158
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:296
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1474
operand_range operands()
Definition VPlanValue.h:364
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:340
unsigned getNumOperands() const
Definition VPlanValue.h:334
operand_iterator op_end()
Definition VPlanValue.h:362
operand_iterator op_begin()
Definition VPlanValue.h:360
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:335
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:315
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:358
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:357
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:46
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:137
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:127
friend class VPRecipeValue
Definition VPlanValue.h:49
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:70
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:196
unsigned getNumUsers() const
Definition VPlanValue.h:107
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2186
VPValue * getVFValue() const
Definition VPlan.h:2175
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getSourceElementType() const
Definition VPlan.h:2172
int64_t getStride() const
Definition VPlan.h:2173
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2207
VPValue * getOffset() const
Definition VPlan.h:2176
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2200
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2162
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:2193
VPValue * getPointer() const
Definition VPlan.h:2174
void materializeOffset(unsigned Part=0)
Adds the offset operand to the recipe.
Type * getSourceElementType() const
Definition VPlan.h:2244
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2246
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2253
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2231
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:2269
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2260
A recipe for widening Call instructions using library calls.
Definition VPlan.h:1992
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1999
const_operand_range args() const
Definition VPlan.h:2033
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2014
operand_range args()
Definition VPlan.h:2032
Function * getCalledScalarFunction() const
Definition VPlan.h:2028
~VPWidenCallRecipe() override=default
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
~VPWidenCanonicalIVRecipe() override=default
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:3984
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3971
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition VPlan.h:3966
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1840
Instruction::CastOps getOpcode() const
Definition VPlan.h:1878
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition VPlan.h:1881
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1848
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1863
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:2124
Type * getSourceElementType() const
Definition VPlan.h:2129
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:2132
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2116
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2102
void execute(VPTransformState &State) override=0
Generate the phi nodes.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2440
static bool classof(const VPValue *V)
Definition VPlan.h:2388
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2407
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2425
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2400
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2415
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
Definition VPlan.h:2418
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2376
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2403
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2423
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2432
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2383
const VPValue * getVFValue() const
Definition VPlan.h:2410
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2393
const VPValue * getStepValue() const
Definition VPlan.h:2404
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2501
const TruncInst * getTruncInst() const
Definition VPlan.h:2517
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2495
~VPWidenIntOrFpInductionRecipe() override=default
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
Definition VPlan.h:2505
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2487
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2461
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2516
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2470
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2532
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2512
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:2525
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1892
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1923
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1963
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:1972
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1909
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:1978
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1944
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:1975
~VPWidenIntrinsicRecipe() override=default
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition VPlan.h:1966
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3542
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition VPlan.h:3539
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3582
static bool classof(const VPUser *U)
Definition VPlan.h:3576
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition VPlan.h:3605
Instruction & Ingredient
Definition VPlan.h:3530
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3565
Instruction & getIngredient() const
Definition VPlan.h:3613
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3536
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3569
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3596
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3533
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3592
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3552
void setMask(VPValue *Mask)
Definition VPlan.h:3544
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3602
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3589
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition VPlan.h:3586
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2632
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe for Phi with start value Start and debug location DL.
Definition VPlan.h:2597
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenPHIRecipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2605
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2559
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2568
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2549
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1784
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1804
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1831
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1788
VPWidenRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1796
~VPWidenRecipe() override=default
unsigned getOpcode() const
Definition VPlan.h:1821
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition VPlanSLP.h:74
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4587
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4895
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1159
friend class VPSlotTracker
Definition VPlan.h:4589
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1135
bool hasVF(ElementCount VF) const
Definition VPlan.h:4800
const DataLayout & getDataLayout() const
Definition VPlan.h:4782
LLVMContext & getContext() const
Definition VPlan.h:4778
VPBasicBlock * getEntry()
Definition VPlan.h:4679
void setName(const Twine &newName)
Definition VPlan.h:4839
bool hasScalableVF() const
Definition VPlan.h:4801
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4737
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4758
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4807
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:904
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:882
VPIRValue * getOrAddLiveIn(VPIRValue *V)
Definition VPlan.h:4857
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:912
const VPBasicBlock * getEntry() const
Definition VPlan.h:4680
friend class VPlanPrinter
Definition VPlan.h:4588
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:4866
VPIRValue * getConstantInt(const APInt &Val)
Return a VPIRValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:4889
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4776
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
Definition VPlan.h:4872
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:4942
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1271
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4898
bool hasUF(unsigned UF) const
Definition VPlan.h:4818
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4727
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4766
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4763
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4843
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
Definition VPlan.h:4869
void setVF(ElementCount VF)
Definition VPlan.h:4788
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:4834
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1058
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4964
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1040
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
Definition VPlan.h:4821
VPIRValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:4883
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4713
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4744
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4751
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4704
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4668
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4921
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1277
void removeVF(ElementCount VF)
Remove VF from the plan.
Definition VPlan.h:4795
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:4863
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4931
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1165
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
Definition VPlan.h:4773
bool hasScalarVFOnly() const
Definition VPlan.h:4811
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4718
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:922
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1118
void addVF(ElementCount VF)
Definition VPlan.h:4786
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4723
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1074
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4684
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4769
void setUF(unsigned UF)
Definition VPlan.h:4826
const VPSymbolicValue & getVF() const
Definition VPlan.h:4770
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4974
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1206
VPlan(BasicBlock *ScalarHeaderBB)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition VPlan.h:4661
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4877
LLVM Value Representation.
Definition Value.h:75
Increasing range of size_t indices.
Definition STLExtras.h:2507
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:190
static auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy
Definition VPlan.h:4203
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:532
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:831
LLVM_PACKED_END
Definition VPlan.h:1108
auto cast_if_present(const Y &Val)
cast_if_present<X> - Functionally identical to cast, except that a null value is accepted.
Definition Casting.h:683
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1765
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:841
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2687
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:356
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:366
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
UncountableExitStyle
Different methods of handling early exits.
Definition VPlan.h:83
@ MaskedHandleExitInScalarLoop
All memory operations other than the load(s) required to determine whether an uncountable exit occurr...
Definition VPlan.h:93
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2012
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2019
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
std::variant< RdxOrdered, RdxInLoop, RdxUnordered > ReductionStyle
Definition VPlan.h:2685
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:78
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
Definition Bitfields.h:207
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
Definition Bitfields.h:223
Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the recipe types implementing V...
Definition VPlan.h:4234
static RetTy doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:4248
static RetTy doCastIfPossible(SrcTy R)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:4253
static bool isPossible(SrcTy R)
Definition VPlan.h:4235
Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the recipe types implementing...
Definition VPlan.h:4165
static VPPhiAccessors * doCastIfPossible(SrcTy f)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:4186
CastInfo< VPPhiAccessors, SrcTy > Self
Definition VPlan.h:4167
static VPPhiAccessors * doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:4170
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
static bool isPossible(const VPRecipeBase *f)
Definition VPlan.h:4157
This struct provides a way to check if a given cast is possible.
Definition Casting.h:253
static bool isPossible(const SrcTy &f)
Definition Casting.h:254
This reduction is in-loop.
Definition VPlan.h:2679
Possible variants of a reduction.
Definition VPlan.h:2677
This reduction is unordered with the partial result scaled down by some factor.
Definition VPlan.h:2682
unsigned VFScaleFactor
Definition VPlan.h:2683
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:276
An overlay on VPIRValue for VPValues that wrap a ConstantInt.
Definition VPlanValue.h:225
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2648
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2660
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start, VPValue &BackedgeValue)
Definition VPlan.h:2639
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:722
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:727
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:717
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:710
PHINode & getIRPhi()
Definition VPlan.h:1765
VPIRPhi(PHINode &PN)
Definition VPlan.h:1753
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1755
static bool classof(const VPUser *U)
Definition VPlan.h:1760
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1776
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:207
Type * getType() const
Returns the type of the underlying IR value.
Definition VPlan.cpp:141
static bool classof(const VPUser *U)
Definition VPlan.h:1652
VPPhi(ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1648
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1667
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1682
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1662
static bool classof(const VPValue *V)
Definition VPlan.h:1657
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:1112
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:1145
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1118
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1113
static bool classof(const VPValue *V)
Definition VPlan.h:1138
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:1133
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:247
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3661
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide load or gather.
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3674
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3662
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3684
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3619
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3641
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3620
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3629
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC)
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3746
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3758
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide store or scatter.
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3747
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3771
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3761
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3702
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3721
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3712
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3727
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3703