LLVM 23.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/Bitfields.h"
29#include "llvm/ADT/MapVector.h"
32#include "llvm/ADT/Twine.h"
33#include "llvm/ADT/ilist.h"
34#include "llvm/ADT/ilist_node.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/IR/FMF.h"
40#include "llvm/IR/Operator.h"
43#include <cassert>
44#include <cstddef>
45#include <functional>
46#include <string>
47#include <utility>
48#include <variant>
49
50namespace llvm {
51
52class BasicBlock;
53class DominatorTree;
55class IRBuilderBase;
56struct VPTransformState;
57class raw_ostream;
59class SCEV;
60class SCEVPredicate;
61class Type;
62class VPBasicBlock;
63class VPBuilder;
64class VPDominatorTree;
65class VPRegionBlock;
66class VPlan;
67class VPLane;
69class Value;
71
72struct VPCostContext;
73
74using VPlanPtr = std::unique_ptr<VPlan>;
75
76/// \enum UncountableExitStyle
77/// Different methods of handling early exits.
78///
81 /// No side effects to worry about, so we can process any uncountable exits
82 /// in the loop and branch either to the middle block if the trip count was
83 /// reached, or an early exitblock to determine which exit was taken.
85 /// All memory operations other than the load(s) required to determine whether
86 /// an uncountable exit occurre will be masked based on that condition. If an
87 /// uncountable exit is taken, then all lanes before the exiting lane will
88 /// complete, leaving just the final lane to execute in the scalar tail.
90};
91
92/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
93/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
95 friend class VPBlockUtils;
96
97 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
98
99 /// An optional name for the block.
100 std::string Name;
101
102 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
103 /// it is a topmost VPBlockBase.
104 VPRegionBlock *Parent = nullptr;
105
106 /// List of predecessor blocks.
108
109 /// List of successor blocks.
111
112 /// VPlan containing the block. Can only be set on the entry block of the
113 /// plan.
114 VPlan *Plan = nullptr;
115
116 /// Add \p Successor as the last successor to this block.
117 void appendSuccessor(VPBlockBase *Successor) {
118 assert(Successor && "Cannot add nullptr successor!");
119 Successors.push_back(Successor);
120 }
121
122 /// Add \p Predecessor as the last predecessor to this block.
123 void appendPredecessor(VPBlockBase *Predecessor) {
124 assert(Predecessor && "Cannot add nullptr predecessor!");
125 Predecessors.push_back(Predecessor);
126 }
127
128 /// Remove \p Predecessor from the predecessors of this block.
129 void removePredecessor(VPBlockBase *Predecessor) {
130 auto Pos = find(Predecessors, Predecessor);
131 assert(Pos && "Predecessor does not exist");
132 Predecessors.erase(Pos);
133 }
134
135 /// Remove \p Successor from the successors of this block.
136 void removeSuccessor(VPBlockBase *Successor) {
137 auto Pos = find(Successors, Successor);
138 assert(Pos && "Successor does not exist");
139 Successors.erase(Pos);
140 }
141
142 /// This function replaces one predecessor with another, useful when
143 /// trying to replace an old block in the CFG with a new one.
144 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
145 auto I = find(Predecessors, Old);
146 assert(I != Predecessors.end());
147 assert(Old->getParent() == New->getParent() &&
148 "replaced predecessor must have the same parent");
149 *I = New;
150 }
151
152 /// This function replaces one successor with another, useful when
153 /// trying to replace an old block in the CFG with a new one.
154 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
155 auto I = find(Successors, Old);
156 assert(I != Successors.end());
157 assert(Old->getParent() == New->getParent() &&
158 "replaced successor must have the same parent");
159 *I = New;
160 }
161
162protected:
163 VPBlockBase(const unsigned char SC, const std::string &N)
164 : SubclassID(SC), Name(N) {}
165
166public:
167 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
168 /// that are actually instantiated. Values of this enumeration are kept in the
169 /// SubclassID field of the VPBlockBase objects. They are used for concrete
170 /// type identification.
171 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
172
174
175 virtual ~VPBlockBase() = default;
176
177 const std::string &getName() const { return Name; }
178
179 void setName(const Twine &newName) { Name = newName.str(); }
180
181 /// \return an ID for the concrete type of this object.
182 /// This is used to implement the classof checks. This should not be used
183 /// for any other purpose, as the values may change as LLVM evolves.
184 unsigned getVPBlockID() const { return SubclassID; }
185
186 VPRegionBlock *getParent() { return Parent; }
187 const VPRegionBlock *getParent() const { return Parent; }
188
189 /// \return A pointer to the plan containing the current block.
190 VPlan *getPlan();
191 const VPlan *getPlan() const;
192
193 /// Sets the pointer of the plan containing the block. The block must be the
194 /// entry block into the VPlan.
195 void setPlan(VPlan *ParentPlan);
196
197 void setParent(VPRegionBlock *P) { Parent = P; }
198
199 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
200 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
201 /// VPBlockBase is a VPBasicBlock, it is returned.
202 const VPBasicBlock *getEntryBasicBlock() const;
203 VPBasicBlock *getEntryBasicBlock();
204
205 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
206 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
207 /// VPBlockBase is a VPBasicBlock, it is returned.
208 const VPBasicBlock *getExitingBasicBlock() const;
209 VPBasicBlock *getExitingBasicBlock();
210
211 const VPBlocksTy &getSuccessors() const { return Successors; }
212 VPBlocksTy &getSuccessors() { return Successors; }
213
214 /// Returns true if this block has any successors.
215 bool hasSuccessors() const { return !Successors.empty(); }
216 /// Returns true if this block has any predecessors.
217 bool hasPredecessors() const { return !Predecessors.empty(); }
218
221
222 const VPBlocksTy &getPredecessors() const { return Predecessors; }
223 VPBlocksTy &getPredecessors() { return Predecessors; }
224
225 /// \return the successor of this VPBlockBase if it has a single successor.
226 /// Otherwise return a null pointer.
228 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
229 }
230
231 /// \return the predecessor of this VPBlockBase if it has a single
232 /// predecessor. Otherwise return a null pointer.
234 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
235 }
236
237 size_t getNumSuccessors() const { return Successors.size(); }
238 size_t getNumPredecessors() const { return Predecessors.size(); }
239
240 /// An Enclosing Block of a block B is any block containing B, including B
241 /// itself. \return the closest enclosing block starting from "this", which
242 /// has successors. \return the root enclosing block if all enclosing blocks
243 /// have no successors.
244 VPBlockBase *getEnclosingBlockWithSuccessors();
245
246 /// \return the closest enclosing block starting from "this", which has
247 /// predecessors. \return the root enclosing block if all enclosing blocks
248 /// have no predecessors.
249 VPBlockBase *getEnclosingBlockWithPredecessors();
250
251 /// \return the successors either attached directly to this VPBlockBase or, if
252 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
253 /// successors of its own, search recursively for the first enclosing
254 /// VPRegionBlock that has successors and return them. If no such
255 /// VPRegionBlock exists, return the (empty) successors of the topmost
256 /// VPBlockBase reached.
258 return getEnclosingBlockWithSuccessors()->getSuccessors();
259 }
260
261 /// \return the hierarchical successor of this VPBlockBase if it has a single
262 /// hierarchical successor. Otherwise return a null pointer.
264 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
265 }
266
267 /// \return the predecessors either attached directly to this VPBlockBase or,
268 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
269 /// predecessors of its own, search recursively for the first enclosing
270 /// VPRegionBlock that has predecessors and return them. If no such
271 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
272 /// VPBlockBase reached.
274 return getEnclosingBlockWithPredecessors()->getPredecessors();
275 }
276
277 /// \return the hierarchical predecessor of this VPBlockBase if it has a
278 /// single hierarchical predecessor. Otherwise return a null pointer.
282
283 /// Set a given VPBlockBase \p Successor as the single successor of this
284 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
285 /// This VPBlockBase must have no successors.
287 assert(Successors.empty() && "Setting one successor when others exist.");
288 assert(Successor->getParent() == getParent() &&
289 "connected blocks must have the same parent");
290 appendSuccessor(Successor);
291 }
292
293 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
294 /// successors of this VPBlockBase. This VPBlockBase is not added as
295 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
296 /// successors.
297 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
298 assert(Successors.empty() && "Setting two successors when others exist.");
299 appendSuccessor(IfTrue);
300 appendSuccessor(IfFalse);
301 }
302
303 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
304 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
305 /// as successor of any VPBasicBlock in \p NewPreds.
307 assert(Predecessors.empty() && "Block predecessors already set.");
308 for (auto *Pred : NewPreds)
309 appendPredecessor(Pred);
310 }
311
312 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
313 /// This VPBlockBase must have no successors. This VPBlockBase is not added
314 /// as predecessor of any VPBasicBlock in \p NewSuccs.
316 assert(Successors.empty() && "Block successors already set.");
317 for (auto *Succ : NewSuccs)
318 appendSuccessor(Succ);
319 }
320
321 /// Remove all the predecessor of this block.
322 void clearPredecessors() { Predecessors.clear(); }
323
324 /// Remove all the successors of this block.
325 void clearSuccessors() { Successors.clear(); }
326
327 /// Swap predecessors of the block. The block must have exactly 2
328 /// predecessors.
330 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
331 std::swap(Predecessors[0], Predecessors[1]);
332 }
333
334 /// Swap successors of the block. The block must have exactly 2 successors.
335 // TODO: This should be part of introducing conditional branch recipes rather
336 // than being independent.
338 assert(Successors.size() == 2 && "must have 2 successors to swap");
339 std::swap(Successors[0], Successors[1]);
340 }
341
342 /// Returns the index for \p Pred in the blocks predecessors list.
343 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
344 assert(count(Predecessors, Pred) == 1 &&
345 "must have Pred exactly once in Predecessors");
346 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
347 }
348
349 /// Returns the index for \p Succ in the blocks successor list.
350 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
351 assert(count(Successors, Succ) == 1 &&
352 "must have Succ exactly once in Successors");
353 return std::distance(Successors.begin(), find(Successors, Succ));
354 }
355
356 /// The method which generates the output IR that correspond to this
357 /// VPBlockBase, thereby "executing" the VPlan.
358 virtual void execute(VPTransformState *State) = 0;
359
360 /// Return the cost of the block.
362
363#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
364 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
365 OS << getName();
366 }
367
368 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
369 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
370 /// consequtive numbers.
371 ///
372 /// Note that the numbering is applied to the whole VPlan, so printing
373 /// individual blocks is consistent with the whole VPlan printing.
374 virtual void print(raw_ostream &O, const Twine &Indent,
375 VPSlotTracker &SlotTracker) const = 0;
376
377 /// Print plain-text dump of this VPlan to \p O.
378 void print(raw_ostream &O) const;
379
380 /// Print the successors of this block to \p O, prefixing all lines with \p
381 /// Indent.
382 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
383
384 /// Dump this VPBlockBase to dbgs().
385 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
386#endif
387
388 /// Clone the current block and it's recipes without updating the operands of
389 /// the cloned recipes, including all blocks in the single-entry single-exit
390 /// region for VPRegionBlocks.
391 virtual VPBlockBase *clone() = 0;
392};
393
394/// VPRecipeBase is a base class modeling a sequence of one or more output IR
395/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
396/// and is responsible for deleting its defined values. Single-value
397/// recipes must inherit from VPSingleDef instead of inheriting from both
398/// VPRecipeBase and VPValue separately.
400 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
401 public VPDef,
402 public VPUser {
403 friend VPBasicBlock;
404 friend class VPBlockUtils;
405
406 /// Subclass identifier (for isa/dyn_cast).
407 const unsigned char SubclassID;
408
409 /// Each VPRecipe belongs to a single VPBasicBlock.
410 VPBasicBlock *Parent = nullptr;
411
412 /// The debug location for the recipe.
413 DebugLoc DL;
414
415public:
416 /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
417 /// that is actually instantiated. Values of this enumeration are kept in the
418 /// SubclassID field of the VPRecipeBase objects. They are used for concrete
419 /// type identification.
420 using VPRecipeTy = enum {
421 VPBranchOnMaskSC,
422 VPDerivedIVSC,
423 VPExpandSCEVSC,
424 VPExpressionSC,
425 VPIRInstructionSC,
426 VPInstructionSC,
427 VPInterleaveEVLSC,
428 VPInterleaveSC,
429 VPReductionEVLSC,
430 VPReductionSC,
431 VPReplicateSC,
432 VPScalarIVStepsSC,
433 VPVectorPointerSC,
434 VPVectorEndPointerSC,
435 VPWidenCallSC,
436 VPWidenCanonicalIVSC,
437 VPWidenCastSC,
438 VPWidenGEPSC,
439 VPWidenIntrinsicSC,
440 VPWidenMemIntrinsicSC,
441 VPWidenLoadEVLSC,
442 VPWidenLoadSC,
443 VPWidenStoreEVLSC,
444 VPWidenStoreSC,
445 VPWidenSC,
446 VPBlendSC,
447 VPHistogramSC,
448 // START: Phi-like recipes. Need to be kept together.
449 VPWidenPHISC,
450 VPPredInstPHISC,
451 // START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
452 // VPHeaderPHIRecipe need to be kept together.
453 VPCurrentIterationPHISC,
454 VPActiveLaneMaskPHISC,
455 VPFirstOrderRecurrencePHISC,
456 VPWidenIntOrFpInductionSC,
457 VPWidenPointerInductionSC,
458 VPReductionPHISC,
459 // END: SubclassID for recipes that inherit VPHeaderPHIRecipe
460 // END: Phi-like recipes
461 VPFirstPHISC = VPWidenPHISC,
462 VPFirstHeaderPHISC = VPCurrentIterationPHISC,
463 VPLastHeaderPHISC = VPReductionPHISC,
464 VPLastPHISC = VPReductionPHISC,
465 };
466
467 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
469 : VPDef(), VPUser(Operands), SubclassID(SC), DL(DL) {}
470
471 ~VPRecipeBase() override = default;
472
473 /// Clone the current recipe.
474 virtual VPRecipeBase *clone() = 0;
475
476 /// \return the VPBasicBlock which this VPRecipe belongs to.
477 VPBasicBlock *getParent() { return Parent; }
478 const VPBasicBlock *getParent() const { return Parent; }
479
480 /// \return the VPRegionBlock which the recipe belongs to.
481 VPRegionBlock *getRegion();
482 const VPRegionBlock *getRegion() const;
483
484 /// The method which generates the output IR instructions that correspond to
485 /// this VPRecipe, thereby "executing" the VPlan.
486 virtual void execute(VPTransformState &State) = 0;
487
488 /// Return the cost of this recipe, taking into account if the cost
489 /// computation should be skipped and the ForceTargetInstructionCost flag.
490 /// Also takes care of printing the cost for debugging.
492
493 /// Insert an unlinked recipe into a basic block immediately before
494 /// the specified recipe.
495 void insertBefore(VPRecipeBase *InsertPos);
496 /// Insert an unlinked recipe into \p BB immediately before the insertion
497 /// point \p IP;
498 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
499
500 /// Insert an unlinked Recipe into a basic block immediately after
501 /// the specified Recipe.
502 void insertAfter(VPRecipeBase *InsertPos);
503
504 /// Unlink this recipe from its current VPBasicBlock and insert it into
505 /// the VPBasicBlock that MovePos lives in, right after MovePos.
506 void moveAfter(VPRecipeBase *MovePos);
507
508 /// Unlink this recipe and insert into BB before I.
509 ///
510 /// \pre I is a valid iterator into BB.
511 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
512
513 /// This method unlinks 'this' from the containing basic block, but does not
514 /// delete it.
515 void removeFromParent();
516
517 /// This method unlinks 'this' from the containing basic block and deletes it.
518 ///
519 /// \returns an iterator pointing to the element after the erased one
521
522 /// \return an ID for the concrete type of this object.
523 unsigned getVPRecipeID() const { return SubclassID; }
524
525 /// Method to support type inquiry through isa, cast, and dyn_cast.
526 static inline bool classof(const VPDef *D) {
527 // All VPDefs are also VPRecipeBases.
528 return true;
529 }
530
531 static inline bool classof(const VPUser *U) { return true; }
532
533 /// Returns true if the recipe may have side-effects.
534 bool mayHaveSideEffects() const;
535
536 /// Return true if we can safely execute this recipe unconditionally even if
537 /// it is masked originally.
538 bool isSafeToSpeculativelyExecute() const;
539
540 /// Returns true for PHI-like recipes.
541 bool isPhi() const;
542
543 /// Returns true if the recipe may read from memory.
544 bool mayReadFromMemory() const;
545
546 /// Returns true if the recipe may write to memory.
547 bool mayWriteToMemory() const;
548
549 /// Returns true if the recipe may read from or write to memory.
550 bool mayReadOrWriteMemory() const {
552 }
553
554 /// Returns the debug location of the recipe.
555 DebugLoc getDebugLoc() const { return DL; }
556
557 /// Set the recipe's debug location to \p NewDL.
558 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
559
560#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
561 /// Dump the recipe to stderr (for debugging).
562 LLVM_ABI_FOR_TEST void dump() const;
563
564 /// Print the recipe, delegating to printRecipe().
565 void print(raw_ostream &O, const Twine &Indent,
567#endif
568
569protected:
570 /// Compute the cost of this recipe either using a recipe's specialized
571 /// implementation or using the legacy cost model and the underlying
572 /// instructions.
573 virtual InstructionCost computeCost(ElementCount VF,
574 VPCostContext &Ctx) const;
575
576#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
577 /// Each concrete VPRecipe prints itself, without printing common information,
578 /// like debug info or metadata.
579 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
580 VPSlotTracker &SlotTracker) const = 0;
581#endif
582};
583
584// Helper macro to define common classof implementations for recipes.
585#define VP_CLASSOF_IMPL(VPRecipeID) \
586 static inline bool classof(const VPRecipeBase *R) { \
587 return R->getVPRecipeID() == VPRecipeID; \
588 } \
589 static inline bool classof(const VPValue *V) { \
590 auto *R = V->getDefiningRecipe(); \
591 return R && R->getVPRecipeID() == VPRecipeID; \
592 } \
593 static inline bool classof(const VPUser *U) { \
594 auto *R = dyn_cast<VPRecipeBase>(U); \
595 return R && R->getVPRecipeID() == VPRecipeID; \
596 } \
597 static inline bool classof(const VPSingleDefRecipe *R) { \
598 return R->getVPRecipeID() == VPRecipeID; \
599 }
600
601/// Compute the scalar result type for an IR \p Opcode given \p Operands.
602LLVM_ABI Type *computeScalarTypeForInstruction(unsigned Opcode,
603 ArrayRef<VPValue *> Operands);
604
605/// VPSingleDefRecipe is a base class for recipes that model a sequence of one
606/// or more output IR that define a single result VPValue. Note that
607/// VPSingleDefRecipe must inherit from VPRecipeBase before VPSingleDefValue.
609public:
610 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
612 : VPRecipeBase(SC, Operands, DL), VPSingleDefValue(this) {}
613
614 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
616 : VPRecipeBase(SC, Operands, DL), VPSingleDefValue(this, UV) {}
617
618 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
619 Type *ResultTy, Value *UV = nullptr,
621 : VPRecipeBase(SC, Operands, DL), VPSingleDefValue(this, UV, ResultTy) {}
622
623 static inline bool classof(const VPRecipeBase *R) {
624 switch (R->getVPRecipeID()) {
625 case VPRecipeBase::VPDerivedIVSC:
626 case VPRecipeBase::VPExpandSCEVSC:
627 case VPRecipeBase::VPExpressionSC:
628 case VPRecipeBase::VPInstructionSC:
629 case VPRecipeBase::VPReductionEVLSC:
630 case VPRecipeBase::VPReductionSC:
631 case VPRecipeBase::VPReplicateSC:
632 case VPRecipeBase::VPScalarIVStepsSC:
633 case VPRecipeBase::VPVectorPointerSC:
634 case VPRecipeBase::VPVectorEndPointerSC:
635 case VPRecipeBase::VPWidenCallSC:
636 case VPRecipeBase::VPWidenCanonicalIVSC:
637 case VPRecipeBase::VPWidenCastSC:
638 case VPRecipeBase::VPWidenGEPSC:
639 case VPRecipeBase::VPWidenIntrinsicSC:
640 case VPRecipeBase::VPWidenMemIntrinsicSC:
641 case VPRecipeBase::VPWidenSC:
642 case VPRecipeBase::VPBlendSC:
643 case VPRecipeBase::VPPredInstPHISC:
644 case VPRecipeBase::VPCurrentIterationPHISC:
645 case VPRecipeBase::VPActiveLaneMaskPHISC:
646 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
647 case VPRecipeBase::VPWidenPHISC:
648 case VPRecipeBase::VPWidenIntOrFpInductionSC:
649 case VPRecipeBase::VPWidenPointerInductionSC:
650 case VPRecipeBase::VPReductionPHISC:
651 case VPRecipeBase::VPWidenLoadEVLSC:
652 case VPRecipeBase::VPWidenLoadSC:
653 return true;
654 case VPRecipeBase::VPBranchOnMaskSC:
655 case VPRecipeBase::VPInterleaveEVLSC:
656 case VPRecipeBase::VPInterleaveSC:
657 case VPRecipeBase::VPIRInstructionSC:
658 case VPRecipeBase::VPWidenStoreEVLSC:
659 case VPRecipeBase::VPWidenStoreSC:
660 case VPRecipeBase::VPHistogramSC:
661 return false;
662 }
663 llvm_unreachable("Unhandled VPRecipeID");
664 }
665
666 static inline bool classof(const VPValue *V) {
667 auto *R = V->getDefiningRecipe();
668 return R && classof(R);
669 }
670
671 static inline bool classof(const VPUser *U) {
672 auto *R = dyn_cast<VPRecipeBase>(U);
673 return R && classof(R);
674 }
675
676 VPSingleDefRecipe *clone() override = 0;
677
678 /// Returns the underlying instruction.
685
686#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
687 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
689#endif
690};
691
692/// Class to record and manage LLVM IR flags.
695 enum class OperationType : unsigned char {
696 Cmp,
697 FCmp,
698 OverflowingBinOp,
699 Trunc,
700 DisjointOp,
701 PossiblyExactOp,
702 GEPOp,
703 FPMathOp,
704 NonNegOp,
705 ReductionOp,
706 Other
707 };
708
709public:
710 struct WrapFlagsTy {
711 char HasNUW : 1;
712 char HasNSW : 1;
713
715 };
716
718 char HasNUW : 1;
719 char HasNSW : 1;
720
722 };
723
728
730 char NonNeg : 1;
731 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
732 };
733
734private:
735 struct ExactFlagsTy {
736 char IsExact : 1;
737 ExactFlagsTy(bool Exact) : IsExact(Exact) {}
738 };
739 struct FastMathFlagsTy {
740 char AllowReassoc : 1;
741 char NoNaNs : 1;
742 char NoInfs : 1;
743 char NoSignedZeros : 1;
744 char AllowReciprocal : 1;
745 char AllowContract : 1;
746 char ApproxFunc : 1;
747
748 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
749 };
750 /// Holds both the predicate and fast-math flags for floating-point
751 /// comparisons.
752 struct FCmpFlagsTy {
753 uint8_t CmpPredStorage;
754 FastMathFlagsTy FMFs;
755 };
756 /// Holds reduction-specific flags: RecurKind, IsOrdered, IsInLoop, and FMFs.
757 struct ReductionFlagsTy {
758 // RecurKind has ~26 values, needs 5 bits but uses 6 bits to account for
759 // additional kinds.
760 unsigned char Kind : 6;
761 // TODO: Derive order/in-loop from plan and remove here.
762 unsigned char IsOrdered : 1;
763 unsigned char IsInLoop : 1;
764 FastMathFlagsTy FMFs;
765
766 ReductionFlagsTy(RecurKind Kind, bool IsOrdered, bool IsInLoop,
767 FastMathFlags FMFs)
768 : Kind(static_cast<unsigned char>(Kind)), IsOrdered(IsOrdered),
769 IsInLoop(IsInLoop), FMFs(FMFs) {}
770 };
771
772 OperationType OpType;
773
774 union {
779 ExactFlagsTy ExactFlags;
782 FastMathFlagsTy FMFs;
783 FCmpFlagsTy FCmpFlags;
784 ReductionFlagsTy ReductionFlags;
786 };
787
788public:
789 VPIRFlags() : OpType(OperationType::Other), AllFlags() {}
790
792 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
793 OpType = OperationType::FCmp;
795 FCmp->getPredicate());
796 assert(getPredicate() == FCmp->getPredicate() && "predicate truncated");
797 FCmpFlags.FMFs = FCmp->getFastMathFlags();
798 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
799 OpType = OperationType::Cmp;
801 Op->getPredicate());
802 assert(getPredicate() == Op->getPredicate() && "predicate truncated");
803 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
804 OpType = OperationType::DisjointOp;
805 DisjointFlags.IsDisjoint = Op->isDisjoint();
806 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
807 OpType = OperationType::OverflowingBinOp;
808 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
809 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
810 OpType = OperationType::Trunc;
811 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
812 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
813 OpType = OperationType::PossiblyExactOp;
814 ExactFlags.IsExact = Op->isExact();
815 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
816 OpType = OperationType::GEPOp;
817 GEPFlagsStorage = GEP->getNoWrapFlags().getRaw();
818 assert(getGEPNoWrapFlags() == GEP->getNoWrapFlags() &&
819 "wrap flags truncated");
820 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
821 OpType = OperationType::NonNegOp;
822 NonNegFlags.NonNeg = PNNI->hasNonNeg();
823 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
824 OpType = OperationType::FPMathOp;
825 FMFs = Op->getFastMathFlags();
826 }
827 }
828
829 VPIRFlags(CmpInst::Predicate Pred) : OpType(OperationType::Cmp), AllFlags() {
831 assert(getPredicate() == Pred && "predicate truncated");
832 }
833
835 : OpType(OperationType::FCmp), AllFlags() {
837 assert(getPredicate() == Pred && "predicate truncated");
838 FCmpFlags.FMFs = FMFs;
839 }
840
842 : OpType(OperationType::OverflowingBinOp), AllFlags() {
843 this->WrapFlags = WrapFlags;
844 }
845
847 : OpType(OperationType::Trunc), AllFlags() {
848 this->TruncFlags = TruncFlags;
849 }
850
851 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), AllFlags() {
852 this->FMFs = FMFs;
853 }
854
856 : OpType(OperationType::DisjointOp), AllFlags() {
857 this->DisjointFlags = DisjointFlags;
858 }
859
861 : OpType(OperationType::NonNegOp), AllFlags() {
862 this->NonNegFlags = NonNegFlags;
863 }
864
865 VPIRFlags(ExactFlagsTy ExactFlags)
866 : OpType(OperationType::PossiblyExactOp), AllFlags() {
867 this->ExactFlags = ExactFlags;
868 }
869
871 : OpType(OperationType::GEPOp), AllFlags() {
872 GEPFlagsStorage = GEPFlags.getRaw();
873 }
874
875 VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
876 : OpType(OperationType::ReductionOp), AllFlags() {
877 ReductionFlags = ReductionFlagsTy(Kind, IsOrdered, IsInLoop, FMFs);
878 }
879
881 OpType = Other.OpType;
882 AllFlags[0] = Other.AllFlags[0];
883 AllFlags[1] = Other.AllFlags[1];
884 }
885
886 /// Only keep flags also present in \p Other. \p Other must have the same
887 /// OpType as the current object.
888 void intersectFlags(const VPIRFlags &Other);
889
890 /// Drop all poison-generating flags.
892 // NOTE: This needs to be kept in-sync with
893 // Instruction::dropPoisonGeneratingFlags.
894 switch (OpType) {
895 case OperationType::OverflowingBinOp:
896 WrapFlags.HasNUW = false;
897 WrapFlags.HasNSW = false;
898 break;
899 case OperationType::Trunc:
900 TruncFlags.HasNUW = false;
901 TruncFlags.HasNSW = false;
902 break;
903 case OperationType::DisjointOp:
904 DisjointFlags.IsDisjoint = false;
905 break;
906 case OperationType::PossiblyExactOp:
907 ExactFlags.IsExact = false;
908 break;
909 case OperationType::GEPOp:
910 GEPFlagsStorage = 0;
911 break;
912 case OperationType::FPMathOp:
913 case OperationType::FCmp:
914 case OperationType::ReductionOp:
915 getFMFsRef().NoNaNs = false;
916 getFMFsRef().NoInfs = false;
917 break;
918 case OperationType::NonNegOp:
919 NonNegFlags.NonNeg = false;
920 break;
921 case OperationType::Cmp:
922 case OperationType::Other:
923 break;
924 }
925 }
926
927 /// Apply the IR flags to \p I.
928 void applyFlags(Instruction &I) const {
929 switch (OpType) {
930 case OperationType::OverflowingBinOp:
931 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
932 I.setHasNoSignedWrap(WrapFlags.HasNSW);
933 break;
934 case OperationType::Trunc:
935 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
936 I.setHasNoSignedWrap(TruncFlags.HasNSW);
937 break;
938 case OperationType::DisjointOp:
939 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
940 break;
941 case OperationType::PossiblyExactOp:
942 I.setIsExact(ExactFlags.IsExact);
943 break;
944 case OperationType::GEPOp:
945 cast<GetElementPtrInst>(&I)->setNoWrapFlags(
947 break;
948 case OperationType::FPMathOp:
949 case OperationType::FCmp: {
950 const FastMathFlagsTy &F = getFMFsRef();
951 I.setHasAllowReassoc(F.AllowReassoc);
952 I.setHasNoNaNs(F.NoNaNs);
953 I.setHasNoInfs(F.NoInfs);
954 I.setHasNoSignedZeros(F.NoSignedZeros);
955 I.setHasAllowReciprocal(F.AllowReciprocal);
956 I.setHasAllowContract(F.AllowContract);
957 I.setHasApproxFunc(F.ApproxFunc);
958 break;
959 }
960 case OperationType::NonNegOp:
961 I.setNonNeg(NonNegFlags.NonNeg);
962 break;
963 case OperationType::ReductionOp:
964 llvm_unreachable("reduction ops should not use applyFlags");
965 case OperationType::Cmp:
966 case OperationType::Other:
967 break;
968 }
969 }
970
972 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
973 "recipe doesn't have a compare predicate");
974 uint8_t Storage = OpType == OperationType::FCmp ? FCmpFlags.CmpPredStorage
977 }
978
980 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
981 "recipe doesn't have a compare predicate");
982 if (OpType == OperationType::FCmp)
984 else
986 assert(getPredicate() == Pred && "predicate truncated");
987 }
988
992
993 /// Returns true if the recipe has a comparison predicate.
994 bool hasPredicate() const {
995 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
996 }
997
998 /// Returns true if the recipe has fast-math flags.
999 bool hasFastMathFlags() const {
1000 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||
1001 OpType == OperationType::ReductionOp;
1002 }
1003
1005
1006 /// Returns true if the recipe has non-negative flag.
1007 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
1008
1009 bool isNonNeg() const {
1010 assert(OpType == OperationType::NonNegOp &&
1011 "recipe doesn't have a NNEG flag");
1012 return NonNegFlags.NonNeg;
1013 }
1014
1015 bool hasNoUnsignedWrap() const {
1016 switch (OpType) {
1017 case OperationType::OverflowingBinOp:
1018 return WrapFlags.HasNUW;
1019 case OperationType::Trunc:
1020 return TruncFlags.HasNUW;
1021 default:
1022 llvm_unreachable("recipe doesn't have a NUW flag");
1023 }
1024 }
1025
1026 bool hasNoSignedWrap() const {
1027 switch (OpType) {
1028 case OperationType::OverflowingBinOp:
1029 return WrapFlags.HasNSW;
1030 case OperationType::Trunc:
1031 return TruncFlags.HasNSW;
1032 default:
1033 llvm_unreachable("recipe doesn't have a NSW flag");
1034 }
1035 }
1036
1037 bool hasNoWrapFlags() const {
1038 switch (OpType) {
1039 case OperationType::OverflowingBinOp:
1040 case OperationType::Trunc:
1041 return true;
1042 default:
1043 return false;
1044 }
1045 }
1046
1048 return {hasNoUnsignedWrap(), hasNoSignedWrap()};
1049 }
1050
1051 bool isDisjoint() const {
1052 assert(OpType == OperationType::DisjointOp &&
1053 "recipe cannot have a disjoing flag");
1054 return DisjointFlags.IsDisjoint;
1055 }
1056
1058 assert(OpType == OperationType::ReductionOp &&
1059 "recipe doesn't have reduction flags");
1060 return static_cast<RecurKind>(ReductionFlags.Kind);
1061 }
1062
1063 bool isReductionOrdered() const {
1064 assert(OpType == OperationType::ReductionOp &&
1065 "recipe doesn't have reduction flags");
1066 return ReductionFlags.IsOrdered;
1067 }
1068
1069 bool isReductionInLoop() const {
1070 assert(OpType == OperationType::ReductionOp &&
1071 "recipe doesn't have reduction flags");
1072 return ReductionFlags.IsInLoop;
1073 }
1074
1075private:
1076 /// Get a reference to the fast-math flags for FPMathOp, FCmp or ReductionOp.
1077 FastMathFlagsTy &getFMFsRef() {
1078 if (OpType == OperationType::FCmp)
1079 return FCmpFlags.FMFs;
1080 if (OpType == OperationType::ReductionOp)
1081 return ReductionFlags.FMFs;
1082 return FMFs;
1083 }
1084 const FastMathFlagsTy &getFMFsRef() const {
1085 if (OpType == OperationType::FCmp)
1086 return FCmpFlags.FMFs;
1087 if (OpType == OperationType::ReductionOp)
1088 return ReductionFlags.FMFs;
1089 return FMFs;
1090 }
1091
1092public:
1093 /// Returns default flags for \p Opcode for opcodes that support it, asserts
1094 /// otherwise. Opcodes not supporting default flags include compares and
1095 /// ComputeReductionResult.
1096 static VPIRFlags getDefaultFlags(unsigned Opcode);
1097
1098#if !defined(NDEBUG)
1099 /// Returns true if the set flags are valid for \p Opcode.
1100 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
1101
1102 /// Returns true if \p Opcode has its required flags set.
1103 LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const;
1104#endif
1105
1106#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1107 void printFlags(raw_ostream &O) const;
1108#endif
1109};
1111
1112static_assert(sizeof(VPIRFlags) <= 3, "VPIRFlags should not grow");
1113
1114/// A pure-virtual common base class for recipes defining a single VPValue and
1115/// using IR flags.
1117 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1118 const VPIRFlags &Flags,
1120 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
1121
1122 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1123 Type *ResultTy, const VPIRFlags &Flags,
1125 : VPSingleDefRecipe(SC, Operands, ResultTy, /*UV=*/nullptr, DL),
1126 VPIRFlags(Flags) {}
1127
1128 static inline bool classof(const VPRecipeBase *R) {
1129 return R->getVPRecipeID() == VPRecipeBase::VPBlendSC ||
1130 R->getVPRecipeID() == VPRecipeBase::VPInstructionSC ||
1131 R->getVPRecipeID() == VPRecipeBase::VPWidenSC ||
1132 R->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC ||
1133 R->getVPRecipeID() == VPRecipeBase::VPWidenCallSC ||
1134 R->getVPRecipeID() == VPRecipeBase::VPWidenCastSC ||
1135 R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1136 R->getVPRecipeID() == VPRecipeBase::VPWidenMemIntrinsicSC ||
1137 R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
1138 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC ||
1139 R->getVPRecipeID() == VPRecipeBase::VPReplicateSC ||
1140 R->getVPRecipeID() == VPRecipeBase::VPVectorEndPointerSC ||
1141 R->getVPRecipeID() == VPRecipeBase::VPVectorPointerSC ||
1142 R->getVPRecipeID() == VPRecipeBase::VPWidenCanonicalIVSC;
1143 }
1144
1145 static inline bool classof(const VPUser *U) {
1146 auto *R = dyn_cast<VPRecipeBase>(U);
1147 return R && classof(R);
1148 }
1149
1150 static inline bool classof(const VPValue *V) {
1151 auto *R = V->getDefiningRecipe();
1152 return R && classof(R);
1153 }
1154
1156
1157 static inline bool classof(const VPSingleDefRecipe *R) {
1158 return classof(static_cast<const VPRecipeBase *>(R));
1159 }
1160
1161 void execute(VPTransformState &State) override = 0;
1162
1163 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
1165 VPCostContext &Ctx) const;
1166};
1167
1168/// Helper to manage IR metadata for recipes. It filters out metadata that
1169/// cannot be propagated.
1172
1173public:
1174 VPIRMetadata() = default;
1175
1176 /// Adds metatadata that can be preserved from the original instruction
1177 /// \p I.
1179
1180 /// Copy constructor for cloning.
1182
1184
1185 /// Add all metadata to \p I.
1186 void applyMetadata(Instruction &I) const;
1187
1188 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1189 /// already exists, it will be replaced. Otherwise, it will be added.
1190 void setMetadata(unsigned Kind, MDNode *Node) {
1191 auto It =
1192 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1193 return P.first == Kind;
1194 });
1195 if (It != Metadata.end())
1196 It->second = Node;
1197 else
1198 Metadata.emplace_back(Kind, Node);
1199 }
1200
1201 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1202 /// nodes that are common to both.
1203 void intersect(const VPIRMetadata &MD);
1204
1205 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1206 MDNode *getMetadata(unsigned Kind) const {
1207 auto It =
1208 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1209 return It != Metadata.end() ? It->second : nullptr;
1210 }
1211
1212#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1213 /// Print metadata with node IDs.
1214 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1215#endif
1216};
1217
1218/// This is a concrete Recipe that models a single VPlan-level instruction.
1219/// While as any Recipe it may generate a sequence of IR instructions when
1220/// executed, these instructions would always form a single-def expression as
1221/// the VPInstruction is also a single def-use vertex. Most VPInstruction
1222/// opcodes can take an optional mask. Masks may be assigned during
1223/// predication.
1225 public VPIRMetadata {
1226public:
1227 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1228 enum {
1230 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1231 // values of a first-order recurrence.
1233 // Creates a mask where each lane is active (true) whilst the current
1234 // counter (first operand + index) is less than the second operand. i.e.
1235 // mask[i] = icmpt ult (op0 + i), op1
1236 // The size of the mask returned is VF * Multiplier (UF, third op).
1239 // Represents the incoming loop-invariant alias-mask. All memory accesses
1240 // in the loop must stay within the active lanes.
1243 // Increment the canonical IV separately for each unrolled part.
1245 // Abstract instruction that compares two values and branches. This is
1246 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1249 // Branch with 2 boolean condition operands and 3 successors. If condition
1250 // 0 is true, branches to successor 0; if condition 1 is true, branches to
1251 // successor 1; otherwise branches to successor 2. Expanded after region
1252 // dissolution into: (1) an OR of the two conditions branching to
1253 // middle.split or successor 2, and (2) middle.split branching to successor
1254 // 0 or successor 1 based on condition 0.
1257 /// Given operands of (the same) struct type, creates a struct of fixed-
1258 /// width vectors each containing a struct field of all operands. The
1259 /// number of operands matches the element count of every vector.
1261 /// Creates a fixed-width vector containing all operands. The number of
1262 /// operands matches the vector element count.
1264 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1265 /// abstract VPInstruction whose single defined VPValue represents VF
1266 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1267 /// VPInstructions.
1269 /// Reduce the operands to the final reduction result using the operation
1270 /// specified via the operation's VPIRFlags.
1272 // Extracts the last part of its operand. Removed during unrolling.
1274 // Extracts the last lane of its vector operand, per part.
1276 // Extracts the second-to-last lane from its operand or the second-to-last
1277 // part if it is scalar. In the latter case, the recipe will be removed
1278 // during unrolling.
1280 LogicalAnd, // Non-poison propagating logical And.
1281 LogicalOr, // Non-poison propagating logical Or.
1282 NumActiveLanes, // Counts the number of active lanes in a mask.
1283 // Add an offset in bytes (second operand) to a base pointer (first
1284 // operand). Only generates scalar values (either for the first lane only or
1285 // for all lanes, depending on its uses).
1287 // Add a vector offset in bytes (second operand) to a scalar base pointer
1288 // (first operand).
1290 // Returns a scalar boolean value, which is true if any lane of its
1291 // (boolean) vector operands is true. It produces the reduced value across
1292 // all unrolled iterations. Unrolling will add all copies of its original
1293 // operand as additional operands. AnyOf is poison-safe as all operands
1294 // will be frozen.
1296 // Calculates the first active lane index of the vector predicate operands.
1297 // It produces the lane index across all unrolled iterations. Unrolling will
1298 // add all copies of its original operand as additional operands.
1299 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1300 // result even with operands that are all zeroes.
1302 // Calculates the last active lane index of the vector predicate operands.
1303 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1304 // tail-folding to extract the correct live-out value from the last active
1305 // iteration. It produces the lane index across all unrolled iterations.
1306 // Unrolling will add all copies of its original operand as additional
1307 // operands.
1309 // Returns a reversed vector for the operand.
1311 /// Start vector for reductions with 3 operands: the original start value,
1312 /// the identity value for the reduction and an integer indicating the
1313 /// scaling factor.
1315 /// Extracts a single lane (first operand) from a set of vector operands.
1316 /// The lane specifies an index into a vector formed by combining all vector
1317 /// operands (all operands after the first one).
1319 /// Explicit user for the resume phi of the canonical induction in the main
1320 /// VPlan, used by the epilogue vector loop.
1322 /// Extracts the last active lane from a set of vectors. The first operand
1323 /// is the default value if no lanes in the masks are active. Conceptually,
1324 /// this concatenates all data vectors (odd operands), concatenates all
1325 /// masks (even operands -- ignoring the default value), and returns the
1326 /// last active value from the combined data vector using the combined mask.
1328 /// Compute the exiting value of a wide induction after vectorization, that
1329 /// is the value of the last lane of the induction increment (i.e. its
1330 /// backedge value). Has the wide induction recipe as operand.
1333
1334 // The opcodes below are used for VPInstructionWithType.
1335 // NOTE: VPInstructionWithType classes are also used for:
1336 // 1. All CastInst variants - see createVPInstructionsForVPBB, and other
1337 // cases where createScalarCast, createScalarZExtOrTrunc and
1338 // createScalarSExtOrTrunc are invoked.
1339 // 2. Scalar load instructions - see createVPInstructionsForVPBB.
1340
1341 /// Scale the first operand (vector step) by the second operand
1342 /// (scalar-step). Casts both operands to the result type if needed.
1344 // Creates a step vector starting from 0 to VF with a step of 1.
1346 /// Returns the value for vscale.
1348
1350 };
1351
1352 /// Returns true if this VPInstruction generates scalar values for all lanes.
1353 /// Most VPInstructions generate a single value per part, either vector or
1354 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1355 /// values per all lanes, stemming from an original ingredient. This method
1356 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1357 /// underlying ingredient.
1358 bool doesGeneratePerAllLanes() const;
1359
1360 /// Return the number of operands determined by the opcode of the
1361 /// VPInstruction, excluding mask. Returns -1u if the number of operands
1362 /// cannot be determined directly by the opcode.
1363 unsigned getNumOperandsForOpcode() const;
1364
1365private:
1366 typedef unsigned char OpcodeTy;
1367 OpcodeTy Opcode;
1368
1369 /// An optional name that can be used for the generated IR instruction.
1370 std::string Name;
1371
1372 /// Returns true if we can generate a scalar for the first lane only if
1373 /// needed.
1374 bool canGenerateScalarForFirstLane() const;
1375
1376 /// Utility methods serving execute(): generates a single vector instance of
1377 /// the modeled instruction. \returns the generated value. . In some cases an
1378 /// existing value is returned rather than a generated one.
1379 Value *generate(VPTransformState &State);
1380
1381 /// Returns true if the VPInstruction does not need masking.
1382 bool alwaysUnmasked() const {
1383 if (Opcode == VPInstruction::MaskedCond)
1384 return false;
1385
1386 // For now only VPInstructions with underlying values use masks.
1387 // TODO: provide masks to VPInstructions w/o underlying values.
1388 if (!getUnderlyingValue())
1389 return true;
1390
1391 return Instruction::isCast(Opcode) || Opcode == Instruction::PHI ||
1392 Opcode == Instruction::GetElementPtr;
1393 }
1394
1395public:
1396 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1397 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1398 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "",
1399 Type *ResultTy = nullptr);
1400
1401 VP_CLASSOF_IMPL(VPRecipeBase::VPInstructionSC)
1402
1403 VPInstruction *clone() override {
1405 }
1406
1408 Type *ResultTy = nullptr) {
1409 auto *New = new VPInstruction(Opcode, NewOperands, *this, *this,
1410 getDebugLoc(), Name, ResultTy);
1411 if (getUnderlyingValue())
1412 New->setUnderlyingValue(getUnderlyingInstr());
1413 return New;
1414 }
1415
1416 unsigned getOpcode() const { return Opcode; }
1417
1418 /// Generate the instruction.
1419 /// TODO: We currently execute only per-part unless a specific instance is
1420 /// provided.
1421 void execute(VPTransformState &State) override;
1422
1423 /// Return the cost of this VPInstruction.
1424 InstructionCost computeCost(ElementCount VF,
1425 VPCostContext &Ctx) const override;
1426
1427#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1428 /// Print the VPInstruction to dbgs() (for debugging).
1429 LLVM_DUMP_METHOD void dump() const;
1430#endif
1431
1432 bool hasResult() const {
1433 // CallInst may or may not have a result, depending on the called function.
1434 // Conservatively return calls have results for now.
1435 switch (getOpcode()) {
1436 case Instruction::Ret:
1437 case Instruction::UncondBr:
1438 case Instruction::CondBr:
1439 case Instruction::Store:
1440 case Instruction::Switch:
1441 case Instruction::IndirectBr:
1442 case Instruction::Resume:
1443 case Instruction::CatchRet:
1444 case Instruction::Unreachable:
1445 case Instruction::Fence:
1446 case Instruction::AtomicRMW:
1450 return false;
1451 default:
1452 return true;
1453 }
1454 }
1455
1456 /// Returns true if the VPInstruction has a mask operand.
1457 bool isMasked() const {
1458 unsigned NumOpsForOpcode = getNumOperandsForOpcode();
1459 // VPInstructions without a fixed number of operands cannot be masked.
1460 if (NumOpsForOpcode == -1u)
1461 return false;
1462 return NumOpsForOpcode + 1 == getNumOperands();
1463 }
1464
1465 /// Returns the number of operands, excluding the mask if the VPInstruction is
1466 /// masked.
1467 unsigned getNumOperandsWithoutMask() const {
1468 return getNumOperands() - isMasked();
1469 }
1470
1471 /// Add mask \p Mask to an unmasked VPInstruction, if it needs masking.
1472 void addMask(VPValue *Mask) {
1473 assert(!isMasked() && "recipe is already masked");
1474 if (alwaysUnmasked())
1475 return;
1476 addOperand(Mask);
1477 }
1478
1479 /// Returns the mask for the VPInstruction. Returns nullptr for unmasked
1480 /// VPInstructions.
1481 VPValue *getMask() const {
1482 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
1483 }
1484
1485 /// Returns an iterator range over the operands excluding the mask operand
1486 /// if present.
1493
1494 /// Returns true if the underlying opcode may read from or write to memory.
1495 bool opcodeMayReadOrWriteFromMemory() const;
1496
1497 /// Returns true if the recipe only uses the first lane of operand \p Op.
1498 bool usesFirstLaneOnly(const VPValue *Op) const override;
1499
1500 /// Returns true if the recipe only uses the first part of operand \p Op.
1501 bool usesFirstPartOnly(const VPValue *Op) const override;
1502
1503 /// Returns true if this VPInstruction produces a scalar value from a vector,
1504 /// e.g. by performing a reduction or extracting a lane.
1505 bool isVectorToScalar() const;
1506
1507 /// Returns true if this VPInstruction's operands are single scalars and the
1508 /// result is also a single scalar.
1509 bool isSingleScalar() const;
1510
1511 /// Returns the symbolic name assigned to the VPInstruction.
1512 StringRef getName() const { return Name; }
1513
1514 /// Set the symbolic name for the VPInstruction.
1515 void setName(StringRef NewName) { Name = NewName.str(); }
1516
1517protected:
1518#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1519 /// Print the VPInstruction to \p O.
1520 void printRecipe(raw_ostream &O, const Twine &Indent,
1521 VPSlotTracker &SlotTracker) const override;
1522#endif
1523};
1524
1525/// A specialization of VPInstruction augmenting it with a dedicated result
1526/// type, to be used when the opcode and operands of the VPInstruction don't
1527/// directly determine the result type. Note that there is no separate recipe ID
1528/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1529/// distinguished purely by the opcode.
1530/// TODO: Merge with VPInstruction, now that VPRecipeValue provides the type.
1532public:
1534 Type *ResultTy, const VPIRFlags &Flags = {},
1535 const VPIRMetadata &Metadata = {},
1537 const Twine &Name = "")
1538 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name, ResultTy) {}
1539
1540 static inline bool classof(const VPRecipeBase *R) {
1541 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1542 // type information.
1543 auto *VPI = dyn_cast<VPInstruction>(R);
1544 if (!VPI)
1545 return false;
1546 unsigned Opc = VPI->getOpcode();
1548 return true;
1549 switch (Opc) {
1553 case Instruction::Load:
1554 return true;
1555 default:
1556 return false;
1557 }
1558 }
1559
1560 static inline bool classof(const VPUser *R) {
1562 }
1563
1564 VPInstruction *clone() override {
1565 auto *New =
1567 *this, *this, getDebugLoc(), getName());
1568 New->setUnderlyingValue(getUnderlyingValue());
1569 return New;
1570 }
1571
1572 void execute(VPTransformState &State) override;
1573
1574 /// Return the cost of this VPInstruction.
1576 VPCostContext &Ctx) const override {
1577 // TODO: Compute accurate cost after retiring the legacy cost model.
1578 return 0;
1579 }
1580
1581 Type *getResultType() const { return getScalarType(); }
1582
1583protected:
1584#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1585 /// Print the recipe.
1586 void printRecipe(raw_ostream &O, const Twine &Indent,
1587 VPSlotTracker &SlotTracker) const override;
1588#endif
1589};
1590
1591/// Helper type to provide functions to access incoming values and blocks for
1592/// phi-like recipes.
1594protected:
1595 /// Return a VPRecipeBase* to the current object.
1596 virtual const VPRecipeBase *getAsRecipe() const = 0;
1597
1598public:
1599 virtual ~VPPhiAccessors() = default;
1600
1601 /// Returns the incoming VPValue with index \p Idx.
1602 VPValue *getIncomingValue(unsigned Idx) const {
1603 return getAsRecipe()->getOperand(Idx);
1604 }
1605
1606 /// Returns the incoming block with index \p Idx.
1607 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1608
1609 /// Returns the incoming value for \p VPBB. \p VPBB must be an incoming block.
1610 VPValue *getIncomingValueForBlock(const VPBasicBlock *VPBB) const;
1611
1612 /// Sets the incoming value for \p VPBB to \p V. \p VPBB must be an incoming
1613 /// block.
1614 void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const;
1615
1616 /// Returns the number of incoming values, also number of incoming blocks.
1617 virtual unsigned getNumIncoming() const {
1618 return getAsRecipe()->getNumOperands();
1619 }
1620
1621 /// Returns an interator range over the incoming values.
1623 return make_range(getAsRecipe()->op_begin(),
1624 getAsRecipe()->op_begin() + getNumIncoming());
1625 }
1626
1628 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1629
1630 /// Returns an iterator range over the incoming blocks.
1632 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1633 return getIncomingBlock(Idx);
1634 };
1635 return map_range(index_range(0, getNumIncoming()), GetBlock);
1636 }
1637
1638 /// Returns an iterator range over pairs of incoming values and corresponding
1639 /// incoming blocks.
1645
1646 /// Removes the incoming value for \p IncomingBlock, which must be a
1647 /// predecessor.
1648 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1649
1650#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1651 /// Print the recipe.
1653#endif
1654};
1655
1658 const Twine &Name = "", Type *ResultTy = nullptr)
1659 : VPInstruction(Instruction::PHI, Operands, Flags, {}, DL, Name,
1660 ResultTy) {}
1661
1662 static inline bool classof(const VPUser *U) {
1663 auto *VPI = dyn_cast<VPInstruction>(U);
1664 return VPI && VPI->getOpcode() == Instruction::PHI;
1665 }
1666
1667 static inline bool classof(const VPValue *V) {
1668 auto *VPI = dyn_cast<VPInstruction>(V);
1669 return VPI && VPI->getOpcode() == Instruction::PHI;
1670 }
1671
1672 static inline bool classof(const VPSingleDefRecipe *SDR) {
1673 auto *VPI = dyn_cast<VPInstruction>(SDR);
1674 return VPI && VPI->getOpcode() == Instruction::PHI;
1675 }
1676
1677 VPPhi *clone() override {
1678 auto *PhiR = new VPPhi(operands(), *this, getDebugLoc(), getName());
1679 PhiR->setUnderlyingValue(getUnderlyingValue());
1680 return PhiR;
1681 }
1682
1683 void execute(VPTransformState &State) override;
1684
1685protected:
1686#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1687 /// Print the recipe.
1688 void printRecipe(raw_ostream &O, const Twine &Indent,
1689 VPSlotTracker &SlotTracker) const override;
1690#endif
1691
1692 const VPRecipeBase *getAsRecipe() const override { return this; }
1693};
1694
1695/// A recipe to wrap on original IR instruction not to be modified during
1696/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1697/// Expect PHIs, VPIRInstructions cannot have any operands.
1699 Instruction &I;
1700
1701protected:
1702 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1703 /// subclasses may need to be created, e.g. VPIRPhi.
1705 : VPRecipeBase(VPRecipeBase::VPIRInstructionSC, {}), I(I) {}
1706
1707public:
1708 ~VPIRInstruction() override = default;
1709
1710 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1711 /// VPIRInstruction.
1713
1714 VP_CLASSOF_IMPL(VPRecipeBase::VPIRInstructionSC)
1715
1717 auto *R = create(I);
1718 for (auto *Op : operands())
1719 R->addOperand(Op);
1720 return R;
1721 }
1722
1723 void execute(VPTransformState &State) override;
1724
1725 /// Return the cost of this VPIRInstruction.
1727 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1728
1729 Instruction &getInstruction() const { return I; }
1730
1731 bool usesScalars(const VPValue *Op) const override {
1733 "Op must be an operand of the recipe");
1734 return true;
1735 }
1736
1737 bool usesFirstPartOnly(const VPValue *Op) const override {
1739 "Op must be an operand of the recipe");
1740 return true;
1741 }
1742
1743 bool usesFirstLaneOnly(const VPValue *Op) const override {
1745 "Op must be an operand of the recipe");
1746 return true;
1747 }
1748
1749protected:
1750#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1751 /// Print the recipe.
1752 void printRecipe(raw_ostream &O, const Twine &Indent,
1753 VPSlotTracker &SlotTracker) const override;
1754#endif
1755};
1756
1757/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1758/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1759/// allowed, and it is used to add a new incoming value for the single
1760/// predecessor VPBB.
1762 public VPPhiAccessors {
1764
1765 static inline bool classof(const VPRecipeBase *U) {
1766 auto *R = dyn_cast<VPIRInstruction>(U);
1767 return R && isa<PHINode>(R->getInstruction());
1768 }
1769
1770 static inline bool classof(const VPUser *U) {
1771 auto *R = dyn_cast<VPRecipeBase>(U);
1772 return R && classof(R);
1773 }
1774
1776
1777 void execute(VPTransformState &State) override;
1778
1779protected:
1780#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1781 /// Print the recipe.
1782 void printRecipe(raw_ostream &O, const Twine &Indent,
1783 VPSlotTracker &SlotTracker) const override;
1784#endif
1785
1786 const VPRecipeBase *getAsRecipe() const override { return this; }
1787};
1788
1789/// VPWidenRecipe is a recipe for producing a widened instruction using the
1790/// opcode and operands of the recipe. This recipe covers most of the
1791/// traditional vectorization cases where each recipe transforms into a
1792/// vectorized version of itself.
1794 public VPIRMetadata {
1795 unsigned Opcode;
1796
1797public:
1799 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1800 DebugLoc DL = {})
1801 : VPWidenRecipe(I.getOpcode(), Operands, Flags, Metadata, DL) {
1802 setUnderlyingValue(&I);
1803 }
1804
1805 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1806 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1807 DebugLoc DL = {})
1808 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands,
1809 computeScalarTypeForInstruction(Opcode, Operands),
1810 Flags, DL),
1811 VPIRMetadata(Metadata), Opcode(Opcode) {}
1812
1813 ~VPWidenRecipe() override = default;
1814
1816
1818 if (auto *UV = getUnderlyingValue())
1819 return new VPWidenRecipe(*cast<Instruction>(UV), NewOperands, *this,
1820 *this, getDebugLoc());
1821 return new VPWidenRecipe(Opcode, NewOperands, *this, *this, getDebugLoc());
1822 }
1823
1824 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenSC)
1825
1826 /// Produce a widened instruction using the opcode and operands of the recipe,
1827 /// processing State.VF elements.
1828 void execute(VPTransformState &State) override;
1829
1830 /// Return the cost of this VPWidenRecipe.
1831 InstructionCost computeCost(ElementCount VF,
1832 VPCostContext &Ctx) const override;
1833
1834 unsigned getOpcode() const { return Opcode; }
1835
1836protected:
1837#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1838 /// Print the recipe.
1839 void printRecipe(raw_ostream &O, const Twine &Indent,
1840 VPSlotTracker &SlotTracker) const override;
1841#endif
1842
1843 /// Returns true if the recipe only uses the first lane of operand \p Op.
1844 bool usesFirstLaneOnly(const VPValue *Op) const override {
1846 "Op must be an operand of the recipe");
1847 return Opcode == Instruction::Select && Op == getOperand(0) &&
1848 Op->isDefinedOutsideLoopRegions();
1849 }
1850};
1851
1852/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1853/// TODO: Merge with VPWidenRecipe now that type is associated to every
1854/// VPRecipeValue.
1856 /// Cast instruction opcode.
1857 Instruction::CastOps Opcode;
1858
1859public:
1861 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1862 const VPIRMetadata &Metadata = {},
1864 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCastSC, Op, ResultTy, Flags,
1865 DL),
1866 VPIRMetadata(Metadata), Opcode(Opcode) {
1867 assert(flagsValidForOpcode(Opcode) &&
1868 "Set flags not supported for the provided opcode");
1870 "Opcode requires specific flags to be set");
1872 }
1873
1874 ~VPWidenCastRecipe() override = default;
1875
1877 return new VPWidenCastRecipe(Opcode, getOperand(0), getScalarType(),
1879 *this, *this, getDebugLoc());
1880 }
1881
1882 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCastSC)
1883
1884 /// Produce widened copies of the cast.
1885 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1886
1887 /// Return the cost of this VPWidenCastRecipe.
1889 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1890
1891 Instruction::CastOps getOpcode() const { return Opcode; }
1892
1893protected:
1894#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1895 /// Print the recipe.
1896 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1897 VPSlotTracker &SlotTracker) const override;
1898#endif
1899};
1900
1901/// A recipe for widening vector intrinsics.
1903 /// ID of the vector intrinsic to widen.
1904 Intrinsic::ID VectorIntrinsicID;
1905
1906 /// True if the intrinsic may read from memory.
1907 bool MayReadFromMemory;
1908
1909 /// True if the intrinsic may read write to memory.
1910 bool MayWriteToMemory;
1911
1912 /// True if the intrinsic may have side-effects.
1913 bool MayHaveSideEffects;
1914
1915protected:
1916 VPWidenIntrinsicRecipe(const unsigned char SC,
1917 Intrinsic::ID VectorIntrinsicID,
1918 ArrayRef<VPValue *> CallArguments, Type *Ty,
1919 const VPIRFlags &Flags = {},
1920 const VPIRMetadata &MD = {},
1922 : VPRecipeWithIRFlags(SC, CallArguments, Ty, Flags, DL), VPIRMetadata(MD),
1923 VectorIntrinsicID(VectorIntrinsicID) {
1924 LLVMContext &Ctx = Ty->getContext();
1925 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1926 MemoryEffects ME = Attrs.getMemoryEffects();
1927 MayReadFromMemory = !ME.onlyWritesMemory();
1928 MayWriteToMemory = !ME.onlyReadsMemory();
1929 MayHaveSideEffects = MayWriteToMemory ||
1930 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1931 !Attrs.hasAttribute(Attribute::WillReturn);
1932 }
1933
1934 /// Helper function to produce the widened intrinsic call.
1935 CallInst *createVectorCall(VPTransformState &State);
1936
1937public:
1939 ArrayRef<VPValue *> CallArguments, Type *Ty,
1940 const VPIRFlags &Flags = {},
1941 const VPIRMetadata &MD = {},
1943 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments, Ty,
1944 Flags, DL),
1945 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID),
1946 MayReadFromMemory(CI.mayReadFromMemory()),
1947 MayWriteToMemory(CI.mayWriteToMemory()),
1948 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1949 setUnderlyingValue(&CI);
1950 }
1951
1953 ArrayRef<VPValue *> CallArguments, Type *Ty,
1954 const VPIRFlags &Flags = {},
1955 const VPIRMetadata &Metadata = {},
1957 : VPWidenIntrinsicRecipe(VPRecipeBase::VPWidenIntrinsicSC,
1958 VectorIntrinsicID, CallArguments, Ty, Flags,
1959 Metadata, DL) {}
1960
1961 ~VPWidenIntrinsicRecipe() override = default;
1962
1964 if (Value *CI = getUnderlyingValue())
1965 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1966 operands(), getScalarType(), *this,
1967 *this, getDebugLoc());
1968 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(),
1969 getScalarType(), *this, *this,
1970 getDebugLoc());
1971 }
1972
1973 static inline bool classof(const VPRecipeBase *R) {
1974 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1975 R->getVPRecipeID() == VPRecipeBase::VPWidenMemIntrinsicSC;
1976 }
1977
1978 static inline bool classof(const VPUser *U) {
1979 auto *R = dyn_cast<VPRecipeBase>(U);
1980 return R && classof(R);
1981 }
1982
1983 static inline bool classof(const VPValue *V) {
1984 auto *R = V->getDefiningRecipe();
1985 return R && classof(R);
1986 }
1987
1988 static inline bool classof(const VPSingleDefRecipe *R) {
1989 return classof(static_cast<const VPRecipeBase *>(R));
1990 }
1991
1992 /// Produce a widened version of the vector intrinsic.
1993 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1994
1995 /// Compute the cost of a vector intrinsic with \p ID and \p Operands.
1998 const VPRecipeWithIRFlags &R,
1999 ElementCount VF, VPCostContext &Ctx);
2000
2001 /// Return the cost of this vector intrinsic.
2003 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
2004
2005 /// Return the ID of the intrinsic.
2006 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
2007
2008 /// Return to name of the intrinsic as string.
2010
2011 /// Returns true if the intrinsic may read from memory.
2012 bool mayReadFromMemory() const { return MayReadFromMemory; }
2013
2014 /// Returns true if the intrinsic may write to memory.
2015 bool mayWriteToMemory() const { return MayWriteToMemory; }
2016
2017 /// Returns true if the intrinsic may have side-effects.
2018 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
2019
2020 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
2021
2022protected:
2023#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2024 /// Print the recipe.
2025 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
2026 VPSlotTracker &SlotTracker) const override;
2027#endif
2028};
2029
2030/// A recipe for widening vector memory intrinsics.
2032 /// Alignment information for this memory access.
2033 Align Alignment;
2034
2035public:
2036 // TODO: support StoreInst for strided store
2038 ArrayRef<VPValue *> CallArguments, Type *Ty,
2039 Align Alignment, const VPIRMetadata &MD = {},
2041 : VPWidenIntrinsicRecipe(VPRecipeBase::VPWidenMemIntrinsicSC,
2042 VectorIntrinsicID, CallArguments, Ty, {}, MD,
2043 DL),
2044 Alignment(Alignment) {
2045 assert(VectorIntrinsicID == Intrinsic::experimental_vp_strided_load &&
2046 "Unexpected intrinsic");
2047 }
2048
2049 ~VPWidenMemIntrinsicRecipe() override = default;
2050
2053 getScalarType(), Alignment, *this,
2054 getDebugLoc());
2055 }
2056
2057 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenMemIntrinsicSC)
2058
2059 /// Produce a widened version of the vector memory intrinsic.
2060 void execute(VPTransformState &State) override;
2061
2062 /// Helper function for computing the cost of vector memory intrinsic.
2064 bool IsMasked, Align Alignment,
2065 VPCostContext &Ctx);
2066
2067 /// Return the cost of this vector memory intrinsic.
2069 VPCostContext &Ctx) const override;
2070};
2071
2072/// A recipe for widening Call instructions using library calls.
2074 public VPIRMetadata {
2075 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
2076 /// between a given VF and the chosen vectorized variant, so there will be a
2077 /// different VPlan for each VF with a valid variant.
2078 Function *Variant;
2079
2080public:
2082 ArrayRef<VPValue *> CallArguments,
2083 const VPIRFlags &Flags = {},
2084 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
2085 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCallSC, CallArguments,
2086 toScalarizedTy(Variant->getReturnType()), Flags,
2087 DL),
2088 VPIRMetadata(Metadata), Variant(Variant) {
2089 setUnderlyingValue(UV);
2090 assert(
2091 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
2092 "last operand must be the called function");
2093 assert(cast<Function>(CallArguments.back()->getLiveInIRValue())
2094 ->getReturnType() == getScalarType() &&
2095 "Scalar type must match return type of called scalar function");
2096 }
2097
2098 ~VPWidenCallRecipe() override = default;
2099
2101 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
2102 *this, *this, getDebugLoc());
2103 }
2104
2105 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCallSC)
2106
2107 /// Produce a widened version of the call instruction.
2108 void execute(VPTransformState &State) override;
2109
2110 /// Return the cost of this VPWidenCallRecipe.
2111 InstructionCost computeCost(ElementCount VF,
2112 VPCostContext &Ctx) const override;
2113
2114 /// Return the cost of widening a call using the vector function \p Variant.
2115 static InstructionCost computeCallCost(Function *Variant, VPCostContext &Ctx);
2116
2120
2123
2124 /// Returns true if the recipe only uses the first lane of operand \p Op.
2125 bool usesFirstLaneOnly(const VPValue *Op) const override;
2126
2127protected:
2128#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2129 /// Print the recipe.
2130 void printRecipe(raw_ostream &O, const Twine &Indent,
2131 VPSlotTracker &SlotTracker) const override;
2132#endif
2133};
2134
2135/// A recipe representing a sequence of load -> update -> store as part of
2136/// a histogram operation. This means there may be aliasing between vector
2137/// lanes, which is handled by the llvm.experimental.vector.histogram family
2138/// of intrinsics. The only update operations currently supported are
2139/// 'add' and 'sub' where the other term is loop-invariant.
2141 /// Opcode of the update operation, currently either add or sub.
2142 unsigned Opcode;
2143
2144public:
2145 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
2146 const VPIRMetadata &Metadata = {},
2148 : VPRecipeBase(VPRecipeBase::VPHistogramSC, Operands, DL),
2149 VPIRMetadata(Metadata), Opcode(Opcode) {}
2150
2151 ~VPHistogramRecipe() override = default;
2152
2154 return new VPHistogramRecipe(Opcode, operands(), *this, getDebugLoc());
2155 }
2156
2157 VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC);
2158
2159 /// Produce a vectorized histogram operation.
2160 void execute(VPTransformState &State) override;
2161
2162 /// Return the cost of this VPHistogramRecipe.
2164 VPCostContext &Ctx) const override;
2165
2166 unsigned getOpcode() const { return Opcode; }
2167
2168 /// Return the mask operand if one was provided, or a null pointer if all
2169 /// lanes should be executed unconditionally.
2170 VPValue *getMask() const {
2171 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2172 }
2173
2174protected:
2175#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2176 /// Print the recipe
2177 void printRecipe(raw_ostream &O, const Twine &Indent,
2178 VPSlotTracker &SlotTracker) const override;
2179#endif
2180};
2181
2182/// A recipe for handling GEP instructions.
2184 Type *SourceElementTy;
2185
2186 bool isPointerLoopInvariant() const {
2187 return getOperand(0)->isDefinedOutsideLoopRegions();
2188 }
2189
2190 bool isIndexLoopInvariant(unsigned I) const {
2191 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
2192 }
2193
2194public:
2195 VPWidenGEPRecipe(Type *SourceElementTy, ArrayRef<VPValue *> Operands,
2196 const VPIRFlags &Flags = {},
2198 GetElementPtrInst *UV = nullptr)
2199 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenGEPSC, Operands,
2200 Operands[0]->getScalarType(), Flags, DL),
2201 SourceElementTy(SourceElementTy) {
2202 if (UV) {
2203 setUnderlyingValue(UV);
2206 assert(Metadata.empty() && "unexpected metadata on GEP");
2207 }
2208 }
2209
2210 ~VPWidenGEPRecipe() override = default;
2211
2217
2218 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenGEPSC)
2219
2220 /// This recipe generates a GEP instruction.
2221 unsigned getOpcode() const { return Instruction::GetElementPtr; }
2222
2223 /// Generate the gep nodes.
2224 void execute(VPTransformState &State) override;
2225
2226 Type *getSourceElementType() const { return SourceElementTy; }
2227
2228 /// Return the cost of this VPWidenGEPRecipe.
2230 VPCostContext &Ctx) const override {
2231 // TODO: Compute accurate cost after retiring the legacy cost model.
2232 return 0;
2233 }
2234
2235 /// Returns true if the recipe only uses the first lane of operand \p Op.
2236 bool usesFirstLaneOnly(const VPValue *Op) const override;
2237
2238protected:
2239#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2240 /// Print the recipe.
2241 void printRecipe(raw_ostream &O, const Twine &Indent,
2242 VPSlotTracker &SlotTracker) const override;
2243#endif
2244};
2245
2246/// A recipe to compute a pointer to the last element of each part of a widened
2247/// memory access for widened memory accesses of SourceElementTy. Used for
2248/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed. An extra
2249/// Offset operand is added by convertToConcreteRecipes when UF = 1, and by the
2250/// unroller otherwise.
2252 Type *SourceElementTy;
2253
2254 /// The constant stride of the pointer computed by this recipe, expressed in
2255 /// units of SourceElementTy.
2256 int64_t Stride;
2257
2258public:
2259 VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy,
2260 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
2261 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorEndPointerSC, {Ptr, VF},
2262 Ptr->getScalarType(), GEPFlags, DL),
2263 SourceElementTy(SourceElementTy), Stride(Stride) {
2264 assert(Stride < 0 && "Stride must be negative");
2265 }
2266
2267 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorEndPointerSC)
2268
2269 Type *getSourceElementType() const { return SourceElementTy; }
2270 int64_t getStride() const { return Stride; }
2271 VPValue *getPointer() const { return getOperand(0); }
2272 VPValue *getVFValue() const { return getOperand(1); }
2274 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2275 }
2276
2277 /// Adds the offset operand to the recipe.
2278 /// Offset = Stride * (VF - 1) + Part * Stride * VF.
2279 void materializeOffset(unsigned Part = 0);
2280
2281 void execute(VPTransformState &State) override;
2282
2283 bool usesFirstLaneOnly(const VPValue *Op) const override {
2285 "Op must be an operand of the recipe");
2286 return true;
2287 }
2288
2289 /// Return the cost of this VPVectorPointerRecipe.
2291 VPCostContext &Ctx) const override {
2292 // TODO: Compute accurate cost after retiring the legacy cost model.
2293 return 0;
2294 }
2295
2296 /// Returns true if the recipe only uses the first part of operand \p Op.
2297 bool usesFirstPartOnly(const VPValue *Op) const override {
2299 "Op must be an operand of the recipe");
2300 assert(getNumOperands() <= 2 && "must have at most two operands");
2301 return true;
2302 }
2303
2305 auto *VEPR = new VPVectorEndPointerRecipe(
2308 if (auto *Offset = getOffset())
2309 VEPR->addOperand(Offset);
2310 return VEPR;
2311 }
2312
2313protected:
2314#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2315 /// Print the recipe.
2316 void printRecipe(raw_ostream &O, const Twine &Indent,
2317 VPSlotTracker &SlotTracker) const override;
2318#endif
2319};
2320
2321/// A recipe to compute the pointers for widened memory accesses of \p
2322/// SourceElementTy, with the \p Stride expressed in units of \p
2323/// SourceElementTy. Unrolling adds an extra \p VFxPart operand for unrolled
2324/// parts > 0 and it produces `GEP SourceElementTy Ptr, VFxPart * Stride`.
2326 Type *SourceElementTy;
2327
2328public:
2329 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride,
2330 GEPNoWrapFlags GEPFlags, DebugLoc DL)
2331 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorPointerSC,
2332 ArrayRef<VPValue *>({Ptr, Stride}),
2333 Ptr->getScalarType(), GEPFlags, DL),
2334 SourceElementTy(SourceElementTy) {}
2335
2336 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorPointerSC)
2337
2338 VPValue *getStride() const { return getOperand(1); }
2339
2341 return getNumOperands() > 2 ? getOperand(2) : nullptr;
2342 }
2343
2344 void execute(VPTransformState &State) override;
2345
2346 Type *getSourceElementType() const { return SourceElementTy; }
2347
2348 bool usesFirstLaneOnly(const VPValue *Op) const override {
2350 "Op must be an operand of the recipe");
2351 return true;
2352 }
2353
2354 /// Returns true if the recipe only uses the first part of operand \p Op.
2355 bool usesFirstPartOnly(const VPValue *Op) const override {
2357 "Op must be an operand of the recipe");
2358 assert(getNumOperands() <= 2 && "must have at most two operands");
2359 return true;
2360 }
2361
2363 auto *Clone =
2364 new VPVectorPointerRecipe(getOperand(0), SourceElementTy, getStride(),
2366 if (auto *VFxPart = getVFxPart())
2367 Clone->addOperand(VFxPart);
2368 return Clone;
2369 }
2370
2371 /// Return the cost of this VPHeaderPHIRecipe.
2373 VPCostContext &Ctx) const override {
2374 // TODO: Compute accurate cost after retiring the legacy cost model.
2375 return 0;
2376 }
2377
2378protected:
2379#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2380 /// Print the recipe.
2381 void printRecipe(raw_ostream &O, const Twine &Indent,
2382 VPSlotTracker &SlotTracker) const override;
2383#endif
2384};
2385
2386/// A pure virtual base class for all recipes modeling header phis, including
2387/// phis for first order recurrences, pointer inductions and reductions. The
2388/// start value is the first operand of the recipe and the incoming value from
2389/// the backedge is the second operand.
2390///
2391/// Inductions are modeled using the following sub-classes:
2392/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2393/// floating point inductions with arbitrary start and step values. Produces
2394/// a vector PHI per-part.
2395/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2396/// pointer induction. Produces either a vector PHI per-part or scalar values
2397/// per-lane based on the canonical induction.
2398/// * VPFirstOrderRecurrencePHIRecipe
2399/// * VPReductionPHIRecipe
2400/// * VPActiveLaneMaskPHIRecipe
2401/// * VPEVLBasedIVPHIRecipe
2402///
2403/// Note that the canonical IV is modeled as a VPRegionValue associated with
2404/// its loop region.
2406 public VPPhiAccessors {
2407protected:
2408 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2409 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2410 : VPHeaderPHIRecipe(VPRecipeID, UnderlyingInstr, Start,
2411 Start->getScalarType(), DL) {}
2412
2413 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2414 VPValue *Start, Type *ResultTy, DebugLoc DL)
2415 : VPSingleDefRecipe(VPRecipeID, Start, ResultTy, UnderlyingInstr, DL) {}
2416
2417 const VPRecipeBase *getAsRecipe() const override { return this; }
2418
2419public:
2420 ~VPHeaderPHIRecipe() override = default;
2421
2422 /// Method to support type inquiry through isa, cast, and dyn_cast.
2423 static inline bool classof(const VPRecipeBase *R) {
2424 return R->getVPRecipeID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2425 R->getVPRecipeID() <= VPRecipeBase::VPLastHeaderPHISC;
2426 }
2427 static inline bool classof(const VPValue *V) {
2428 return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
2429 }
2430 static inline bool classof(const VPSingleDefRecipe *R) {
2431 return isa<VPHeaderPHIRecipe>(static_cast<const VPRecipeBase *>(R));
2432 }
2433
2434 /// Generate the phi nodes.
2435 void execute(VPTransformState &State) override = 0;
2436
2437 /// Return the cost of this header phi recipe.
2439 VPCostContext &Ctx) const override;
2440
2441 /// Returns the start value of the phi, if one is set.
2443 return getNumOperands() == 0 ? nullptr : getOperand(0);
2444 }
2446 return getNumOperands() == 0 ? nullptr : getOperand(0);
2447 }
2448
2449 /// Update the start value of the recipe.
2451
2452 /// Returns the incoming value from the loop backedge.
2454 return getOperand(1);
2455 }
2456
2457 /// Update the incoming value from the loop backedge.
2459
2460 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2461 /// to be a recipe.
2463 return *getBackedgeValue()->getDefiningRecipe();
2464 }
2465
2466protected:
2467#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2468 /// Print the recipe.
2469 void printRecipe(raw_ostream &O, const Twine &Indent,
2470 VPSlotTracker &SlotTracker) const override = 0;
2471#endif
2472};
2473
2474/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2475/// VPWidenPointerInductionRecipe), providing shared functionality, including
2476/// retrieving the step value, induction descriptor and original phi node.
2478 InductionDescriptor IndDesc;
2479
2480public:
2481 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2482 VPValue *Step, const InductionDescriptor &IndDesc,
2483 DebugLoc DL)
2484 : VPWidenInductionRecipe(Kind, IV, Start, Step, IndDesc,
2485 Start->getScalarType(), DL) {}
2486
2487 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2488 VPValue *Step, const InductionDescriptor &IndDesc,
2489 Type *ResultTy, DebugLoc DL)
2490 : VPHeaderPHIRecipe(Kind, IV, Start, ResultTy, DL), IndDesc(IndDesc) {
2491 addOperand(Step);
2492 }
2493
2494 static inline bool classof(const VPRecipeBase *R) {
2495 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC ||
2496 R->getVPRecipeID() == VPRecipeBase::VPWidenPointerInductionSC;
2497 }
2498
2499 static inline bool classof(const VPValue *V) {
2500 auto *R = V->getDefiningRecipe();
2501 return R && classof(R);
2502 }
2503
2504 static inline bool classof(const VPSingleDefRecipe *R) {
2505 return classof(static_cast<const VPRecipeBase *>(R));
2506 }
2507
2508 void execute(VPTransformState &State) override = 0;
2509
2510 /// Returns the start value of the induction.
2512
2513 /// Returns the step value of the induction.
2515 const VPValue *getStepValue() const { return getOperand(1); }
2516
2517 /// Update the step value of the recipe.
2518 void setStepValue(VPValue *V) { setOperand(1, V); }
2519
2521 const VPValue *getVFValue() const { return getOperand(2); }
2522
2523 /// Returns the number of incoming values, also number of incoming blocks.
2524 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2525 /// incoming value, its start value.
2526 unsigned getNumIncoming() const override { return 1; }
2527
2528 /// Returns the underlying PHINode if one exists, or null otherwise.
2532
2533 /// Returns the induction descriptor for the recipe.
2534 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2535
2536 /// Returns the SCEV predicates associated with this induction.
2538 return IndDesc.getNoWrapPredicates();
2539 }
2540
2542 // TODO: All operands of base recipe must exist and be at same index in
2543 // derived recipe.
2545 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2546 }
2547
2549 // TODO: All operands of base recipe must exist and be at same index in
2550 // derived recipe.
2552 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2553 }
2554
2555 /// Returns true if the recipe only uses the first lane of operand \p Op.
2556 bool usesFirstLaneOnly(const VPValue *Op) const override {
2558 "Op must be an operand of the recipe");
2559 // The recipe creates its own wide start value, so it only requests the
2560 // first lane of the operand.
2561 // TODO: Remove once creating the start value is modeled separately.
2562 return Op == getStartValue() || Op == getStepValue();
2563 }
2564};
2565
2566/// A recipe for handling phi nodes of integer and floating-point inductions,
2567/// producing their vector values. This is an abstract recipe and must be
2568/// converted to concrete recipes before executing.
2570 public VPIRFlags {
2571 TruncInst *Trunc;
2572
2573 // If this recipe is unrolled it will have 2 additional operands.
2574 bool isUnrolled() const { return getNumOperands() == 5; }
2575
2576public:
2578 VPValue *VF, const InductionDescriptor &IndDesc,
2579 const VPIRFlags &Flags, DebugLoc DL)
2580 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2581 Start, Step, IndDesc, DL),
2582 VPIRFlags(Flags), Trunc(nullptr) {
2583 addOperand(VF);
2584 }
2585
2587 VPValue *VF, const InductionDescriptor &IndDesc,
2588 TruncInst *Trunc, const VPIRFlags &Flags,
2589 DebugLoc DL)
2590 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2591 Start, Step, IndDesc,
2592 Trunc ? Trunc->getType() : Start->getType(), DL),
2593 VPIRFlags(Flags), Trunc(Trunc) {
2594 addOperand(VF);
2596 if (Trunc)
2598 assert(Metadata.empty() && "unexpected metadata on Trunc");
2599 }
2600
2602
2608
2609 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntOrFpInductionSC)
2610
2611 void execute(VPTransformState &State) override {
2612 llvm_unreachable("cannot execute this recipe, should be expanded via "
2613 "expandVPWidenIntOrFpInductionRecipe");
2614 }
2615
2616 /// Returns the start value of the induction.
2618
2619 /// If the recipe has been unrolled, return the VPValue for the induction
2620 /// increment, otherwise return null.
2622 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2623 }
2624
2625 /// Returns the number of incoming values, also number of incoming blocks.
2626 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2627 /// incoming value, its start value.
2628 unsigned getNumIncoming() const override { return 1; }
2629
2630 /// Returns the first defined value as TruncInst, if it is one or nullptr
2631 /// otherwise.
2632 TruncInst *getTruncInst() { return Trunc; }
2633 const TruncInst *getTruncInst() const { return Trunc; }
2634
2635 /// Returns true if the induction is canonical, i.e. starting at 0 and
2636 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2637 /// same type as the canonical induction.
2638 bool isCanonical() const;
2639
2640 /// Returns the VPValue representing the value of this induction at
2641 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2642 /// take place.
2644 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2645 }
2646
2647protected:
2648#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2649 /// Print the recipe.
2650 void printRecipe(raw_ostream &O, const Twine &Indent,
2651 VPSlotTracker &SlotTracker) const override;
2652#endif
2653};
2654
2656public:
2657 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2658 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2659 /// VF*UF.
2661 VPValue *NumUnrolledElems,
2662 const InductionDescriptor &IndDesc, DebugLoc DL)
2663 : VPWidenInductionRecipe(VPRecipeBase::VPWidenPointerInductionSC, Phi,
2664 Start, Step, IndDesc, DL) {
2665 addOperand(NumUnrolledElems);
2666 }
2667
2669
2675
2676 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPointerInductionSC)
2677
2678 /// Generate vector values for the pointer induction.
2679 void execute(VPTransformState &State) override {
2680 llvm_unreachable("cannot execute this recipe, should be expanded via "
2681 "expandVPWidenPointerInduction");
2682 };
2683
2684 /// Returns true if only scalar values will be generated.
2685 bool onlyScalarsGenerated(bool IsScalable);
2686
2687protected:
2688#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2689 /// Print the recipe.
2690 void printRecipe(raw_ostream &O, const Twine &Indent,
2691 VPSlotTracker &SlotTracker) const override;
2692#endif
2693};
2694
2695/// A recipe for widened phis. Incoming values are operands of the recipe and
2696/// their operand index corresponds to the incoming predecessor block. If the
2697/// recipe is placed in an entry block to a (non-replicate) region, it must have
2698/// exactly 2 incoming values, the first from the predecessor of the region and
2699/// the second from the exiting block of the region.
2701 public VPPhiAccessors {
2702 /// Name to use for the generated IR instruction for the widened phi.
2703 std::string Name;
2704
2705public:
2706 /// Create a new VPWidenPHIRecipe with incoming values \p IncomingValues,
2707 /// debug location \p DL and \p Name.
2709 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2710 : VPSingleDefRecipe(VPRecipeBase::VPWidenPHISC, IncomingValues,
2711 IncomingValues[0]->getScalarType(),
2712 /*UV=*/nullptr, DL),
2713 Name(Name.str()) {
2714 assert(all_of(IncomingValues,
2715 [this](VPValue *VPV) {
2716 return VPV->getScalarType() == getScalarType();
2717 }) &&
2718 "all incoming values must have the same type");
2719 }
2720
2722 return new VPWidenPHIRecipe(operands(), getDebugLoc(), Name);
2723 }
2724
2725 ~VPWidenPHIRecipe() override = default;
2726
2727 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPHISC)
2728
2729 /// Generate the phi/select nodes.
2730 void execute(VPTransformState &State) override;
2731
2732 /// Return the cost of this VPWidenPHIRecipe.
2734 VPCostContext &Ctx) const override;
2735
2736protected:
2737#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2738 /// Print the recipe.
2739 void printRecipe(raw_ostream &O, const Twine &Indent,
2740 VPSlotTracker &SlotTracker) const override;
2741#endif
2742
2743 const VPRecipeBase *getAsRecipe() const override { return this; }
2744};
2745
2746/// A recipe for handling first-order recurrence phis. The start value is the
2747/// first operand of the recipe and the incoming value from the backedge is the
2748/// second operand.
2751 VPValue &BackedgeValue)
2752 : VPHeaderPHIRecipe(VPRecipeBase::VPFirstOrderRecurrencePHISC, Phi,
2753 &Start) {
2754 addOperand(&BackedgeValue);
2755 }
2756
2757 VP_CLASSOF_IMPL(VPRecipeBase::VPFirstOrderRecurrencePHISC)
2758
2763
2764 void execute(VPTransformState &State) override;
2765
2766 /// Return the cost of this first-order recurrence phi recipe.
2768 VPCostContext &Ctx) const override;
2769
2770 /// Returns true if the recipe only uses the first lane of operand \p Op.
2771 bool usesFirstLaneOnly(const VPValue *Op) const override {
2773 "Op must be an operand of the recipe");
2774 return Op == getStartValue();
2775 }
2776
2777protected:
2778#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2779 /// Print the recipe.
2780 void printRecipe(raw_ostream &O, const Twine &Indent,
2781 VPSlotTracker &SlotTracker) const override;
2782#endif
2783};
2784
2785/// Possible variants of a reduction.
2786
2787/// This reduction is ordered and in-loop.
2788struct RdxOrdered {};
2789/// This reduction is in-loop.
2790struct RdxInLoop {};
2791/// This reduction is unordered with the partial result scaled down by some
2792/// factor.
2795};
2796using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2797
2798inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2799 unsigned ScaleFactor) {
2800 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2801 if (Ordered)
2802 return RdxOrdered{};
2803 if (InLoop)
2804 return RdxInLoop{};
2805 return RdxUnordered{/*VFScaleFactor=*/ScaleFactor};
2806}
2807
2808/// A recipe for handling reduction phis. The start value is the first operand
2809/// of the recipe and the incoming value from the backedge is the second
2810/// operand.
2812 /// The recurrence kind of the reduction.
2813 const RecurKind Kind;
2814
2815 ReductionStyle Style;
2816
2817 /// The phi is part of a multi-use reduction (e.g., used in FindIV
2818 /// patterns for argmin/argmax).
2819 /// TODO: Also support cases where the phi itself has a single use, but its
2820 /// compare has multiple uses.
2821 bool HasUsesOutsideReductionChain;
2822
2823public:
2824 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2826 VPValue &BackedgeValue, ReductionStyle Style,
2827 const VPIRFlags &Flags,
2828 bool HasUsesOutsideReductionChain = false)
2829 : VPHeaderPHIRecipe(VPRecipeBase::VPReductionPHISC, Phi, &Start),
2830 VPIRFlags(Flags), Kind(Kind), Style(Style),
2831 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2832 addOperand(&BackedgeValue);
2833 }
2834
2835 ~VPReductionPHIRecipe() override = default;
2836
2838 VPValue *BackedgeValue) {
2839 return new VPReductionPHIRecipe(
2841 *Start, *BackedgeValue, Style, *this, HasUsesOutsideReductionChain);
2842 }
2843
2847
2848 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionPHISC)
2849
2850 /// Generate the phi/select nodes.
2851 void execute(VPTransformState &State) override;
2852
2853 /// Get the factor that the VF of this recipe's output should be scaled by, or
2854 /// 1 if it isn't scaled.
2855 unsigned getVFScaleFactor() const {
2856 auto *Partial = std::get_if<RdxUnordered>(&Style);
2857 return Partial ? Partial->VFScaleFactor : 1;
2858 }
2859
2860 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2861 /// > 1.
2862 void setVFScaleFactor(unsigned ScaleFactor) {
2863 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2864 Style = RdxUnordered{ScaleFactor};
2865 }
2866
2867 /// Returns the number of incoming values, also number of incoming blocks.
2868 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2869 /// incoming value, its start value.
2870 unsigned getNumIncoming() const override { return 2; }
2871
2872 /// Returns the recurrence kind of the reduction.
2873 RecurKind getRecurrenceKind() const { return Kind; }
2874
2875 /// Returns true, if the phi is part of an ordered reduction.
2876 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); }
2877
2878 /// Returns true if the phi is part of an in-loop reduction.
2879 bool isInLoop() const {
2880 return std::holds_alternative<RdxInLoop>(Style) ||
2881 std::holds_alternative<RdxOrdered>(Style);
2882 }
2883
2884 /// Returns true if the reduction outputs a vector with a scaled down VF.
2885 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2886
2887 /// Returns true, if the phi is part of a multi-use reduction.
2889 return HasUsesOutsideReductionChain;
2890 }
2891
2892 /// Returns true if the recipe only uses the first lane of operand \p Op.
2893 bool usesFirstLaneOnly(const VPValue *Op) const override {
2895 "Op must be an operand of the recipe");
2896 return isOrdered() || isInLoop();
2897 }
2898
2899protected:
2900#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2901 /// Print the recipe.
2902 void printRecipe(raw_ostream &O, const Twine &Indent,
2903 VPSlotTracker &SlotTracker) const override;
2904#endif
2905};
2906
2907/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2908/// instructions.
2910public:
2911 /// The blend operation is a User of the incoming values and of their
2912 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2913 /// be omitted (implied by passing an odd number of operands) in which case
2914 /// all other incoming values are merged into it.
2916 const VPIRFlags &Flags, DebugLoc DL)
2917 : VPRecipeWithIRFlags(VPRecipeBase::VPBlendSC, Operands,
2918 Operands[0]->getScalarType(), Flags, DL) {
2919 assert(Operands.size() >= 2 && "Expected at least two operands!");
2921 [this](unsigned I) {
2922 return getIncomingValue(I)->getScalarType() ==
2923 getScalarType();
2924 }) &&
2925 "all incoming values must have the same type");
2927 [this](unsigned I) {
2928 return getMask(I)->getScalarType()->isIntegerTy(1);
2929 }) &&
2930 "masks must be a bool");
2931 setUnderlyingValue(Phi);
2932 }
2933
2935
2938 NewOperands, *this, getDebugLoc());
2939 }
2940
2941 VP_CLASSOF_IMPL(VPRecipeBase::VPBlendSC)
2942
2943 /// A normalized blend is one that has an odd number of operands, whereby the
2944 /// first operand does not have an associated mask.
2945 bool isNormalized() const { return getNumOperands() % 2; }
2946
2947 /// Return the number of incoming values, taking into account when normalized
2948 /// the first incoming value will have no mask.
2949 unsigned getNumIncomingValues() const {
2950 return (getNumOperands() + isNormalized()) / 2;
2951 }
2952
2953 /// Return incoming value number \p Idx.
2954 VPValue *getIncomingValue(unsigned Idx) const {
2955 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2956 }
2957
2958 /// Return mask number \p Idx.
2959 VPValue *getMask(unsigned Idx) const {
2960 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2961 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2962 }
2963
2964 /// Set mask number \p Idx to \p V.
2965 void setMask(unsigned Idx, VPValue *V) {
2966 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2967 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2968 }
2969
2970 void execute(VPTransformState &State) override {
2971 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2972 }
2973
2974 /// Return the cost of this VPWidenMemoryRecipe.
2975 InstructionCost computeCost(ElementCount VF,
2976 VPCostContext &Ctx) const override;
2977
2978 /// Returns true if the recipe only uses the first lane of operand \p Op.
2979 bool usesFirstLaneOnly(const VPValue *Op) const override;
2980
2981protected:
2982#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2983 /// Print the recipe.
2984 void printRecipe(raw_ostream &O, const Twine &Indent,
2985 VPSlotTracker &SlotTracker) const override;
2986#endif
2987};
2988
2989/// A common base class for interleaved memory operations.
2990/// An Interleaved memory operation is a memory access method that combines
2991/// multiple strided loads/stores into a single wide load/store with shuffles.
2992/// The first operand is the start address. The optional operands are, in order,
2993/// the stored values and the mask.
2995 public VPIRMetadata {
2997
2998 /// Indicates if the interleave group is in a conditional block and requires a
2999 /// mask.
3000 bool HasMask = false;
3001
3002 /// Indicates if gaps between members of the group need to be masked out or if
3003 /// unusued gaps can be loaded speculatively.
3004 bool NeedsMaskForGaps = false;
3005
3006protected:
3007 VPInterleaveBase(const unsigned char SC,
3009 ArrayRef<VPValue *> Operands,
3010 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
3011 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
3012 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
3013 NeedsMaskForGaps(NeedsMaskForGaps) {
3014 // TODO: extend the masked interleaved-group support to reversed access.
3015 assert((!Mask || !IG->isReverse()) &&
3016 "Reversed masked interleave-group not supported.");
3017 if (StoredValues.empty()) {
3018 for (Instruction *Inst : IG->members()) {
3019 assert(!Inst->getType()->isVoidTy() && "must have result");
3020 new VPMultiDefValue(this, Inst, Inst->getType());
3021 }
3022 } else {
3023 for (auto *SV : StoredValues)
3024 addOperand(SV);
3025 }
3026 if (Mask) {
3027 HasMask = true;
3028 addOperand(Mask);
3029 }
3030 }
3031
3032public:
3033 VPInterleaveBase *clone() override = 0;
3034
3035 static inline bool classof(const VPRecipeBase *R) {
3036 return R->getVPRecipeID() == VPRecipeBase::VPInterleaveSC ||
3037 R->getVPRecipeID() == VPRecipeBase::VPInterleaveEVLSC;
3038 }
3039
3040 static inline bool classof(const VPUser *U) {
3041 auto *R = dyn_cast<VPRecipeBase>(U);
3042 return R && classof(R);
3043 }
3044
3045 /// Return the address accessed by this recipe.
3046 VPValue *getAddr() const {
3047 return getOperand(0); // Address is the 1st, mandatory operand.
3048 }
3049
3050 /// Return the mask used by this recipe. Note that a full mask is represented
3051 /// by a nullptr.
3052 VPValue *getMask() const {
3053 // Mask is optional and the last operand.
3054 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
3055 }
3056
3057 /// Return true if the access needs a mask because of the gaps.
3058 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
3059
3061
3062 Instruction *getInsertPos() const { return IG->getInsertPos(); }
3063
3064 void execute(VPTransformState &State) override {
3065 llvm_unreachable("VPInterleaveBase should not be instantiated.");
3066 }
3067
3068 /// Return the cost of this recipe.
3069 InstructionCost computeCost(ElementCount VF,
3070 VPCostContext &Ctx) const override;
3071
3072 /// Returns true if the recipe only uses the first lane of operand \p Op.
3073 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
3074
3075 /// Returns the number of stored operands of this interleave group. Returns 0
3076 /// for load interleave groups.
3077 virtual unsigned getNumStoreOperands() const = 0;
3078
3079 /// Return the VPValues stored by this interleave group. If it is a load
3080 /// interleave group, return an empty ArrayRef.
3082 return {op_end() - (getNumStoreOperands() + (HasMask ? 1 : 0)),
3084 }
3085};
3086
3087/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
3088/// or stores into one wide load/store and shuffles. The first operand of a
3089/// VPInterleave recipe is the address, followed by the stored values, followed
3090/// by an optional mask.
3092public:
3094 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
3095 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
3096 : VPInterleaveBase(VPRecipeBase::VPInterleaveSC, IG, Addr, StoredValues,
3097 Mask, NeedsMaskForGaps, MD, DL) {}
3098
3099 ~VPInterleaveRecipe() override = default;
3100
3104 needsMaskForGaps(), *this, getDebugLoc());
3105 }
3106
3107 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveSC)
3108
3109 /// Generate the wide load or store, and shuffles.
3110 void execute(VPTransformState &State) override;
3111
3112 bool usesFirstLaneOnly(const VPValue *Op) const override {
3114 "Op must be an operand of the recipe");
3115 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
3116 }
3117
3118 unsigned getNumStoreOperands() const override {
3119 return getNumOperands() - (getMask() ? 2 : 1);
3120 }
3121
3122protected:
3123#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3124 /// Print the recipe.
3125 void printRecipe(raw_ostream &O, const Twine &Indent,
3126 VPSlotTracker &SlotTracker) const override;
3127#endif
3128};
3129
3130/// A recipe for interleaved memory operations with vector-predication
3131/// intrinsics. The first operand is the address, the second operand is the
3132/// explicit vector length. Stored values and mask are optional operands.
3134public:
3136 : VPInterleaveBase(VPRecipeBase::VPInterleaveEVLSC,
3137 R.getInterleaveGroup(), {R.getAddr(), &EVL},
3138 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
3139 R.getDebugLoc()) {
3140 assert(!getInterleaveGroup()->isReverse() &&
3141 "Reversed interleave-group with tail folding is not supported.");
3142 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
3143 "supported for scalable vector.");
3144 }
3145
3146 ~VPInterleaveEVLRecipe() override = default;
3147
3149 llvm_unreachable("cloning not implemented yet");
3150 }
3151
3152 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveEVLSC)
3153
3154 /// The VPValue of the explicit vector length.
3155 VPValue *getEVL() const { return getOperand(1); }
3156
3157 /// Generate the wide load or store, and shuffles.
3158 void execute(VPTransformState &State) override;
3159
3160 /// The recipe only uses the first lane of the address, and EVL operand.
3161 bool usesFirstLaneOnly(const VPValue *Op) const override {
3163 "Op must be an operand of the recipe");
3164 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
3165 Op == getEVL();
3166 }
3167
3168 unsigned getNumStoreOperands() const override {
3169 return getNumOperands() - (getMask() ? 3 : 2);
3170 }
3171
3172protected:
3173#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3174 /// Print the recipe.
3175 void printRecipe(raw_ostream &O, const Twine &Indent,
3176 VPSlotTracker &SlotTracker) const override;
3177#endif
3178};
3179
3180/// A recipe to represent inloop, ordered or partial reduction operations. It
3181/// performs a reduction on a vector operand into a scalar (vector in the case
3182/// of a partial reduction) value, and adds the result to a chain. The Operands
3183/// are {ChainOp, VecOp, [Condition]}.
3185
3186 /// The recurrence kind for the reduction in question.
3187 RecurKind RdxKind;
3188 /// Whether the reduction is conditional.
3189 bool IsConditional = false;
3190 ReductionStyle Style;
3191
3192protected:
3193 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
3195 ArrayRef<VPValue *> Operands, VPValue *CondOp,
3196 ReductionStyle Style, DebugLoc DL)
3197 : VPRecipeWithIRFlags(SC, Operands, Operands[0]->getScalarType(), FMFs,
3198 DL),
3199 RdxKind(RdxKind), Style(Style) {
3200 assert(all_of(Operands,
3201 [this](VPValue *VPV) {
3202 return VPV->getScalarType() == getScalarType() ||
3203 (isa<VPInstruction>(VPV) &&
3204 cast<VPInstruction>(VPV)->getOpcode() ==
3206 }) &&
3207 "all incoming values must have the same type");
3208 if (CondOp) {
3209 assert(CondOp->getScalarType()->isIntegerTy(1) &&
3210 "CondOp must be a bool");
3211 IsConditional = true;
3212 addOperand(CondOp);
3213 }
3215 }
3216
3217public:
3219 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3221 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, I,
3222 {ChainOp, VecOp}, CondOp, Style, DL) {}
3223
3225 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3227 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, nullptr,
3228 {ChainOp, VecOp}, CondOp, Style, DL) {}
3229
3230 ~VPReductionRecipe() override = default;
3231
3233 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
3235 getCondOp(), Style, getDebugLoc());
3236 }
3237
3238 static inline bool classof(const VPRecipeBase *R) {
3239 return R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
3240 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC;
3241 }
3242
3243 static inline bool classof(const VPUser *U) {
3244 auto *R = dyn_cast<VPRecipeBase>(U);
3245 return R && classof(R);
3246 }
3247
3248 static inline bool classof(const VPValue *VPV) {
3249 const VPRecipeBase *R = VPV->getDefiningRecipe();
3250 return R && classof(R);
3251 }
3252
3253 static inline bool classof(const VPSingleDefRecipe *R) {
3254 return classof(static_cast<const VPRecipeBase *>(R));
3255 }
3256
3257 /// Generate the reduction in the loop.
3258 void execute(VPTransformState &State) override;
3259
3260 /// Return the cost of VPReductionRecipe.
3261 InstructionCost computeCost(ElementCount VF,
3262 VPCostContext &Ctx) const override;
3263
3264 /// Return the recurrence kind for the in-loop reduction.
3265 RecurKind getRecurrenceKind() const { return RdxKind; }
3266 /// Return true if the in-loop reduction is ordered.
3267 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); };
3268 /// Return true if the in-loop reduction is conditional.
3269 bool isConditional() const { return IsConditional; };
3270 /// Returns true if the reduction outputs a vector with a scaled down VF.
3271 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
3272 /// Returns true if the reduction is in-loop.
3273 bool isInLoop() const {
3274 return std::holds_alternative<RdxInLoop>(Style) ||
3275 std::holds_alternative<RdxOrdered>(Style);
3276 }
3277 /// The VPValue of the scalar Chain being accumulated.
3278 VPValue *getChainOp() const { return getOperand(0); }
3279 /// The VPValue of the vector value to be reduced.
3280 VPValue *getVecOp() const { return getOperand(1); }
3281 /// The VPValue of the condition for the block.
3283 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
3284 }
3285 /// Get the factor that the VF of this recipe's output should be scaled by, or
3286 /// 1 if it isn't scaled.
3287 unsigned getVFScaleFactor() const {
3288 auto *Partial = std::get_if<RdxUnordered>(&Style);
3289 return Partial ? Partial->VFScaleFactor : 1;
3290 }
3291
3292protected:
3293#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3294 /// Print the recipe.
3295 void printRecipe(raw_ostream &O, const Twine &Indent,
3296 VPSlotTracker &SlotTracker) const override;
3297#endif
3298};
3299
3300/// A recipe to represent inloop reduction operations with vector-predication
3301/// intrinsics, performing a reduction on a vector operand with the explicit
3302/// vector length (EVL) into a scalar value, and adding the result to a chain.
3303/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
3305public:
3308 : VPReductionRecipe(VPRecipeBase::VPReductionEVLSC, R.getRecurrenceKind(),
3309 R.getFastMathFlags(),
3311 {R.getChainOp(), R.getVecOp(), &EVL}, CondOp,
3312 getReductionStyle(/*InLoop=*/true, R.isOrdered(), 1),
3313 DL) {}
3314
3315 ~VPReductionEVLRecipe() override = default;
3316
3318 llvm_unreachable("cloning not implemented yet");
3319 }
3320
3321 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionEVLSC)
3322
3323 /// Generate the reduction in the loop
3324 void execute(VPTransformState &State) override;
3325
3326 /// The VPValue of the explicit vector length.
3327 VPValue *getEVL() const { return getOperand(2); }
3328
3329 /// Returns true if the recipe only uses the first lane of operand \p Op.
3330 bool usesFirstLaneOnly(const VPValue *Op) const override {
3332 "Op must be an operand of the recipe");
3333 return Op == getEVL();
3334 }
3335
3336protected:
3337#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3338 /// Print the recipe.
3339 void printRecipe(raw_ostream &O, const Twine &Indent,
3340 VPSlotTracker &SlotTracker) const override;
3341#endif
3342};
3343
3344/// VPReplicateRecipe replicates a given instruction producing multiple scalar
3345/// copies of the original scalar type, one per lane, instead of producing a
3346/// single copy of widened type for all lanes. If the instruction is known to be
3347/// a single scalar, only one copy will be generated.
3349 public VPIRMetadata {
3350 /// Indicator if only a single replica per lane is needed.
3351 bool IsSingleScalar;
3352
3353 /// Indicator if the replicas are also predicated.
3354 bool IsPredicated;
3355
3356public:
3358 bool IsSingleScalar, VPValue *Mask = nullptr,
3359 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
3360 DebugLoc DL = DebugLoc::getUnknown())
3361 : VPRecipeWithIRFlags(VPRecipeBase::VPReplicateSC, Operands,
3362 computeScalarType(I, Operands), Flags, DL),
3363 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
3364 IsPredicated(Mask) {
3365 setUnderlyingValue(I);
3366 if (Mask)
3367 addOperand(Mask);
3368 }
3369
3370 ~VPReplicateRecipe() override = default;
3371
3372 /// Compute the scalar result type for a VPReplicateRecipe wrapping \p I with
3373 /// \p Operands (excluding any predicate mask).
3374 static Type *computeScalarType(const Instruction *I,
3375 ArrayRef<VPValue *> Operands);
3376
3378
3380 auto *Copy = new VPReplicateRecipe(
3381 getUnderlyingInstr(), NewOperands, IsSingleScalar,
3382 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
3383 Copy->transferFlags(*this);
3384 return Copy;
3385 }
3386
3387 VP_CLASSOF_IMPL(VPRecipeBase::VPReplicateSC)
3388
3389 /// Generate replicas of the desired Ingredient. Replicas will be generated
3390 /// for all parts and lanes unless a specific part and lane are specified in
3391 /// the \p State.
3392 void execute(VPTransformState &State) override;
3393
3394 /// Return the cost of this VPReplicateRecipe.
3395 InstructionCost computeCost(ElementCount VF,
3396 VPCostContext &Ctx) const override;
3397
3398 /// Return the cost of scalarizing a call to \p CalledFn with argument
3399 /// operands \p ArgOps for a given \p VF.
3400 static InstructionCost computeCallCost(Function *CalledFn, Type *ResultTy,
3402 bool IsSingleScalar, ElementCount VF,
3403 VPCostContext &Ctx);
3404
3405 bool isSingleScalar() const { return IsSingleScalar; }
3406
3407 bool isPredicated() const { return IsPredicated; }
3408
3409 /// Returns true if the recipe only uses the first lane of operand \p Op.
3410 bool usesFirstLaneOnly(const VPValue *Op) const override {
3412 "Op must be an operand of the recipe");
3413 return isSingleScalar();
3414 }
3415
3416 /// Returns true if the recipe uses scalars of operand \p Op.
3417 bool usesScalars(const VPValue *Op) const override {
3419 "Op must be an operand of the recipe");
3420 return true;
3421 }
3422
3423 /// Return the mask of a predicated VPReplicateRecipe.
3425 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3426 return getOperand(getNumOperands() - 1);
3427 }
3428
3429 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3430
3431protected:
3432#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3433 /// Print the recipe.
3434 void printRecipe(raw_ostream &O, const Twine &Indent,
3435 VPSlotTracker &SlotTracker) const override;
3436#endif
3437};
3438
3439/// A recipe for generating conditional branches on the bits of a mask.
3441public:
3443 : VPRecipeBase(VPRecipeBase::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3444
3447 }
3448
3449 VP_CLASSOF_IMPL(VPRecipeBase::VPBranchOnMaskSC)
3450
3451 /// Generate the extraction of the appropriate bit from the block mask and the
3452 /// conditional branch.
3453 void execute(VPTransformState &State) override;
3454
3455 /// Return the cost of this VPBranchOnMaskRecipe.
3456 InstructionCost computeCost(ElementCount VF,
3457 VPCostContext &Ctx) const override;
3458
3459#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3460 /// Print the recipe.
3461 void printRecipe(raw_ostream &O, const Twine &Indent,
3462 VPSlotTracker &SlotTracker) const override {
3463 O << Indent << "BRANCH-ON-MASK ";
3465 }
3466#endif
3467
3468 /// Returns true if the recipe uses scalars of operand \p Op.
3469 bool usesScalars(const VPValue *Op) const override {
3471 "Op must be an operand of the recipe");
3472 return true;
3473 }
3474};
3475
3476/// A recipe to combine multiple recipes into a single 'expression' recipe,
3477/// which should be considered a single entity for cost-modeling and transforms.
3478/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3479/// expression recipes, before execute. The individual expression recipes are
3480/// completely disconnected from the def-use graph of other recipes not part of
3481/// the expression. Def-use edges between pairs of expression recipes remain
3482/// intact, whereas every edge between an expression recipe and a recipe outside
3483/// the expression is elevated to connect the non-expression recipe with the
3484/// VPExpressionRecipe itself.
3485class VPExpressionRecipe : public VPSingleDefRecipe {
3486 /// Recipes included in this VPExpressionRecipe. This could contain
3487 /// duplicates.
3488 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3489
3490 /// Temporary VPValues used for external operands of the expression, i.e.
3491 /// operands not defined by recipes in the expression.
3492 SmallVector<VPValue *> LiveInPlaceholders;
3493
3494 enum class ExpressionTypes {
3495 /// Represents an inloop extended reduction operation, performing a
3496 /// reduction on an extended vector operand into a scalar value, and adding
3497 /// the result to a chain.
3498 ExtendedReduction,
3499 /// Represents an inloop extended reduction operation, which is negated,
3500 /// then reduced before adding the result to a chain.
3501 NegatedExtendedReduction,
3502 /// Represent an inloop multiply-accumulate reduction, multiplying the
3503 /// extended vector operands, performing a reduction.add on the result, and
3504 /// adding the scalar result to a chain.
3505 ExtMulAccReduction,
3506 /// Represent an inloop multiply-accumulate reduction, multiplying the
3507 /// vector operands, performing a reduction.add on the result, and adding
3508 /// the scalar result to a chain.
3509 MulAccReduction,
3510 /// Represent an inloop multiply-accumulate reduction, multiplying the
3511 /// extended vector operands, negating the multiplication, performing a
3512 /// reduction.add on the result, and adding the scalar result to a chain.
3513 ExtNegatedMulAccReduction,
3514 };
3515
3516 /// Type of the expression.
3517 ExpressionTypes ExpressionType;
3518
3519 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3520 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3521 /// in the expression) are replaced by temporary VPValues and the original
3522 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3523 /// as needed (excluding last) to ensure they are only used by other recipes
3524 /// in the expression.
3525 VPExpressionRecipe(ExpressionTypes ExpressionType,
3526 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3527
3528public:
3530 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3532 VPReductionRecipe *Red)
3533 : VPExpressionRecipe(ExpressionTypes::NegatedExtendedReduction,
3534 {Ext, Neg, Red}) {
3535 assert((Red->getRecurrenceKind() == RecurKind::Add ||
3536 Red->getRecurrenceKind() == RecurKind::FAdd) &&
3537 "Expected an add reduction");
3538 if (Neg->getOpcode() == Instruction::Sub) {
3539 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(getOperand(1));
3540 assert(SubConst && SubConst->isZero() && "Expected a negating sub");
3541 } else
3542 assert(Neg->getOpcode() == Instruction::FNeg && "Unexpected opcode");
3543 }
3545 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3548 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3549 {Ext0, Ext1, Mul, Red}) {}
3552 VPReductionRecipe *Red)
3553 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3554 {Ext0, Ext1, Mul, Neg, Red}) {
3555 assert((Mul->getOpcode() == Instruction::Mul ||
3556 Mul->getOpcode() == Instruction::FMul) &&
3557 "Expected a mul");
3558 assert((Red->getRecurrenceKind() == RecurKind::Add ||
3559 Red->getRecurrenceKind() == RecurKind::FAdd) &&
3560 "Expected an add reduction");
3561 assert(getNumOperands() >= 3 && "Expected at least three operands");
3562 if (Neg->getOpcode() == Instruction::Sub) {
3563 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(getOperand(2));
3564 assert(SubConst && SubConst->isZero() &&
3565 Neg->getOpcode() == Instruction::Sub && "Expected a negating sub");
3566 } else
3567 assert(Neg->getOpcode() == Instruction::FNeg && "Unexpected opcode");
3568 }
3569
3571 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3572 for (auto *R : reverse(ExpressionRecipes)) {
3573 if (ExpressionRecipesSeen.insert(R).second)
3574 delete R;
3575 }
3576 for (VPValue *T : LiveInPlaceholders)
3577 delete T;
3578 }
3579
3580 VP_CLASSOF_IMPL(VPRecipeBase::VPExpressionSC)
3581
3582 VPExpressionRecipe *clone() override {
3583 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3584 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3585 for (auto *R : ExpressionRecipes)
3586 NewExpressiondRecipes.push_back(R->clone());
3587 for (auto *New : NewExpressiondRecipes) {
3588 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3589 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3590 // Update placeholder operands in the cloned recipe to use the external
3591 // operands, to be internalized when the cloned expression is constructed.
3592 for (const auto &[Placeholder, OutsideOp] :
3593 zip(LiveInPlaceholders, operands()))
3594 New->replaceUsesOfWith(Placeholder, OutsideOp);
3595 }
3596 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3597 }
3598
3599 /// Return the VPValue to use to infer the result type of the recipe.
3601 unsigned OpIdx =
3602 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3603 : 1;
3604 return getOperand(getNumOperands() - OpIdx);
3605 }
3606
3607 /// Insert the recipes of the expression back into the VPlan, directly before
3608 /// the current recipe. Leaves the expression recipe empty, which must be
3609 /// removed before codegen.
3610 void decompose();
3611
3612 unsigned getVFScaleFactor() const {
3613 auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
3614 return PR ? PR->getVFScaleFactor() : 1;
3615 }
3616
3617 /// Method for generating code, must not be called as this recipe is abstract.
3618 void execute(VPTransformState &State) override {
3619 llvm_unreachable("recipe must be removed before execute");
3620 }
3621
3623 VPCostContext &Ctx) const override;
3624
3625 /// Returns true if this expression contains recipes that may read from or
3626 /// write to memory.
3627 bool mayReadOrWriteMemory() const;
3628
3629 /// Returns true if this expression contains recipes that may have side
3630 /// effects.
3631 bool mayHaveSideEffects() const;
3632
3633 /// Returns true if this VPExpressionRecipe produces a single scalar.
3634 bool isVectorToScalar() const;
3635
3636protected:
3637#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3638 /// Print the recipe.
3639 void printRecipe(raw_ostream &O, const Twine &Indent,
3640 VPSlotTracker &SlotTracker) const override;
3641#endif
3642};
3643
3644/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3645/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3646/// order to merge values that are set under such a branch and feed their uses.
3647/// The phi nodes can be scalar or vector depending on the users of the value.
3648/// This recipe works in concert with VPBranchOnMaskRecipe.
3650public:
3651 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3652 /// nodes after merging back from a Branch-on-Mask.
3654 : VPSingleDefRecipe(VPRecipeBase::VPPredInstPHISC, PredV,
3655 PredV->getScalarType(), /*UV=*/nullptr, DL) {}
3656 ~VPPredInstPHIRecipe() override = default;
3657
3659 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3660 }
3661
3662 VP_CLASSOF_IMPL(VPRecipeBase::VPPredInstPHISC)
3663
3664 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3665 /// retain SSA form.
3666 void execute(VPTransformState &State) override;
3667
3668 /// Return the cost of this VPPredInstPHIRecipe.
3670 VPCostContext &Ctx) const override {
3671 // TODO: Compute accurate cost after retiring the legacy cost model.
3672 return 0;
3673 }
3674
3675protected:
3676#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3677 /// Print the recipe.
3678 void printRecipe(raw_ostream &O, const Twine &Indent,
3679 VPSlotTracker &SlotTracker) const override;
3680#endif
3681};
3682
3683/// A common mixin class for widening memory operations. An optional mask can be
3684/// provided as the last operand.
3686protected:
3688
3689 /// Alignment information for this memory access.
3691
3692 /// Whether the accessed addresses are consecutive.
3694
3695 /// Whether the memory access is masked.
3696 bool IsMasked = false;
3697
3698 void setMask(VPValue *Mask) {
3699 assert(!IsMasked && "cannot re-set mask");
3700 if (!Mask)
3701 return;
3702 getAsRecipe()->addOperand(Mask);
3703 IsMasked = true;
3704 }
3705
3710
3711public:
3712 virtual ~VPWidenMemoryRecipe() = default;
3713
3714 /// Return a VPRecipeBase* to the current object.
3716 virtual const VPRecipeBase *getAsRecipe() const = 0;
3717
3718 /// Return whether the loaded-from / stored-to addresses are consecutive.
3719 bool isConsecutive() const { return Consecutive; }
3720
3721 /// Return the address accessed by this recipe.
3722 VPValue *getAddr() const { return getAsRecipe()->getOperand(0); }
3723
3724 /// Returns true if the recipe is masked.
3725 bool isMasked() const { return IsMasked; }
3726
3727 /// Return the mask used by this recipe. Note that a full mask is represented
3728 /// by a nullptr.
3729 VPValue *getMask() const {
3730 // Mask is optional and therefore the last operand.
3731 const VPRecipeBase *R = getAsRecipe();
3732 return isMasked() ? R->getOperand(R->getNumOperands() - 1) : nullptr;
3733 }
3734
3735 /// Returns the alignment of the memory access.
3736 Align getAlign() const { return Alignment; }
3737
3738 /// Return the cost of this VPWidenMemoryRecipe.
3739 InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const;
3740
3742};
3743
3744/// A recipe for widening load operations, using the address to load from and an
3745/// optional mask.
3747 public VPWidenMemoryRecipe {
3749 bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
3750 : VPSingleDefRecipe(VPRecipeBase::VPWidenLoadSC, {Addr}, Load.getType(),
3751 &Load, DL),
3752 VPWidenMemoryRecipe(Load, Consecutive, Metadata) {
3753 setMask(Mask);
3754 }
3755
3758 getMask(), Consecutive, *this, getDebugLoc());
3759 }
3760
3761 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC);
3762
3763 /// Generate a wide load or gather.
3764 void execute(VPTransformState &State) override;
3765
3766 /// Return the cost of this VPWidenLoadRecipe.
3768 VPCostContext &Ctx) const override {
3769 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
3770 }
3771
3772 /// Returns true if the recipe only uses the first lane of operand \p Op.
3773 bool usesFirstLaneOnly(const VPValue *Op) const override {
3775 "Op must be an operand of the recipe");
3776 // Widened, consecutive loads operations only demand the first lane of
3777 // their address.
3778 return Op == getAddr() && isConsecutive();
3779 }
3780
3781protected:
3782 VPRecipeBase *getAsRecipe() override { return this; }
3783 const VPRecipeBase *getAsRecipe() const override { return this; }
3784
3785#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3786 /// Print the recipe.
3787 void printRecipe(raw_ostream &O, const Twine &Indent,
3788 VPSlotTracker &SlotTracker) const override;
3789#endif
3790};
3791
3792/// A recipe for widening load operations with vector-predication intrinsics,
3793/// using the address to load from, the explicit vector length and an optional
3794/// mask.
3796 public VPWidenMemoryRecipe {
3798 VPValue *Mask)
3799 : VPSingleDefRecipe(VPRecipeBase::VPWidenLoadEVLSC, {Addr, &EVL},
3800 L.getIngredient().getType(), &L.getIngredient(),
3801 L.getDebugLoc()),
3802 VPWidenMemoryRecipe(L.getIngredient(), L.isConsecutive(), L) {
3803 setMask(Mask);
3804 }
3805
3807 llvm_unreachable("cloning not supported");
3808 }
3809
3810 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadEVLSC)
3811
3812 /// Return the EVL operand.
3813 VPValue *getEVL() const { return getOperand(1); }
3814
3815 /// Generate the wide load or gather.
3816 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3817
3818 /// Return the cost of this VPWidenLoadEVLRecipe.
3820 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3821
3822 /// Returns true if the recipe only uses the first lane of operand \p Op.
3823 bool usesFirstLaneOnly(const VPValue *Op) const override {
3825 "Op must be an operand of the recipe");
3826 // Widened loads only demand the first lane of EVL and consecutive loads
3827 // only demand the first lane of their address.
3828 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3829 }
3830
3831protected:
3832 VPRecipeBase *getAsRecipe() override { return this; }
3833 const VPRecipeBase *getAsRecipe() const override { return this; }
3834
3835#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3836 /// Print the recipe.
3837 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3838 VPSlotTracker &SlotTracker) const override;
3839#endif
3840};
3841
3842/// A recipe for widening store operations, using the stored value, the address
3843/// to store to and an optional mask.
3845 public VPWidenMemoryRecipe {
3846 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3847 VPValue *Mask, bool Consecutive,
3848 const VPIRMetadata &Metadata, DebugLoc DL)
3849 : VPRecipeBase(VPRecipeBase::VPWidenStoreSC, {Addr, StoredVal}, DL),
3850 VPWidenMemoryRecipe(Store, Consecutive, Metadata) {
3851 setMask(Mask);
3852 }
3853
3857 *this, getDebugLoc());
3858 }
3859
3860 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC);
3861
3862 /// Return the value stored by this recipe.
3863 VPValue *getStoredValue() const { return getOperand(1); }
3864
3865 /// Generate a wide store or scatter.
3866 void execute(VPTransformState &State) override;
3867
3868 /// Return the cost of this VPWidenStoreRecipe.
3870 VPCostContext &Ctx) const override {
3871 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
3872 }
3873
3874 /// Returns true if the recipe only uses the first lane of operand \p Op.
3875 bool usesFirstLaneOnly(const VPValue *Op) const override {
3877 "Op must be an operand of the recipe");
3878 // Widened, consecutive stores only demand the first lane of their address,
3879 // unless the same operand is also stored.
3880 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3881 }
3882
3883protected:
3884 VPRecipeBase *getAsRecipe() override { return this; }
3885 const VPRecipeBase *getAsRecipe() const override { return this; }
3886
3887#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3888 /// Print the recipe.
3889 void printRecipe(raw_ostream &O, const Twine &Indent,
3890 VPSlotTracker &SlotTracker) const override;
3891#endif
3892};
3893
3894/// A recipe for widening store operations with vector-predication intrinsics,
3895/// using the value to store, the address to store to, the explicit vector
3896/// length and an optional mask.
3898 public VPWidenMemoryRecipe {
3900 VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3901 : VPRecipeBase(VPRecipeBase::VPWidenStoreEVLSC, {Addr, StoredVal, &EVL},
3902 S.getDebugLoc()),
3903 VPWidenMemoryRecipe(S.getIngredient(), S.isConsecutive(), S) {
3904 setMask(Mask);
3905 }
3906
3908 llvm_unreachable("cloning not supported");
3909 }
3910
3911 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreEVLSC)
3912
3913 /// Return the address accessed by this recipe.
3914 VPValue *getStoredValue() const { return getOperand(1); }
3915
3916 /// Return the EVL operand.
3917 VPValue *getEVL() const { return getOperand(2); }
3918
3919 /// Generate the wide store or scatter.
3920 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3921
3922 /// Return the cost of this VPWidenStoreEVLRecipe.
3924 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3925
3926 /// Returns true if the recipe only uses the first lane of operand \p Op.
3927 bool usesFirstLaneOnly(const VPValue *Op) const override {
3929 "Op must be an operand of the recipe");
3930 if (Op == getEVL()) {
3931 assert(getStoredValue() != Op && "unexpected store of EVL");
3932 return true;
3933 }
3934 // Widened, consecutive memory operations only demand the first lane of
3935 // their address, unless the same operand is also stored. That latter can
3936 // happen with opaque pointers.
3937 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3938 }
3939
3940protected:
3941 VPRecipeBase *getAsRecipe() override { return this; }
3942 const VPRecipeBase *getAsRecipe() const override { return this; }
3943
3944#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3945 /// Print the recipe.
3946 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3947 VPSlotTracker &SlotTracker) const override;
3948#endif
3949};
3950
3951/// Recipe to expand a SCEV expression.
3953 const SCEV *Expr;
3954
3955public:
3956 VPExpandSCEVRecipe(const SCEV *Expr);
3957
3958 ~VPExpandSCEVRecipe() override = default;
3959
3960 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3961
3962 VP_CLASSOF_IMPL(VPRecipeBase::VPExpandSCEVSC)
3963
3964 void execute(VPTransformState &State) override {
3965 llvm_unreachable("SCEV expressions must be expanded before final execute");
3966 }
3967
3968 /// Return the cost of this VPExpandSCEVRecipe.
3970 VPCostContext &Ctx) const override {
3971 // TODO: Compute accurate cost after retiring the legacy cost model.
3972 return 0;
3973 }
3974
3975 const SCEV *getSCEV() const { return Expr; }
3976
3977protected:
3978#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3979 /// Print the recipe.
3980 void printRecipe(raw_ostream &O, const Twine &Indent,
3981 VPSlotTracker &SlotTracker) const override;
3982#endif
3983};
3984
3985/// A recipe for generating the active lane mask for the vector loop that is
3986/// used to predicate the vector operations.
3988public:
3990 : VPHeaderPHIRecipe(VPRecipeBase::VPActiveLaneMaskPHISC, nullptr,
3991 StartMask, DL) {}
3992
3993 ~VPActiveLaneMaskPHIRecipe() override = default;
3994
3997 if (getNumOperands() == 2)
3998 R->addOperand(getOperand(1));
3999 return R;
4000 }
4001
4002 VP_CLASSOF_IMPL(VPRecipeBase::VPActiveLaneMaskPHISC)
4003
4004 /// Generate the active lane mask phi of the vector loop.
4005 void execute(VPTransformState &State) override;
4006
4007protected:
4008#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4009 /// Print the recipe.
4010 void printRecipe(raw_ostream &O, const Twine &Indent,
4011 VPSlotTracker &SlotTracker) const override;
4012#endif
4013};
4014
4015/// A recipe for generating the phi node tracking the current scalar iteration
4016/// index. It starts at the start value of the canonical induction and gets
4017/// incremented by the number of scalar iterations processed by the vector loop
4018/// iteration. The increment does not have to be loop invariant.
4020public:
4022 : VPHeaderPHIRecipe(VPRecipeBase::VPCurrentIterationPHISC, nullptr,
4023 StartIV, DL) {}
4024
4025 ~VPCurrentIterationPHIRecipe() override = default;
4026
4028 llvm_unreachable("cloning not implemented yet");
4029 }
4030
4031 VP_CLASSOF_IMPL(VPRecipeBase::VPCurrentIterationPHISC)
4032
4033 void execute(VPTransformState &State) override {
4034 llvm_unreachable("cannot execute this recipe, should be replaced by a "
4035 "scalar phi recipe");
4036 }
4037
4038 /// Return the cost of this VPCurrentIterationPHIRecipe.
4040 VPCostContext &Ctx) const override {
4041 // For now, match the behavior of the legacy cost model.
4042 return 0;
4043 }
4044
4045 /// Returns true if the recipe only uses the first lane of operand \p Op.
4046 bool usesFirstLaneOnly(const VPValue *Op) const override {
4048 "Op must be an operand of the recipe");
4049 return true;
4050 }
4051
4052protected:
4053#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4054 /// Print the recipe.
4055 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
4056 VPSlotTracker &SlotTracker) const override;
4057#endif
4058};
4059
4060/// A Recipe for widening the canonical induction variable of the vector loop.
4061/// First operand is the canonical IV recipe, a second step operand (VF * Part)
4062/// is added during unrolling.
4064public:
4066 const VPIRFlags::WrapFlagsTy &Flags = {false, false})
4067 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCanonicalIVSC, CanonicalIV,
4068 CanonicalIV->getType(), Flags) {}
4069
4070 ~VPWidenCanonicalIVRecipe() override = default;
4071
4073 auto *WideCanIV =
4075 if (VPValue *Step = getStepValue())
4076 WideCanIV->addOperand(Step);
4077 return WideCanIV;
4078 }
4079
4080 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCanonicalIVSC)
4081
4082 void execute(VPTransformState &State) override {
4083 llvm_unreachable("Expected prior expansion of WidenCanonicalIV recipes");
4084 }
4085
4086 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
4088 VPCostContext &Ctx) const override {
4089 // TODO: Compute accurate cost after retiring the legacy cost model.
4090 return 0;
4091 }
4092
4093 /// Return the canonical IV being widened.
4097
4099 return getNumOperands() == 2 ? getOperand(1) : nullptr;
4100 }
4101
4102protected:
4103#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4104 /// Print the recipe.
4105 void printRecipe(raw_ostream &O, const Twine &Indent,
4106 VPSlotTracker &SlotTracker) const override;
4107#endif
4108};
4109
4110/// A recipe for converting the input value \p IV value to the corresponding
4111/// value of an IV with different start and step values, using Start + IV *
4112/// Step.
4114 /// Kind of the induction.
4116 /// If not nullptr, the floating point induction binary operator. Must be set
4117 /// for floating point inductions.
4118 const FPMathOperator *FPBinOp;
4119
4120public:
4122 VPValue *CanonicalIV, VPValue *Step)
4124 IndDesc.getKind(),
4125 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
4126 Start, CanonicalIV, Step) {}
4127
4129 const FPMathOperator *FPBinOp, VPIRValue *Start,
4130 VPValue *IV, VPValue *Step)
4131 : VPSingleDefRecipe(VPRecipeBase::VPDerivedIVSC, {Start, IV, Step},
4132 Start->getScalarType(), nullptr),
4133 Kind(Kind), FPBinOp(FPBinOp) {}
4134
4135 ~VPDerivedIVRecipe() override = default;
4136
4138 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
4139 getStepValue());
4140 }
4141
4142 VP_CLASSOF_IMPL(VPRecipeBase::VPDerivedIVSC)
4143
4144 void execute(VPTransformState &State) override {
4145 llvm_unreachable("Expected prior expansion of this recipe");
4146 }
4147
4148 /// Return the cost of this VPDerivedIVRecipe.
4150 VPCostContext &Ctx) const override {
4151 // TODO: Compute accurate cost after retiring the legacy cost model.
4152 return 0;
4153 }
4154
4156 VPValue *getIndex() const { return getOperand(1); }
4157 VPValue *getStepValue() const { return getOperand(2); }
4158 const FPMathOperator *getFPBinOp() const { return FPBinOp; }
4160
4161 /// Returns true if the recipe only uses the first lane of operand \p Op.
4162 bool usesFirstLaneOnly(const VPValue *Op) const override {
4164 "Op must be an operand of the recipe");
4165 return true;
4166 }
4167
4168protected:
4169#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4170 /// Print the recipe.
4171 void printRecipe(raw_ostream &O, const Twine &Indent,
4172 VPSlotTracker &SlotTracker) const override;
4173#endif
4174};
4175
4176/// A recipe for handling phi nodes of integer and floating-point inductions,
4177/// producing their scalar values. Before unrolling by UF the recipe represents
4178/// the VF*UF scalar values to be produced, or UF scalar values if only first
4179/// lane is used, and has 3 operands: IV, step and VF. Unrolling adds one extra
4180/// operand StartIndex to all unroll parts except part 0, as the recipe
4181/// represents the VF scalar values (this number of values is taken from
4182/// State.VF rather than from the VF operand) starting at IV + StartIndex.
4184 Instruction::BinaryOps InductionOpcode;
4185
4186public:
4189 DebugLoc DL)
4190 : VPRecipeWithIRFlags(VPRecipeBase::VPScalarIVStepsSC, {IV, Step, VF},
4191 IV->getScalarType(), FMFs, DL),
4192 InductionOpcode(Opcode) {}
4193
4195 VPValue *Step, VPValue *VF,
4198 IV, Step, VF, IndDesc.getInductionOpcode(),
4199 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
4200 ? IndDesc.getInductionBinOp()->getFastMathFlags()
4201 : FastMathFlags(),
4202 DL) {}
4203
4204 ~VPScalarIVStepsRecipe() override = default;
4205
4207 auto *NewR = new VPScalarIVStepsRecipe(getOperand(0), getOperand(1),
4208 getOperand(2), InductionOpcode,
4210 if (VPValue *StartIndex = getStartIndex())
4211 NewR->setStartIndex(StartIndex);
4212 return NewR;
4213 }
4214
4215 VP_CLASSOF_IMPL(VPRecipeBase::VPScalarIVStepsSC)
4216
4217 /// Generate the scalarized versions of the phi node as needed by their users.
4218 void execute(VPTransformState &State) override;
4219
4220 /// Return the cost of this VPScalarIVStepsRecipe.
4222 VPCostContext &Ctx) const override {
4223 // TODO: Compute accurate cost after retiring the legacy cost model.
4224 return 0;
4225 }
4226
4227 VPValue *getStepValue() const { return getOperand(1); }
4228
4229 /// Return the number of scalars to produce per unroll part, used to compute
4230 /// StartIndex during unrolling.
4231 VPValue *getVFValue() const { return getOperand(2); }
4232
4233 /// Return the StartIndex, or null if known to be zero, valid only after
4234 /// unrolling.
4236 return getNumOperands() == 4 ? getOperand(3) : nullptr;
4237 }
4238
4239 /// Set or add the StartIndex operand.
4240 void setStartIndex(VPValue *StartIndex) {
4241 if (getNumOperands() == 4)
4242 setOperand(3, StartIndex);
4243 else
4244 addOperand(StartIndex);
4245 }
4246
4247 /// Returns true if the recipe only uses the first lane of operand \p Op.
4248 bool usesFirstLaneOnly(const VPValue *Op) const override {
4250 "Op must be an operand of the recipe");
4251 return true;
4252 }
4253
4254 Instruction::BinaryOps getInductionOpcode() const { return InductionOpcode; }
4255
4256protected:
4257#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4258 /// Print the recipe.
4259 void printRecipe(raw_ostream &O, const Twine &Indent,
4260 VPSlotTracker &SlotTracker) const override;
4261#endif
4262};
4263
4264/// CastInfo helper for casting from VPRecipeBase to a mixin class that is not
4265/// part of the VPRecipeBase class hierarchy (e.g. VPPhiAccessors,
4266/// VPIRMetadata).
4267namespace vpdetail {
4268template <typename VPMixin, typename... RecipeTys>
4270 : public DefaultDoCastIfPossible<VPMixin *, VPRecipeBase *,
4271 CastInfoMixinImpl<VPMixin, RecipeTys...>> {
4272 static_assert((std::is_base_of_v<VPMixin, RecipeTys> && ...),
4273 "Each type in RecipeTys must derive from VPMixin");
4274
4275 /// Used by isa.
4276 static bool isPossible(VPRecipeBase *R) { return isa<RecipeTys...>(R); }
4277
4278 /// Used by cast.
4279 static VPMixin *doCast(VPRecipeBase *R) {
4280 VPMixin *Out = nullptr;
4281 ((Out = dyn_cast<RecipeTys>(R)) || ...);
4282 assert(Out && "Illegal recipe for cast");
4283 return Out;
4284 }
4285 static VPMixin *castFailed() { return nullptr; }
4286};
4287} // namespace vpdetail
4288
4289/// Support casting from VPRecipeBase -> VPPhiAccessors.
4290template <>
4294
4295template <>
4300template <>
4302 : public ForwardToPointerCast<VPPhiAccessors, VPRecipeBase *,
4303 CastInfo<VPPhiAccessors, VPRecipeBase *>> {};
4304
4305/// Support casting from VPRecipeBase / VPUser -> VPWidenMemoryRecipe.
4306template <>
4311template <>
4316
4317/// Support casting from VPRecipeBase -> VPIRMetadata.
4318template <>
4324
4325template <>
4330template <>
4332 : public ForwardToPointerCast<VPIRMetadata, VPRecipeBase *,
4333 CastInfo<VPIRMetadata, VPRecipeBase *>> {};
4334
4335/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
4336/// holds a sequence of zero or more VPRecipe's each representing a sequence of
4337/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
4338class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
4339 friend class VPlan;
4340
4341 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
4342 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
4343 : VPBlockBase(VPBasicBlockSC, Name.str()) {
4344 if (Recipe)
4345 appendRecipe(Recipe);
4346 }
4347
4348public:
4350
4351protected:
4352 /// The VPRecipes held in the order of output instructions to generate.
4354
4355 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4356 : VPBlockBase(BlockSC, Name.str()) {}
4357
4358public:
4359 ~VPBasicBlock() override {
4360 while (!Recipes.empty())
4361 Recipes.pop_back();
4362 }
4363
4364 /// Instruction iterators...
4369
4370 //===--------------------------------------------------------------------===//
4371 /// Recipe iterator methods
4372 ///
4373 inline iterator begin() { return Recipes.begin(); }
4374 inline const_iterator begin() const { return Recipes.begin(); }
4375 inline iterator end() { return Recipes.end(); }
4376 inline const_iterator end() const { return Recipes.end(); }
4377
4378 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4379 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4380 inline reverse_iterator rend() { return Recipes.rend(); }
4381 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4382
4383 inline size_t size() const { return Recipes.size(); }
4384 inline bool empty() const { return Recipes.empty(); }
4385 inline const VPRecipeBase &front() const { return Recipes.front(); }
4386 inline VPRecipeBase &front() { return Recipes.front(); }
4387 inline const VPRecipeBase &back() const { return Recipes.back(); }
4388 inline VPRecipeBase &back() { return Recipes.back(); }
4389
4390 /// Returns a reference to the list of recipes.
4392
4393 /// Returns a pointer to a member of the recipe list.
4394 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4395 return &VPBasicBlock::Recipes;
4396 }
4397
4398 /// Method to support type inquiry through isa, cast, and dyn_cast.
4399 static inline bool classof(const VPBlockBase *V) {
4400 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4401 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4402 }
4403
4404 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4405 assert(Recipe && "No recipe to append.");
4406 assert(!Recipe->Parent && "Recipe already in VPlan");
4407 Recipe->Parent = this;
4408 Recipes.insert(InsertPt, Recipe);
4409 }
4410
4411 /// Augment the existing recipes of a VPBasicBlock with an additional
4412 /// \p Recipe as the last recipe.
4413 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4414
4415 /// The method which generates the output IR instructions that correspond to
4416 /// this VPBasicBlock, thereby "executing" the VPlan.
4417 void execute(VPTransformState *State) override;
4418
4419 /// Return the cost of this VPBasicBlock.
4420 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4421
4422 /// Return the position of the first non-phi node recipe in the block.
4423 iterator getFirstNonPhi();
4424
4425 /// Returns an iterator range over the PHI-like recipes in the block.
4429
4430 /// Split current block at \p SplitAt by inserting a new block between the
4431 /// current block and its successors and moving all recipes starting at
4432 /// SplitAt to the new block. Returns the new block.
4433 VPBasicBlock *splitAt(iterator SplitAt);
4434
4435 VPRegionBlock *getEnclosingLoopRegion();
4436 const VPRegionBlock *getEnclosingLoopRegion() const;
4437
4438#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4439 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4440 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4441 ///
4442 /// Note that the numbering is applied to the whole VPlan, so printing
4443 /// individual blocks is consistent with the whole VPlan printing.
4444 void print(raw_ostream &O, const Twine &Indent,
4445 VPSlotTracker &SlotTracker) const override;
4446 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4447#endif
4448
4449 /// If the block has multiple successors, return the branch recipe terminating
4450 /// the block. If there are no or only a single successor, return nullptr;
4451 VPRecipeBase *getTerminator();
4452 const VPRecipeBase *getTerminator() const;
4453
4454 /// Returns true if the block is exiting it's parent region.
4455 bool isExiting() const;
4456
4457 /// Clone the current block and it's recipes, without updating the operands of
4458 /// the cloned recipes.
4459 VPBasicBlock *clone() override;
4460
4461 /// Returns the predecessor block at index \p Idx with the predecessors as per
4462 /// the corresponding plain CFG. If the block is an entry block to a region,
4463 /// the first predecessor is the single predecessor of a region, and the
4464 /// second predecessor is the exiting block of the region.
4465 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4466
4467protected:
4468 /// Execute the recipes in the IR basic block \p BB.
4469 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4470
4471 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4472 /// generated for this VPBB.
4473 void connectToPredecessors(VPTransformState &State);
4474
4475private:
4476 /// Create an IR BasicBlock to hold the output instructions generated by this
4477 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4478 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4479};
4480
4481inline const VPBasicBlock *
4483 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4484}
4485
4486/// A special type of VPBasicBlock that wraps an existing IR basic block.
4487/// Recipes of the block get added before the first non-phi instruction in the
4488/// wrapped block.
4489/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4490/// preheader block.
4491class VPIRBasicBlock : public VPBasicBlock {
4492 friend class VPlan;
4493
4494 BasicBlock *IRBB;
4495
4496 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4497 VPIRBasicBlock(BasicBlock *IRBB)
4498 : VPBasicBlock(VPIRBasicBlockSC,
4499 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4500 IRBB(IRBB) {}
4501
4502public:
4503 ~VPIRBasicBlock() override = default;
4504
4505 static inline bool classof(const VPBlockBase *V) {
4506 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4507 }
4508
4509 /// The method which generates the output IR instructions that correspond to
4510 /// this VPBasicBlock, thereby "executing" the VPlan.
4511 void execute(VPTransformState *State) override;
4512
4513 VPIRBasicBlock *clone() override;
4514
4515 BasicBlock *getIRBasicBlock() const { return IRBB; }
4516};
4517
4518/// Track information about the canonical IV value of a region.
4519/// TODO: Have it also track the canonical IV increment, subject of NUW flag.
4521 /// VPRegionValue for the canonical IV, whose allocation is managed by
4522 /// VPCanonicalIVInfo.
4523 std::unique_ptr<VPRegionValue> CanIV;
4524
4525 /// Whether the increment of the canonical IV may unsigned wrap or not.
4526 bool HasNUW = true;
4527
4528public:
4530 : CanIV(std::make_unique<VPRegionValue>(Ty, DL, Region)) {}
4531
4532 VPRegionValue *getRegionValue() { return CanIV.get(); }
4533 const VPRegionValue *getRegionValue() const { return CanIV.get(); }
4534
4535 bool hasNUW() const { return HasNUW; }
4536
4537 void clearNUW() { HasNUW = false; }
4538};
4539
4540/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4541/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4542/// A VPRegionBlock may indicate that its contents are to be replicated several
4543/// times. This is designed to support predicated scalarization, in which a
4544/// scalar if-then code structure needs to be generated VF * UF times. Having
4545/// this replication indicator helps to keep a single model for multiple
4546/// candidate VF's. The actual replication takes place only once the desired VF
4547/// and UF have been determined.
4548class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4549 friend class VPlan;
4550
4551 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4552 VPBlockBase *Entry;
4553
4554 /// Hold the Single Exiting block of the SESE region modelled by the
4555 /// VPRegionBlock.
4556 VPBlockBase *Exiting;
4557
4558 /// Holds the Canonical IV of the loop region along with additional
4559 /// information. If CanIVInfo is nullptr, the region is a replicating region.
4560 /// Loop regions retain their canonical IVs until they are dissolved, even if
4561 /// the canonical IV has no users.
4562 std::unique_ptr<VPCanonicalIVInfo> CanIVInfo;
4563
4564 /// Use VPlan::createLoopRegion() and VPlan::createReplicateRegion() to create
4565 /// VPRegionBlocks.
4566 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4567 const std::string &Name = "")
4568 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting) {
4569 if (Entry) {
4570 assert(!Entry->hasPredecessors() && "Entry block has predecessors.");
4571 assert(Exiting && "Must also pass Exiting if Entry is passed.");
4572 assert(!Exiting->hasSuccessors() && "Exit block has successors.");
4573 Entry->setParent(this);
4574 Exiting->setParent(this);
4575 }
4576 }
4577
4578 VPRegionBlock(Type *CanIVTy, DebugLoc DL, VPBlockBase *Entry,
4579 VPBlockBase *Exiting, const std::string &Name = "")
4580 : VPRegionBlock(Entry, Exiting, Name) {
4581 CanIVInfo = std::make_unique<VPCanonicalIVInfo>(CanIVTy, DL, this);
4582 }
4583
4584public:
4585 ~VPRegionBlock() override = default;
4586
4587 /// Method to support type inquiry through isa, cast, and dyn_cast.
4588 static inline bool classof(const VPBlockBase *V) {
4589 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4590 }
4591
4592 const VPBlockBase *getEntry() const { return Entry; }
4593 VPBlockBase *getEntry() { return Entry; }
4594
4595 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4596 /// EntryBlock must have no predecessors.
4597 void setEntry(VPBlockBase *EntryBlock) {
4598 assert(!EntryBlock->hasPredecessors() &&
4599 "Entry block cannot have predecessors.");
4600 Entry = EntryBlock;
4601 EntryBlock->setParent(this);
4602 }
4603
4604 const VPBlockBase *getExiting() const { return Exiting; }
4605 VPBlockBase *getExiting() { return Exiting; }
4606
4607 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4608 /// ExitingBlock must have no successors.
4609 void setExiting(VPBlockBase *ExitingBlock) {
4610 assert(!ExitingBlock->hasSuccessors() &&
4611 "Exit block cannot have successors.");
4612 Exiting = ExitingBlock;
4613 ExitingBlock->setParent(this);
4614 }
4615
4616 /// Returns the pre-header VPBasicBlock of the loop region.
4618 assert(!isReplicator() && "should only get pre-header of loop regions");
4619 return getSinglePredecessor()->getExitingBasicBlock();
4620 }
4621
4622 /// An indicator whether this region is to generate multiple replicated
4623 /// instances of output IR corresponding to its VPBlockBases.
4624 bool isReplicator() const { return !CanIVInfo; }
4625
4626 /// The method which generates the output IR instructions that correspond to
4627 /// this VPRegionBlock, thereby "executing" the VPlan.
4628 void execute(VPTransformState *State) override;
4629
4630 // Return the cost of this region.
4631 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4632
4633#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4634 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4635 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4636 /// consequtive numbers.
4637 ///
4638 /// Note that the numbering is applied to the whole VPlan, so printing
4639 /// individual regions is consistent with the whole VPlan printing.
4640 void print(raw_ostream &O, const Twine &Indent,
4641 VPSlotTracker &SlotTracker) const override;
4642 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4643#endif
4644
4645 /// Clone all blocks in the single-entry single-exit region of the block and
4646 /// their recipes without updating the operands of the cloned recipes.
4647 VPRegionBlock *clone() override;
4648
4649 /// Remove the current region from its VPlan, connecting its predecessor to
4650 /// its entry, and its exiting block to its successor.
4651 void dissolveToCFGLoop();
4652
4653 /// Get the canonical IV increment instruction if it exists. Otherwise, create
4654 /// a new increment before the terminator and return it. The canonical IV
4655 /// increment is subject to DCE if unused, unlike the canonical IV itself.
4656 VPInstruction *getOrCreateCanonicalIVIncrement();
4657
4658 /// Return the canonical induction variable of the region, null for
4659 /// replicating regions.
4661 return CanIVInfo ? CanIVInfo->getRegionValue() : nullptr;
4662 }
4664 return CanIVInfo ? CanIVInfo->getRegionValue() : nullptr;
4665 }
4666
4667 /// Return the type of the canonical IV for loop regions.
4669 return CanIVInfo->getRegionValue()->getType();
4670 }
4671
4672 /// Indicates if NUW is set for the canonical IV increment, for loop regions.
4673 bool hasCanonicalIVNUW() const { return CanIVInfo->hasNUW(); }
4674
4675 /// Unsets NUW for the canonical IV increment \p Increment, for loop regions.
4677 assert(Increment && "Must provide increment to clear");
4678 Increment->dropPoisonGeneratingFlags();
4679 CanIVInfo->clearNUW();
4680 }
4681};
4682
4684 return getParent()->getParent();
4685}
4686
4688 return getParent()->getParent();
4689}
4690
4691/// VPlan models a candidate for vectorization, encoding various decisions take
4692/// to produce efficient output IR, including which branches, basic-blocks and
4693/// output IR instructions to generate, and their cost. VPlan holds a
4694/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4695/// VPBasicBlock.
4696class VPlan {
4697 friend class VPlanPrinter;
4698 friend class VPSlotTracker;
4699
4700 /// VPBasicBlock corresponding to the original preheader. Used to place
4701 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4702 /// rest of VPlan execution.
4703 /// When this VPlan is used for the epilogue vector loop, the entry will be
4704 /// replaced by a new entry block created during skeleton creation.
4705 VPBasicBlock *Entry;
4706
4707 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4708 VPIRBasicBlock *ScalarHeader;
4709
4710 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4711 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4712 /// e.g. if the scalar epilogue always executes.
4714
4715 /// Holds the VFs applicable to this VPlan.
4717
4718 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4719 /// any UF.
4721
4722 /// Holds the name of the VPlan, for printing.
4723 std::string Name;
4724
4725 /// Represents the trip count of the original loop, for folding
4726 /// the tail.
4727 VPValue *TripCount = nullptr;
4728
4729 /// Represents the backedge taken count of the original loop, for folding
4730 /// the tail. It equals TripCount - 1.
4731 VPSymbolicValue *BackedgeTakenCount = nullptr;
4732
4733 /// Represents the vector trip count.
4734 VPSymbolicValue VectorTripCount;
4735
4736 /// Represents the vectorization factor of the loop.
4737 VPSymbolicValue VF;
4738
4739 /// Represents the unroll factor of the loop.
4740 VPSymbolicValue UF;
4741
4742 /// Represents the loop-invariant VF * UF of the vector loop region.
4743 VPSymbolicValue VFxUF;
4744
4745 /// Contains all the external definitions created for this VPlan, as a mapping
4746 /// from IR Values to VPIRValues.
4748
4749 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4750 /// VPlan is destroyed.
4751 SmallVector<VPBlockBase *> CreatedBlocks;
4752
4753 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4754 /// wrapping the original header of the scalar loop. The vector loop will have
4755 /// index type \p IdxTy.
4756 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader, Type *IdxTy)
4757 : Entry(Entry), ScalarHeader(ScalarHeader), VectorTripCount(IdxTy),
4758 VF(IdxTy), UF(IdxTy), VFxUF(IdxTy) {
4759 Entry->setPlan(this);
4760 assert(ScalarHeader->getNumSuccessors() == 0 &&
4761 "scalar header must be a leaf node");
4762 }
4763
4764public:
4765 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4766 /// original preheader and scalar header of \p L, to be used as entry and
4767 /// scalar header blocks of the new VPlan. The vector loop will have index
4768 /// type \p IdxTy.
4769 VPlan(Loop *L, Type *IdxTy);
4770
4771 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4772 /// wrapping \p ScalarHeaderBB and vector loop index of type \p IdxTy.
4773 VPlan(BasicBlock *ScalarHeaderBB, Type *IdxTy)
4774 : VectorTripCount(IdxTy), VF(IdxTy), UF(IdxTy), VFxUF(IdxTy) {
4775 setEntry(createVPBasicBlock("preheader"));
4776 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4777 }
4778
4780
4782 Entry = VPBB;
4783 VPBB->setPlan(this);
4784 }
4785
4786 /// Generate the IR code for this VPlan.
4787 void execute(VPTransformState *State);
4788
4789 /// Return the cost of this plan.
4791
4792 VPBasicBlock *getEntry() { return Entry; }
4793 const VPBasicBlock *getEntry() const { return Entry; }
4794
4795 /// Returns the preheader of the vector loop region, if one exists, or null
4796 /// otherwise.
4798 const VPRegionBlock *VectorRegion = getVectorLoopRegion();
4799 return VectorRegion
4800 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4801 : nullptr;
4802 }
4803
4804 /// Returns the VPRegionBlock of the vector loop.
4807
4808 /// Returns true if this VPlan is for an outer loop, i.e., its vector
4809 /// loop region contains a nested loop region.
4810 LLVM_ABI_FOR_TEST bool isOuterLoop() const;
4811
4812 /// Returns the 'middle' block of the plan, that is the block that selects
4813 /// whether to execute the scalar tail loop or the exit block from the loop
4814 /// latch. If there is an early exit from the vector loop, the middle block
4815 /// conceptully has the early exit block as third successor, split accross 2
4816 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4817 /// tail loop or the exit block. If the scalar tail loop or exit block are
4818 /// known to always execute, the middle block may branch directly to that
4819 /// block. This function cannot be called once the vector loop region has been
4820 /// removed.
4822 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4823 assert(
4824 LoopRegion &&
4825 "cannot call the function after vector loop region has been removed");
4826 // The middle block is always the last successor of the region.
4827 return cast<VPBasicBlock>(LoopRegion->getSuccessors().back());
4828 }
4829
4831 return const_cast<VPlan *>(this)->getMiddleBlock();
4832 }
4833
4834 /// Return the VPBasicBlock for the preheader of the scalar loop.
4837 getScalarHeader()->getSinglePredecessor());
4838 }
4839
4840 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4841 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4842
4843 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4844 /// the original scalar loop.
4845 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4846
4847 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4848 /// exit block.
4850
4851 /// Returns true if \p VPBB is an exit block.
4852 bool isExitBlock(VPBlockBase *VPBB);
4853
4854 /// The trip count of the original loop.
4856 assert(TripCount && "trip count needs to be set before accessing it");
4857 return TripCount;
4858 }
4859
4860 /// Set the trip count assuming it is currently null; if it is not - use
4861 /// resetTripCount().
4862 void setTripCount(VPValue *NewTripCount) {
4863 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4864 TripCount = NewTripCount;
4865 }
4866
4867 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4868 /// the original trip count have been replaced.
4869 void resetTripCount(VPValue *NewTripCount) {
4870 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4871 "TripCount must be set when resetting");
4872 TripCount = NewTripCount;
4873 }
4874
4875 /// The backedge taken count of the original loop.
4877 // BTC shares the canonical IV type with VectorTripCount.
4878 if (!BackedgeTakenCount)
4879 BackedgeTakenCount = new VPSymbolicValue(VectorTripCount.getType());
4880 return BackedgeTakenCount;
4881 }
4882 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4883
4884 /// The vector trip count.
4885 VPSymbolicValue &getVectorTripCount() { return VectorTripCount; }
4886
4887 /// Returns the VF of the vector loop region.
4888 VPSymbolicValue &getVF() { return VF; };
4889 const VPSymbolicValue &getVF() const { return VF; };
4890
4891 /// Returns the UF of the vector loop region.
4892 VPSymbolicValue &getUF() { return UF; };
4893
4894 /// Returns VF * UF of the vector loop region.
4895 VPSymbolicValue &getVFxUF() { return VFxUF; }
4896
4899 }
4900
4901 const DataLayout &getDataLayout() const {
4903 }
4904
4905 void addVF(ElementCount VF) { VFs.insert(VF); }
4906
4908 assert(hasVF(VF) && "Cannot set VF not already in plan");
4909 VFs.clear();
4910 VFs.insert(VF);
4911 }
4912
4913 /// Remove \p VF from the plan.
4915 assert(hasVF(VF) && "tried to remove VF not present in plan");
4916 VFs.remove(VF);
4917 }
4918
4919 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4920 bool hasScalableVF() const {
4921 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4922 }
4923
4924 /// Returns an iterator range over all VFs of the plan.
4927 return VFs;
4928 }
4929
4930 /// Returns the single VF of the plan, asserting that the plan has exactly
4931 /// one VF.
4933 assert(VFs.size() == 1 && "expected plan with single VF");
4934 return VFs[0];
4935 }
4936
4937 bool hasScalarVFOnly() const {
4938 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4939 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4940 "Plan with scalar VF should only have a single VF");
4941 return HasScalarVFOnly;
4942 }
4943
4944 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4945
4946 /// Returns the concrete UF of the plan, after unrolling.
4947 unsigned getConcreteUF() const {
4948 assert(UFs.size() == 1 && "Expected a single UF");
4949 return UFs[0];
4950 }
4951
4952 void setUF(unsigned UF) {
4953 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4954 UFs.clear();
4955 UFs.insert(UF);
4956 }
4957
4958 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4959 /// concrete UF.
4960 bool isUnrolled() const { return UFs.size() == 1; }
4961
4962 /// Return a string with the name of the plan and the applicable VFs and UFs.
4963 std::string getName() const;
4964
4965 void setName(const Twine &newName) { Name = newName.str(); }
4966
4967 /// Gets the live-in VPIRValue for \p V or adds a new live-in (if none exists
4968 /// yet) for \p V.
4970 assert(V && "Trying to get or add the VPIRValue of a null Value");
4971 auto [It, Inserted] = LiveIns.try_emplace(V);
4972 if (Inserted) {
4973 if (auto *CI = dyn_cast<ConstantInt>(V))
4974 It->second = new VPConstantInt(CI);
4975 else
4976 It->second = new VPIRValue(V);
4977 }
4978
4979 assert(isa<VPIRValue>(It->second) &&
4980 "Only VPIRValues should be in mapping");
4981 return It->second;
4982 }
4984 assert(V && "Trying to get or add the VPIRValue of a null VPIRValue");
4985 return getOrAddLiveIn(V->getValue());
4986 }
4987
4988 /// Return a VPIRValue wrapping i1 true.
4989 VPIRValue *getTrue() { return getConstantInt(1, 1); }
4990
4991 /// Return a VPIRValue wrapping i1 false.
4992 VPIRValue *getFalse() { return getConstantInt(1, 0); }
4993
4994 /// Return a VPIRValue wrapping the null value of type \p Ty.
4995 VPIRValue *getZero(Type *Ty) { return getConstantInt(Ty, 0); }
4996
4997 /// Return a VPIRValue wrapping the AllOnes value of type \p Ty.
4999 return getConstantInt(APInt::getAllOnes(Ty->getIntegerBitWidth()));
5000 }
5001
5002 /// Return a VPIRValue wrapping a ConstantInt with the given type and value.
5003 VPIRValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
5004 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
5005 }
5006
5007 /// Return a VPIRValue wrapping a ConstantInt with the given bitwidth and
5008 /// value.
5010 bool IsSigned = false) {
5011 return getConstantInt(APInt(BitWidth, Val, IsSigned));
5012 }
5013
5014 /// Return a VPIRValue wrapping a ConstantInt with the given APInt value.
5016 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
5017 }
5018
5019 /// Return the live-in VPIRValue for \p V, if there is one or nullptr
5020 /// otherwise.
5021 VPIRValue *getLiveIn(Value *V) const { return LiveIns.lookup(V); }
5022
5023 /// Return the list of live-in VPValues available in the VPlan.
5024 auto getLiveIns() const { return LiveIns.values(); }
5025
5026#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5027 /// Print the live-ins of this VPlan to \p O.
5028 void printLiveIns(raw_ostream &O) const;
5029
5030 /// Print this VPlan to \p O.
5031 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
5032
5033 /// Print this VPlan in DOT format to \p O.
5034 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
5035
5036 /// Dump the plan to stderr (for debugging).
5037 LLVM_DUMP_METHOD void dump() const;
5038#endif
5039
5040 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
5041 /// recipes to refer to the clones, and return it.
5043
5044 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
5045 /// present. The returned block is owned by the VPlan and deleted once the
5046 /// VPlan is destroyed.
5048 VPRecipeBase *Recipe = nullptr) {
5049 auto *VPB = new VPBasicBlock(Name, Recipe);
5050 CreatedBlocks.push_back(VPB);
5051 return VPB;
5052 }
5053
5054 /// Create a new loop region with a canonical IV using \p CanIVTy and
5055 /// \p DL. Use \p Name as the region's name and set entry and exiting blocks
5056 /// to \p Entry and \p Exiting respectively, if provided. The returned block
5057 /// is owned by the VPlan and deleted once the VPlan is destroyed.
5059 const std::string &Name = "",
5060 VPBlockBase *Entry = nullptr,
5061 VPBlockBase *Exiting = nullptr) {
5062 auto *VPB = new VPRegionBlock(CanIVTy, DL, Entry, Exiting, Name);
5063 CreatedBlocks.push_back(VPB);
5064 return VPB;
5065 }
5066
5067 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
5068 /// returned block is owned by the VPlan and deleted once the VPlan is
5069 /// destroyed.
5071 const std::string &Name = "") {
5072 auto *VPB = new VPRegionBlock(Entry, Exiting, Name);
5073 CreatedBlocks.push_back(VPB);
5074 return VPB;
5075 }
5076
5077 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
5078 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
5079 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
5081
5082 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
5083 /// instructions in \p IRBB, except its terminator which is managed by the
5084 /// successors of the block in VPlan. The returned block is owned by the VPlan
5085 /// and deleted once the VPlan is destroyed.
5087
5088 /// Returns true if the VPlan is based on a loop with an early exit. That is
5089 /// the case if the VPlan has either more than one exit block or a single exit
5090 /// block with multiple predecessors (one for the exit via the latch and one
5091 /// via the other early exit).
5092 bool hasEarlyExit() const {
5093 return count_if(ExitBlocks,
5094 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
5095 1 ||
5096 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
5097 }
5098
5099 /// Returns true if the scalar tail may execute after the vector loop, i.e.
5100 /// if the middle block is a predecessor of the scalar preheader. Note that
5101 /// this relies on unneeded branches to the scalar tail loop being removed.
5102 bool hasScalarTail() const {
5103 auto *ScalarPH = getScalarPreheader();
5104 return ScalarPH &&
5105 is_contained(ScalarPH->getPredecessors(), getMiddleBlock());
5106 }
5107
5108 /// The type of the canonical induction variable of the vector loop.
5109 Type *getIndexType() const { return VF.getType(); }
5110};
5111
5112#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5113inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
5114 Plan.print(OS);
5115 return OS;
5116}
5117#endif
5118
5119} // end namespace llvm
5120
5121#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file implements methods to test, set and extract typed bits from packed unsigned integers.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
#define LLVM_PACKED_START
Definition Compiler.h:554
dxil translate DXIL Translate Metadata
Hexagon Common GEP
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
static StringRef getName(Value *V)
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPRecipeID)
Definition VPlan.h:585
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & back() const
Get the last element.
Definition ArrayRef.h:150
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:512
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getUnknown()
Definition DebugLoc.h:151
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:155
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:202
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags fromRaw(unsigned Flags)
unsigned getRaw() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
bool isCast() const
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1075
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an assumption made using SCEV expressions which can be checked at run-time.
This class represents an analyzed expression in the program.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::string str() const
Get the contents as an std::string.
Definition StringRef.h:222
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3995
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:3989
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4338
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:4366
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4413
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:4368
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4365
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4391
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:4349
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:4355
iterator end()
Definition VPlan.h:4375
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4373
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:4367
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4426
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:763
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:266
~VPBasicBlock() override
Definition VPlan.h:4359
const_reverse_iterator rbegin() const
Definition VPlan.h:4379
reverse_iterator rend()
Definition VPlan.h:4380
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:4353
VPRecipeBase & back()
Definition VPlan.h:4388
const VPRecipeBase & front() const
Definition VPlan.h:4385
const_iterator begin() const
Definition VPlan.h:4374
VPRecipeBase & front()
Definition VPlan.h:4386
const VPRecipeBase & back() const
Definition VPlan.h:4387
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4404
bool empty() const
Definition VPlan.h:4384
const_iterator end() const
Definition VPlan.h:4376
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4399
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4394
reverse_iterator rbegin()
Definition VPlan.h:4378
friend class VPlan
Definition VPlan.h:4339
size_t size() const
Definition VPlan.h:4383
const_reverse_iterator rend() const
Definition VPlan.h:4381
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2954
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2959
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2915
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2949
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2970
VPBlendRecipe * cloneWithOperands(ArrayRef< VPValue * > NewOperands)
Definition VPlan.h:2936
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2934
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:2965
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2945
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:94
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:315
VPRegionBlock * getParent()
Definition VPlan.h:186
VPBlocksTy & getPredecessors()
Definition VPlan.h:223
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:220
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:385
void setName(const Twine &newName)
Definition VPlan.h:179
size_t getNumSuccessors() const
Definition VPlan.h:237
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:219
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:217
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:337
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:661
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:173
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:273
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:350
size_t getNumPredecessors() const
Definition VPlan.h:238
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:306
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:258
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:343
bool hasSuccessors() const
Returns true if this block has any successors.
Definition VPlan.h:215
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:222
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:171
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:230
const VPRegionBlock * getParent() const
Definition VPlan.h:187
const std::string & getName() const
Definition VPlan.h:177
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:325
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:263
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:297
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:233
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:257
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:322
friend class VPBlockUtils
Definition VPlan.h:95
unsigned getVPBlockID() const
Definition VPlan.h:184
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:364
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:329
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:163
VPBlocksTy & getSuccessors()
Definition VPlan.h:212
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:250
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:286
void setParent(VPRegionBlock *P)
Definition VPlan.h:197
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:279
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:227
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:211
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3461
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3445
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3469
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3442
VPlan-based builder utility analogous to IRBuilder.
VPRegionValue * getRegionValue()
Definition VPlan.h:4532
VPCanonicalIVInfo(Type *Ty, DebugLoc DL, VPRegionBlock *Region)
Definition VPlan.h:4529
const VPRegionValue * getRegionValue() const
Definition VPlan.h:4533
bool hasNUW() const
Definition VPlan.h:4535
VPCurrentIterationPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4027
VPCurrentIterationPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:4021
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCurrentIterationPHIRecipe.
Definition VPlan.h:4039
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:4033
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4046
~VPCurrentIterationPHIRecipe() override=default
InductionDescriptor::InductionKind getInductionKind() const
Definition VPlan.h:4159
VPValue * getIndex() const
Definition VPlan.h:4156
const FPMathOperator * getFPBinOp() const
Definition VPlan.h:4158
VPIRValue * getStartValue() const
Definition VPlan.h:4155
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlan.h:4149
VPValue * getStepValue() const
Definition VPlan.h:4157
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:4144
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4137
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *IV, VPValue *Step)
Definition VPlan.h:4128
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4162
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPIRValue *Start, VPValue *CanonicalIV, VPValue *Step)
Definition VPlan.h:4121
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3964
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:3969
VPExpandSCEVRecipe(const SCEV *Expr)
const SCEV * getSCEV() const
Definition VPlan.h:3975
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3960
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3618
bool isVectorToScalar() const
Returns true if this VPExpressionRecipe produces a single scalar.
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3600
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPWidenRecipe *Neg, VPReductionRecipe *Red)
Definition VPlan.h:3531
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3582
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3570
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3529
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3546
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Neg, VPReductionRecipe *Red)
Definition VPlan.h:3550
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3612
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3544
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition VPlan.h:2406
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2408
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2417
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2430
static bool classof(const VPValue *V)
Definition VPlan.h:2427
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2453
VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr, VPValue *Start, Type *ResultTy, DebugLoc DL)
Definition VPlan.h:2413
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2458
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2442
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2450
static bool classof(const VPRecipeBase *R)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2423
VPValue * getStartValue() const
Definition VPlan.h:2445
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2462
~VPHeaderPHIRecipe() override=default
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
Definition VPlan.h:2140
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2153
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:2170
unsigned getOpcode() const
Definition VPlan.h:2166
VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC)
~VPHistogramRecipe() override=default
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2145
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4491
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:473
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4515
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4505
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4492
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:498
Class to record and manage LLVM IR flags.
Definition VPlan.h:694
FastMathFlagsTy FMFs
Definition VPlan.h:782
ReductionFlagsTy ReductionFlags
Definition VPlan.h:784
LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const
Returns true if Opcode has its required flags set.
bool hasNoWrapFlags() const
Definition VPlan.h:1037
VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
Definition VPlan.h:875
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:855
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:841
WrapFlagsTy WrapFlags
Definition VPlan.h:776
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:834
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:999
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
bool isReductionOrdered() const
Definition VPlan.h:1063
TruncFlagsTy TruncFlags
Definition VPlan.h:777
CmpInst::Predicate getPredicate() const
Definition VPlan.h:971
WrapFlagsTy getNoWrapFlags() const
Definition VPlan.h:1047
uint8_t AllFlags[2]
Definition VPlan.h:785
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:1007
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:880
ExactFlagsTy ExactFlags
Definition VPlan.h:779
bool hasNoSignedWrap() const
Definition VPlan.h:1026
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:1051
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:846
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:851
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:860
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:829
uint8_t GEPFlagsStorage
Definition VPlan.h:780
VPIRFlags(ExactFlagsTy ExactFlags)
Definition VPlan.h:865
bool isNonNeg() const
Definition VPlan.h:1009
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:989
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:994
DisjointFlagsTy DisjointFlags
Definition VPlan.h:778
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:979
bool hasNoUnsignedWrap() const
Definition VPlan.h:1015
FCmpFlagsTy FCmpFlags
Definition VPlan.h:783
NonNegFlagsTy NonNegFlags
Definition VPlan.h:781
bool isReductionInLoop() const
Definition VPlan.h:1069
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:891
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:928
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:870
uint8_t CmpPredStorage
Definition VPlan.h:775
RecurKind getRecurKind() const
Definition VPlan.h:1057
VPIRFlags(Instruction &I)
Definition VPlan.h:791
Instruction & getInstruction() const
Definition VPlan.h:1729
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1737
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1716
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1743
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1731
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1704
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1170
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1206
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:1178
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetadata object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print metadata with node IDs.
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1190
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1533
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1575
static bool classof(const VPUser *R)
Definition VPlan.h:1560
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1540
Type * getResultType() const
Definition VPlan.h:1581
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1564
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1225
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", Type *ResultTy=nullptr)
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the VPInstruction is masked.
Definition VPlan.h:1467
iterator_range< operand_iterator > operandsWithoutMask()
Returns an iterator range over the operands excluding the mask operand if present.
Definition VPlan.h:1487
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1403
@ ExtractLastActive
Extracts the last active lane from a set of vectors.
Definition VPlan.h:1327
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1318
@ ExitingIVValue
Compute the exiting value of a wide induction after vectorization, that is the value of the last lane...
Definition VPlan.h:1331
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1343
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1321
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1268
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1314
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1263
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1260
@ VScale
Returns the value for vscale.
Definition VPlan.h:1347
@ CanonicalIVIncrementForPart
Definition VPlan.h:1244
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
Definition VPlan.h:1271
bool hasResult() const
Definition VPlan.h:1432
iterator_range< const_operand_iterator > operandsWithoutMask() const
Definition VPlan.h:1490
void addMask(VPValue *Mask)
Add mask Mask to an unmasked VPInstruction, if it needs masking.
Definition VPlan.h:1472
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1512
unsigned getOpcode() const
Definition VPlan.h:1416
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
Definition VPlan.h:1515
VPValue * getMask() const
Returns the mask for the VPInstruction.
Definition VPlan.h:1481
VPInstruction * cloneWithOperands(ArrayRef< VPValue * > NewOperands, Type *ResultTy=nullptr)
Definition VPlan.h:1407
unsigned getNumOperandsForOpcode() const
Return the number of operands determined by the opcode of the VPInstruction, excluding mask.
bool isMasked() const
Returns true if the VPInstruction has a mask operand.
Definition VPlan.h:1457
A common base class for interleaved memory operations.
Definition VPlan.h:2995
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:3058
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3064
static bool classof(const VPUser *U)
Definition VPlan.h:3040
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:3007
Instruction * getInsertPos() const
Definition VPlan.h:3062
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3035
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:3060
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3052
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:3081
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3046
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:3161
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3155
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:3168
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3148
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3135
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:3091
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:3118
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3101
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3112
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:3093
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
A VPRecipeValue defined by a multi-def recipe, stores a pointer to it.
Definition VPlanValue.h:364
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1593
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPValue * getIncomingValueForBlock(const VPBasicBlock *VPBB) const
Returns the incoming value for VPBB. VPBB must be an incoming block.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1622
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1617
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4482
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1642
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1602
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const
Sets the incoming value for VPBB to V.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1627
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1631
~VPPredInstPHIRecipe() override=default
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3658
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3669
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3653
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:402
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:550
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4683
enum { VPBranchOnMaskSC, VPDerivedIVSC, VPExpandSCEVSC, VPExpressionSC, VPIRInstructionSC, VPInstructionSC, VPInterleaveEVLSC, VPInterleaveSC, VPReductionEVLSC, VPReductionSC, VPReplicateSC, VPScalarIVStepsSC, VPVectorPointerSC, VPVectorEndPointerSC, VPWidenCallSC, VPWidenCanonicalIVSC, VPWidenCastSC, VPWidenGEPSC, VPWidenIntrinsicSC, VPWidenMemIntrinsicSC, VPWidenLoadEVLSC, VPWidenLoadSC, VPWidenStoreEVLSC, VPWidenStoreSC, VPWidenSC, VPBlendSC, VPHistogramSC, VPWidenPHISC, VPPredInstPHISC, VPCurrentIterationPHISC, VPActiveLaneMaskPHISC, VPFirstOrderRecurrencePHISC, VPWidenIntOrFpInductionSC, VPWidenPointerInductionSC, VPReductionPHISC, VPFirstPHISC=VPWidenPHISC, VPFirstHeaderPHISC=VPCurrentIterationPHISC, VPLastHeaderPHISC=VPReductionPHISC, VPLastPHISC=VPReductionPHISC, } VPRecipeTy
An enumeration for keeping track of the concrete subclass of VPRecipeBase that is actually instantiat...
Definition VPlan.h:420
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:558
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:477
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:555
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:526
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:404
const VPBasicBlock * getParent() const
Definition VPlan.h:478
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:531
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
unsigned getVPRecipeID() const
Definition VPlan.h:523
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:467
Type * getScalarType() const
Returns the scalar type of this VPRecipeValue.
Definition VPlanValue.h:337
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3327
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3306
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3330
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3317
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2876
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
Definition VPlan.h:2862
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2844
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2855
~VPReductionPHIRecipe() override=default
bool hasUsesOutsideReductionChain() const
Returns true, if the phi is part of a multi-use reduction.
Definition VPlan.h:2888
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2870
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, VPValue &BackedgeValue, ReductionStyle Style, const VPIRFlags &Flags, bool HasUsesOutsideReductionChain=false)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2825
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2879
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2893
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPReductionPHIRecipe * cloneWithOperands(VPValue *Start, VPValue *BackedgeValue)
Definition VPlan.h:2837
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2885
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2873
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:3184
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, ReductionStyle Style, DebugLoc DL)
Definition VPlan.h:3193
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:3269
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3238
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:3253
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:3280
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:3282
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:3265
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3218
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:3267
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3224
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:3271
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:3278
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:3273
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3232
static bool classof(const VPUser *U)
Definition VPlan.h:3243
static bool classof(const VPValue *VPV)
Definition VPlan.h:3248
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:3287
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4548
const VPBlockBase * getEntry() const
Definition VPlan.h:4592
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4624
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4609
VPBlockBase * getExiting()
Definition VPlan.h:4605
const VPRegionValue * getCanonicalIV() const
Definition VPlan.h:4663
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4597
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4668
bool hasCanonicalIVNUW() const
Indicates if NUW is set for the canonical IV increment, for loop regions.
Definition VPlan.h:4673
void clearCanonicalIVNUW(VPInstruction *Increment)
Unsets NUW for the canonical IV increment Increment, for loop regions.
Definition VPlan.h:4676
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
Definition VPlan.h:4660
const VPBlockBase * getExiting() const
Definition VPlan.h:4604
VPBlockBase * getEntry()
Definition VPlan.h:4593
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4617
friend class VPlan
Definition VPlan.h:4549
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4588
VPValues defined by a VPRegionBlock, like the canonical IV.
Definition VPlanValue.h:215
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3349
bool isSingleScalar() const
Definition VPlan.h:3405
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3357
~VPReplicateRecipe() override=default
static Type * computeScalarType(const Instruction *I, ArrayRef< VPValue * > Operands)
Compute the scalar result type for a VPReplicateRecipe wrapping I with Operands (excluding any predic...
VPReplicateRecipe * cloneWithOperands(ArrayRef< VPValue * > NewOperands)
Definition VPlan.h:3379
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3417
bool isPredicated() const
Definition VPlan.h:3407
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3377
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3410
unsigned getOpcode() const
Definition VPlan.h:3429
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:3424
Instruction::BinaryOps getInductionOpcode() const
Definition VPlan.h:4254
VPValue * getStepValue() const
Definition VPlan.h:4227
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:4221
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:4194
void setStartIndex(VPValue *StartIndex)
Set or add the StartIndex operand.
Definition VPlan.h:4240
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4206
VPValue * getStartIndex() const
Return the StartIndex, or null if known to be zero, valid only after unrolling.
Definition VPlan.h:4235
VPValue * getVFValue() const
Return the number of scalars to produce per unroll part, used to compute StartIndex during unrolling.
Definition VPlan.h:4231
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:4187
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4248
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Definition VPlan.h:608
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:614
static bool classof(const VPValue *V)
Definition VPlan.h:666
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:679
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:623
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:682
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Type *ResultTy, Value *UV=nullptr, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:618
static bool classof(const VPUser *U)
Definition VPlan.h:671
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:610
LLVM_ABI_FOR_TEST VPSingleDefValue(VPSingleDefRecipe *Def, Value *UV=nullptr, Type *Ty=nullptr)
Construct a VPSingleDefValue. Must only be used by VPSingleDefRecipe.
Definition VPlan.cpp:169
This class can be used to assign names to VPValues.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:384
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1527
operand_range operands()
Definition VPlanValue.h:455
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:428
unsigned getNumOperands() const
Definition VPlanValue.h:422
operand_iterator op_end()
Definition VPlanValue.h:453
operand_iterator op_begin()
Definition VPlanValue.h:451
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:423
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:403
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:449
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:448
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:50
Type * getScalarType() const
Returns the scalar type of this VPValue, dispatching based on the concrete subclass.
Definition VPlan.cpp:149
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:143
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:130
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:75
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:208
unsigned getNumUsers() const
Definition VPlanValue.h:115
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2283
VPValue * getVFValue() const
Definition VPlan.h:2272
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getSourceElementType() const
Definition VPlan.h:2269
int64_t getStride() const
Definition VPlan.h:2270
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2304
VPValue * getOffset() const
Definition VPlan.h:2273
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2297
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2259
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:2290
VPValue * getPointer() const
Definition VPlan.h:2271
void materializeOffset(unsigned Part=0)
Adds the offset operand to the recipe.
VPValue * getStride() const
Definition VPlan.h:2338
Type * getSourceElementType() const
Definition VPlan.h:2346
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2348
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2355
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2329
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:2372
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2362
VPValue * getVFxPart() const
Definition VPlan.h:2340
A recipe for widening Call instructions using library calls.
Definition VPlan.h:2074
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:2081
const_operand_range args() const
Definition VPlan.h:2122
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2100
operand_range args()
Definition VPlan.h:2121
Function * getCalledScalarFunction() const
Definition VPlan.h:2117
~VPWidenCallRecipe() override=default
VPWidenCanonicalIVRecipe(VPRegionValue *CanonicalIV, const VPIRFlags::WrapFlagsTy &Flags={false, false})
Definition VPlan.h:4065
~VPWidenCanonicalIVRecipe() override=default
VPValue * getStepValue() const
Definition VPlan.h:4098
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:4087
VPRegionValue * getCanonicalIV() const
Return the canonical IV being widened.
Definition VPlan.h:4094
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4072
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:4082
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1855
Instruction::CastOps getOpcode() const
Definition VPlan.h:1891
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1860
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1876
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:2221
Type * getSourceElementType() const
Definition VPlan.h:2226
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:2229
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2212
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(Type *SourceElementTy, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown(), GetElementPtrInst *UV=nullptr)
Definition VPlan.h:2195
void execute(VPTransformState &State) override=0
Generate the phi nodes.
ArrayRef< const SCEVPredicate * > getNoWrapPredicates() const
Returns the SCEV predicates associated with this induction.
Definition VPlan.h:2537
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2556
static bool classof(const VPValue *V)
Definition VPlan.h:2499
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2518
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2541
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2511
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2526
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
Definition VPlan.h:2529
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2481
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2514
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, Type *ResultTy, DebugLoc DL)
Definition VPlan.h:2487
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2534
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2548
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2494
const VPValue * getVFValue() const
Definition VPlan.h:2521
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2504
const VPValue * getStepValue() const
Definition VPlan.h:2515
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2617
const TruncInst * getTruncInst() const
Definition VPlan.h:2633
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2611
~VPWidenIntOrFpInductionRecipe() override=default
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
Definition VPlan.h:2621
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2603
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2577
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2632
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2586
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2643
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2628
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1902
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1952
CallInst * createVectorCall(VPTransformState &State)
Helper function to produce the widened intrinsic call.
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:2006
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:2012
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
static InstructionCost computeCallCost(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost of a vector intrinsic with ID and Operands.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1938
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:2018
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:1988
static bool classof(const VPValue *V)
Definition VPlan.h:1983
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1963
VPWidenIntrinsicRecipe(const unsigned char SC, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1916
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:2015
~VPWidenIntrinsicRecipe() override=default
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1973
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
static bool classof(const VPUser *U)
Definition VPlan.h:1978
static InstructionCost computeMemIntrinsicCost(Intrinsic::ID IID, Type *Ty, bool IsMasked, Align Alignment, VPCostContext &Ctx)
Helper function for computing the cost of vector memory intrinsic.
void execute(VPTransformState &State) override
Produce a widened version of the vector memory intrinsic.
~VPWidenMemIntrinsicRecipe() override=default
VPWidenMemIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2051
VPWidenMemIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, Align Alignment, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2037
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector memory intrinsic.
A common mixin class for widening memory operations.
Definition VPlan.h:3685
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3696
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3719
virtual ~VPWidenMemoryRecipe()=default
Instruction & Ingredient
Definition VPlan.h:3687
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Return the cost of this VPWidenMemoryRecipe.
Instruction & getIngredient() const
Definition VPlan.h:3741
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3693
virtual const VPRecipeBase * getAsRecipe() const =0
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3729
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3690
VPWidenMemoryRecipe(Instruction &I, bool Consecutive, const VPIRMetadata &Metadata)
Definition VPlan.h:3706
virtual VPRecipeBase * getAsRecipe()=0
Return a VPRecipeBase* to the current object.
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3725
void setMask(VPValue *Mask)
Definition VPlan.h:3698
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3736
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3722
A recipe for widened phis.
Definition VPlan.h:2701
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2743
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenPHIRecipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2721
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
VPWidenPHIRecipe(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe with incoming values IncomingValues, debug location DL and Name.
Definition VPlan.h:2708
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2670
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2679
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2660
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1794
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1815
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1844
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1798
VPWidenRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1805
~VPWidenRecipe() override=default
VPWidenRecipe * cloneWithOperands(ArrayRef< VPValue * > NewOperands)
Definition VPlan.h:1817
unsigned getOpcode() const
Definition VPlan.h:1834
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4696
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:5021
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1177
friend class VPSlotTracker
Definition VPlan.h:4698
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1153
bool hasVF(ElementCount VF) const
Definition VPlan.h:4919
ElementCount getSingleVF() const
Returns the single VF of the plan, asserting that the plan has exactly one VF.
Definition VPlan.h:4932
const DataLayout & getDataLayout() const
Definition VPlan.h:4901
LLVMContext & getContext() const
Definition VPlan.h:4897
VPBasicBlock * getEntry()
Definition VPlan.h:4792
Type * getIndexType() const
The type of the canonical induction variable of the vector loop.
Definition VPlan.h:5109
void setName(const Twine &newName)
Definition VPlan.h:4965
bool hasScalableVF() const
Definition VPlan.h:4920
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4855
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4876
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4926
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:902
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:885
VPIRValue * getOrAddLiveIn(VPIRValue *V)
Definition VPlan.h:4983
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:910
const VPBasicBlock * getEntry() const
Definition VPlan.h:4793
friend class VPlanPrinter
Definition VPlan.h:4697
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:4992
VPIRValue * getConstantInt(const APInt &Val)
Return a VPIRValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:5015
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4895
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
Definition VPlan.h:4998
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:5070
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1312
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:5024
bool hasUF(unsigned UF) const
Definition VPlan.h:4944
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4845
VPlan(BasicBlock *ScalarHeaderBB, Type *IdxTy)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and vect...
Definition VPlan.h:4773
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4885
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4882
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4969
VPRegionBlock * createLoopRegion(Type *CanIVTy, DebugLoc DL, const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with a canonical IV using CanIVTy and DL.
Definition VPlan.h:5058
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
Definition VPlan.h:4995
void setVF(ElementCount VF)
Definition VPlan.h:4907
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:4960
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1068
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:5092
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1050
LLVM_ABI_FOR_TEST bool isOuterLoop() const
Returns true if this VPlan is for an outer loop, i.e., its vector loop region contains a nested loop ...
Definition VPlan.cpp:1083
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
Definition VPlan.h:4947
VPIRValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:5009
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4830
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4862
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4869
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4821
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4781
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:5047
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1318
void removeVF(ElementCount VF)
Remove VF from the plan.
Definition VPlan.h:4914
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:4989
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4797
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1183
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
Definition VPlan.h:4892
bool hasScalarVFOnly() const
Definition VPlan.h:4937
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4835
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:920
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1136
void addVF(ElementCount VF)
Definition VPlan.h:4905
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4841
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1092
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4888
void setUF(unsigned UF)
Definition VPlan.h:4952
const VPSymbolicValue & getVF() const
Definition VPlan.h:4889
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
Definition VPlan.h:5102
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1224
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:5003
LLVM Value Representation.
Definition Value.h:75
Increasing range of size_t indices.
Definition STLExtras.h:2506
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:190
CastInfo helper for casting from VPRecipeBase to a mixin class that is not part of the VPRecipeBase c...
Definition VPlan.h:4267
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:558
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
LLVM_PACKED_END
Definition VPlan.h:1112
auto cast_if_present(const Y &Val)
cast_if_present<X> - Functionally identical to cast, except that a null value is accepted.
Definition Casting.h:683
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1764
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:840
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2798
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
Type * toScalarizedTy(Type *Ty)
A helper for converting vectorized types to scalarized (non-vector) types.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:356
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:365
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
UncountableExitStyle
Different methods of handling early exits.
Definition VPlan.h:79
@ MaskedHandleExitInScalarLoop
All memory operations other than the load(s) required to determine whether an uncountable exit occurr...
Definition VPlan.h:89
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI Type * computeScalarTypeForInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands)
Compute the scalar result type for an IR Opcode given Operands.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:322
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2011
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
std::variant< RdxOrdered, RdxInLoop, RdxUnordered > ReductionStyle
Definition VPlan.h:2796
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:74
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:861
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:863
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
Definition Bitfields.h:207
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
Definition Bitfields.h:223
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
Provides a cast trait that strips const from types to make it easier to implement a const-version of ...
Definition Casting.h:388
This cast trait just provides the default implementation of doCastIfPossible to make CastInfo special...
Definition Casting.h:309
Provides a cast trait that uses a defined pointer to pointer cast as a base for reference-to-referenc...
Definition Casting.h:423
This reduction is in-loop.
Definition VPlan.h:2790
Possible variants of a reduction.
Definition VPlan.h:2788
This reduction is unordered with the partial result scaled down by some factor.
Definition VPlan.h:2793
unsigned VFScaleFactor
Definition VPlan.h:2794
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:334
An overlay on VPIRValue for VPValues that wrap a ConstantInt.
Definition VPlanValue.h:264
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2759
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2771
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start, VPValue &BackedgeValue)
Definition VPlan.h:2750
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:726
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:731
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:721
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:714
An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use cast/dyn_cast/isa and exec...
Definition VPlan.h:1762
PHINode & getIRPhi()
Definition VPlan.h:1775
VPIRPhi(PHINode &PN)
Definition VPlan.h:1763
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1765
static bool classof(const VPUser *U)
Definition VPlan.h:1770
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1786
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:246
static bool classof(const VPUser *U)
Definition VPlan.h:1662
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1677
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1692
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1672
static bool classof(const VPValue *V)
Definition VPlan.h:1667
VPPhi(ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL, const Twine &Name="", Type *ResultTy=nullptr)
Definition VPlan.h:1657
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:1116
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:1157
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1128
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1117
static bool classof(const VPValue *V)
Definition VPlan.h:1150
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1122
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:1145
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:286
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3796
const VPRecipeBase * getAsRecipe() const override
Definition VPlan.h:3833
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide load or gather.
VPWidenLoadEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3806
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase * getAsRecipe() override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:3832
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3813
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3797
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3823
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3747
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3748
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3773
const VPRecipeBase * getAsRecipe() const override
Definition VPlan.h:3783
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3756
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC)
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadRecipe.
Definition VPlan.h:3767
VPRecipeBase * getAsRecipe() override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:3782
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3898
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3914
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide store or scatter.
VPWidenStoreEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3907
const VPRecipeBase * getAsRecipe() const override
Definition VPlan.h:3942
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3899
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3927
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPRecipeBase * getAsRecipe() override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:3941
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3917
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3845
VPRecipeBase * getAsRecipe() override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:3884
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3846
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3863
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3854
const VPRecipeBase * getAsRecipe() const override
Definition VPlan.h:3885
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreRecipe.
Definition VPlan.h:3869
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3875
static VPMixin * castFailed()
Definition VPlan.h:4285
static bool isPossible(VPRecipeBase *R)
Used by isa.
Definition VPlan.h:4276
static VPMixin * doCast(VPRecipeBase *R)
Used by cast.
Definition VPlan.h:4279