LLVM 17.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. VPInstruction, a concrete Recipe and VPUser modeling a single planned
16/// instruction;
17/// 4. The VPlan class holding a candidate for vectorization;
18/// 5. The VPlanPrinter class providing a way to print a plan in dot format;
19/// These are documented in docs/VectorizationPlan.rst.
20//
21//===----------------------------------------------------------------------===//
22
23#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
24#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25
26#include "VPlanValue.h"
27#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/MapVector.h"
33#include "llvm/ADT/Twine.h"
34#include "llvm/ADT/ilist.h"
35#include "llvm/ADT/ilist_node.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/IR/FMF.h"
41#include <algorithm>
42#include <cassert>
43#include <cstddef>
44#include <string>
45
46namespace llvm {
47
48class BasicBlock;
49class DominatorTree;
50class InductionDescriptor;
51class InnerLoopVectorizer;
52class IRBuilderBase;
53class LoopInfo;
54class PredicateScalarEvolution;
55class raw_ostream;
56class RecurrenceDescriptor;
57class SCEV;
58class Type;
59class VPBasicBlock;
60class VPRegionBlock;
61class VPlan;
62class VPReplicateRecipe;
63class VPlanSlp;
64class Value;
65
66namespace Intrinsic {
67typedef unsigned ID;
68}
69
70/// Returns a calculation for the total number of elements for a given \p VF.
71/// For fixed width vectors this value is a constant, whereas for scalable
72/// vectors it is an expression determined at runtime.
73Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
74
75/// Return a value for Step multiplied by VF.
76Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
77 int64_t Step);
78
79const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE);
80
81/// A range of powers-of-2 vectorization factors with fixed start and
82/// adjustable end. The range includes start and excludes end, e.g.,:
83/// [1, 9) = {1, 2, 4, 8}
84struct VFRange {
85 // A power of 2.
87
88 // Need not be a power of 2. If End <= Start range is empty.
90
91 bool isEmpty() const {
93 }
94
96 : Start(Start), End(End) {
98 "Both Start and End should have the same scalable flag");
100 "Expected Start to be a power of 2");
101 }
102};
103
104using VPlanPtr = std::unique_ptr<VPlan>;
105
106/// In what follows, the term "input IR" refers to code that is fed into the
107/// vectorizer whereas the term "output IR" refers to code that is generated by
108/// the vectorizer.
109
110/// VPLane provides a way to access lanes in both fixed width and scalable
111/// vectors, where for the latter the lane index sometimes needs calculating
112/// as a runtime expression.
113class VPLane {
114public:
115 /// Kind describes how to interpret Lane.
116 enum class Kind : uint8_t {
117 /// For First, Lane is the index into the first N elements of a
118 /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
119 First,
120 /// For ScalableLast, Lane is the offset from the start of the last
121 /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
122 /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
123 /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
125 };
126
127private:
128 /// in [0..VF)
129 unsigned Lane;
130
131 /// Indicates how the Lane should be interpreted, as described above.
132 Kind LaneKind;
133
134public:
135 VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
136
138
140 unsigned LaneOffset = VF.getKnownMinValue() - 1;
141 Kind LaneKind;
142 if (VF.isScalable())
143 // In this case 'LaneOffset' refers to the offset from the start of the
144 // last subvector with VF.getKnownMinValue() elements.
146 else
147 LaneKind = VPLane::Kind::First;
148 return VPLane(LaneOffset, LaneKind);
149 }
150
151 /// Returns a compile-time known value for the lane index and asserts if the
152 /// lane can only be calculated at runtime.
153 unsigned getKnownLane() const {
154 assert(LaneKind == Kind::First);
155 return Lane;
156 }
157
158 /// Returns an expression describing the lane index that can be used at
159 /// runtime.
160 Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
161
162 /// Returns the Kind of lane offset.
163 Kind getKind() const { return LaneKind; }
164
165 /// Returns true if this is the first lane of the whole vector.
166 bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
167
168 /// Maps the lane to a cache index based on \p VF.
169 unsigned mapToCacheIndex(const ElementCount &VF) const {
170 switch (LaneKind) {
172 assert(VF.isScalable() && Lane < VF.getKnownMinValue());
173 return VF.getKnownMinValue() + Lane;
174 default:
175 assert(Lane < VF.getKnownMinValue());
176 return Lane;
177 }
178 }
179
180 /// Returns the maxmimum number of lanes that we are able to consider
181 /// caching for \p VF.
182 static unsigned getNumCachedLanes(const ElementCount &VF) {
183 return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);
184 }
185};
186
187/// VPIteration represents a single point in the iteration space of the output
188/// (vectorized and/or unrolled) IR loop.
190 /// in [0..UF)
191 unsigned Part;
192
194
195 VPIteration(unsigned Part, unsigned Lane,
197 : Part(Part), Lane(Lane, Kind) {}
198
199 VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {}
200
201 bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); }
202};
203
204/// VPTransformState holds information passed down when "executing" a VPlan,
205/// needed for generating the output IR.
210 : VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ILV(ILV), Plan(Plan),
211 LVer(nullptr) {}
212
213 /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
215 unsigned UF;
216
217 /// Hold the indices to generate specific scalar instructions. Null indicates
218 /// that all instances are to be generated, using either scalar or vector
219 /// instructions.
220 std::optional<VPIteration> Instance;
221
222 struct DataState {
223 /// A type for vectorized values in the new loop. Each value from the
224 /// original loop, when vectorized, is represented by UF vector values in
225 /// the new unrolled loop, where UF is the unroll factor.
227
229
233
234 /// Get the generated Value for a given VPValue and a given Part. Note that
235 /// as some Defs are still created by ILV and managed in its ValueMap, this
236 /// method will delegate the call to ILV in such cases in order to provide
237 /// callers a consistent API.
238 /// \see set.
239 Value *get(VPValue *Def, unsigned Part);
240
241 /// Get the generated Value for a given VPValue and given Part and Lane.
242 Value *get(VPValue *Def, const VPIteration &Instance);
243
244 bool hasVectorValue(VPValue *Def, unsigned Part) {
245 auto I = Data.PerPartOutput.find(Def);
246 return I != Data.PerPartOutput.end() && Part < I->second.size() &&
247 I->second[Part];
248 }
249
250 bool hasAnyVectorValue(VPValue *Def) const {
251 return Data.PerPartOutput.contains(Def);
252 }
253
255 auto I = Data.PerPartScalars.find(Def);
256 if (I == Data.PerPartScalars.end())
257 return false;
258 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
259 return Instance.Part < I->second.size() &&
260 CacheIdx < I->second[Instance.Part].size() &&
261 I->second[Instance.Part][CacheIdx];
262 }
263
264 /// Set the generated Value for a given VPValue and a given Part.
265 void set(VPValue *Def, Value *V, unsigned Part) {
266 if (!Data.PerPartOutput.count(Def)) {
268 Data.PerPartOutput[Def] = Entry;
269 }
270 Data.PerPartOutput[Def][Part] = V;
271 }
272 /// Reset an existing vector value for \p Def and a given \p Part.
273 void reset(VPValue *Def, Value *V, unsigned Part) {
274 auto Iter = Data.PerPartOutput.find(Def);
275 assert(Iter != Data.PerPartOutput.end() &&
276 "need to overwrite existing value");
277 Iter->second[Part] = V;
278 }
279
280 /// Set the generated scalar \p V for \p Def and the given \p Instance.
281 void set(VPValue *Def, Value *V, const VPIteration &Instance) {
282 auto Iter = Data.PerPartScalars.insert({Def, {}});
283 auto &PerPartVec = Iter.first->second;
284 while (PerPartVec.size() <= Instance.Part)
285 PerPartVec.emplace_back();
286 auto &Scalars = PerPartVec[Instance.Part];
287 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
288 while (Scalars.size() <= CacheIdx)
289 Scalars.push_back(nullptr);
290 assert(!Scalars[CacheIdx] && "should overwrite existing value");
291 Scalars[CacheIdx] = V;
292 }
293
294 /// Reset an existing scalar value for \p Def and a given \p Instance.
295 void reset(VPValue *Def, Value *V, const VPIteration &Instance) {
296 auto Iter = Data.PerPartScalars.find(Def);
297 assert(Iter != Data.PerPartScalars.end() &&
298 "need to overwrite existing value");
299 assert(Instance.Part < Iter->second.size() &&
300 "need to overwrite existing value");
301 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
302 assert(CacheIdx < Iter->second[Instance.Part].size() &&
303 "need to overwrite existing value");
304 Iter->second[Instance.Part][CacheIdx] = V;
305 }
306
307 /// Add additional metadata to \p To that was not present on \p Orig.
308 ///
309 /// Currently this is used to add the noalias annotations based on the
310 /// inserted memchecks. Use this for instructions that are *cloned* into the
311 /// vector loop.
312 void addNewMetadata(Instruction *To, const Instruction *Orig);
313
314 /// Add metadata from one instruction to another.
315 ///
316 /// This includes both the original MDs from \p From and additional ones (\see
317 /// addNewMetadata). Use this for *newly created* instructions in the vector
318 /// loop.
320
321 /// Similar to the previous function but it adds the metadata to a
322 /// vector of instructions.
324
325 /// Set the debug location in the builder using the debug location in \p V.
326 void setDebugLocFromInst(const Value *V);
327
328 /// Hold state information used when constructing the CFG of the output IR,
329 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
330 struct CFGState {
331 /// The previous VPBasicBlock visited. Initially set to null.
333
334 /// The previous IR BasicBlock created or used. Initially set to the new
335 /// header BasicBlock.
336 BasicBlock *PrevBB = nullptr;
337
338 /// The last IR BasicBlock in the output IR. Set to the exit block of the
339 /// vector loop.
340 BasicBlock *ExitBB = nullptr;
341
342 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
343 /// of replication, maps the BasicBlock of the last replica created.
345
346 CFGState() = default;
347
348 /// Returns the BasicBlock* mapped to the pre-header of the loop region
349 /// containing \p R.
352
353 /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
355
356 /// Hold a pointer to Dominator Tree to register new basic blocks in the loop.
358
359 /// Hold a reference to the IRBuilder used to generate output IR code.
361
363
364 /// Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF).
365 Value *CanonicalIV = nullptr;
366
367 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
369
370 /// Pointer to the VPlan code is generated for.
372
373 /// Holds recipes that may generate a poison value that is used after
374 /// vectorization, even when their operands are not poison.
376
377 /// The loop object for the current parent region, or nullptr.
379
380 /// LoopVersioning. It's only set up (non-null) if memchecks were
381 /// used.
382 ///
383 /// This is currently only used to add no-alias metadata based on the
384 /// memchecks. The actually versioning is performed manually.
385 std::unique_ptr<LoopVersioning> LVer;
386};
387
388/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
389/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
391 friend class VPBlockUtils;
392
393 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
394
395 /// An optional name for the block.
396 std::string Name;
397
398 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
399 /// it is a topmost VPBlockBase.
400 VPRegionBlock *Parent = nullptr;
401
402 /// List of predecessor blocks.
404
405 /// List of successor blocks.
407
408 /// VPlan containing the block. Can only be set on the entry block of the
409 /// plan.
410 VPlan *Plan = nullptr;
411
412 /// Add \p Successor as the last successor to this block.
413 void appendSuccessor(VPBlockBase *Successor) {
414 assert(Successor && "Cannot add nullptr successor!");
415 Successors.push_back(Successor);
416 }
417
418 /// Add \p Predecessor as the last predecessor to this block.
419 void appendPredecessor(VPBlockBase *Predecessor) {
420 assert(Predecessor && "Cannot add nullptr predecessor!");
421 Predecessors.push_back(Predecessor);
422 }
423
424 /// Remove \p Predecessor from the predecessors of this block.
425 void removePredecessor(VPBlockBase *Predecessor) {
426 auto Pos = find(Predecessors, Predecessor);
427 assert(Pos && "Predecessor does not exist");
428 Predecessors.erase(Pos);
429 }
430
431 /// Remove \p Successor from the successors of this block.
432 void removeSuccessor(VPBlockBase *Successor) {
433 auto Pos = find(Successors, Successor);
434 assert(Pos && "Successor does not exist");
435 Successors.erase(Pos);
436 }
437
438protected:
439 VPBlockBase(const unsigned char SC, const std::string &N)
440 : SubclassID(SC), Name(N) {}
441
442public:
443 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
444 /// that are actually instantiated. Values of this enumeration are kept in the
445 /// SubclassID field of the VPBlockBase objects. They are used for concrete
446 /// type identification.
447 using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC };
448
450
451 virtual ~VPBlockBase() = default;
452
453 const std::string &getName() const { return Name; }
454
455 void setName(const Twine &newName) { Name = newName.str(); }
456
457 /// \return an ID for the concrete type of this object.
458 /// This is used to implement the classof checks. This should not be used
459 /// for any other purpose, as the values may change as LLVM evolves.
460 unsigned getVPBlockID() const { return SubclassID; }
461
462 VPRegionBlock *getParent() { return Parent; }
463 const VPRegionBlock *getParent() const { return Parent; }
464
465 /// \return A pointer to the plan containing the current block.
466 VPlan *getPlan();
467 const VPlan *getPlan() const;
468
469 /// Sets the pointer of the plan containing the block. The block must be the
470 /// entry block into the VPlan.
471 void setPlan(VPlan *ParentPlan);
472
473 void setParent(VPRegionBlock *P) { Parent = P; }
474
475 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
476 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
477 /// VPBlockBase is a VPBasicBlock, it is returned.
478 const VPBasicBlock *getEntryBasicBlock() const;
480
481 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
482 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
483 /// VPBlockBase is a VPBasicBlock, it is returned.
484 const VPBasicBlock *getExitingBasicBlock() const;
486
487 const VPBlocksTy &getSuccessors() const { return Successors; }
488 VPBlocksTy &getSuccessors() { return Successors; }
489
491
492 const VPBlocksTy &getPredecessors() const { return Predecessors; }
493 VPBlocksTy &getPredecessors() { return Predecessors; }
494
495 /// \return the successor of this VPBlockBase if it has a single successor.
496 /// Otherwise return a null pointer.
498 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
499 }
500
501 /// \return the predecessor of this VPBlockBase if it has a single
502 /// predecessor. Otherwise return a null pointer.
504 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
505 }
506
507 size_t getNumSuccessors() const { return Successors.size(); }
508 size_t getNumPredecessors() const { return Predecessors.size(); }
509
510 /// An Enclosing Block of a block B is any block containing B, including B
511 /// itself. \return the closest enclosing block starting from "this", which
512 /// has successors. \return the root enclosing block if all enclosing blocks
513 /// have no successors.
515
516 /// \return the closest enclosing block starting from "this", which has
517 /// predecessors. \return the root enclosing block if all enclosing blocks
518 /// have no predecessors.
520
521 /// \return the successors either attached directly to this VPBlockBase or, if
522 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
523 /// successors of its own, search recursively for the first enclosing
524 /// VPRegionBlock that has successors and return them. If no such
525 /// VPRegionBlock exists, return the (empty) successors of the topmost
526 /// VPBlockBase reached.
529 }
530
531 /// \return the hierarchical successor of this VPBlockBase if it has a single
532 /// hierarchical successor. Otherwise return a null pointer.
535 }
536
537 /// \return the predecessors either attached directly to this VPBlockBase or,
538 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
539 /// predecessors of its own, search recursively for the first enclosing
540 /// VPRegionBlock that has predecessors and return them. If no such
541 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
542 /// VPBlockBase reached.
545 }
546
547 /// \return the hierarchical predecessor of this VPBlockBase if it has a
548 /// single hierarchical predecessor. Otherwise return a null pointer.
551 }
552
553 /// Set a given VPBlockBase \p Successor as the single successor of this
554 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
555 /// This VPBlockBase must have no successors.
557 assert(Successors.empty() && "Setting one successor when others exist.");
558 appendSuccessor(Successor);
559 }
560
561 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
562 /// successors of this VPBlockBase. This VPBlockBase is not added as
563 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
564 /// successors.
565 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
566 assert(Successors.empty() && "Setting two successors when others exist.");
567 appendSuccessor(IfTrue);
568 appendSuccessor(IfFalse);
569 }
570
571 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
572 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
573 /// as successor of any VPBasicBlock in \p NewPreds.
575 assert(Predecessors.empty() && "Block predecessors already set.");
576 for (auto *Pred : NewPreds)
577 appendPredecessor(Pred);
578 }
579
580 /// Remove all the predecessor of this block.
581 void clearPredecessors() { Predecessors.clear(); }
582
583 /// Remove all the successors of this block.
584 void clearSuccessors() { Successors.clear(); }
585
586 /// The method which generates the output IR that correspond to this
587 /// VPBlockBase, thereby "executing" the VPlan.
588 virtual void execute(VPTransformState *State) = 0;
589
590 /// Delete all blocks reachable from a given VPBlockBase, inclusive.
591 static void deleteCFG(VPBlockBase *Entry);
592
593 /// Return true if it is legal to hoist instructions into this block.
595 // There are currently no constraints that prevent an instruction to be
596 // hoisted into a VPBlockBase.
597 return true;
598 }
599
600 /// Replace all operands of VPUsers in the block with \p NewValue and also
601 /// replaces all uses of VPValues defined in the block with NewValue.
602 virtual void dropAllReferences(VPValue *NewValue) = 0;
603
604#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
605 void printAsOperand(raw_ostream &OS, bool PrintType) const {
606 OS << getName();
607 }
608
609 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
610 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
611 /// consequtive numbers.
612 ///
613 /// Note that the numbering is applied to the whole VPlan, so printing
614 /// individual blocks is consistent with the whole VPlan printing.
615 virtual void print(raw_ostream &O, const Twine &Indent,
616 VPSlotTracker &SlotTracker) const = 0;
617
618 /// Print plain-text dump of this VPlan to \p O.
619 void print(raw_ostream &O) const {
621 print(O, "", SlotTracker);
622 }
623
624 /// Print the successors of this block to \p O, prefixing all lines with \p
625 /// Indent.
626 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
627
628 /// Dump this VPBlockBase to dbgs().
629 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
630#endif
631};
632
633/// A value that is used outside the VPlan. The operand of the user needs to be
634/// added to the associated LCSSA phi node.
635class VPLiveOut : public VPUser {
636 PHINode *Phi;
637
638public:
640 : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
641
642 /// Fixup the wrapped LCSSA phi node in the unique exit block. This simply
643 /// means we need to add the appropriate incoming value from the middle
644 /// block as exiting edges from the scalar epilogue loop (if present) are
645 /// already in place, and we exit the vector loop exclusively to the middle
646 /// block.
647 void fixPhi(VPlan &Plan, VPTransformState &State);
648
649 /// Returns true if the VPLiveOut uses scalars of operand \p Op.
650 bool usesScalars(const VPValue *Op) const override {
652 "Op must be an operand of the recipe");
653 return true;
654 }
655
656 PHINode *getPhi() const { return Phi; }
657};
658
659/// VPRecipeBase is a base class modeling a sequence of one or more output IR
660/// instructions. VPRecipeBase owns the the VPValues it defines through VPDef
661/// and is responsible for deleting its defined values. Single-value
662/// VPRecipeBases that also inherit from VPValue must make sure to inherit from
663/// VPRecipeBase before VPValue.
664class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
665 public VPDef,
666 public VPUser {
667 friend VPBasicBlock;
668 friend class VPBlockUtils;
669
670 /// Each VPRecipe belongs to a single VPBasicBlock.
671 VPBasicBlock *Parent = nullptr;
672
673public:
676
677 template <typename IterT>
680 virtual ~VPRecipeBase() = default;
681
682 /// \return the VPBasicBlock which this VPRecipe belongs to.
683 VPBasicBlock *getParent() { return Parent; }
684 const VPBasicBlock *getParent() const { return Parent; }
685
686 /// The method which generates the output IR instructions that correspond to
687 /// this VPRecipe, thereby "executing" the VPlan.
688 virtual void execute(VPTransformState &State) = 0;
689
690 /// Insert an unlinked recipe into a basic block immediately before
691 /// the specified recipe.
692 void insertBefore(VPRecipeBase *InsertPos);
693 /// Insert an unlinked recipe into \p BB immediately before the insertion
694 /// point \p IP;
696
697 /// Insert an unlinked Recipe into a basic block immediately after
698 /// the specified Recipe.
699 void insertAfter(VPRecipeBase *InsertPos);
700
701 /// Unlink this recipe from its current VPBasicBlock and insert it into
702 /// the VPBasicBlock that MovePos lives in, right after MovePos.
703 void moveAfter(VPRecipeBase *MovePos);
704
705 /// Unlink this recipe and insert into BB before I.
706 ///
707 /// \pre I is a valid iterator into BB.
709
710 /// This method unlinks 'this' from the containing basic block, but does not
711 /// delete it.
712 void removeFromParent();
713
714 /// This method unlinks 'this' from the containing basic block and deletes it.
715 ///
716 /// \returns an iterator pointing to the element after the erased one
718
719 /// Returns the underlying instruction, if the recipe is a VPValue or nullptr
720 /// otherwise.
722 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue());
723 }
725 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue());
726 }
727
728 /// Method to support type inquiry through isa, cast, and dyn_cast.
729 static inline bool classof(const VPDef *D) {
730 // All VPDefs are also VPRecipeBases.
731 return true;
732 }
733
734 static inline bool classof(const VPUser *U) {
735 return U->getVPUserID() == VPUser::VPUserID::Recipe;
736 }
737
738 /// Returns true if the recipe may have side-effects.
739 bool mayHaveSideEffects() const;
740
741 /// Returns true for PHI-like recipes.
742 bool isPhi() const {
743 return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;
744 }
745
746 /// Returns true if the recipe may read from memory.
747 bool mayReadFromMemory() const;
748
749 /// Returns true if the recipe may write to memory.
750 bool mayWriteToMemory() const;
751
752 /// Returns true if the recipe may read from or write to memory.
753 bool mayReadOrWriteMemory() const {
755 }
756};
757
758// Helper macro to define common classof implementations for recipes.
759#define VP_CLASSOF_IMPL(VPDefID) \
760 static inline bool classof(const VPDef *D) { \
761 return D->getVPDefID() == VPDefID; \
762 } \
763 static inline bool classof(const VPValue *V) { \
764 auto *R = V->getDefiningRecipe(); \
765 return R && R->getVPDefID() == VPDefID; \
766 } \
767 static inline bool classof(const VPUser *U) { \
768 auto *R = dyn_cast<VPRecipeBase>(U); \
769 return R && R->getVPDefID() == VPDefID; \
770 } \
771 static inline bool classof(const VPRecipeBase *R) { \
772 return R->getVPDefID() == VPDefID; \
773 }
774
775/// This is a concrete Recipe that models a single VPlan-level instruction.
776/// While as any Recipe it may generate a sequence of IR instructions when
777/// executed, these instructions would always form a single-def expression as
778/// the VPInstruction is also a single def-use vertex.
779class VPInstruction : public VPRecipeBase, public VPValue {
780 friend class VPlanSlp;
781
782public:
783 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
784 enum {
786 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
787 // values of a first-order recurrence.
796 // The next two are similar to the above, but instead increment the
797 // canonical IV separately for each unrolled part.
802 };
803
804private:
805 typedef unsigned char OpcodeTy;
806 OpcodeTy Opcode;
807 FastMathFlags FMF;
808 DebugLoc DL;
809
810 /// An optional name that can be used for the generated IR instruction.
811 const std::string Name;
812
813 /// Utility method serving execute(): generates a single instance of the
814 /// modeled instruction.
815 void generateInstruction(VPTransformState &State, unsigned Part);
816
817protected:
819
820public:
822 const Twine &Name = "")
823 : VPRecipeBase(VPDef::VPInstructionSC, Operands), VPValue(this),
824 Opcode(Opcode), DL(DL), Name(Name.str()) {}
825
826 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
827 DebugLoc DL = {}, const Twine &Name = "")
829
830 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
831
834 return new VPInstruction(Opcode, Operands, DL, Name);
835 }
836
837 unsigned getOpcode() const { return Opcode; }
838
839 /// Generate the instruction.
840 /// TODO: We currently execute only per-part unless a specific instance is
841 /// provided.
842 void execute(VPTransformState &State) override;
843
844#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
845 /// Print the VPInstruction to \p O.
846 void print(raw_ostream &O, const Twine &Indent,
847 VPSlotTracker &SlotTracker) const override;
848
849 /// Print the VPInstruction to dbgs() (for debugging).
850 LLVM_DUMP_METHOD void dump() const;
851#endif
852
853 /// Return true if this instruction may modify memory.
854 bool mayWriteToMemory() const {
855 // TODO: we can use attributes of the called function to rule out memory
856 // modifications.
857 return Opcode == Instruction::Store || Opcode == Instruction::Call ||
858 Opcode == Instruction::Invoke || Opcode == SLPStore;
859 }
860
861 bool hasResult() const {
862 // CallInst may or may not have a result, depending on the called function.
863 // Conservatively return calls have results for now.
864 switch (getOpcode()) {
865 case Instruction::Ret:
866 case Instruction::Br:
867 case Instruction::Store:
868 case Instruction::Switch:
869 case Instruction::IndirectBr:
870 case Instruction::Resume:
871 case Instruction::CatchRet:
872 case Instruction::Unreachable:
873 case Instruction::Fence:
874 case Instruction::AtomicRMW:
877 return false;
878 default:
879 return true;
880 }
881 }
882
883 /// Set the fast-math flags.
884 void setFastMathFlags(FastMathFlags FMFNew);
885
886 /// Returns true if the recipe only uses the first lane of operand \p Op.
887 bool onlyFirstLaneUsed(const VPValue *Op) const override {
889 "Op must be an operand of the recipe");
890 if (getOperand(0) != Op)
891 return false;
892 switch (getOpcode()) {
893 default:
894 return false;
902 return true;
903 };
904 llvm_unreachable("switch should return");
905 }
906};
907
908/// VPWidenRecipe is a recipe for producing a copy of vector type its
909/// ingredient. This recipe covers most of the traditional vectorization cases
910/// where each ingredient transforms into a vectorized version of itself.
911class VPWidenRecipe : public VPRecipeBase, public VPValue {
912public:
913 template <typename IterT>
915 : VPRecipeBase(VPDef::VPWidenSC, Operands), VPValue(this, &I) {}
916
917 ~VPWidenRecipe() override = default;
918
919 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
920
921 /// Produce widened copies of all Ingredients.
922 void execute(VPTransformState &State) override;
923
924#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
925 /// Print the recipe.
926 void print(raw_ostream &O, const Twine &Indent,
927 VPSlotTracker &SlotTracker) const override;
928#endif
929};
930
931/// A recipe for widening Call instructions.
932class VPWidenCallRecipe : public VPRecipeBase, public VPValue {
933 /// ID of the vector intrinsic to call when widening the call. If set the
934 /// Intrinsic::not_intrinsic, a library call will be used instead.
935 Intrinsic::ID VectorIntrinsicID;
936 /// If this recipe represents a library call, Variant stores a pointer to
937 /// the chosen function. There is a 1:1 mapping between a given VF and the
938 /// chosen vectorized variant, so there will be a different vplan for each
939 /// VF with a valid variant.
940 Function *Variant;
941
942public:
943 template <typename IterT>
945 Intrinsic::ID VectorIntrinsicID,
946 Function *Variant = nullptr)
947 : VPRecipeBase(VPDef::VPWidenCallSC, CallArguments), VPValue(this, &I),
948 VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {}
949
950 ~VPWidenCallRecipe() override = default;
951
952 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
953
954 /// Produce a widened version of the call instruction.
955 void execute(VPTransformState &State) override;
956
957#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
958 /// Print the recipe.
959 void print(raw_ostream &O, const Twine &Indent,
960 VPSlotTracker &SlotTracker) const override;
961#endif
962};
963
964/// A recipe for widening select instructions.
965struct VPWidenSelectRecipe : public VPRecipeBase, public VPValue {
966 template <typename IterT>
968 : VPRecipeBase(VPDef::VPWidenSelectSC, Operands), VPValue(this, &I) {}
969
970 ~VPWidenSelectRecipe() override = default;
971
972 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
973
974 /// Produce a widened version of the select instruction.
975 void execute(VPTransformState &State) override;
976
977#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
978 /// Print the recipe.
979 void print(raw_ostream &O, const Twine &Indent,
980 VPSlotTracker &SlotTracker) const override;
981#endif
982
983 VPValue *getCond() const {
984 return getOperand(0);
985 }
986
987 bool isInvariantCond() const {
989 }
990};
991
992/// A recipe for handling GEP instructions.
993class VPWidenGEPRecipe : public VPRecipeBase, public VPValue {
994 bool isPointerLoopInvariant() const {
996 }
997
998 bool isIndexLoopInvariant(unsigned I) const {
1000 }
1001
1002 bool areAllOperandsInvariant() const {
1003 return all_of(operands(), [](VPValue *Op) {
1004 return Op->isDefinedOutsideVectorRegions();
1005 });
1006 }
1007
1008public:
1009 template <typename IterT>
1011 : VPRecipeBase(VPDef::VPWidenGEPSC, Operands), VPValue(this, GEP) {}
1012
1013 ~VPWidenGEPRecipe() override = default;
1014
1015 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1016
1017 /// Generate the gep nodes.
1018 void execute(VPTransformState &State) override;
1019
1020#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1021 /// Print the recipe.
1022 void print(raw_ostream &O, const Twine &Indent,
1023 VPSlotTracker &SlotTracker) const override;
1024#endif
1025};
1026
1027/// A pure virtual base class for all recipes modeling header phis, including
1028/// phis for first order recurrences, pointer inductions and reductions. The
1029/// start value is the first operand of the recipe and the incoming value from
1030/// the backedge is the second operand.
1031///
1032/// Inductions are modeled using the following sub-classes:
1033/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
1034/// starting at a specified value (zero for the main vector loop, the resume
1035/// value for the epilogue vector loop) and stepping by 1. The induction
1036/// controls exiting of the vector loop by comparing against the vector trip
1037/// count. Produces a single scalar PHI for the induction value per
1038/// iteration.
1039/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
1040/// floating point inductions with arbitrary start and step values. Produces
1041/// a vector PHI per-part.
1042/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
1043/// value of an IV with different start and step values. Produces a single
1044/// scalar value per iteration
1045/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
1046/// canonical or derived induction.
1047/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
1048/// pointer induction. Produces either a vector PHI per-part or scalar values
1049/// per-lane based on the canonical induction.
1051protected:
1052 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
1053 VPValue *Start = nullptr)
1054 : VPRecipeBase(VPDefID, {}), VPValue(this, UnderlyingInstr) {
1055 if (Start)
1056 addOperand(Start);
1057 }
1058
1059public:
1060 ~VPHeaderPHIRecipe() override = default;
1061
1062 /// Method to support type inquiry through isa, cast, and dyn_cast.
1063 static inline bool classof(const VPRecipeBase *B) {
1064 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
1065 B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
1066 }
1067 static inline bool classof(const VPValue *V) {
1068 auto *B = V->getDefiningRecipe();
1069 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
1070 B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
1071 }
1072
1073 /// Generate the phi nodes.
1074 void execute(VPTransformState &State) override = 0;
1075
1076#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1077 /// Print the recipe.
1078 void print(raw_ostream &O, const Twine &Indent,
1079 VPSlotTracker &SlotTracker) const override = 0;
1080#endif
1081
1082 /// Returns the start value of the phi, if one is set.
1084 return getNumOperands() == 0 ? nullptr : getOperand(0);
1085 }
1087 return getNumOperands() == 0 ? nullptr : getOperand(0);
1088 }
1089
1090 /// Update the start value of the recipe.
1092
1093 /// Returns the incoming value from the loop backedge.
1095 return getOperand(1);
1096 }
1097
1098 /// Returns the backedge value as a recipe. The backedge value is guaranteed
1099 /// to be a recipe.
1102 }
1103};
1104
1105/// A recipe for handling phi nodes of integer and floating-point inductions,
1106/// producing their vector values.
1108 PHINode *IV;
1109 TruncInst *Trunc;
1110 const InductionDescriptor &IndDesc;
1111 bool NeedsVectorIV;
1112
1113public:
1115 const InductionDescriptor &IndDesc,
1116 bool NeedsVectorIV)
1117 : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start), IV(IV),
1118 Trunc(nullptr), IndDesc(IndDesc), NeedsVectorIV(NeedsVectorIV) {
1119 addOperand(Step);
1120 }
1121
1123 const InductionDescriptor &IndDesc,
1124 TruncInst *Trunc, bool NeedsVectorIV)
1125 : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, Trunc, Start),
1126 IV(IV), Trunc(Trunc), IndDesc(IndDesc), NeedsVectorIV(NeedsVectorIV) {
1127 addOperand(Step);
1128 }
1129
1131
1132 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
1133
1134 /// Generate the vectorized and scalarized versions of the phi node as
1135 /// needed by their users.
1136 void execute(VPTransformState &State) override;
1137
1138#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1139 /// Print the recipe.
1140 void print(raw_ostream &O, const Twine &Indent,
1141 VPSlotTracker &SlotTracker) const override;
1142#endif
1143
1145 // TODO: All operands of base recipe must exist and be at same index in
1146 // derived recipe.
1148 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
1149 }
1150
1152 // TODO: All operands of base recipe must exist and be at same index in
1153 // derived recipe.
1155 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
1156 }
1157
1158 /// Returns the step value of the induction.
1160 const VPValue *getStepValue() const { return getOperand(1); }
1161
1162 /// Returns the first defined value as TruncInst, if it is one or nullptr
1163 /// otherwise.
1164 TruncInst *getTruncInst() { return Trunc; }
1165 const TruncInst *getTruncInst() const { return Trunc; }
1166
1167 PHINode *getPHINode() { return IV; }
1168
1169 /// Returns the induction descriptor for the recipe.
1170 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1171
1172 /// Returns true if the induction is canonical, i.e. starting at 0 and
1173 /// incremented by UF * VF (= the original IV is incremented by 1).
1174 bool isCanonical() const;
1175
1176 /// Returns the scalar type of the induction.
1177 const Type *getScalarType() const {
1178 return Trunc ? Trunc->getType() : IV->getType();
1179 }
1180
1181 /// Returns true if a vector phi needs to be created for the induction.
1182 bool needsVectorIV() const { return NeedsVectorIV; }
1183};
1184
1186 const InductionDescriptor &IndDesc;
1187
1188 bool IsScalarAfterVectorization;
1189
1190public:
1191 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
1192 /// Start.
1194 const InductionDescriptor &IndDesc,
1195 bool IsScalarAfterVectorization)
1196 : VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC, Phi),
1197 IndDesc(IndDesc),
1198 IsScalarAfterVectorization(IsScalarAfterVectorization) {
1199 addOperand(Start);
1200 addOperand(Step);
1201 }
1202
1204
1205 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
1206
1207 /// Generate vector values for the pointer induction.
1208 void execute(VPTransformState &State) override;
1209
1210 /// Returns true if only scalar values will be generated.
1212
1213 /// Returns the induction descriptor for the recipe.
1214 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1215
1216#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1217 /// Print the recipe.
1218 void print(raw_ostream &O, const Twine &Indent,
1219 VPSlotTracker &SlotTracker) const override;
1220#endif
1221};
1222
1223/// A recipe for handling header phis that are widened in the vector loop.
1224/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
1225/// managed in the recipe directly.
1227 /// List of incoming blocks. Only used in the VPlan native path.
1228 SmallVector<VPBasicBlock *, 2> IncomingBlocks;
1229
1230public:
1231 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
1232 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
1233 : VPHeaderPHIRecipe(VPDef::VPWidenPHISC, Phi) {
1234 if (Start)
1235 addOperand(Start);
1236 }
1237
1238 ~VPWidenPHIRecipe() override = default;
1239
1240 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
1241
1242 /// Generate the phi/select nodes.
1243 void execute(VPTransformState &State) override;
1244
1245#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1246 /// Print the recipe.
1247 void print(raw_ostream &O, const Twine &Indent,
1248 VPSlotTracker &SlotTracker) const override;
1249#endif
1250
1251 /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
1252 void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
1253 addOperand(IncomingV);
1254 IncomingBlocks.push_back(IncomingBlock);
1255 }
1256
1257 /// Returns the \p I th incoming VPBasicBlock.
1258 VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
1259
1260 /// Returns the \p I th incoming VPValue.
1261 VPValue *getIncomingValue(unsigned I) { return getOperand(I); }
1262};
1263
1264/// A recipe for handling first-order recurrence phis. The start value is the
1265/// first operand of the recipe and the incoming value from the backedge is the
1266/// second operand.
1269 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
1270
1271 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
1272
1274 return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;
1275 }
1276
1277 void execute(VPTransformState &State) override;
1278
1279#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1280 /// Print the recipe.
1281 void print(raw_ostream &O, const Twine &Indent,
1282 VPSlotTracker &SlotTracker) const override;
1283#endif
1284};
1285
1286/// A recipe for handling reduction phis. The start value is the first operand
1287/// of the recipe and the incoming value from the backedge is the second
1288/// operand.
1290 /// Descriptor for the reduction.
1291 const RecurrenceDescriptor &RdxDesc;
1292
1293 /// The phi is part of an in-loop reduction.
1294 bool IsInLoop;
1295
1296 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
1297 bool IsOrdered;
1298
1299public:
1300 /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
1301 /// RdxDesc.
1303 VPValue &Start, bool IsInLoop = false,
1304 bool IsOrdered = false)
1305 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
1306 RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) {
1307 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
1308 }
1309
1310 ~VPReductionPHIRecipe() override = default;
1311
1312 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
1313
1315 return R->getVPDefID() == VPDef::VPReductionPHISC;
1316 }
1317
1318 /// Generate the phi/select nodes.
1319 void execute(VPTransformState &State) override;
1320
1321#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1322 /// Print the recipe.
1323 void print(raw_ostream &O, const Twine &Indent,
1324 VPSlotTracker &SlotTracker) const override;
1325#endif
1326
1328 return RdxDesc;
1329 }
1330
1331 /// Returns true, if the phi is part of an ordered reduction.
1332 bool isOrdered() const { return IsOrdered; }
1333
1334 /// Returns true, if the phi is part of an in-loop reduction.
1335 bool isInLoop() const { return IsInLoop; }
1336};
1337
1338/// A recipe for vectorizing a phi-node as a sequence of mask-based select
1339/// instructions.
1340class VPBlendRecipe : public VPRecipeBase, public VPValue {
1341 PHINode *Phi;
1342
1343public:
1344 /// The blend operation is a User of the incoming values and of their
1345 /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value
1346 /// might be incoming with a full mask for which there is no VPValue.
1348 : VPRecipeBase(VPDef::VPBlendSC, Operands), VPValue(this, Phi), Phi(Phi) {
1349 assert(Operands.size() > 0 &&
1350 ((Operands.size() == 1) || (Operands.size() % 2 == 0)) &&
1351 "Expected either a single incoming value or a positive even number "
1352 "of operands");
1353 }
1354
1355 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
1356
1357 /// Return the number of incoming values, taking into account that a single
1358 /// incoming value has no mask.
1359 unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }
1360
1361 /// Return incoming value number \p Idx.
1362 VPValue *getIncomingValue(unsigned Idx) const { return getOperand(Idx * 2); }
1363
1364 /// Return mask number \p Idx.
1365 VPValue *getMask(unsigned Idx) const { return getOperand(Idx * 2 + 1); }
1366
1367 /// Generate the phi/select nodes.
1368 void execute(VPTransformState &State) override;
1369
1370#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1371 /// Print the recipe.
1372 void print(raw_ostream &O, const Twine &Indent,
1373 VPSlotTracker &SlotTracker) const override;
1374#endif
1375
1376 /// Returns true if the recipe only uses the first lane of operand \p Op.
1377 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1378 assert(is_contained(operands(), Op) &&
1379 "Op must be an operand of the recipe");
1380 // Recursing through Blend recipes only, must terminate at header phi's the
1381 // latest.
1382 return all_of(users(),
1383 [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
1384 }
1385};
1386
1387/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
1388/// or stores into one wide load/store and shuffles. The first operand of a
1389/// VPInterleave recipe is the address, followed by the stored values, followed
1390/// by an optional mask.
1393
1394 bool HasMask = false;
1395
1396public:
1398 ArrayRef<VPValue *> StoredValues, VPValue *Mask)
1399 : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG) {
1400 for (unsigned i = 0; i < IG->getFactor(); ++i)
1401 if (Instruction *I = IG->getMember(i)) {
1402 if (I->getType()->isVoidTy())
1403 continue;
1404 new VPValue(I, this);
1405 }
1406
1407 for (auto *SV : StoredValues)
1408 addOperand(SV);
1409 if (Mask) {
1410 HasMask = true;
1411 addOperand(Mask);
1412 }
1413 }
1414 ~VPInterleaveRecipe() override = default;
1415
1416 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
1417
1418 /// Return the address accessed by this recipe.
1419 VPValue *getAddr() const {
1420 return getOperand(0); // Address is the 1st, mandatory operand.
1421 }
1422
1423 /// Return the mask used by this recipe. Note that a full mask is represented
1424 /// by a nullptr.
1425 VPValue *getMask() const {
1426 // Mask is optional and therefore the last, currently 2nd operand.
1427 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
1428 }
1429
1430 /// Return the VPValues stored by this interleave group. If it is a load
1431 /// interleave group, return an empty ArrayRef.
1433 // The first operand is the address, followed by the stored values, followed
1434 // by an optional mask.
1437 }
1438
1439 /// Generate the wide load or store, and shuffles.
1440 void execute(VPTransformState &State) override;
1441
1442#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1443 /// Print the recipe.
1444 void print(raw_ostream &O, const Twine &Indent,
1445 VPSlotTracker &SlotTracker) const override;
1446#endif
1447
1449
1450 /// Returns the number of stored operands of this interleave group. Returns 0
1451 /// for load interleave groups.
1452 unsigned getNumStoreOperands() const {
1453 return getNumOperands() - (HasMask ? 2 : 1);
1454 }
1455
1456 /// The recipe only uses the first lane of the address.
1457 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1458 assert(is_contained(operands(), Op) &&
1459 "Op must be an operand of the recipe");
1460 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
1461 }
1462};
1463
1464/// A recipe to represent inloop reduction operations, performing a reduction on
1465/// a vector operand into a scalar value, and adding the result to a chain.
1466/// The Operands are {ChainOp, VecOp, [Condition]}.
1468 /// The recurrence decriptor for the reduction in question.
1469 const RecurrenceDescriptor *RdxDesc;
1470 /// Pointer to the TTI, needed to create the target reduction
1471 const TargetTransformInfo *TTI;
1472
1473public:
1475 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
1476 const TargetTransformInfo *TTI)
1477 : VPRecipeBase(VPDef::VPReductionSC, {ChainOp, VecOp}), VPValue(this, I),
1478 RdxDesc(R), TTI(TTI) {
1479 if (CondOp)
1480 addOperand(CondOp);
1481 }
1482
1483 ~VPReductionRecipe() override = default;
1484
1485 VP_CLASSOF_IMPL(VPDef::VPReductionSC)
1486
1487 /// Generate the reduction in the loop
1488 void execute(VPTransformState &State) override;
1489
1490#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1491 /// Print the recipe.
1492 void print(raw_ostream &O, const Twine &Indent,
1493 VPSlotTracker &SlotTracker) const override;
1494#endif
1495
1496 /// The VPValue of the scalar Chain being accumulated.
1497 VPValue *getChainOp() const { return getOperand(0); }
1498 /// The VPValue of the vector value to be reduced.
1499 VPValue *getVecOp() const { return getOperand(1); }
1500 /// The VPValue of the condition for the block.
1502 return getNumOperands() > 2 ? getOperand(2) : nullptr;
1503 }
1504};
1505
1506/// VPReplicateRecipe replicates a given instruction producing multiple scalar
1507/// copies of the original scalar type, one per lane, instead of producing a
1508/// single copy of widened type for all lanes. If the instruction is known to be
1509/// uniform only one copy, per lane zero, will be generated.
1511 /// Indicator if only a single replica per lane is needed.
1512 bool IsUniform;
1513
1514 /// Indicator if the replicas are also predicated.
1515 bool IsPredicated;
1516
1517public:
1518 template <typename IterT>
1520 bool IsUniform, VPValue *Mask = nullptr)
1521 : VPRecipeBase(VPDef::VPReplicateSC, Operands), VPValue(this, I),
1522 IsUniform(IsUniform), IsPredicated(Mask) {
1523 if (Mask)
1524 addOperand(Mask);
1525 }
1526
1527 ~VPReplicateRecipe() override = default;
1528
1529 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
1530
1531 /// Generate replicas of the desired Ingredient. Replicas will be generated
1532 /// for all parts and lanes unless a specific part and lane are specified in
1533 /// the \p State.
1534 void execute(VPTransformState &State) override;
1535
1536#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1537 /// Print the recipe.
1538 void print(raw_ostream &O, const Twine &Indent,
1539 VPSlotTracker &SlotTracker) const override;
1540#endif
1541
1542 bool isUniform() const { return IsUniform; }
1543
1544 bool isPredicated() const { return IsPredicated; }
1545
1546 /// Returns true if the recipe only uses the first lane of operand \p Op.
1547 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1548 assert(is_contained(operands(), Op) &&
1549 "Op must be an operand of the recipe");
1550 return isUniform();
1551 }
1552
1553 /// Returns true if the recipe uses scalars of operand \p Op.
1554 bool usesScalars(const VPValue *Op) const override {
1555 assert(is_contained(operands(), Op) &&
1556 "Op must be an operand of the recipe");
1557 return true;
1558 }
1559
1560 /// Returns true if the recipe is used by a widened recipe via an intervening
1561 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
1562 /// in a vector.
1563 bool shouldPack() const;
1564
1565 /// Return the mask of a predicated VPReplicateRecipe.
1567 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
1568 return getOperand(getNumOperands() - 1);
1569 }
1570};
1571
1572/// A recipe for generating conditional branches on the bits of a mask.
1574public:
1576 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {}) {
1577 if (BlockInMask) // nullptr means all-one mask.
1578 addOperand(BlockInMask);
1579 }
1580
1581 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
1582
1583 /// Generate the extraction of the appropriate bit from the block mask and the
1584 /// conditional branch.
1585 void execute(VPTransformState &State) override;
1586
1587#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1588 /// Print the recipe.
1589 void print(raw_ostream &O, const Twine &Indent,
1590 VPSlotTracker &SlotTracker) const override {
1591 O << Indent << "BRANCH-ON-MASK ";
1592 if (VPValue *Mask = getMask())
1593 Mask->printAsOperand(O, SlotTracker);
1594 else
1595 O << " All-One";
1596 }
1597#endif
1598
1599 /// Return the mask used by this recipe. Note that a full mask is represented
1600 /// by a nullptr.
1601 VPValue *getMask() const {
1602 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
1603 // Mask is optional.
1604 return getNumOperands() == 1 ? getOperand(0) : nullptr;
1605 }
1606
1607 /// Returns true if the recipe uses scalars of operand \p Op.
1608 bool usesScalars(const VPValue *Op) const override {
1609 assert(is_contained(operands(), Op) &&
1610 "Op must be an operand of the recipe");
1611 return true;
1612 }
1613};
1614
1615/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
1616/// control converges back from a Branch-on-Mask. The phi nodes are needed in
1617/// order to merge values that are set under such a branch and feed their uses.
1618/// The phi nodes can be scalar or vector depending on the users of the value.
1619/// This recipe works in concert with VPBranchOnMaskRecipe.
1621public:
1622 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
1623 /// nodes after merging back from a Branch-on-Mask.
1625 : VPRecipeBase(VPDef::VPPredInstPHISC, PredV), VPValue(this) {}
1626 ~VPPredInstPHIRecipe() override = default;
1627
1628 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
1629
1630 /// Generates phi nodes for live-outs as needed to retain SSA form.
1631 void execute(VPTransformState &State) override;
1632
1633#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1634 /// Print the recipe.
1635 void print(raw_ostream &O, const Twine &Indent,
1636 VPSlotTracker &SlotTracker) const override;
1637#endif
1638
1639 /// Returns true if the recipe uses scalars of operand \p Op.
1640 bool usesScalars(const VPValue *Op) const override {
1641 assert(is_contained(operands(), Op) &&
1642 "Op must be an operand of the recipe");
1643 return true;
1644 }
1645};
1646
1647/// A Recipe for widening load/store operations.
1648/// The recipe uses the following VPValues:
1649/// - For load: Address, optional mask
1650/// - For store: Address, stored value, optional mask
1651/// TODO: We currently execute only per-part unless a specific instance is
1652/// provided.
1654 Instruction &Ingredient;
1655
1656 // Whether the loaded-from / stored-to addresses are consecutive.
1657 bool Consecutive;
1658
1659 // Whether the consecutive loaded/stored addresses are in reverse order.
1660 bool Reverse;
1661
1662 void setMask(VPValue *Mask) {
1663 if (!Mask)
1664 return;
1665 addOperand(Mask);
1666 }
1667
1668 bool isMasked() const {
1669 return isStore() ? getNumOperands() == 3 : getNumOperands() == 2;
1670 }
1671
1672public:
1674 bool Consecutive, bool Reverse)
1675 : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr}),
1676 Ingredient(Load), Consecutive(Consecutive), Reverse(Reverse) {
1677 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
1678 new VPValue(this, &Load);
1679 setMask(Mask);
1680 }
1681
1683 VPValue *StoredValue, VPValue *Mask,
1684 bool Consecutive, bool Reverse)
1685 : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr, StoredValue}),
1686 Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) {
1687 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
1688 setMask(Mask);
1689 }
1690
1691 VP_CLASSOF_IMPL(VPDef::VPWidenMemoryInstructionSC)
1692
1693 /// Return the address accessed by this recipe.
1694 VPValue *getAddr() const {
1695 return getOperand(0); // Address is the 1st, mandatory operand.
1696 }
1697
1698 /// Return the mask used by this recipe. Note that a full mask is represented
1699 /// by a nullptr.
1700 VPValue *getMask() const {
1701 // Mask is optional and therefore the last operand.
1702 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
1703 }
1704
1705 /// Returns true if this recipe is a store.
1706 bool isStore() const { return isa<StoreInst>(Ingredient); }
1707
1708 /// Return the address accessed by this recipe.
1710 assert(isStore() && "Stored value only available for store instructions");
1711 return getOperand(1); // Stored value is the 2nd, mandatory operand.
1712 }
1713
1714 // Return whether the loaded-from / stored-to addresses are consecutive.
1715 bool isConsecutive() const { return Consecutive; }
1716
1717 // Return whether the consecutive loaded/stored addresses are in reverse
1718 // order.
1719 bool isReverse() const { return Reverse; }
1720
1721 /// Generate the wide load/store.
1722 void execute(VPTransformState &State) override;
1723
1724#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1725 /// Print the recipe.
1726 void print(raw_ostream &O, const Twine &Indent,
1727 VPSlotTracker &SlotTracker) const override;
1728#endif
1729
1730 /// Returns true if the recipe only uses the first lane of operand \p Op.
1731 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1732 assert(is_contained(operands(), Op) &&
1733 "Op must be an operand of the recipe");
1734
1735 // Widened, consecutive memory operations only demand the first lane of
1736 // their address, unless the same operand is also stored. That latter can
1737 // happen with opaque pointers.
1738 return Op == getAddr() && isConsecutive() &&
1739 (!isStore() || Op != getStoredValue());
1740 }
1741
1742 Instruction &getIngredient() const { return Ingredient; }
1743};
1744
1745/// Recipe to expand a SCEV expression.
1747 const SCEV *Expr;
1748 ScalarEvolution &SE;
1749
1750public:
1752 : VPRecipeBase(VPDef::VPExpandSCEVSC, {}), VPValue(this), Expr(Expr),
1753 SE(SE) {}
1754
1755 ~VPExpandSCEVRecipe() override = default;
1756
1757 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
1758
1759 /// Generate a canonical vector induction variable of the vector loop, with
1760 void execute(VPTransformState &State) override;
1761
1762#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1763 /// Print the recipe.
1764 void print(raw_ostream &O, const Twine &Indent,
1765 VPSlotTracker &SlotTracker) const override;
1766#endif
1767
1768 const SCEV *getSCEV() const { return Expr; }
1769};
1770
1771/// Canonical scalar induction phi of the vector loop. Starting at the specified
1772/// start value (either 0 or the resume value when vectorizing the epilogue
1773/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
1774/// canonical induction variable.
1776 DebugLoc DL;
1777
1778public:
1780 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV), DL(DL) {}
1781
1782 ~VPCanonicalIVPHIRecipe() override = default;
1783
1784 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
1785
1787 return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
1788 }
1789
1790 /// Generate the canonical scalar induction phi of the vector loop.
1791 void execute(VPTransformState &State) override;
1792
1793#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1794 /// Print the recipe.
1795 void print(raw_ostream &O, const Twine &Indent,
1796 VPSlotTracker &SlotTracker) const override;
1797#endif
1798
1799 /// Returns the scalar type of the induction.
1800 const Type *getScalarType() const {
1801 return getOperand(0)->getLiveInIRValue()->getType();
1802 }
1803
1804 /// Returns true if the recipe only uses the first lane of operand \p Op.
1805 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1806 assert(is_contained(operands(), Op) &&
1807 "Op must be an operand of the recipe");
1808 return true;
1809 }
1810
1811 /// Check if the induction described by \p ID is canonical, i.e. has the same
1812 /// start, step (of 1), and type as the canonical IV.
1813 bool isCanonical(const InductionDescriptor &ID, Type *Ty) const;
1814};
1815
1816/// A recipe for generating the active lane mask for the vector loop that is
1817/// used to predicate the vector operations.
1818/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
1819/// remove VPActiveLaneMaskPHIRecipe.
1821 DebugLoc DL;
1822
1823public:
1825 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask),
1826 DL(DL) {}
1827
1828 ~VPActiveLaneMaskPHIRecipe() override = default;
1829
1830 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
1831
1833 return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC;
1834 }
1835
1836 /// Generate the active lane mask phi of the vector loop.
1837 void execute(VPTransformState &State) override;
1838
1839#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1840 /// Print the recipe.
1841 void print(raw_ostream &O, const Twine &Indent,
1842 VPSlotTracker &SlotTracker) const override;
1843#endif
1844};
1845
1846/// A Recipe for widening the canonical induction variable of the vector loop.
1848public:
1850 : VPRecipeBase(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}),
1851 VPValue(this) {}
1852
1853 ~VPWidenCanonicalIVRecipe() override = default;
1854
1855 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
1856
1857 /// Generate a canonical vector induction variable of the vector loop, with
1858 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
1859 /// step = <VF*UF, VF*UF, ..., VF*UF>.
1860 void execute(VPTransformState &State) override;
1861
1862#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1863 /// Print the recipe.
1864 void print(raw_ostream &O, const Twine &Indent,
1865 VPSlotTracker &SlotTracker) const override;
1866#endif
1867
1868 /// Returns the scalar type of the induction.
1869 const Type *getScalarType() const {
1870 return cast<VPCanonicalIVPHIRecipe>(getOperand(0)->getDefiningRecipe())
1871 ->getScalarType();
1872 }
1873};
1874
1875/// A recipe for converting the canonical IV value to the corresponding value of
1876/// an IV with different start and step values, using Start + CanonicalIV *
1877/// Step.
1879 /// The type of the result value. It may be smaller than the type of the
1880 /// induction and in this case it will get truncated to ResultTy.
1881 Type *ResultTy;
1882
1883 /// Induction descriptor for the induction the canonical IV is transformed to.
1884 const InductionDescriptor &IndDesc;
1885
1886public:
1888 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
1889 Type *ResultTy)
1890 : VPRecipeBase(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
1891 VPValue(this), ResultTy(ResultTy), IndDesc(IndDesc) {}
1892
1893 ~VPDerivedIVRecipe() override = default;
1894
1895 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
1896
1897 /// Generate the transformed value of the induction at offset StartValue (1.
1898 /// operand) + IV (2. operand) * StepValue (3, operand).
1899 void execute(VPTransformState &State) override;
1900
1901#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1902 /// Print the recipe.
1903 void print(raw_ostream &O, const Twine &Indent,
1904 VPSlotTracker &SlotTracker) const override;
1905#endif
1906
1907 VPValue *getStartValue() const { return getOperand(0); }
1908 VPValue *getCanonicalIV() const { return getOperand(1); }
1909 VPValue *getStepValue() const { return getOperand(2); }
1910
1911 /// Returns true if the recipe only uses the first lane of operand \p Op.
1912 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1913 assert(is_contained(operands(), Op) &&
1914 "Op must be an operand of the recipe");
1915 return true;
1916 }
1917};
1918
1919/// A recipe for handling phi nodes of integer and floating-point inductions,
1920/// producing their scalar values.
1922 const InductionDescriptor &IndDesc;
1923
1924public:
1926 VPValue *Step)
1927 : VPRecipeBase(VPDef::VPScalarIVStepsSC, {IV, Step}), VPValue(this),
1928 IndDesc(IndDesc) {}
1929
1930 ~VPScalarIVStepsRecipe() override = default;
1931
1932 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
1933
1934 /// Generate the scalarized versions of the phi node as needed by their users.
1935 void execute(VPTransformState &State) override;
1936
1937#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1938 /// Print the recipe.
1939 void print(raw_ostream &O, const Twine &Indent,
1940 VPSlotTracker &SlotTracker) const override;
1941#endif
1942
1943 VPValue *getStepValue() const { return getOperand(1); }
1944
1945 /// Returns true if the recipe only uses the first lane of operand \p Op.
1946 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1947 assert(is_contained(operands(), Op) &&
1948 "Op must be an operand of the recipe");
1949 return true;
1950 }
1951};
1952
1953/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
1954/// holds a sequence of zero or more VPRecipe's each representing a sequence of
1955/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
1957public:
1959
1960private:
1961 /// The VPRecipes held in the order of output instructions to generate.
1962 RecipeListTy Recipes;
1963
1964public:
1965 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
1966 : VPBlockBase(VPBasicBlockSC, Name.str()) {
1967 if (Recipe)
1968 appendRecipe(Recipe);
1969 }
1970
1971 ~VPBasicBlock() override {
1972 while (!Recipes.empty())
1973 Recipes.pop_back();
1974 }
1975
1976 /// Instruction iterators...
1981
1982 //===--------------------------------------------------------------------===//
1983 /// Recipe iterator methods
1984 ///
1985 inline iterator begin() { return Recipes.begin(); }
1986 inline const_iterator begin() const { return Recipes.begin(); }
1987 inline iterator end() { return Recipes.end(); }
1988 inline const_iterator end() const { return Recipes.end(); }
1989
1990 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
1991 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
1992 inline reverse_iterator rend() { return Recipes.rend(); }
1993 inline const_reverse_iterator rend() const { return Recipes.rend(); }
1994
1995 inline size_t size() const { return Recipes.size(); }
1996 inline bool empty() const { return Recipes.empty(); }
1997 inline const VPRecipeBase &front() const { return Recipes.front(); }
1998 inline VPRecipeBase &front() { return Recipes.front(); }
1999 inline const VPRecipeBase &back() const { return Recipes.back(); }
2000 inline VPRecipeBase &back() { return Recipes.back(); }
2001
2002 /// Returns a reference to the list of recipes.
2003 RecipeListTy &getRecipeList() { return Recipes; }
2004
2005 /// Returns a pointer to a member of the recipe list.
2007 return &VPBasicBlock::Recipes;
2008 }
2009
2010 /// Method to support type inquiry through isa, cast, and dyn_cast.
2011 static inline bool classof(const VPBlockBase *V) {
2012 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC;
2013 }
2014
2015 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
2016 assert(Recipe && "No recipe to append.");
2017 assert(!Recipe->Parent && "Recipe already in VPlan");
2018 Recipe->Parent = this;
2019 Recipes.insert(InsertPt, Recipe);
2020 }
2021
2022 /// Augment the existing recipes of a VPBasicBlock with an additional
2023 /// \p Recipe as the last recipe.
2024 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
2025
2026 /// The method which generates the output IR instructions that correspond to
2027 /// this VPBasicBlock, thereby "executing" the VPlan.
2028 void execute(VPTransformState *State) override;
2029
2030 /// Return the position of the first non-phi node recipe in the block.
2032
2033 /// Returns an iterator range over the PHI-like recipes in the block.
2035 return make_range(begin(), getFirstNonPhi());
2036 }
2037
2038 void dropAllReferences(VPValue *NewValue) override;
2039
2040 /// Split current block at \p SplitAt by inserting a new block between the
2041 /// current block and its successors and moving all recipes starting at
2042 /// SplitAt to the new block. Returns the new block.
2043 VPBasicBlock *splitAt(iterator SplitAt);
2044
2046
2047#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2048 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
2049 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
2050 ///
2051 /// Note that the numbering is applied to the whole VPlan, so printing
2052 /// individual blocks is consistent with the whole VPlan printing.
2053 void print(raw_ostream &O, const Twine &Indent,
2054 VPSlotTracker &SlotTracker) const override;
2055 using VPBlockBase::print; // Get the print(raw_stream &O) version.
2056#endif
2057
2058 /// If the block has multiple successors, return the branch recipe terminating
2059 /// the block. If there are no or only a single successor, return nullptr;
2061 const VPRecipeBase *getTerminator() const;
2062
2063 /// Returns true if the block is exiting it's parent region.
2064 bool isExiting() const;
2065
2066private:
2067 /// Create an IR BasicBlock to hold the output instructions generated by this
2068 /// VPBasicBlock, and return it. Update the CFGState accordingly.
2069 BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
2070};
2071
2072/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
2073/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
2074/// A VPRegionBlock may indicate that its contents are to be replicated several
2075/// times. This is designed to support predicated scalarization, in which a
2076/// scalar if-then code structure needs to be generated VF * UF times. Having
2077/// this replication indicator helps to keep a single model for multiple
2078/// candidate VF's. The actual replication takes place only once the desired VF
2079/// and UF have been determined.
2081 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
2082 VPBlockBase *Entry;
2083
2084 /// Hold the Single Exiting block of the SESE region modelled by the
2085 /// VPRegionBlock.
2086 VPBlockBase *Exiting;
2087
2088 /// An indicator whether this region is to generate multiple replicated
2089 /// instances of output IR corresponding to its VPBlockBases.
2090 bool IsReplicator;
2091
2092public:
2094 const std::string &Name = "", bool IsReplicator = false)
2095 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
2096 IsReplicator(IsReplicator) {
2097 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
2098 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
2099 Entry->setParent(this);
2100 Exiting->setParent(this);
2101 }
2102 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
2103 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
2104 IsReplicator(IsReplicator) {}
2105
2106 ~VPRegionBlock() override {
2107 if (Entry) {
2108 VPValue DummyValue;
2109 Entry->dropAllReferences(&DummyValue);
2110 deleteCFG(Entry);
2111 }
2112 }
2113
2114 /// Method to support type inquiry through isa, cast, and dyn_cast.
2115 static inline bool classof(const VPBlockBase *V) {
2116 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
2117 }
2118
2119 const VPBlockBase *getEntry() const { return Entry; }
2120 VPBlockBase *getEntry() { return Entry; }
2121
2122 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
2123 /// EntryBlock must have no predecessors.
2124 void setEntry(VPBlockBase *EntryBlock) {
2125 assert(EntryBlock->getPredecessors().empty() &&
2126 "Entry block cannot have predecessors.");
2127 Entry = EntryBlock;
2128 EntryBlock->setParent(this);
2129 }
2130
2131 const VPBlockBase *getExiting() const { return Exiting; }
2132 VPBlockBase *getExiting() { return Exiting; }
2133
2134 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
2135 /// ExitingBlock must have no successors.
2136 void setExiting(VPBlockBase *ExitingBlock) {
2137 assert(ExitingBlock->getSuccessors().empty() &&
2138 "Exit block cannot have successors.");
2139 Exiting = ExitingBlock;
2140 ExitingBlock->setParent(this);
2141 }
2142
2143 /// Returns the pre-header VPBasicBlock of the loop region.
2145 assert(!isReplicator() && "should only get pre-header of loop regions");
2147 }
2148
2149 /// An indicator whether this region is to generate multiple replicated
2150 /// instances of output IR corresponding to its VPBlockBases.
2151 bool isReplicator() const { return IsReplicator; }
2152
2153 /// The method which generates the output IR instructions that correspond to
2154 /// this VPRegionBlock, thereby "executing" the VPlan.
2155 void execute(VPTransformState *State) override;
2156
2157 void dropAllReferences(VPValue *NewValue) override;
2158
2159#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2160 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
2161 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
2162 /// consequtive numbers.
2163 ///
2164 /// Note that the numbering is applied to the whole VPlan, so printing
2165 /// individual regions is consistent with the whole VPlan printing.
2166 void print(raw_ostream &O, const Twine &Indent,
2167 VPSlotTracker &SlotTracker) const override;
2168 using VPBlockBase::print; // Get the print(raw_stream &O) version.
2169#endif
2170};
2171
2172/// VPlan models a candidate for vectorization, encoding various decisions take
2173/// to produce efficient output IR, including which branches, basic-blocks and
2174/// output IR instructions to generate, and their cost. VPlan holds a
2175/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
2176/// VPBlock.
2177class VPlan {
2178 friend class VPlanPrinter;
2179 friend class VPSlotTracker;
2180
2181 /// Hold the single entry to the Hierarchical CFG of the VPlan.
2182 VPBlockBase *Entry;
2183
2184 /// Holds the VFs applicable to this VPlan.
2186
2187 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
2188 /// any UF.
2190
2191 /// Holds the name of the VPlan, for printing.
2192 std::string Name;
2193
2194 /// Holds all the external definitions created for this VPlan. External
2195 /// definitions must be immutable and hold a pointer to their underlying IR.
2196 DenseMap<Value *, VPValue *> VPExternalDefs;
2197
2198 /// Represents the trip count of the original loop, for folding
2199 /// the tail.
2200 VPValue *TripCount = nullptr;
2201
2202 /// Represents the backedge taken count of the original loop, for folding
2203 /// the tail. It equals TripCount - 1.
2204 VPValue *BackedgeTakenCount = nullptr;
2205
2206 /// Represents the vector trip count.
2207 VPValue VectorTripCount;
2208
2209 /// Holds a mapping between Values and their corresponding VPValue inside
2210 /// VPlan.
2211 Value2VPValueTy Value2VPValue;
2212
2213 /// Contains all VPValues that been allocated by addVPValue directly and need
2214 /// to be free when the plan's destructor is called.
2215 SmallVector<VPValue *, 16> VPValuesToFree;
2216
2217 /// Indicates whether it is safe use the Value2VPValue mapping or if the
2218 /// mapping cannot be used any longer, because it is stale.
2219 bool Value2VPValueEnabled = true;
2220
2221 /// Values used outside the plan.
2223
2224public:
2225 VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {
2226 if (Entry)
2227 Entry->setPlan(this);
2228 }
2229
2230 ~VPlan();
2231
2232 /// Prepare the plan for execution, setting up the required live-in values.
2233 void prepareToExecute(Value *TripCount, Value *VectorTripCount,
2234 Value *CanonicalIVStartValue, VPTransformState &State,
2235 bool IsEpilogueVectorization);
2236
2237 /// Generate the IR code for this VPlan.
2238 void execute(VPTransformState *State);
2239
2240 VPBlockBase *getEntry() { return Entry; }
2241 const VPBlockBase *getEntry() const { return Entry; }
2242
2244 Entry = Block;
2245 Block->setPlan(this);
2246 return Entry;
2247 }
2248
2249 /// The trip count of the original loop.
2251 if (!TripCount)
2252 TripCount = new VPValue();
2253 return TripCount;
2254 }
2255
2256 /// The backedge taken count of the original loop.
2258 if (!BackedgeTakenCount)
2259 BackedgeTakenCount = new VPValue();
2260 return BackedgeTakenCount;
2261 }
2262
2263 /// The vector trip count.
2264 VPValue &getVectorTripCount() { return VectorTripCount; }
2265
2266 /// Mark the plan to indicate that using Value2VPValue is not safe any
2267 /// longer, because it may be stale.
2268 void disableValue2VPValue() { Value2VPValueEnabled = false; }
2269
2270 void addVF(ElementCount VF) { VFs.insert(VF); }
2271
2273 assert(hasVF(VF) && "Cannot set VF not already in plan");
2274 VFs.clear();
2275 VFs.insert(VF);
2276 }
2277
2278 bool hasVF(ElementCount VF) { return VFs.count(VF); }
2279
2280 bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
2281
2282 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
2283
2284 void setUF(unsigned UF) {
2285 assert(hasUF(UF) && "Cannot set the UF not already in plan");
2286 UFs.clear();
2287 UFs.insert(UF);
2288 }
2289
2290 /// Return a string with the name of the plan and the applicable VFs and UFs.
2291 std::string getName() const;
2292
2293 void setName(const Twine &newName) { Name = newName.str(); }
2294
2295 /// Get the existing or add a new external definition for \p V.
2297 auto I = VPExternalDefs.insert({V, nullptr});
2298 if (I.second)
2299 I.first->second = new VPValue(V);
2300 return I.first->second;
2301 }
2302
2304 assert(Value2VPValueEnabled &&
2305 "IR value to VPValue mapping may be out of date!");
2306 assert(V && "Trying to add a null Value to VPlan");
2307 assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
2308 VPValue *VPV = new VPValue(V);
2309 Value2VPValue[V] = VPV;
2310 VPValuesToFree.push_back(VPV);
2311 }
2312
2313 void addVPValue(Value *V, VPValue *VPV) {
2314 assert(Value2VPValueEnabled && "Value2VPValue mapping may be out of date!");
2315 assert(V && "Trying to add a null Value to VPlan");
2316 assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
2317 Value2VPValue[V] = VPV;
2318 }
2319
2320 /// Returns the VPValue for \p V. \p OverrideAllowed can be used to disable
2321 /// checking whether it is safe to query VPValues using IR Values.
2322 VPValue *getVPValue(Value *V, bool OverrideAllowed = false) {
2323 assert((OverrideAllowed || isa<Constant>(V) || Value2VPValueEnabled) &&
2324 "Value2VPValue mapping may be out of date!");
2325 assert(V && "Trying to get the VPValue of a null Value");
2326 assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
2327 return Value2VPValue[V];
2328 }
2329
2330 /// Gets the VPValue or adds a new one (if none exists yet) for \p V. \p
2331 /// OverrideAllowed can be used to disable checking whether it is safe to
2332 /// query VPValues using IR Values.
2333 VPValue *getOrAddVPValue(Value *V, bool OverrideAllowed = false) {
2334 assert((OverrideAllowed || isa<Constant>(V) || Value2VPValueEnabled) &&
2335 "Value2VPValue mapping may be out of date!");
2336 assert(V && "Trying to get or add the VPValue of a null Value");
2337 if (!Value2VPValue.count(V))
2338 addVPValue(V);
2339 return getVPValue(V);
2340 }
2341
2343 assert(Value2VPValueEnabled &&
2344 "IR value to VPValue mapping may be out of date!");
2345 Value2VPValue.erase(V);
2346 }
2347
2348#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2349 /// Print this VPlan to \p O.
2350 void print(raw_ostream &O) const;
2351
2352 /// Print this VPlan in DOT format to \p O.
2353 void printDOT(raw_ostream &O) const;
2354
2355 /// Dump the plan to stderr (for debugging).
2356 LLVM_DUMP_METHOD void dump() const;
2357#endif
2358
2359 /// Returns a range mapping the values the range \p Operands to their
2360 /// corresponding VPValues.
2361 iterator_range<mapped_iterator<Use *, std::function<VPValue *(Value *)>>>
2363 std::function<VPValue *(Value *)> Fn = [this](Value *Op) {
2364 return getOrAddVPValue(Op);
2365 };
2366 return map_range(Operands, Fn);
2367 }
2368
2369 /// Returns the VPRegionBlock of the vector loop.
2371 return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
2372 }
2374 return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
2375 }
2376
2377 /// Returns the canonical induction recipe of the vector loop.
2380 if (EntryVPBB->empty()) {
2381 // VPlan native path.
2382 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
2383 }
2384 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
2385 }
2386
2387 /// Find and return the VPActiveLaneMaskPHIRecipe from the header - there
2388 /// be only one at most. If there isn't one, then return nullptr.
2390
2391 void addLiveOut(PHINode *PN, VPValue *V);
2392
2394 for (auto &KV : LiveOuts)
2395 delete KV.second;
2396 LiveOuts.clear();
2397 }
2398
2400 delete LiveOuts[PN];
2401 LiveOuts.erase(PN);
2402 }
2403
2405 return LiveOuts;
2406 }
2407
2408private:
2409 /// Add to the given dominator tree the header block and every new basic block
2410 /// that was created between it and the latch block, inclusive.
2411 static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopLatchBB,
2412 BasicBlock *LoopPreHeaderBB,
2413 BasicBlock *LoopExitBB);
2414};
2415
2416#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2417/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
2418/// indented and follows the dot format.
2420 raw_ostream &OS;
2421 const VPlan &Plan;
2422 unsigned Depth = 0;
2423 unsigned TabWidth = 2;
2424 std::string Indent;
2425 unsigned BID = 0;
2427
2429
2430 /// Handle indentation.
2431 void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
2432
2433 /// Print a given \p Block of the Plan.
2434 void dumpBlock(const VPBlockBase *Block);
2435
2436 /// Print the information related to the CFG edges going out of a given
2437 /// \p Block, followed by printing the successor blocks themselves.
2438 void dumpEdges(const VPBlockBase *Block);
2439
2440 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
2441 /// its successor blocks.
2442 void dumpBasicBlock(const VPBasicBlock *BasicBlock);
2443
2444 /// Print a given \p Region of the Plan.
2445 void dumpRegion(const VPRegionBlock *Region);
2446
2447 unsigned getOrCreateBID(const VPBlockBase *Block) {
2448 return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
2449 }
2450
2451 Twine getOrCreateName(const VPBlockBase *Block);
2452
2453 Twine getUID(const VPBlockBase *Block);
2454
2455 /// Print the information related to a CFG edge between two VPBlockBases.
2456 void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
2457 const Twine &Label);
2458
2459public:
2461 : OS(O), Plan(P), SlotTracker(&P) {}
2462
2463 LLVM_DUMP_METHOD void dump();
2464};
2465
2467 const Value *V;
2468
2469 VPlanIngredient(const Value *V) : V(V) {}
2470
2471 void print(raw_ostream &O) const;
2472};
2473
2475 I.print(OS);
2476 return OS;
2477}
2478
2480 Plan.print(OS);
2481 return OS;
2482}
2483#endif
2484
2485//===----------------------------------------------------------------------===//
2486// VPlan Utilities
2487//===----------------------------------------------------------------------===//
2488
2489/// Class that provides utilities for VPBlockBases in VPlan.
2491public:
2492 VPBlockUtils() = delete;
2493
2494 /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
2495 /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
2496 /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
2497 /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
2498 /// have neither successors nor predecessors.
2499 static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
2500 assert(NewBlock->getSuccessors().empty() &&
2501 NewBlock->getPredecessors().empty() &&
2502 "Can't insert new block with predecessors or successors.");
2503 NewBlock->setParent(BlockPtr->getParent());
2504 SmallVector<VPBlockBase *> Succs(BlockPtr->successors());
2505 for (VPBlockBase *Succ : Succs) {
2506 disconnectBlocks(BlockPtr, Succ);
2507 connectBlocks(NewBlock, Succ);
2508 }
2509 connectBlocks(BlockPtr, NewBlock);
2510 }
2511
2512 /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
2513 /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
2514 /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
2515 /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
2516 /// and \p IfTrue and \p IfFalse must have neither successors nor
2517 /// predecessors.
2518 static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
2519 VPBlockBase *BlockPtr) {
2520 assert(IfTrue->getSuccessors().empty() &&
2521 "Can't insert IfTrue with successors.");
2522 assert(IfFalse->getSuccessors().empty() &&
2523 "Can't insert IfFalse with successors.");
2524 BlockPtr->setTwoSuccessors(IfTrue, IfFalse);
2525 IfTrue->setPredecessors({BlockPtr});
2526 IfFalse->setPredecessors({BlockPtr});
2527 IfTrue->setParent(BlockPtr->getParent());
2528 IfFalse->setParent(BlockPtr->getParent());
2529 }
2530
2531 /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
2532 /// the successors of \p From and \p From to the predecessors of \p To. Both
2533 /// VPBlockBases must have the same parent, which can be null. Both
2534 /// VPBlockBases can be already connected to other VPBlockBases.
2536 assert((From->getParent() == To->getParent()) &&
2537 "Can't connect two block with different parents");
2538 assert(From->getNumSuccessors() < 2 &&
2539 "Blocks can't have more than two successors.");
2540 From->appendSuccessor(To);
2541 To->appendPredecessor(From);
2542 }
2543
2544 /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
2545 /// from the successors of \p From and \p From from the predecessors of \p To.
2547 assert(To && "Successor to disconnect is null.");
2548 From->removeSuccessor(To);
2549 To->removePredecessor(From);
2550 }
2551
2552 /// Return an iterator range over \p Range which only includes \p BlockTy
2553 /// blocks. The accesses are casted to \p BlockTy.
2554 template <typename BlockTy, typename T>
2555 static auto blocksOnly(const T &Range) {
2556 // Create BaseTy with correct const-ness based on BlockTy.
2557 using BaseTy = std::conditional_t<std::is_const<BlockTy>::value,
2558 const VPBlockBase, VPBlockBase>;
2559
2560 // We need to first create an iterator range over (const) BlocktTy & instead
2561 // of (const) BlockTy * for filter_range to work properly.
2562 auto Mapped =
2563 map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; });
2565 Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); });
2566 return map_range(Filter, [](BaseTy &Block) -> BlockTy * {
2567 return cast<BlockTy>(&Block);
2568 });
2569 }
2570};
2571
2574 InterleaveGroupMap;
2575
2576 /// Type for mapping of instruction based interleave groups to VPInstruction
2577 /// interleave groups
2580
2581 /// Recursively \p Region and populate VPlan based interleave groups based on
2582 /// \p IAI.
2583 void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
2585 /// Recursively traverse \p Block and populate VPlan based interleave groups
2586 /// based on \p IAI.
2587 void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
2589
2590public:
2592
2595 // Avoid releasing a pointer twice.
2596 for (auto &I : InterleaveGroupMap)
2597 DelSet.insert(I.second);
2598 for (auto *Ptr : DelSet)
2599 delete Ptr;
2600 }
2601
2602 /// Get the interleave group that \p Instr belongs to.
2603 ///
2604 /// \returns nullptr if doesn't have such group.
2607 return InterleaveGroupMap.lookup(Instr);
2608 }
2609};
2610
2611/// Class that maps (parts of) an existing VPlan to trees of combined
2612/// VPInstructions.
2614 enum class OpMode { Failed, Load, Opcode };
2615
2616 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
2617 /// DenseMap keys.
2618 struct BundleDenseMapInfo {
2619 static SmallVector<VPValue *, 4> getEmptyKey() {
2620 return {reinterpret_cast<VPValue *>(-1)};
2621 }
2622
2623 static SmallVector<VPValue *, 4> getTombstoneKey() {
2624 return {reinterpret_cast<VPValue *>(-2)};
2625 }
2626
2627 static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
2628 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
2629 }
2630
2631 static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
2633 return LHS == RHS;
2634 }
2635 };
2636
2637 /// Mapping of values in the original VPlan to a combined VPInstruction.
2639 BundleToCombined;
2640
2642
2643 /// Basic block to operate on. For now, only instructions in a single BB are
2644 /// considered.
2645 const VPBasicBlock &BB;
2646
2647 /// Indicates whether we managed to combine all visited instructions or not.
2648 bool CompletelySLP = true;
2649
2650 /// Width of the widest combined bundle in bits.
2651 unsigned WidestBundleBits = 0;
2652
2653 using MultiNodeOpTy =
2654 typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
2655
2656 // Input operand bundles for the current multi node. Each multi node operand
2657 // bundle contains values not matching the multi node's opcode. They will
2658 // be reordered in reorderMultiNodeOps, once we completed building a
2659 // multi node.
2660 SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
2661
2662 /// Indicates whether we are building a multi node currently.
2663 bool MultiNodeActive = false;
2664
2665 /// Check if we can vectorize Operands together.
2666 bool areVectorizable(ArrayRef<VPValue *> Operands) const;
2667
2668 /// Add combined instruction \p New for the bundle \p Operands.
2669 void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
2670
2671 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
2672 VPInstruction *markFailed();
2673
2674 /// Reorder operands in the multi node to maximize sequential memory access
2675 /// and commutative operations.
2676 SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
2677
2678 /// Choose the best candidate to use for the lane after \p Last. The set of
2679 /// candidates to choose from are values with an opcode matching \p Last's
2680 /// or loads consecutive to \p Last.
2681 std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
2682 SmallPtrSetImpl<VPValue *> &Candidates,
2684
2685#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2686 /// Print bundle \p Values to dbgs().
2687 void dumpBundle(ArrayRef<VPValue *> Values);
2688#endif
2689
2690public:
2691 VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
2692
2693 ~VPlanSlp() = default;
2694
2695 /// Tries to build an SLP tree rooted at \p Operands and returns a
2696 /// VPInstruction combining \p Operands, if they can be combined.
2698
2699 /// Return the width of the widest combined bundle in bits.
2700 unsigned getWidestBundleBits() const { return WidestBundleBits; }
2701
2702 /// Return true if all visited instruction can be combined.
2703 bool isCompletelySLP() const { return CompletelySLP; }
2704};
2705
2706namespace vputils {
2707
2708/// Returns true if only the first lane of \p Def is used.
2709bool onlyFirstLaneUsed(VPValue *Def);
2710
2711/// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p
2712/// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in
2713/// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's
2714/// pre-header already contains a recipe expanding \p Expr, return it. If not,
2715/// create a new one.
2717 ScalarEvolution &SE);
2718
2719/// Returns true if \p VPV is uniform after vectorization.
2721 // A value defined outside the vector region must be uniform after
2722 // vectorization inside a vector region.
2724 return true;
2725 VPRecipeBase *Def = VPV->getDefiningRecipe();
2726 assert(Def && "Must have definition for value defined inside vector region");
2727 if (auto Rep = dyn_cast<VPReplicateRecipe>(Def))
2728 return Rep->isUniform();
2729 return false;
2730}
2731} // end namespace vputils
2732
2733} // end namespace llvm
2734
2735#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
aarch64 promote const
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
always inline
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
RelocType Type
Definition: COFFYAML.cpp:390
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:492
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
uint64_t Addr
std::string Name
Flatten the CFG
Hexagon Common GEP
std::pair< BasicBlock *, unsigned > BlockTy
A pair of (basic block, score).
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
This file implements a map that provides insertion order iteration.
#define P(N)
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition: VPlan.h:759
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:77
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:193
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
This class represents a function call, abstracting a target machine's calling convention.
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
bool erase(const KeyT &Val)
Definition: DenseMap.h:315
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:21
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:940
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
A struct for saving information about induction variables.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:639
uint32_t getFactor() const
Definition: VectorUtils.h:655
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:709
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:781
An instruction for reading from memory.
Definition: Instructions.h:177
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:173
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:69
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:88
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:219
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:213
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:152
void clear()
Completely clear the SetVector.
Definition: SetVector.h:224
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:83
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:678
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:312
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:350
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
Definition: VPlan.h:1820
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:1832
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition: VPlan.h:1824
~VPActiveLaneMaskPHIRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:1956
RecipeListTy::const_iterator const_iterator
Definition: VPlan.h:1978
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition: VPlan.h:2024
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition: VPlan.h:1980
RecipeListTy::iterator iterator
Instruction iterators...
Definition: VPlan.h:1977
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:325
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:2003
iterator end()
Definition: VPlan.h:1987
VPBasicBlock(const Twine &Name="", VPRecipeBase *Recipe=nullptr)
Definition: VPlan.h:1965
iterator begin()
Recipe iterator methods.
Definition: VPlan.h:1985
RecipeListTy::reverse_iterator reverse_iterator
Definition: VPlan.h:1979
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition: VPlan.h:2034
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition: VPlan.cpp:207
~VPBasicBlock() override
Definition: VPlan.h:1971
VPRegionBlock * getEnclosingLoopRegion()
Definition: VPlan.cpp:427
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:392
const_reverse_iterator rbegin() const
Definition: VPlan.h:1991
reverse_iterator rend()
Definition: VPlan.h:1992
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition: VPlan.cpp:402
VPRecipeBase & back()
Definition: VPlan.h:2000
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition: VPlan.cpp:495
const VPRecipeBase & front() const
Definition: VPlan.h:1997
const_iterator begin() const
Definition: VPlan.h:1986
VPRecipeBase & front()
Definition: VPlan.h:1998
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition: VPlan.cpp:478
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition: VPlan.cpp:466
const VPRecipeBase & back() const
Definition: VPlan.h:1999
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:2015
bool empty() const
Definition: VPlan.h:1996
const_iterator end() const
Definition: VPlan.h:1988
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:2011
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition: VPlan.h:2006
reverse_iterator rbegin()
Definition: VPlan.h:1990
size_t size() const
Definition: VPlan.h:1995
const_reverse_iterator rend() const
Definition: VPlan.h:1993
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
Definition: VPlan.h:1340
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands)
The blend operation is a User of the incoming values and of their respective masks,...
Definition: VPlan.h:1347
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:1377
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:1362
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:1365
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account that a single incoming value has no mask.
Definition: VPlan.h:1359
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:390
VPRegionBlock * getParent()
Definition: VPlan.h:462
VPBlocksTy & getPredecessors()
Definition: VPlan.h:493
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:172
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition: VPlan.h:629
void setName(const Twine &newName)
Definition: VPlan.h:455
size_t getNumSuccessors() const
Definition: VPlan.h:507
iterator_range< VPBlockBase ** > successors()
Definition: VPlan.h:490
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition: VPlan.cpp:483
bool isLegalToHoistInto()
Return true if it is legal to hoist instructions into this block.
Definition: VPlan.h:594
virtual ~VPBlockBase()=default
void print(raw_ostream &O) const
Print plain-text dump of this VPlan to O.
Definition: VPlan.h:619
const VPBlocksTy & getHierarchicalPredecessors()
Definition: VPlan.h:543
size_t getNumPredecessors() const
Definition: VPlan.h:508
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition: VPlan.h:574
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition: VPlan.cpp:194
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:492
static void deleteCFG(VPBlockBase *Entry)
Delete all blocks reachable from a given VPBlockBase, inclusive.
Definition: VPlan.cpp:202
VPlan * getPlan()
Definition: VPlan.cpp:146
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition: VPlan.cpp:165
const VPRegionBlock * getParent() const
Definition: VPlan.h:463
void printAsOperand(raw_ostream &OS, bool PrintType) const
Definition: VPlan.h:605
const std::string & getName() const
Definition: VPlan.h:453
void clearSuccessors()
Remove all the successors of this block.
Definition: VPlan.h:584
VPBlockBase * getSingleHierarchicalSuccessor()
Definition: VPlan.h:533
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition: VPlan.h:565
VPBlockBase * getSinglePredecessor() const
Definition: VPlan.h:503
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition: VPlan.h:527
void clearPredecessors()
Remove all the predecessor of this block.
Definition: VPlan.h:581
enum { VPBasicBlockSC, VPRegionBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition: VPlan.h:447
unsigned getVPBlockID() const
Definition: VPlan.h:460
VPBlockBase(const unsigned char SC, const std::string &N)
Definition: VPlan.h:439
VPBlocksTy & getSuccessors()
Definition: VPlan.h:488
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition: VPlan.cpp:186
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:151
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition: VPlan.h:556
void setParent(VPRegionBlock *P)
Definition: VPlan.h:473
virtual void dropAllReferences(VPValue *NewValue)=0
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
VPBlockBase * getSingleHierarchicalPredecessor()
Definition: VPlan.h:549
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:497
const VPBlocksTy & getSuccessors() const
Definition: VPlan.h:487
Class that provides utilities for VPBlockBases in VPlan.
Definition: VPlan.h:2490
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition: VPlan.h:2555
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition: VPlan.h:2499
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
Definition: VPlan.h:2518
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:2546
static void connectBlocks(VPBlockBase *From, VPBlockBase *To)
Connect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:2535
A recipe for generating conditional branches on the bits of a mask.
Definition: VPlan.h:1573
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:1601
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlan.h:1589
VPBranchOnMaskRecipe(VPValue *BlockInMask)
Definition: VPlan.h:1575
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:1608
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
Canonical scalar induction phi of the vector loop.
Definition: VPlan.h:1775
~VPCanonicalIVPHIRecipe() override=default
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:1786
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition: VPlan.h:1779
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:1805
void execute(VPTransformState &State) override
Generate the canonical scalar induction phi of the vector loop.
bool isCanonical(const InductionDescriptor &ID, Type *Ty) const
Check if the induction described by ID is canonical, i.e.
const Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:1800
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:303
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:384
unsigned getVPDefID() const
Definition: VPlanValue.h:416
A recipe for converting the canonical IV value to the corresponding value of an IV with different sta...
Definition: VPlan.h:1878
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, Type *ResultTy)
Definition: VPlan.h:1887
VPValue * getStepValue() const
Definition: VPlan.h:1909
VPValue * getCanonicalIV() const
Definition: VPlan.h:1908
~VPDerivedIVRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:1912
VPValue * getStartValue() const
Definition: VPlan.h:1907
Recipe to expand a SCEV expression.
Definition: VPlan.h:1746
VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
Definition: VPlan.h:1751
const SCEV * getSCEV() const
Definition: VPlan.h:1768
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPExpandSCEVRecipe() override=default
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition: VPlan.h:1050
static bool classof(const VPValue *V)
Definition: VPlan.h:1067
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition: VPlan.h:1094
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:1083
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition: VPlan.h:1091
VPValue * getStartValue() const
Definition: VPlan.h:1086
static bool classof(const VPRecipeBase *B)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:1063
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition: VPlan.h:1100
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start=nullptr)
Definition: VPlan.h:1052
~VPHeaderPHIRecipe() override=default
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:779
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:887
VPInstruction * clone() const
Definition: VPlan.h:832
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition: VPlan.h:821
@ CanonicalIVIncrementForPartNUW
Definition: VPlan.h:799
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:785
@ CanonicalIVIncrementNUW
Definition: VPlan.h:795
@ CanonicalIVIncrementForPart
Definition: VPlan.h:798
@ CalculateTripCountMinusVF
Definition: VPlan.h:793
bool hasResult() const
Definition: VPlan.h:861
void setUnderlyingInstr(Instruction *I)
Definition: VPlan.h:818
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:837
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:826
void setFastMathFlags(FastMathFlags FMFNew)
Set the fast-math flags.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
Definition: VPlan.h:854
void execute(VPTransformState &State) override
Generate the instruction.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition: VPlan.h:1391
bool onlyFirstLaneUsed(const VPValue *Op) const override
The recipe only uses the first lane of the address.
Definition: VPlan.h:1457
~VPInterleaveRecipe() override=default
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:1419
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:1425
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask)
Definition: VPlan.h:1397
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:1432
const InterleaveGroup< Instruction > * getInterleaveGroup()
Definition: VPlan.h:1448
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:1452
InterleaveGroup< VPInstruction > * getInterleaveGroup(VPInstruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VPlan.h:2606
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:113
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:139
static unsigned getNumCachedLanes(const ElementCount &VF)
Returns the maxmimum number of lanes that we are able to consider caching for VF.
Definition: VPlan.h:182
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition: VPlan.cpp:66
VPLane(unsigned Lane, Kind LaneKind)
Definition: VPlan.h:135
Kind getKind() const
Returns the Kind of lane offset.
Definition: VPlan.h:163
bool isFirstLane() const
Returns true if this is the first lane of the whole vector.
Definition: VPlan.h:166
unsigned getKnownLane() const
Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...
Definition: VPlan.h:153
static VPLane getFirstLane()
Definition: VPlan.h:137
Kind
Kind describes how to interpret Lane.
Definition: VPlan.h:116
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
unsigned mapToCacheIndex(const ElementCount &VF) const
Maps the lane to a cache index based on VF.
Definition: VPlan.h:169
A value that is used outside the VPlan.
Definition: VPlan.h:635
VPLiveOut(PHINode *Phi, VPValue *Op)
Definition: VPlan.h:639
bool usesScalars(const VPValue *Op) const override
Returns true if the VPLiveOut uses scalars of operand Op.
Definition: VPlan.h:650
PHINode * getPhi() const
Definition: VPlan.h:656
void fixPhi(VPlan &Plan, VPTransformState &State)
Fixup the wrapped LCSSA phi node in the unique exit block.
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
Definition: VPlan.h:1620
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:1640
VPPredInstPHIRecipe(VPValue *PredV)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition: VPlan.h:1624
void execute(VPTransformState &State) override
Generates phi nodes for live-outs as needed to retain SSA form.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:666
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands)
Definition: VPlan.h:674
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition: VPlan.h:753
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
Instruction * getUnderlyingInstr()
Returns the underlying instruction, if the recipe is a VPValue or nullptr otherwise.
Definition: VPlan.h:721
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual ~VPRecipeBase()=default
VPBasicBlock * getParent()
Definition: VPlan.h:683
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:729
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
VPRecipeBase(const unsigned char SC, iterator_range< IterT > Operands)
Definition: VPlan.h:678
const VPBasicBlock * getParent() const
Definition: VPlan.h:684
const Instruction * getUnderlyingInstr() const
Definition: VPlan.h:724
static bool classof(const VPUser *U)
Definition: VPlan.h:734
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
bool isPhi() const
Returns true for PHI-like recipes.
Definition: VPlan.h:742
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
A recipe for handling reduction phis.
Definition: VPlan.h:1289
VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc, VPValue &Start, bool IsInLoop=false, bool IsOrdered=false)
Create a new VPReductionPHIRecipe for the reduction Phi described by RdxDesc.
Definition: VPlan.h:1302
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:1332
~VPReductionPHIRecipe() override=default
bool isInLoop() const
Returns true, if the phi is part of an in-loop reduction.
Definition: VPlan.h:1335
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:1314
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Definition: VPlan.h:1327
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
Definition: VPlan.h:1467
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:1499
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:1501
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:1497
VPReductionRecipe(const RecurrenceDescriptor *R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, const TargetTransformInfo *TTI)
Definition: VPlan.h:1474
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:2080
const VPBlockBase * getEntry() const
Definition: VPlan.h:2119
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition: VPlan.h:2151
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:509
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:2136
VPBlockBase * getExiting()
Definition: VPlan.h:2132
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:2124
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition: VPlan.cpp:568
VPRegionBlock(const std::string &Name="", bool IsReplicator=false)
Definition: VPlan.h:2102
VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Definition: VPlan.h:2093
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition: VPlan.cpp:516
const VPBlockBase * getExiting() const
Definition: VPlan.h:2131
VPBlockBase * getEntry()
Definition: VPlan.h:2120
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition: VPlan.h:2144
~VPRegionBlock() override
Definition: VPlan.h:2106
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:2115
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition: VPlan.h:1510
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate replicas of the desired Ingredient.
~VPReplicateRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:1547
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:1554
bool isUniform() const
Definition: VPlan.h:1542
bool isPredicated() const
Definition: VPlan.h:1544
VPReplicateRecipe(Instruction *I, iterator_range< IterT > Operands, bool IsUniform, VPValue *Mask=nullptr)
Definition: VPlan.h:1519
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition: VPlan.h:1566
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition: VPlan.h:1921
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:1946
VPValue * getStepValue() const
Definition: VPlan.h:1943
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step)
Definition: VPlan.h:1925
~VPScalarIVStepsRecipe() override=default
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
This class can be used to assign consecutive numbers to all VPValues in a VPlan and allows querying t...
Definition: VPlanValue.h:434
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:201
operand_range operands()
Definition: VPlanValue.h:276
void setOperand(unsigned I, VPValue *New)
Definition: VPlanValue.h:256
unsigned getNumOperands() const
Definition: VPlanValue.h:250
operand_iterator op_begin()
Definition: VPlanValue.h:272
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:251
VPUserID
Subclass identifier (for isa/dyn_cast).
Definition: VPlanValue.h:204
void addOperand(VPValue *Operand)
Definition: VPlanValue.h:245
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:116
friend class VPInstruction
Definition: VPlanValue.h:47
void setUnderlyingValue(Value *Val)
Definition: VPlanValue.h:77
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:177
user_range users()
Definition: VPlanValue.h:147
bool isDefinedOutsideVectorRegions() const
Returns true if the VPValue is defined outside any vector regions, i.e.
Definition: VPlanValue.h:191
A recipe for widening Call instructions.
Definition: VPlan.h:932
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
VPWidenCallRecipe(CallInst &I, iterator_range< IterT > CallArguments, Intrinsic::ID VectorIntrinsicID, Function *Variant=nullptr)
Definition: VPlan.h:944
~VPWidenCallRecipe() override=default
A Recipe for widening the canonical induction variable of the vector loop.
Definition: VPlan.h:1847
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenCanonicalIVRecipe() override=default
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition: VPlan.h:1849
const Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:1869
A recipe for handling GEP instructions.
Definition: VPlan.h:993
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range< IterT > Operands)
Definition: VPlan.h:1010
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition: VPlan.h:1107
const TruncInst * getTruncInst() const
Definition: VPlan.h:1165
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition: VPlan.h:1151
~VPWidenIntOrFpInductionRecipe() override=default
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:1164
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, bool NeedsVectorIV)
Definition: VPlan.h:1114
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:1159
const Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:1177
bool needsVectorIV() const
Returns true if a vector phi needs to be created for the induction.
Definition: VPlan.h:1182
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, TruncInst *Trunc, bool NeedsVectorIV)
Definition: VPlan.h:1122
const VPValue * getStepValue() const
Definition: VPlan.h:1160
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition: VPlan.h:1144
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1170
A Recipe for widening load/store operations.
Definition: VPlan.h:1653
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:1700
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:1694
Instruction & getIngredient() const
Definition: VPlan.h:1742
VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredValue, VPValue *Mask, bool Consecutive, bool Reverse)
Definition: VPlan.h:1682
void execute(VPTransformState &State) override
Generate the wide load/store.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse)
Definition: VPlan.h:1673
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:1709
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:1731
bool isStore() const
Returns true if this recipe is a store.
Definition: VPlan.h:1706
A recipe for handling header phis that are widened in the vector loop.
Definition: VPlan.h:1226
void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock)
Adds a pair (IncomingV, IncomingBlock) to the phi.
Definition: VPlan.h:1252
VPValue * getIncomingValue(unsigned I)
Returns the I th incoming VPValue.
Definition: VPlan.h:1261
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr)
Create a new VPWidenPHIRecipe for Phi with start value Start.
Definition: VPlan.h:1232
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
VPBasicBlock * getIncomingBlock(unsigned I)
Returns the I th incoming VPBasicBlock.
Definition: VPlan.h:1258
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(ElementCount VF)
Returns true if only scalar values will be generated.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1214
~VPWidenPointerInductionRecipe() override=default
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, bool IsScalarAfterVectorization)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start.
Definition: VPlan.h:1193
VPWidenRecipe is a recipe for producing a copy of vector type its ingredient.
Definition: VPlan.h:911
void execute(VPTransformState &State) override
Produce widened copies of all Ingredients.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenRecipe() override=default
VPWidenRecipe(Instruction &I, iterator_range< IterT > Operands)
Definition: VPlan.h:914
VPlanPrinter prints a given VPlan to a given output stream.
Definition: VPlan.h:2419
VPlanPrinter(raw_ostream &O, const VPlan &P)
Definition: VPlan.h:2460
LLVM_DUMP_METHOD void dump()
Definition: VPlan.cpp:884
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition: VPlan.h:2613
VPInstruction * buildGraph(ArrayRef< VPValue * > Operands)
Tries to build an SLP tree rooted at Operands and returns a VPInstruction combining Operands,...
Definition: VPlanSLP.cpp:359
bool isCompletelySLP() const
Return true if all visited instruction can be combined.
Definition: VPlan.h:2703
~VPlanSlp()=default
VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB)
Definition: VPlan.h:2691
unsigned getWidestBundleBits() const
Return the width of the widest combined bundle in bits.
Definition: VPlan.h:2700
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:2177
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition: VPlan.cpp:818
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition: VPlan.cpp:794
void addVPValue(Value *V, VPValue *VPV)
Definition: VPlan.h:2313
VPValue & getVectorTripCount()
The vector trip count.
Definition: VPlan.h:2264
void setName(const Twine &newName)
Definition: VPlan.h:2293
void removeVPValueFor(Value *V)
Definition: VPlan.h:2342
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition: VPlan.h:2257
void removeLiveOut(PHINode *PN)
Definition: VPlan.h:2399
VPBlockBase * getEntry()
Definition: VPlan.h:2240
void clearLiveOuts()
Definition: VPlan.h:2393
VPValue * getOrCreateTripCount()
The trip count of the original loop.
Definition: VPlan.h:2250
void addLiveOut(PHINode *PN, VPValue *V)
Definition: VPlan.cpp:827
void prepareToExecute(Value *TripCount, Value *VectorTripCount, Value *CanonicalIVStartValue, VPTransformState &State, bool IsEpilogueVectorization)
Prepare the plan for execution, setting up the required live-in values.
Definition: VPlan.cpp:611
VPValue * getOrAddExternalDef(Value *V)
Get the existing or add a new external definition for V.
Definition: VPlan.h:2296
VPValue * getOrAddVPValue(Value *V, bool OverrideAllowed=false)
Gets the VPValue or adds a new one (if none exists yet) for V.
Definition: VPlan.h:2333
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.h:2370
const VPRegionBlock * getVectorLoopRegion() const
Definition: VPlan.h:2373
bool hasVF(ElementCount VF)
Definition: VPlan.h:2278
bool hasUF(unsigned UF) const
Definition: VPlan.h:2282
VPlan(VPBlockBase *Entry=nullptr)
Definition: VPlan.h:2225
void setVF(ElementCount VF)
Definition: VPlan.h:2272
void addVPValue(Value *V)
Definition: VPlan.h:2303
VPValue * getVPValue(Value *V, bool OverrideAllowed=false)
Returns the VPValue for V.
Definition: VPlan.h:2322
void disableValue2VPValue()
Mark the plan to indicate that using Value2VPValue is not safe any longer, because it may be stale.
Definition: VPlan.h:2268
const VPBlockBase * getEntry() const
Definition: VPlan.h:2241
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition: VPlan.cpp:824
bool hasScalarVFOnly() const
Definition: VPlan.h:2280
iterator_range< mapped_iterator< Use *, std::function< VPValue *(Value *)> > > mapToVPValues(User::op_range Operands)
Returns a range mapping the values the range Operands to their corresponding VPValues.
Definition: VPlan.h:2362
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition: VPlan.cpp:665
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
Definition: VPlan.h:2378
const MapVector< PHINode *, VPLiveOut * > & getLiveOuts() const
Definition: VPlan.h:2404
VPActiveLaneMaskPHIRecipe * getActiveLaneMaskPhi()
Find and return the VPActiveLaneMaskPHIRecipe from the header - there be only one at most.
Definition: VPlan.cpp:602
void print(raw_ostream &O) const
Print this VPlan to O.
Definition: VPlan.cpp:746
void addVF(ElementCount VF)
Definition: VPlan.h:2270
VPBlockBase * setEntry(VPBlockBase *Block)
Definition: VPlan.h:2243
void setUF(unsigned UF)
Definition: VPlan.h:2284
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:163
Iterator for intrusive lists based on ilist_node.
An ilist node that can access its parent list.
Definition: ilist_node.h:257
base_list_type::const_reverse_iterator const_reverse_iterator
Definition: ilist.h:182
void pop_back()
Definition: ilist.h:319
base_list_type::reverse_iterator reverse_iterator
Definition: ilist.h:180
base_list_type::const_iterator const_iterator
Definition: ilist.h:179
base_list_type::iterator iterator
Definition: ilist.h:178
iterator insert(iterator where, pointer New)
Definition: ilist.h:229
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, ScalarEvolution &SE)
Get or create a VPValue that corresponds to the expansion of Expr.
Definition: VPlan.cpp:1134
bool isUniformAfterVectorization(VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlan.h:2720
bool onlyFirstLaneUsed(VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlan.cpp:1129
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1839
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1819
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto map_range(ContainerTy &&C, FuncTy F)
Definition: STLExtras.h:461
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
std::unique_ptr< VPlan > VPlanPtr
Definition: VPlan.h:104
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition: STLExtras.h:664
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1976
const SCEV * createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE)
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:491
#define N
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Definition: VPlan.h:84
const ElementCount Start
Definition: VPlan.h:86
ElementCount End
Definition: VPlan.h:89
bool isEmpty() const
Definition: VPlan.h:91
VFRange(const ElementCount &Start, const ElementCount &End)
Definition: VPlan.h:95
A recipe for handling first-order recurrence phis.
Definition: VPlan.h:1267
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
Definition: VPlan.h:1268
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:1273
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPIteration represents a single point in the iteration space of the output (vectorized and/or unrolle...
Definition: VPlan.h:189
VPIteration(unsigned Part, const VPLane &Lane)
Definition: VPlan.h:199
unsigned Part
in [0..UF)
Definition: VPlan.h:191
VPLane Lane
Definition: VPlan.h:193
VPIteration(unsigned Part, unsigned Lane, VPLane::Kind Kind=VPLane::Kind::First)
Definition: VPlan.h:195
bool isFirstIteration() const
Definition: VPlan.h:201
Hold state information used when constructing the CFG of the output IR, traversing the VPBasicBlocks ...
Definition: VPlan.h:330
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:336
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:344
VPBasicBlock * PrevVPBB
The previous VPBasicBlock visited. Initially set to null.
Definition: VPlan.h:332
BasicBlock * ExitBB
The last IR BasicBlock in the output IR.
Definition: VPlan.h:340
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:235
SmallVector< Value *, 2 > PerPartValuesTy
A type for vectorized values in the new loop.
Definition: VPlan.h:226
DenseMap< VPValue *, ScalarsPerPartValuesTy > PerPartScalars
Definition: VPlan.h:231
DenseMap< VPValue *, PerPartValuesTy > PerPartOutput
Definition: VPlan.h:228
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:206
VPValue2ValueTy VPValue2Value
Definition: VPlan.h:362
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
Definition: VPlan.h:354
VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI, DominatorTree *DT, IRBuilderBase &Builder, InnerLoopVectorizer *ILV, VPlan *Plan)
Definition: VPlan.h:207
void addMetadata(Instruction *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:248
Value * get(VPValue *Def, unsigned Part)
Get the generated Value for a given VPValue and a given Part.
struct llvm::VPTransformState::DataState Data
void setDebugLocFromInst(const Value *V)
Set the debug location in the builder using the debug location in V.
Definition: VPlan.cpp:260
void reset(VPValue *Def, Value *V, unsigned Part)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:273
struct llvm::VPTransformState::CFGState CFG
void reset(VPValue *Def, Value *V, const VPIteration &Instance)
Reset an existing scalar value for Def and a given Instance.
Definition: VPlan.h:295
void addNewMetadata(Instruction *To, const Instruction *Orig)
Add additional metadata to To that was not present on Orig.
Definition: VPlan.cpp:240
bool hasAnyVectorValue(VPValue *Def) const
Definition: VPlan.h:250
void set(VPValue *Def, Value *V, const VPIteration &Instance)
Set the generated scalar V for Def and the given Instance.
Definition: VPlan.h:281
std::optional< VPIteration > Instance
Hold the indices to generate specific scalar instructions.
Definition: VPlan.h:220
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:360
SmallPtrSet< VPRecipeBase *, 16 > MayGeneratePoisonRecipes
Holds recipes that may generate a poison value that is used after vectorization, even when their oper...
Definition: VPlan.h:375
DominatorTree * DT
Hold a pointer to Dominator Tree to register new basic blocks in the loop.
Definition: VPlan.h:357
bool hasScalarValue(VPValue *Def, VPIteration Instance)
Definition: VPlan.h:254
VPlan * Plan
Pointer to the VPlan code is generated for.
Definition: VPlan.h:371
InnerLoopVectorizer * ILV
Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
Definition: VPlan.h:368
std::unique_ptr< LoopVersioning > LVer
LoopVersioning.
Definition: VPlan.h:385
Value * CanonicalIV
Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF).
Definition: VPlan.h:365
bool hasVectorValue(VPValue *Def, unsigned Part)
Definition: VPlan.h:244
ElementCount VF
The chosen Vectorization and Unroll Factors of the loop being vectorized.
Definition: VPlan.h:214
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:378
void set(VPValue *Def, Value *V, unsigned Part)
Set the generated Value for a given VPValue and a given Part.
Definition: VPlan.h:265
A recipe for widening select instructions.
Definition: VPlan.h:965
bool isInvariantCond() const
Definition: VPlan.h:987
VPWidenSelectRecipe(SelectInst &I, iterator_range< IterT > Operands)
Definition: VPlan.h:967
VPValue * getCond() const
Definition: VPlan.h:983
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
~VPWidenSelectRecipe() override=default
VPlanIngredient(const Value *V)
Definition: VPlan.h:2469
const Value * V
Definition: VPlan.h:2467
void print(raw_ostream &O) const
Definition: VPlan.cpp:993