LLVM 22.0.0git
VPlanPredicator.cpp
Go to the documentation of this file.
1//===-- VPlanPredicator.cpp - VPlan predicator ----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements predication for VPlans.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPRecipeBuilder.h"
15#include "VPlan.h"
16#include "VPlanCFG.h"
17#include "VPlanPatternMatch.h"
18#include "VPlanTransforms.h"
19#include "VPlanUtils.h"
21
22using namespace llvm;
23using namespace VPlanPatternMatch;
24
25namespace {
26class VPPredicator {
27 /// Builder to construct recipes to compute masks.
28 VPBuilder Builder;
29
30 /// When we if-convert we need to create edge masks. We have to cache values
31 /// so that we don't end up with exponential recursion/IR.
32 using EdgeMaskCacheTy =
33 DenseMap<std::pair<const VPBasicBlock *, const VPBasicBlock *>,
34 VPValue *>;
35 using BlockMaskCacheTy = DenseMap<VPBasicBlock *, VPValue *>;
36 EdgeMaskCacheTy EdgeMaskCache;
37
38 BlockMaskCacheTy BlockMaskCache;
39
40 /// Create an edge mask for every destination of cases and/or default.
41 void createSwitchEdgeMasks(VPInstruction *SI);
42
43 /// Computes and return the predicate of the edge between \p Src and \p Dst,
44 /// possibly inserting new recipes at \p Dst (using Builder's insertion point)
45 VPValue *createEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst);
46
47 /// Returns the *entry* mask for \p VPBB.
48 VPValue *getBlockInMask(VPBasicBlock *VPBB) const {
49 return BlockMaskCache.lookup(VPBB);
50 }
51
52 /// Record \p Mask as the *entry* mask of \p VPBB, which is expected to not
53 /// already have a mask.
54 void setBlockInMask(VPBasicBlock *VPBB, VPValue *Mask) {
55 // TODO: Include the masks as operands in the predicated VPlan directly to
56 // avoid keeping the map of masks beyond the predication transform.
57 assert(!getBlockInMask(VPBB) && "Mask already set");
58 BlockMaskCache[VPBB] = Mask;
59 }
60
61 /// Record \p Mask as the mask of the edge from \p Src to \p Dst. The edge is
62 /// expected to not have a mask already.
63 VPValue *setEdgeMask(const VPBasicBlock *Src, const VPBasicBlock *Dst,
64 VPValue *Mask) {
65 assert(Src != Dst && "Src and Dst must be different");
66 assert(!getEdgeMask(Src, Dst) && "Mask already set");
67 return EdgeMaskCache[{Src, Dst}] = Mask;
68 }
69
70public:
71 /// Returns the precomputed predicate of the edge from \p Src to \p Dst.
72 VPValue *getEdgeMask(const VPBasicBlock *Src, const VPBasicBlock *Dst) const {
73 return EdgeMaskCache.lookup({Src, Dst});
74 }
75
76 /// Compute and return the mask for the vector loop header block.
77 void createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail);
78
79 /// Compute and return the predicate of \p VPBB, assuming that the header
80 /// block of the loop is set to True, or to the loop mask when tail folding.
81 VPValue *createBlockInMask(VPBasicBlock *VPBB);
82
83 /// Convert phi recipes in \p VPBB to VPBlendRecipes.
84 void convertPhisToBlends(VPBasicBlock *VPBB);
85
86 const BlockMaskCacheTy getBlockMaskCache() const { return BlockMaskCache; }
87};
88} // namespace
89
90VPValue *VPPredicator::createEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst) {
91 assert(is_contained(Dst->getPredecessors(), Src) && "Invalid edge");
92
93 // Look for cached value.
94 VPValue *EdgeMask = getEdgeMask(Src, Dst);
95 if (EdgeMask)
96 return EdgeMask;
97
98 VPValue *SrcMask = getBlockInMask(Src);
99
100 // If there's a single successor, there's no terminator recipe.
101 if (Src->getNumSuccessors() == 1)
102 return setEdgeMask(Src, Dst, SrcMask);
103
104 auto *Term = cast<VPInstruction>(Src->getTerminator());
105 if (Term->getOpcode() == Instruction::Switch) {
106 createSwitchEdgeMasks(Term);
107 return getEdgeMask(Src, Dst);
108 }
109
110 assert(Term->getOpcode() == VPInstruction::BranchOnCond &&
111 "Unsupported terminator");
112 if (Src->getSuccessors()[0] == Src->getSuccessors()[1])
113 return setEdgeMask(Src, Dst, SrcMask);
114
115 EdgeMask = Term->getOperand(0);
116 assert(EdgeMask && "No Edge Mask found for condition");
117
118 if (Src->getSuccessors()[0] != Dst)
119 EdgeMask = Builder.createNot(EdgeMask, Term->getDebugLoc());
120
121 if (SrcMask) { // Otherwise block in-mask is all-one, no need to AND.
122 // The bitwise 'And' of SrcMask and EdgeMask introduces new UB if SrcMask
123 // is false and EdgeMask is poison. Avoid that by using 'LogicalAnd'
124 // instead which generates 'select i1 SrcMask, i1 EdgeMask, i1 false'.
125 EdgeMask = Builder.createLogicalAnd(SrcMask, EdgeMask, Term->getDebugLoc());
126 }
127
128 return setEdgeMask(Src, Dst, EdgeMask);
129}
130
131VPValue *VPPredicator::createBlockInMask(VPBasicBlock *VPBB) {
132 // Start inserting after the block's phis, which be replaced by blends later.
133 Builder.setInsertPoint(VPBB, VPBB->getFirstNonPhi());
134 // All-one mask is modelled as no-mask following the convention for masked
135 // load/store/gather/scatter. Initialize BlockMask to no-mask.
136 VPValue *BlockMask = nullptr;
137 // This is the block mask. We OR all unique incoming edges.
138 for (auto *Predecessor : SetVector<VPBlockBase *>(
139 VPBB->getPredecessors().begin(), VPBB->getPredecessors().end())) {
140 VPValue *EdgeMask = createEdgeMask(cast<VPBasicBlock>(Predecessor), VPBB);
141 if (!EdgeMask) { // Mask of predecessor is all-one so mask of block is
142 // too.
143 setBlockInMask(VPBB, EdgeMask);
144 return EdgeMask;
145 }
146
147 if (!BlockMask) { // BlockMask has its initial nullptr value.
148 BlockMask = EdgeMask;
149 continue;
150 }
151
152 BlockMask = Builder.createOr(BlockMask, EdgeMask, {});
153 }
154
155 setBlockInMask(VPBB, BlockMask);
156 return BlockMask;
157}
158
159void VPPredicator::createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) {
160 if (!FoldTail) {
161 setBlockInMask(HeaderVPBB, nullptr);
162 return;
163 }
164
165 // Introduce the early-exit compare IV <= BTC to form header block mask.
166 // This is used instead of IV < TC because TC may wrap, unlike BTC. Start by
167 // constructing the desired canonical IV in the header block as its first
168 // non-phi instructions.
169
170 auto &Plan = *HeaderVPBB->getPlan();
171 auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV());
172 Builder.setInsertPoint(HeaderVPBB, HeaderVPBB->getFirstNonPhi());
173 Builder.insert(IV);
174
175 VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
176 VPValue *BlockMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
177 setBlockInMask(HeaderVPBB, BlockMask);
178}
179
180void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {
181 VPBasicBlock *Src = SI->getParent();
182
183 // Create masks where SI is a switch. We create masks for all edges from SI's
184 // parent block at the same time. This is more efficient, as we can create and
185 // collect compares for all cases once.
186 VPValue *Cond = SI->getOperand(0);
187 VPBasicBlock *DefaultDst = cast<VPBasicBlock>(Src->getSuccessors()[0]);
188 MapVector<VPBasicBlock *, SmallVector<VPValue *>> Dst2Compares;
189 for (const auto &[Idx, Succ] : enumerate(drop_begin(Src->getSuccessors()))) {
190 VPBasicBlock *Dst = cast<VPBasicBlock>(Succ);
191 assert(!getEdgeMask(Src, Dst) && "Edge masks already created");
192 // Cases whose destination is the same as default are redundant and can
193 // be ignored - they will get there anyhow.
194 if (Dst == DefaultDst)
195 continue;
196 auto &Compares = Dst2Compares[Dst];
197 VPValue *V = SI->getOperand(Idx + 1);
198 Compares.push_back(Builder.createICmp(CmpInst::ICMP_EQ, Cond, V));
199 }
200
201 // We need to handle 2 separate cases below for all entries in Dst2Compares,
202 // which excludes destinations matching the default destination.
203 VPValue *SrcMask = getBlockInMask(Src);
204 VPValue *DefaultMask = nullptr;
205 for (const auto &[Dst, Conds] : Dst2Compares) {
206 // 1. Dst is not the default destination. Dst is reached if any of the
207 // cases with destination == Dst are taken. Join the conditions for each
208 // case whose destination == Dst using an OR.
209 VPValue *Mask = Conds[0];
210 for (VPValue *V : drop_begin(Conds))
211 Mask = Builder.createOr(Mask, V);
212 if (SrcMask)
213 Mask = Builder.createLogicalAnd(SrcMask, Mask);
214 setEdgeMask(Src, Dst, Mask);
215
216 // 2. Create the mask for the default destination, which is reached if
217 // none of the cases with destination != default destination are taken.
218 // Join the conditions for each case where the destination is != Dst using
219 // an OR and negate it.
220 DefaultMask = DefaultMask ? Builder.createOr(DefaultMask, Mask) : Mask;
221 }
222
223 if (DefaultMask) {
224 DefaultMask = Builder.createNot(DefaultMask);
225 if (SrcMask)
226 DefaultMask = Builder.createLogicalAnd(SrcMask, DefaultMask);
227 }
228 setEdgeMask(Src, DefaultDst, DefaultMask);
229}
230
231void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
233 for (VPRecipeBase &R : VPBB->phis())
234 Phis.push_back(cast<VPPhi>(&R));
235 for (VPPhi *PhiR : Phis) {
236 // The non-header Phi is converted into a Blend recipe below,
237 // so we don't have to worry about the insertion order and we can just use
238 // the builder. At this point we generate the predication tree. There may
239 // be duplications since this is a simple recursive scan, but future
240 // optimizations will clean it up.
241
242 SmallVector<VPValue *, 2> OperandsWithMask;
243 for (const auto &[InVPV, InVPBB] : PhiR->incoming_values_and_blocks()) {
244 OperandsWithMask.push_back(InVPV);
245 VPValue *EdgeMask = getEdgeMask(InVPBB, VPBB);
246 if (!EdgeMask) {
247 assert(all_equal(PhiR->incoming_values()) &&
248 "Distinct incoming values with one having a full mask");
249 break;
250 }
251
252 OperandsWithMask.push_back(EdgeMask);
253 }
254 PHINode *IRPhi = cast_or_null<PHINode>(PhiR->getUnderlyingValue());
255 auto *Blend =
256 new VPBlendRecipe(IRPhi, OperandsWithMask, PhiR->getDebugLoc());
257 Builder.insert(Blend);
258 PhiR->replaceAllUsesWith(Blend);
259 PhiR->eraseFromParent();
260 }
261}
262
263DenseMap<VPBasicBlock *, VPValue *>
265 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
266 // Scan the body of the loop in a topological order to visit each basic block
267 // after having visited its predecessor basic blocks.
268 VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
270 Header);
271 VPPredicator Predicator;
272 for (VPBlockBase *VPB : RPOT) {
273 // Non-outer regions with VPBBs only are supported at the moment.
274 auto *VPBB = cast<VPBasicBlock>(VPB);
275 // Introduce the mask for VPBB, which may introduce needed edge masks, and
276 // convert all phi recipes of VPBB to blend recipes unless VPBB is the
277 // header.
278 if (VPBB == Header) {
279 Predicator.createHeaderMask(Header, FoldTail);
280 continue;
281 }
282
283 Predicator.createBlockInMask(VPBB);
284 Predicator.convertPhisToBlends(VPBB);
285 }
286
287 // Linearize the blocks of the loop into one serial chain.
288 VPBlockBase *PrevVPBB = nullptr;
290 auto Successors = to_vector(VPBB->getSuccessors());
291 if (Successors.size() > 1)
293
294 // Flatten the CFG in the loop. To do so, first disconnect VPBB from its
295 // successors. Then connect VPBB to the previously visited VPBB.
296 for (auto *Succ : Successors)
298 if (PrevVPBB)
299 VPBlockUtils::connectBlocks(PrevVPBB, VPBB);
300
301 PrevVPBB = VPBB;
302 }
303 return Predicator.getBlockMaskCache();
304}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Early If Predicator
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This file provides utility VPlan to VPlan transformations.
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Definition blake3_impl.h:83
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:704
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:187
void push_back(const T &Elt)
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3754
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:3842
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:246
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:680
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:81
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:204
VPlan * getPlan()
Definition VPlan.cpp:165
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:170
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:198
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition VPlanUtils.h:217
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:176
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:195
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void insert(VPRecipeBase *R)
Insert R at the current insertion point.
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:3942
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4045
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1050
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:330
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2474
auto cast_or_null(const Y &Val)
Definition Casting.h:720
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1899
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition STLExtras.h:2110
static DenseMap< VPBasicBlock *, VPValue * > introduceMasksAndLinearize(VPlan &Plan, bool FoldTail)
Predicate and linearize the control-flow in the only loop region of Plan.