LLVM 22.0.0git
VPlanUnroll.cpp
Go to the documentation of this file.
1//===-- VPlanUnroll.cpp - VPlan unroller ----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements explicit unrolling for VPlans.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPRecipeBuilder.h"
15#include "VPlan.h"
16#include "VPlanAnalysis.h"
17#include "VPlanCFG.h"
18#include "VPlanHelpers.h"
19#include "VPlanPatternMatch.h"
20#include "VPlanTransforms.h"
21#include "VPlanUtils.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/ScopeExit.h"
26#include "llvm/IR/Intrinsics.h"
27
28using namespace llvm;
29using namespace llvm::VPlanPatternMatch;
30
31namespace {
32
33/// Helper to hold state needed for unrolling. It holds the Plan to unroll by
34/// UF. It also holds copies of VPValues across UF-1 unroll parts to facilitate
35/// the unrolling transformation, where the original VPValues are retained for
36/// part zero.
37class UnrollState {
38 /// Plan to unroll.
39 VPlan &Plan;
40 /// Unroll factor to unroll by.
41 const unsigned UF;
42 /// Analysis for types.
43 VPTypeAnalysis TypeInfo;
44
45 /// Unrolling may create recipes that should not be unrolled themselves.
46 /// Those are tracked in ToSkip.
47 SmallPtrSet<VPRecipeBase *, 8> ToSkip;
48
49 // Associate with each VPValue of part 0 its unrolled instances of parts 1,
50 // ..., UF-1.
51 DenseMap<VPValue *, SmallVector<VPValue *>> VPV2Parts;
52
53 /// Unroll replicate region \p VPR by cloning the region UF - 1 times.
54 void unrollReplicateRegionByUF(VPRegionBlock *VPR);
55
56 /// Unroll recipe \p R by cloning it UF - 1 times, unless it is uniform across
57 /// all parts.
58 void unrollRecipeByUF(VPRecipeBase &R);
59
60 /// Unroll header phi recipe \p R. How exactly the recipe gets unrolled
61 /// depends on the concrete header phi. Inserts newly created recipes at \p
62 /// InsertPtForPhi.
63 void unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
64 VPBasicBlock::iterator InsertPtForPhi);
65
66 /// Unroll a widen induction recipe \p IV. This introduces recipes to compute
67 /// the induction steps for each part.
68 void unrollWidenInductionByUF(VPWidenInductionRecipe *IV,
69 VPBasicBlock::iterator InsertPtForPhi);
70
71 VPValue *getConstantInt(unsigned Part) {
72 Type *CanIVIntTy = Plan.getVectorLoopRegion()->getCanonicalIVType();
73 return Plan.getConstantInt(CanIVIntTy, Part);
74 }
75
76public:
77 UnrollState(VPlan &Plan, unsigned UF) : Plan(Plan), UF(UF), TypeInfo(Plan) {}
78
79 void unrollBlock(VPBlockBase *VPB);
80
81 VPValue *getValueForPart(VPValue *V, unsigned Part) {
82 if (Part == 0 || V->isLiveIn())
83 return V;
84 assert((VPV2Parts.contains(V) && VPV2Parts[V].size() >= Part) &&
85 "accessed value does not exist");
86 return VPV2Parts[V][Part - 1];
87 }
88
89 /// Given a single original recipe \p OrigR (of part zero), and its copy \p
90 /// CopyR for part \p Part, map every VPValue defined by \p OrigR to its
91 /// corresponding VPValue defined by \p CopyR.
92 void addRecipeForPart(VPRecipeBase *OrigR, VPRecipeBase *CopyR,
93 unsigned Part) {
94 for (const auto &[Idx, VPV] : enumerate(OrigR->definedValues())) {
95 const auto &[V, _] = VPV2Parts.try_emplace(VPV);
96 assert(V->second.size() == Part - 1 && "earlier parts not set");
97 V->second.push_back(CopyR->getVPValue(Idx));
98 }
99 }
100
101 /// Given a uniform recipe \p R, add it for all parts.
102 void addUniformForAllParts(VPSingleDefRecipe *R) {
103 const auto &[V, Inserted] = VPV2Parts.try_emplace(R);
104 assert(Inserted && "uniform value already added");
105 for (unsigned Part = 0; Part != UF; ++Part)
106 V->second.push_back(R);
107 }
108
109 bool contains(VPValue *VPV) const { return VPV2Parts.contains(VPV); }
110
111 /// Update \p R's operand at \p OpIdx with its corresponding VPValue for part
112 /// \p P.
113 void remapOperand(VPRecipeBase *R, unsigned OpIdx, unsigned Part) {
114 auto *Op = R->getOperand(OpIdx);
115 R->setOperand(OpIdx, getValueForPart(Op, Part));
116 }
117
118 /// Update \p R's operands with their corresponding VPValues for part \p P.
119 void remapOperands(VPRecipeBase *R, unsigned Part) {
120 for (const auto &[OpIdx, Op] : enumerate(R->operands()))
121 R->setOperand(OpIdx, getValueForPart(Op, Part));
122 }
123};
124} // namespace
125
126void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
127 VPBlockBase *InsertPt = VPR->getSingleSuccessor();
128 for (unsigned Part = 1; Part != UF; ++Part) {
129 auto *Copy = VPR->clone();
130 VPBlockUtils::insertBlockBefore(Copy, InsertPt);
131
132 auto PartI = vp_depth_first_shallow(Copy->getEntry());
133 auto Part0 = vp_depth_first_shallow(VPR->getEntry());
134 for (const auto &[PartIVPBB, Part0VPBB] :
137 for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
138 remapOperands(&PartIR, Part);
139 if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
140 ScalarIVSteps->addOperand(getConstantInt(Part));
141 }
142
143 addRecipeForPart(&Part0R, &PartIR, Part);
144 }
145 }
146 }
147}
148
149void UnrollState::unrollWidenInductionByUF(
150 VPWidenInductionRecipe *IV, VPBasicBlock::iterator InsertPtForPhi) {
151 VPBasicBlock *PH = cast<VPBasicBlock>(
152 IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor());
153 Type *IVTy = TypeInfo.inferScalarType(IV);
154 auto &ID = IV->getInductionDescriptor();
155 VPIRFlags Flags;
156 if (isa_and_present<FPMathOperator>(ID.getInductionBinOp()))
157 Flags = ID.getInductionBinOp()->getFastMathFlags();
158
159 VPValue *ScalarStep = IV->getStepValue();
160 VPBuilder Builder(PH);
161 Type *VectorStepTy =
162 IVTy->isPointerTy() ? TypeInfo.inferScalarType(ScalarStep) : IVTy;
163 VPInstruction *VectorStep = Builder.createNaryOp(
164 VPInstruction::WideIVStep, {&Plan.getVF(), ScalarStep}, VectorStepTy,
165 Flags, IV->getDebugLoc());
166
167 ToSkip.insert(VectorStep);
168
169 // Now create recipes to compute the induction steps for part 1 .. UF. Part 0
170 // remains the header phi. Parts > 0 are computed by adding Step to the
171 // previous part. The header phi recipe will get 2 new operands: the step
172 // value for a single part and the last part, used to compute the backedge
173 // value during VPWidenInductionRecipe::execute.
174 // %Part.0 = VPWidenInductionRecipe %Start, %ScalarStep, %VectorStep, %Part.3
175 // %Part.1 = %Part.0 + %VectorStep
176 // %Part.2 = %Part.1 + %VectorStep
177 // %Part.3 = %Part.2 + %VectorStep
178 //
179 // The newly added recipes are added to ToSkip to avoid interleaving them
180 // again.
181 VPValue *Prev = IV;
182 Builder.setInsertPoint(IV->getParent(), InsertPtForPhi);
183 unsigned AddOpc;
184 if (IVTy->isPointerTy())
186 else if (IVTy->isFloatingPointTy())
187 AddOpc = ID.getInductionOpcode();
188 else
189 AddOpc = Instruction::Add;
190 for (unsigned Part = 1; Part != UF; ++Part) {
191 std::string Name =
192 Part > 1 ? "step.add." + std::to_string(Part) : "step.add";
193
194 VPInstruction *Add = Builder.createNaryOp(AddOpc,
195 {
196 Prev,
197 VectorStep,
198 },
199 Flags, IV->getDebugLoc(), Name);
200 ToSkip.insert(Add);
201 addRecipeForPart(IV, Add, Part);
202 Prev = Add;
203 }
204 IV->addOperand(VectorStep);
205 IV->addOperand(Prev);
206}
207
208void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
209 VPBasicBlock::iterator InsertPtForPhi) {
210 // First-order recurrences pass a single vector or scalar through their header
211 // phis, irrespective of interleaving.
213 return;
214
215 // Generate step vectors for each unrolled part.
216 if (auto *IV = dyn_cast<VPWidenInductionRecipe>(R)) {
217 unrollWidenInductionByUF(IV, InsertPtForPhi);
218 return;
219 }
220
221 auto *RdxPhi = dyn_cast<VPReductionPHIRecipe>(R);
222 if (RdxPhi && RdxPhi->isOrdered())
223 return;
224
225 auto InsertPt = std::next(R->getIterator());
226 for (unsigned Part = 1; Part != UF; ++Part) {
227 VPRecipeBase *Copy = R->clone();
228 Copy->insertBefore(*R->getParent(), InsertPt);
229 addRecipeForPart(R, Copy, Part);
230 if (RdxPhi) {
231 // If the start value is a ReductionStartVector, use the identity value
232 // (second operand) for unrolled parts. If the scaling factor is > 1,
233 // create a new ReductionStartVector with the scale factor and both
234 // operands set to the identity value.
235 if (auto *VPI = dyn_cast<VPInstruction>(RdxPhi->getStartValue())) {
236 assert(VPI->getOpcode() == VPInstruction::ReductionStartVector &&
237 "unexpected start VPInstruction");
238 if (Part != 1)
239 continue;
240 VPValue *StartV;
241 if (match(VPI->getOperand(2), m_One())) {
242 StartV = VPI->getOperand(1);
243 } else {
244 auto *C = VPI->clone();
245 C->setOperand(0, C->getOperand(1));
246 C->insertAfter(VPI);
247 StartV = C;
248 }
249 for (unsigned Part = 1; Part != UF; ++Part)
250 VPV2Parts[VPI][Part - 1] = StartV;
251 }
252 Copy->addOperand(getConstantInt(Part));
253 } else {
255 "unexpected header phi recipe not needing unrolled part");
256 }
257 }
258}
259
260/// Handle non-header-phi recipes.
261void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
263 return;
264
265 if (auto *VPI = dyn_cast<VPInstruction>(&R)) {
267 addUniformForAllParts(VPI);
268 return;
269 }
270 }
271 if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
272 if (isa<StoreInst>(RepR->getUnderlyingValue()) &&
273 RepR->getOperand(1)->isDefinedOutsideLoopRegions()) {
274 // Stores to an invariant address only need to store the last part.
275 remapOperands(&R, UF - 1);
276 return;
277 }
278 if (match(RepR,
280 addUniformForAllParts(RepR);
281 return;
282 }
283 }
284
285 // Unroll non-uniform recipes.
286 auto InsertPt = std::next(R.getIterator());
287 VPBasicBlock &VPBB = *R.getParent();
288 for (unsigned Part = 1; Part != UF; ++Part) {
289 VPRecipeBase *Copy = R.clone();
290 Copy->insertBefore(VPBB, InsertPt);
291 addRecipeForPart(&R, Copy, Part);
292
293 VPValue *Op;
295 m_VPValue(), m_VPValue(Op)))) {
296 Copy->setOperand(0, getValueForPart(Op, Part - 1));
297 Copy->setOperand(1, getValueForPart(Op, Part));
298 continue;
299 }
300 if (auto *VPR = dyn_cast<VPVectorPointerRecipe>(&R)) {
301 VPBuilder Builder(VPR);
302 const DataLayout &DL =
304 Type *IndexTy = DL.getIndexType(TypeInfo.inferScalarType(VPR));
305 Type *VFTy = TypeInfo.inferScalarType(&Plan.getVF());
306 VPValue *VF = Builder.createScalarZExtOrTrunc(
307 &Plan.getVF(), IndexTy, VFTy, DebugLoc::getUnknown());
308 // VFxUF does not wrap, so VF * Part also cannot wrap.
309 VPValue *VFxPart = Builder.createOverflowingOp(
310 Instruction::Mul, {VF, Plan.getConstantInt(IndexTy, Part)},
311 {true, true});
312 Copy->setOperand(0, VPR->getOperand(0));
313 Copy->addOperand(VFxPart);
314 continue;
315 }
316 if (auto *Red = dyn_cast<VPReductionRecipe>(&R)) {
317 auto *Phi = dyn_cast<VPReductionPHIRecipe>(R.getOperand(0));
318 if (Phi && Phi->isOrdered()) {
319 auto &Parts = VPV2Parts[Phi];
320 if (Part == 1) {
321 Parts.clear();
322 Parts.push_back(Red);
323 }
324 Parts.push_back(Copy->getVPSingleValue());
325 Phi->setOperand(1, Copy->getVPSingleValue());
326 }
327 }
328 remapOperands(Copy, Part);
329
330 // Add operand indicating the part to generate code for, to recipes still
331 // requiring it.
332 if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
333 VPVectorEndPointerRecipe>(Copy) ||
334 match(Copy,
336 Copy->addOperand(getConstantInt(Part));
337
339 Copy->setOperand(0, R.getOperand(0));
340 }
341}
342
343void UnrollState::unrollBlock(VPBlockBase *VPB) {
344 auto *VPR = dyn_cast<VPRegionBlock>(VPB);
345 if (VPR) {
346 if (VPR->isReplicator())
347 return unrollReplicateRegionByUF(VPR);
348
349 // Traverse blocks in region in RPO to ensure defs are visited before uses
350 // across blocks.
351 ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
352 RPOT(VPR->getEntry());
353 for (VPBlockBase *VPB : RPOT)
354 unrollBlock(VPB);
355 return;
356 }
357
358 // VPB is a VPBasicBlock; unroll it, i.e., unroll its recipes.
359 auto *VPBB = cast<VPBasicBlock>(VPB);
360 auto InsertPtForPhi = VPBB->getFirstNonPhi();
361 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
362 if (ToSkip.contains(&R) || isa<VPIRInstruction>(&R))
363 continue;
364
365 // Add all VPValues for all parts to AnyOf, FirstActiveLaneMask and
366 // Compute*Result which combine all parts to compute the final value.
367 VPValue *Op1;
369 match(&R, m_FirstActiveLane(m_VPValue(Op1))) ||
370 match(&R, m_LastActiveLane(m_VPValue(Op1))) ||
372 m_VPValue(), m_VPValue(), m_VPValue(Op1))) ||
374 m_VPValue(), m_VPValue(Op1))) ||
376 m_VPValue(), m_VPValue(), m_VPValue(), m_VPValue(Op1)))) {
377 addUniformForAllParts(cast<VPInstruction>(&R));
378 for (unsigned Part = 1; Part != UF; ++Part)
379 R.addOperand(getValueForPart(Op1, Part));
380 continue;
381 }
382 VPValue *Op0;
383 if (match(&R, m_ExtractLane(m_VPValue(Op0), m_VPValue(Op1)))) {
384 addUniformForAllParts(cast<VPInstruction>(&R));
385 for (unsigned Part = 1; Part != UF; ++Part)
386 R.addOperand(getValueForPart(Op1, Part));
387 continue;
388 }
389
390 if (Plan.hasScalarVFOnly()) {
391 if (match(&R, m_ExtractLastPart(m_VPValue(Op0))) ||
393 auto *I = cast<VPInstruction>(&R);
394 bool IsPenultimatePart =
396 unsigned PartIdx = IsPenultimatePart ? UF - 2 : UF - 1;
397 // For scalar VF, directly use the scalar part value.
398 I->replaceAllUsesWith(getValueForPart(Op0, PartIdx));
399 continue;
400 }
401 }
402 // For vector VF, the penultimate element is always extracted from the last part.
405 addUniformForAllParts(cast<VPSingleDefRecipe>(&R));
406 R.setOperand(0, getValueForPart(Op0, UF - 1));
407 continue;
408 }
409
410 auto *SingleDef = dyn_cast<VPSingleDefRecipe>(&R);
411 if (SingleDef && vputils::isUniformAcrossVFsAndUFs(SingleDef)) {
412 addUniformForAllParts(SingleDef);
413 continue;
414 }
415
416 if (auto *H = dyn_cast<VPHeaderPHIRecipe>(&R)) {
417 unrollHeaderPHIByUF(H, InsertPtForPhi);
418 continue;
419 }
420
421 unrollRecipeByUF(R);
422 }
423}
424
425void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
426 assert(UF > 0 && "Unroll factor must be positive");
427 Plan.setUF(UF);
428 auto Cleanup = make_scope_exit([&Plan]() {
429 auto Iter = vp_depth_first_deep(Plan.getEntry());
430 // Remove recipes that are redundant after unrolling.
432 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
433 auto *VPI = dyn_cast<VPInstruction>(&R);
434 if (VPI &&
435 VPI->getOpcode() == VPInstruction::CanonicalIVIncrementForPart &&
436 VPI->getNumOperands() == 1) {
437 VPI->replaceAllUsesWith(VPI->getOperand(0));
438 VPI->eraseFromParent();
439 }
440 }
441 }
442 });
443 if (UF == 1) {
444 return;
445 }
446
447 UnrollState Unroller(Plan, UF);
448
449 // Iterate over all blocks in the plan starting from Entry, and unroll
450 // recipes inside them. This includes the vector preheader and middle blocks,
451 // which may set up or post-process per-part values.
453 Plan.getEntry());
454 for (VPBlockBase *VPB : RPOT)
455 Unroller.unrollBlock(VPB);
456
457 unsigned Part = 1;
458 // Remap operands of cloned header phis to update backedge values. The header
459 // phis cloned during unrolling are just after the header phi for part 0.
460 // Reset Part to 1 when reaching the first (part 0) recipe of a block.
461 for (VPRecipeBase &H :
463 // The second operand of Fixed Order Recurrence phi's, feeding the spliced
464 // value across the backedge, needs to remap to the last part of the spliced
465 // value.
467 Unroller.remapOperand(&H, 1, UF - 1);
468 continue;
469 }
470 if (Unroller.contains(H.getVPSingleValue())) {
471 Part = 1;
472 continue;
473 }
474 Unroller.remapOperands(&H, Part);
475 Part++;
476 }
477
479}
480
481/// Create a single-scalar clone of \p DefR (must be a VPReplicateRecipe or
482/// VPInstruction) for lane \p Lane. Use \p Def2LaneDefs to look up scalar
483/// definitions for operands of \DefR.
484static VPValue *
485cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
486 VPSingleDefRecipe *DefR, VPLane Lane,
487 const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
488 VPValue *Op;
490 auto LaneDefs = Def2LaneDefs.find(Op);
491 if (LaneDefs != Def2LaneDefs.end())
492 return LaneDefs->second[Lane.getKnownLane()];
493
494 VPValue *Idx = Plan.getConstantInt(IdxTy, Lane.getKnownLane());
495 return Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx});
496 }
497
498 // Collect the operands at Lane, creating extracts as needed.
500 for (VPValue *Op : DefR->operands()) {
501 // If Op is a definition that has been unrolled, directly use the clone for
502 // the corresponding lane.
503 auto LaneDefs = Def2LaneDefs.find(Op);
504 if (LaneDefs != Def2LaneDefs.end()) {
505 NewOps.push_back(LaneDefs->second[Lane.getKnownLane()]);
506 continue;
507 }
508 if (Lane.getKind() == VPLane::Kind::ScalableLast) {
509 // Look through mandatory Unpack.
510 [[maybe_unused]] bool Matched =
512 assert(Matched && "original op must have been Unpack");
513 auto *ExtractPart =
514 Builder.createNaryOp(VPInstruction::ExtractLastPart, {Op});
515 NewOps.push_back(
516 Builder.createNaryOp(VPInstruction::ExtractLastLane, {ExtractPart}));
517 continue;
518 }
520 NewOps.push_back(Op);
521 continue;
522 }
523
524 // Look through buildvector to avoid unnecessary extracts.
525 if (match(Op, m_BuildVector())) {
526 NewOps.push_back(
527 cast<VPInstruction>(Op)->getOperand(Lane.getKnownLane()));
528 continue;
529 }
530 VPValue *Idx = Plan.getConstantInt(IdxTy, Lane.getKnownLane());
531 VPValue *Ext = Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx});
532 NewOps.push_back(Ext);
533 }
534
536 if (auto *RepR = dyn_cast<VPReplicateRecipe>(DefR)) {
537 // TODO: have cloning of replicate recipes also provide the desired result
538 // coupled with setting its operands to NewOps (deriving IsSingleScalar and
539 // Mask from the operands?)
540 New = new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
541 /*IsSingleScalar=*/true, /*Mask=*/nullptr,
542 *RepR, *RepR, RepR->getDebugLoc());
543 } else {
545 "DefR must be a VPReplicateRecipe or VPInstruction");
546 New = DefR->clone();
547 for (const auto &[Idx, Op] : enumerate(NewOps)) {
548 New->setOperand(Idx, Op);
549 }
550 }
551 New->insertBefore(DefR);
552 return New;
553}
554
556 Type *IdxTy = IntegerType::get(
558
559 // Visit all VPBBs outside the loop region and directly inside the top-level
560 // loop region.
561 auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
563 auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
565 auto VPBBsToUnroll =
566 concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion);
567 // A mapping of current VPValue definitions to collections of new VPValues
568 // defined per lane. Serves to hook-up potential users of current VPValue
569 // definition that are replicated-per-VF later.
571 // The removal of current recipes being replaced by new ones needs to be
572 // delayed after Def2LaneDefs is no longer in use.
574 for (VPBasicBlock *VPBB : VPBBsToUnroll) {
575 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
578 cast<VPReplicateRecipe>(&R)->isSingleScalar()) ||
579 (isa<VPInstruction>(&R) &&
580 !cast<VPInstruction>(&R)->doesGeneratePerAllLanes() &&
582 continue;
583
584 auto *DefR = cast<VPSingleDefRecipe>(&R);
585 VPBuilder Builder(DefR);
586 if (DefR->getNumUsers() == 0) {
587 // Create single-scalar version of DefR for all lanes.
588 for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
589 cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs);
590 DefR->eraseFromParent();
591 continue;
592 }
593 /// Create single-scalar version of DefR for all lanes.
594 SmallVector<VPValue *> LaneDefs;
595 for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
596 LaneDefs.push_back(
597 cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs));
598
599 Def2LaneDefs[DefR] = LaneDefs;
600 /// Users that only demand the first lane can use the definition for lane
601 /// 0.
602 DefR->replaceUsesWithIf(LaneDefs[0], [DefR](VPUser &U, unsigned) {
603 return U.usesFirstLaneOnly(DefR);
604 });
605
606 // Update each build vector user that currently has DefR as its only
607 // operand, to have all LaneDefs as its operands.
608 for (VPUser *U : to_vector(DefR->users())) {
609 auto *VPI = dyn_cast<VPInstruction>(U);
610 if (!VPI || (VPI->getOpcode() != VPInstruction::BuildVector &&
611 VPI->getOpcode() != VPInstruction::BuildStructVector))
612 continue;
613 assert(VPI->getNumOperands() == 1 &&
614 "Build(Struct)Vector must have a single operand before "
615 "replicating by VF");
616 VPI->setOperand(0, LaneDefs[0]);
617 for (VPValue *LaneDef : drop_begin(LaneDefs))
618 VPI->addOperand(LaneDef);
619 }
620 ToRemove.push_back(DefR);
621 }
622 }
623 for (auto *R : reverse(ToRemove))
624 R->eraseFromParent();
625}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const HTTPClientCleanup Cleanup
#define _
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
MachineInstr unsigned OpIdx
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
This file contains the declarations of different VPlan-related auxiliary helpers.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file provides utility VPlan to VPlan transformations.
static VPValue * cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, VPSingleDefRecipe *DefR, VPLane Lane, const DenseMap< VPValue *, SmallVector< VPValue * > > &Def2LaneDefs)
Create a single-scalar clone of DefR (must be a VPReplicateRecipe or VPInstruction) for lane Lane.
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
Definition VPlan.cpp:1115
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Definition blake3_impl.h:83
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
static DebugLoc getUnknown()
Definition DebugLoc.h:161
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3980
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4007
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4068
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:216
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:81
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:166
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:209
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition VPlanUtils.h:211
static void insertBlockBefore(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected block NewBlock before Blockptr.
Definition VPlanUtils.h:132
VPlan-based builder utility analogous to IRBuilder.
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
Definition VPlanValue.h:421
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition VPlanValue.h:411
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4157
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1119
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1073
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1123
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1068
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1065
@ CanonicalIVIncrementForPart
Definition VPlan.h:1056
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Kind getKind() const
Returns the Kind of lane offset.
unsigned getKnownLane() const
Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:387
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4168
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition VPlan.cpp:728
const VPBlockBase * getEntry() const
Definition VPlan.h:4204
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4236
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:2951
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:531
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:202
operand_range operands()
Definition VPlanValue.h:270
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:46
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4298
VPBasicBlock * getEntry()
Definition VPlan.h:4387
VPValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4481
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4561
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1011
bool hasScalarVFOnly() const
Definition VPlan.h:4510
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4435
void setUF(unsigned UF)
Definition VPlan.h:4524
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition ScopeExit.h:59
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2494
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:216
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
Definition VPlanCFG.h:243
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1150
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
bool isa_and_present(const Y &Val)
isa_and_present<X> - Functionally identical to isa, except that a null value is accepted.
Definition Casting.h:669
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Add
Sum of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
static void unrollByUF(VPlan &Plan, unsigned UF)
Explicitly unroll Plan by UF.
static void removeDeadRecipes(VPlan &Plan)
Remove dead recipes from Plan.
static void replicateByVF(VPlan &Plan, ElementCount VF)
Replace each replicating VPReplicateRecipe and VPInstruction outside of any replicate region in Plan ...