LLVM 22.0.0git
VPlanUnroll.cpp
Go to the documentation of this file.
1//===-- VPlanUnroll.cpp - VPlan unroller ----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements explicit unrolling for VPlans.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPRecipeBuilder.h"
15#include "VPlan.h"
16#include "VPlanAnalysis.h"
17#include "VPlanCFG.h"
18#include "VPlanHelpers.h"
19#include "VPlanPatternMatch.h"
20#include "VPlanTransforms.h"
21#include "VPlanUtils.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/ScopeExit.h"
26#include "llvm/IR/Intrinsics.h"
27
28using namespace llvm;
29using namespace llvm::VPlanPatternMatch;
30
31namespace {
32
33/// Helper to hold state needed for unrolling. It holds the Plan to unroll by
34/// UF. It also holds copies of VPValues across UF-1 unroll parts to facilitate
35/// the unrolling transformation, where the original VPValues are retained for
36/// part zero.
37class UnrollState {
38 /// Plan to unroll.
39 VPlan &Plan;
40 /// Unroll factor to unroll by.
41 const unsigned UF;
42 /// Analysis for types.
43 VPTypeAnalysis TypeInfo;
44
45 /// Unrolling may create recipes that should not be unrolled themselves.
46 /// Those are tracked in ToSkip.
47 SmallPtrSet<VPRecipeBase *, 8> ToSkip;
48
49 // Associate with each VPValue of part 0 its unrolled instances of parts 1,
50 // ..., UF-1.
51 DenseMap<VPValue *, SmallVector<VPValue *>> VPV2Parts;
52
53 /// Unroll replicate region \p VPR by cloning the region UF - 1 times.
54 void unrollReplicateRegionByUF(VPRegionBlock *VPR);
55
56 /// Unroll recipe \p R by cloning it UF - 1 times, unless it is uniform across
57 /// all parts.
58 void unrollRecipeByUF(VPRecipeBase &R);
59
60 /// Unroll header phi recipe \p R. How exactly the recipe gets unrolled
61 /// depends on the concrete header phi. Inserts newly created recipes at \p
62 /// InsertPtForPhi.
63 void unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
64 VPBasicBlock::iterator InsertPtForPhi);
65
66 /// Unroll a widen induction recipe \p IV. This introduces recipes to compute
67 /// the induction steps for each part.
68 void unrollWidenInductionByUF(VPWidenInductionRecipe *IV,
69 VPBasicBlock::iterator InsertPtForPhi);
70
71 VPValue *getConstantVPV(unsigned Part) {
72 Type *CanIVIntTy =
73 Plan.getVectorLoopRegion()->getCanonicalIV()->getScalarType();
74 return Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, Part));
75 }
76
77public:
78 UnrollState(VPlan &Plan, unsigned UF) : Plan(Plan), UF(UF), TypeInfo(Plan) {}
79
80 void unrollBlock(VPBlockBase *VPB);
81
82 VPValue *getValueForPart(VPValue *V, unsigned Part) {
83 if (Part == 0 || V->isLiveIn())
84 return V;
85 assert((VPV2Parts.contains(V) && VPV2Parts[V].size() >= Part) &&
86 "accessed value does not exist");
87 return VPV2Parts[V][Part - 1];
88 }
89
90 /// Given a single original recipe \p OrigR (of part zero), and its copy \p
91 /// CopyR for part \p Part, map every VPValue defined by \p OrigR to its
92 /// corresponding VPValue defined by \p CopyR.
93 void addRecipeForPart(VPRecipeBase *OrigR, VPRecipeBase *CopyR,
94 unsigned Part) {
95 for (const auto &[Idx, VPV] : enumerate(OrigR->definedValues())) {
96 const auto &[V, _] = VPV2Parts.try_emplace(VPV);
97 assert(V->second.size() == Part - 1 && "earlier parts not set");
98 V->second.push_back(CopyR->getVPValue(Idx));
99 }
100 }
101
102 /// Given a uniform recipe \p R, add it for all parts.
103 void addUniformForAllParts(VPSingleDefRecipe *R) {
104 const auto &[V, Inserted] = VPV2Parts.try_emplace(R);
105 assert(Inserted && "uniform value already added");
106 for (unsigned Part = 0; Part != UF; ++Part)
107 V->second.push_back(R);
108 }
109
110 bool contains(VPValue *VPV) const { return VPV2Parts.contains(VPV); }
111
112 /// Update \p R's operand at \p OpIdx with its corresponding VPValue for part
113 /// \p P.
114 void remapOperand(VPRecipeBase *R, unsigned OpIdx, unsigned Part) {
115 auto *Op = R->getOperand(OpIdx);
116 R->setOperand(OpIdx, getValueForPart(Op, Part));
117 }
118
119 /// Update \p R's operands with their corresponding VPValues for part \p P.
120 void remapOperands(VPRecipeBase *R, unsigned Part) {
121 for (const auto &[OpIdx, Op] : enumerate(R->operands()))
122 R->setOperand(OpIdx, getValueForPart(Op, Part));
123 }
124};
125} // namespace
126
127void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
128 VPBlockBase *InsertPt = VPR->getSingleSuccessor();
129 for (unsigned Part = 1; Part != UF; ++Part) {
130 auto *Copy = VPR->clone();
131 VPBlockUtils::insertBlockBefore(Copy, InsertPt);
132
133 auto PartI = vp_depth_first_shallow(Copy->getEntry());
134 auto Part0 = vp_depth_first_shallow(VPR->getEntry());
135 for (const auto &[PartIVPBB, Part0VPBB] :
138 for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
139 remapOperands(&PartIR, Part);
140 if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
141 ScalarIVSteps->addOperand(getConstantVPV(Part));
142 }
143
144 addRecipeForPart(&Part0R, &PartIR, Part);
145 }
146 }
147 }
148}
149
150void UnrollState::unrollWidenInductionByUF(
151 VPWidenInductionRecipe *IV, VPBasicBlock::iterator InsertPtForPhi) {
152 VPBasicBlock *PH = cast<VPBasicBlock>(
153 IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor());
154 Type *IVTy = TypeInfo.inferScalarType(IV);
155 auto &ID = IV->getInductionDescriptor();
156 VPIRFlags Flags;
157 if (isa_and_present<FPMathOperator>(ID.getInductionBinOp()))
158 Flags = ID.getInductionBinOp()->getFastMathFlags();
159
160 VPValue *ScalarStep = IV->getStepValue();
161 VPBuilder Builder(PH);
162 Type *VectorStepTy =
163 IVTy->isPointerTy() ? TypeInfo.inferScalarType(ScalarStep) : IVTy;
164 VPInstruction *VectorStep = Builder.createNaryOp(
165 VPInstruction::WideIVStep, {&Plan.getVF(), ScalarStep}, VectorStepTy,
166 Flags, IV->getDebugLoc());
167
168 ToSkip.insert(VectorStep);
169
170 // Now create recipes to compute the induction steps for part 1 .. UF. Part 0
171 // remains the header phi. Parts > 0 are computed by adding Step to the
172 // previous part. The header phi recipe will get 2 new operands: the step
173 // value for a single part and the last part, used to compute the backedge
174 // value during VPWidenInductionRecipe::execute.
175 // %Part.0 = VPWidenInductionRecipe %Start, %ScalarStep, %VectorStep, %Part.3
176 // %Part.1 = %Part.0 + %VectorStep
177 // %Part.2 = %Part.1 + %VectorStep
178 // %Part.3 = %Part.2 + %VectorStep
179 //
180 // The newly added recipes are added to ToSkip to avoid interleaving them
181 // again.
182 VPValue *Prev = IV;
183 Builder.setInsertPoint(IV->getParent(), InsertPtForPhi);
184 unsigned AddOpc;
185 if (IVTy->isPointerTy())
187 else if (IVTy->isFloatingPointTy())
188 AddOpc = ID.getInductionOpcode();
189 else
190 AddOpc = Instruction::Add;
191 for (unsigned Part = 1; Part != UF; ++Part) {
192 std::string Name =
193 Part > 1 ? "step.add." + std::to_string(Part) : "step.add";
194
195 VPInstruction *Add = Builder.createNaryOp(AddOpc,
196 {
197 Prev,
198 VectorStep,
199 },
200 Flags, IV->getDebugLoc(), Name);
201 ToSkip.insert(Add);
202 addRecipeForPart(IV, Add, Part);
203 Prev = Add;
204 }
205 IV->addOperand(VectorStep);
206 IV->addOperand(Prev);
207}
208
209void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
210 VPBasicBlock::iterator InsertPtForPhi) {
211 // First-order recurrences pass a single vector or scalar through their header
212 // phis, irrespective of interleaving.
214 return;
215
216 // Generate step vectors for each unrolled part.
217 if (auto *IV = dyn_cast<VPWidenInductionRecipe>(R)) {
218 unrollWidenInductionByUF(IV, InsertPtForPhi);
219 return;
220 }
221
222 auto *RdxPhi = dyn_cast<VPReductionPHIRecipe>(R);
223 if (RdxPhi && RdxPhi->isOrdered())
224 return;
225
226 auto InsertPt = std::next(R->getIterator());
227 for (unsigned Part = 1; Part != UF; ++Part) {
228 VPRecipeBase *Copy = R->clone();
229 Copy->insertBefore(*R->getParent(), InsertPt);
230 addRecipeForPart(R, Copy, Part);
231 if (RdxPhi) {
232 // If the start value is a ReductionStartVector, use the identity value
233 // (second operand) for unrolled parts. If the scaling factor is > 1,
234 // create a new ReductionStartVector with the scale factor and both
235 // operands set to the identity value.
236 if (auto *VPI = dyn_cast<VPInstruction>(RdxPhi->getStartValue())) {
237 assert(VPI->getOpcode() == VPInstruction::ReductionStartVector &&
238 "unexpected start VPInstruction");
239 if (Part != 1)
240 continue;
241 VPValue *StartV;
242 if (match(VPI->getOperand(2), m_One())) {
243 StartV = VPI->getOperand(1);
244 } else {
245 auto *C = VPI->clone();
246 C->setOperand(0, C->getOperand(1));
247 C->insertAfter(VPI);
248 StartV = C;
249 }
250 for (unsigned Part = 1; Part != UF; ++Part)
251 VPV2Parts[VPI][Part - 1] = StartV;
252 }
253 Copy->addOperand(getConstantVPV(Part));
254 } else {
256 "unexpected header phi recipe not needing unrolled part");
257 }
258 }
259}
260
261/// Handle non-header-phi recipes.
262void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
264 return;
265
266 if (auto *VPI = dyn_cast<VPInstruction>(&R)) {
268 addUniformForAllParts(VPI);
269 return;
270 }
271 }
272 if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
273 if (isa<StoreInst>(RepR->getUnderlyingValue()) &&
274 RepR->getOperand(1)->isDefinedOutsideLoopRegions()) {
275 // Stores to an invariant address only need to store the last part.
276 remapOperands(&R, UF - 1);
277 return;
278 }
279 if (auto *II = dyn_cast<IntrinsicInst>(RepR->getUnderlyingValue())) {
280 if (II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl) {
281 addUniformForAllParts(RepR);
282 return;
283 }
284 }
285 }
286
287 // Unroll non-uniform recipes.
288 auto InsertPt = std::next(R.getIterator());
289 VPBasicBlock &VPBB = *R.getParent();
290 for (unsigned Part = 1; Part != UF; ++Part) {
291 VPRecipeBase *Copy = R.clone();
292 Copy->insertBefore(VPBB, InsertPt);
293 addRecipeForPart(&R, Copy, Part);
294
295 VPValue *Op;
297 m_VPValue(), m_VPValue(Op)))) {
298 Copy->setOperand(0, getValueForPart(Op, Part - 1));
299 Copy->setOperand(1, getValueForPart(Op, Part));
300 continue;
301 }
302 if (auto *Red = dyn_cast<VPReductionRecipe>(&R)) {
303 auto *Phi = dyn_cast<VPReductionPHIRecipe>(R.getOperand(0));
304 if (Phi && Phi->isOrdered()) {
305 auto &Parts = VPV2Parts[Phi];
306 if (Part == 1) {
307 Parts.clear();
308 Parts.push_back(Red);
309 }
310 Parts.push_back(Copy->getVPSingleValue());
311 Phi->setOperand(1, Copy->getVPSingleValue());
312 }
313 }
314 remapOperands(Copy, Part);
315
316 // Add operand indicating the part to generate code for, to recipes still
317 // requiring it.
318 if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
319 VPVectorPointerRecipe, VPVectorEndPointerRecipe>(Copy) ||
320 match(Copy,
322 Copy->addOperand(getConstantVPV(Part));
323
325 Copy->setOperand(0, R.getOperand(0));
326 }
327}
328
329void UnrollState::unrollBlock(VPBlockBase *VPB) {
330 auto *VPR = dyn_cast<VPRegionBlock>(VPB);
331 if (VPR) {
332 if (VPR->isReplicator())
333 return unrollReplicateRegionByUF(VPR);
334
335 // Traverse blocks in region in RPO to ensure defs are visited before uses
336 // across blocks.
337 ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
338 RPOT(VPR->getEntry());
339 for (VPBlockBase *VPB : RPOT)
340 unrollBlock(VPB);
341 return;
342 }
343
344 // VPB is a VPBasicBlock; unroll it, i.e., unroll its recipes.
345 auto *VPBB = cast<VPBasicBlock>(VPB);
346 auto InsertPtForPhi = VPBB->getFirstNonPhi();
347 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
348 if (ToSkip.contains(&R) || isa<VPIRInstruction>(&R))
349 continue;
350
351 // Add all VPValues for all parts to AnyOf, FirstActiveLaneMask and
352 // Compute*Result which combine all parts to compute the final value.
353 VPValue *Op1;
355 match(&R, m_FirstActiveLane(m_VPValue(Op1))) ||
357 m_VPValue(), m_VPValue(), m_VPValue(Op1))) ||
359 m_VPValue(), m_VPValue(Op1))) ||
361 m_VPValue(), m_VPValue(), m_VPValue(), m_VPValue(Op1)))) {
362 addUniformForAllParts(cast<VPInstruction>(&R));
363 for (unsigned Part = 1; Part != UF; ++Part)
364 R.addOperand(getValueForPart(Op1, Part));
365 continue;
366 }
367 VPValue *Op0;
369 m_VPValue(Op0), m_VPValue(Op1)))) {
370 addUniformForAllParts(cast<VPInstruction>(&R));
371 for (unsigned Part = 1; Part != UF; ++Part)
372 R.addOperand(getValueForPart(Op1, Part));
373 continue;
374 }
375 if (match(&R, m_ExtractLastElement(m_VPValue(Op0))) ||
377 m_VPValue(Op0)))) {
378 addUniformForAllParts(cast<VPSingleDefRecipe>(&R));
379 if (Plan.hasScalarVFOnly()) {
380 auto *I = cast<VPInstruction>(&R);
381 // Extracting from end with VF = 1 implies retrieving the last or
382 // penultimate scalar part (UF-1 or UF-2).
383 unsigned Offset =
384 I->getOpcode() == VPInstruction::ExtractLastElement ? 1 : 2;
385 I->replaceAllUsesWith(getValueForPart(Op0, UF - Offset));
386 R.eraseFromParent();
387 } else {
388 // Otherwise we extract from the last part.
389 remapOperands(&R, UF - 1);
390 }
391 continue;
392 }
393
394 auto *SingleDef = dyn_cast<VPSingleDefRecipe>(&R);
395 if (SingleDef && vputils::isUniformAcrossVFsAndUFs(SingleDef)) {
396 addUniformForAllParts(SingleDef);
397 continue;
398 }
399
400 if (auto *H = dyn_cast<VPHeaderPHIRecipe>(&R)) {
401 unrollHeaderPHIByUF(H, InsertPtForPhi);
402 continue;
403 }
404
405 unrollRecipeByUF(R);
406 }
407}
408
409void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
410 assert(UF > 0 && "Unroll factor must be positive");
411 Plan.setUF(UF);
412 auto Cleanup = make_scope_exit([&Plan]() {
413 auto Iter = vp_depth_first_deep(Plan.getEntry());
414 // Remove recipes that are redundant after unrolling.
416 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
417 auto *VPI = dyn_cast<VPInstruction>(&R);
418 if (VPI &&
419 VPI->getOpcode() == VPInstruction::CanonicalIVIncrementForPart &&
420 VPI->getNumOperands() == 1) {
421 VPI->replaceAllUsesWith(VPI->getOperand(0));
422 VPI->eraseFromParent();
423 }
424 }
425 }
426 });
427 if (UF == 1) {
428 return;
429 }
430
431 UnrollState Unroller(Plan, UF);
432
433 // Iterate over all blocks in the plan starting from Entry, and unroll
434 // recipes inside them. This includes the vector preheader and middle blocks,
435 // which may set up or post-process per-part values.
437 Plan.getEntry());
438 for (VPBlockBase *VPB : RPOT)
439 Unroller.unrollBlock(VPB);
440
441 unsigned Part = 1;
442 // Remap operands of cloned header phis to update backedge values. The header
443 // phis cloned during unrolling are just after the header phi for part 0.
444 // Reset Part to 1 when reaching the first (part 0) recipe of a block.
445 for (VPRecipeBase &H :
447 // The second operand of Fixed Order Recurrence phi's, feeding the spliced
448 // value across the backedge, needs to remap to the last part of the spliced
449 // value.
451 Unroller.remapOperand(&H, 1, UF - 1);
452 continue;
453 }
454 if (Unroller.contains(H.getVPSingleValue())) {
455 Part = 1;
456 continue;
457 }
458 Unroller.remapOperands(&H, Part);
459 Part++;
460 }
461
463}
464
465/// Create a single-scalar clone of \p DefR (must be a VPReplicateRecipe or
466/// VPInstruction) for lane \p Lane. Use \p Def2LaneDefs to look up scalar
467/// definitions for operands of \DefR.
468static VPValue *
469cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
470 VPRecipeWithIRFlags *DefR, VPLane Lane,
471 const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
472 VPValue *Op;
474 auto LaneDefs = Def2LaneDefs.find(Op);
475 if (LaneDefs != Def2LaneDefs.end())
476 return LaneDefs->second[Lane.getKnownLane()];
477
478 VPValue *Idx =
479 Plan.getOrAddLiveIn(ConstantInt::get(IdxTy, Lane.getKnownLane()));
480 return Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx});
481 }
482
483 // Collect the operands at Lane, creating extracts as needed.
485 for (VPValue *Op : DefR->operands()) {
486 // If Op is a definition that has been unrolled, directly use the clone for
487 // the corresponding lane.
488 auto LaneDefs = Def2LaneDefs.find(Op);
489 if (LaneDefs != Def2LaneDefs.end()) {
490 NewOps.push_back(LaneDefs->second[Lane.getKnownLane()]);
491 continue;
492 }
493 if (Lane.getKind() == VPLane::Kind::ScalableLast) {
494 // Look through mandatory Unpack.
495 [[maybe_unused]] bool Matched =
497 assert(Matched && "original op must have been Unpack");
498 NewOps.push_back(
499 Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op}));
500 continue;
501 }
503 NewOps.push_back(Op);
504 continue;
505 }
506
507 // Look through buildvector to avoid unnecessary extracts.
508 if (match(Op, m_BuildVector())) {
509 NewOps.push_back(
510 cast<VPInstruction>(Op)->getOperand(Lane.getKnownLane()));
511 continue;
512 }
513 VPValue *Idx =
514 Plan.getOrAddLiveIn(ConstantInt::get(IdxTy, Lane.getKnownLane()));
515 VPValue *Ext = Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx});
516 NewOps.push_back(Ext);
517 }
518
520 if (auto *RepR = dyn_cast<VPReplicateRecipe>(DefR)) {
521 // TODO: have cloning of replicate recipes also provide the desired result
522 // coupled with setting its operands to NewOps (deriving IsSingleScalar and
523 // Mask from the operands?)
524 New =
525 new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
526 /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
527 } else {
529 "DefR must be a VPReplicateRecipe or VPInstruction");
530 New = DefR->clone();
531 for (const auto &[Idx, Op] : enumerate(NewOps)) {
532 New->setOperand(Idx, Op);
533 }
534 }
535 New->transferFlags(*DefR);
536 New->insertBefore(DefR);
537 return New;
538}
539
541 Type *IdxTy = IntegerType::get(
543
544 // Visit all VPBBs outside the loop region and directly inside the top-level
545 // loop region.
546 auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
548 auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
550 auto VPBBsToUnroll =
551 concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion);
552 // A mapping of current VPValue definitions to collections of new VPValues
553 // defined per lane. Serves to hook-up potential users of current VPValue
554 // definition that are replicated-per-VF later.
556 // The removal of current recipes being replaced by new ones needs to be
557 // delayed after Def2LaneDefs is no longer in use.
559 for (VPBasicBlock *VPBB : VPBBsToUnroll) {
560 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
563 cast<VPReplicateRecipe>(&R)->isSingleScalar()) ||
564 (isa<VPInstruction>(&R) &&
565 !cast<VPInstruction>(&R)->doesGeneratePerAllLanes() &&
567 continue;
568
569 auto *DefR = cast<VPRecipeWithIRFlags>(&R);
570 VPBuilder Builder(DefR);
571 if (DefR->getNumUsers() == 0) {
572 // Create single-scalar version of DefR for all lanes.
573 for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
574 cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs);
575 DefR->eraseFromParent();
576 continue;
577 }
578 /// Create single-scalar version of DefR for all lanes.
579 SmallVector<VPValue *> LaneDefs;
580 for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
581 LaneDefs.push_back(
582 cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs));
583
584 Def2LaneDefs[DefR] = LaneDefs;
585 /// Users that only demand the first lane can use the definition for lane
586 /// 0.
587 DefR->replaceUsesWithIf(LaneDefs[0], [DefR](VPUser &U, unsigned) {
588 return U.onlyFirstLaneUsed(DefR);
589 });
590
591 // Update each build vector user that currently has DefR as its only
592 // operand, to have all LaneDefs as its operands.
593 for (VPUser *U : to_vector(DefR->users())) {
594 auto *VPI = dyn_cast<VPInstruction>(U);
595 if (!VPI || (VPI->getOpcode() != VPInstruction::BuildVector &&
596 VPI->getOpcode() != VPInstruction::BuildStructVector))
597 continue;
598 assert(VPI->getNumOperands() == 1 &&
599 "Build(Struct)Vector must have a single operand before "
600 "replicating by VF");
601 VPI->setOperand(0, LaneDefs[0]);
602 for (VPValue *LaneDef : drop_begin(LaneDefs))
603 VPI->addOperand(LaneDef);
604 }
605 ToRemove.push_back(DefR);
606 }
607 }
608 for (auto *R : reverse(ToRemove))
609 R->eraseFromParent();
610}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
ReachingDefInfo InstSet & ToRemove
static const HTTPClientCleanup Cleanup
#define _
#define I(x, y, z)
Definition MD5.cpp:58
#define H(x, y, z)
Definition MD5.cpp:57
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file contains the declarations of different VPlan-related auxiliary helpers.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file provides utility VPlan to VPlan transformations.
static VPValue * cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, VPRecipeWithIRFlags *DefR, VPLane Lane, const DenseMap< VPValue *, SmallVector< VPValue * > > &Def2LaneDefs)
Create a single-scalar clone of DefR (must be a VPReplicateRecipe or VPInstruction) for lane Lane.
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
Definition VPlan.cpp:1153
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Definition blake3_impl.h:83
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3800
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:3827
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:3888
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:246
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:80
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:170
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:208
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition VPlanUtils.h:238
static void insertBlockBefore(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected block NewBlock before Blockptr.
Definition VPlanUtils.h:159
VPlan-based builder utility analogous to IRBuilder.
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
Definition VPlanValue.h:419
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition VPlanValue.h:409
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:3977
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1053
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1014
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1057
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1009
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1006
@ CanonicalIVIncrementForPart
Definition VPlan.h:999
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Kind getKind() const
Returns the Kind of lane offset.
unsigned getKnownLane() const
Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:386
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:3988
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition VPlan.cpp:769
const VPBlockBase * getEntry() const
Definition VPlan.h:4024
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4056
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:2868
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:199
operand_range operands()
Definition VPlanValue.h:267
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4112
VPBasicBlock * getEntry()
Definition VPlan.h:4211
VPValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4305
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1049
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4365
bool hasScalarVFOnly() const
Definition VPlan.h:4334
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4259
void setUF(unsigned UF)
Definition VPlan.h:4348
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
VPInstruction_match< VPInstruction::ExtractLastElement, Op0_t > m_ExtractLastElement(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
Definition VPlanUtils.h:44
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition ScopeExit.h:59
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:216
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
Definition VPlanCFG.h:243
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1150
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
bool isa_and_present(const Y &Val)
isa_and_present<X> - Functionally identical to isa, except that a null value is accepted.
Definition Casting.h:669
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Add
Sum of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:871
virtual VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static void unrollByUF(VPlan &Plan, unsigned UF)
Explicitly unroll Plan by UF.
static void removeDeadRecipes(VPlan &Plan)
Remove dead recipes from Plan.
static void replicateByVF(VPlan &Plan, ElementCount VF)
Replace each replicating VPReplicateRecipe and VPInstruction outside of any replicate region in Plan ...