LLVM 19.0.0git
VPlanRecipes.cpp
Go to the documentation of this file.
1//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains implementations for different VPlan recipes.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPlan.h"
15#include "VPlanAnalysis.h"
16#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Twine.h"
20#include "llvm/IR/BasicBlock.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Instruction.h"
24#include "llvm/IR/Type.h"
25#include "llvm/IR/Value.h"
28#include "llvm/Support/Debug.h"
33#include <cassert>
34
35using namespace llvm;
36
38
39namespace llvm {
41}
42
43#define LV_NAME "loop-vectorize"
44#define DEBUG_TYPE LV_NAME
45
47 switch (getVPDefID()) {
48 case VPInterleaveSC:
49 return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
50 case VPWidenStoreEVLSC:
51 case VPWidenStoreSC:
52 return true;
53 case VPReplicateSC:
54 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
55 ->mayWriteToMemory();
56 case VPWidenCallSC:
57 return !cast<VPWidenCallRecipe>(this)
58 ->getCalledScalarFunction()
59 ->onlyReadsMemory();
60 case VPBranchOnMaskSC:
61 case VPScalarIVStepsSC:
62 case VPPredInstPHISC:
63 return false;
64 case VPBlendSC:
65 case VPReductionSC:
66 case VPWidenCanonicalIVSC:
67 case VPWidenCastSC:
68 case VPWidenGEPSC:
69 case VPWidenIntOrFpInductionSC:
70 case VPWidenLoadEVLSC:
71 case VPWidenLoadSC:
72 case VPWidenPHISC:
73 case VPWidenSC:
74 case VPWidenSelectSC: {
75 const Instruction *I =
76 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
77 (void)I;
78 assert((!I || !I->mayWriteToMemory()) &&
79 "underlying instruction may write to memory");
80 return false;
81 }
82 default:
83 return true;
84 }
85}
86
88 switch (getVPDefID()) {
89 case VPWidenLoadEVLSC:
90 case VPWidenLoadSC:
91 return true;
92 case VPReplicateSC:
93 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
94 ->mayReadFromMemory();
95 case VPWidenCallSC:
96 return !cast<VPWidenCallRecipe>(this)
97 ->getCalledScalarFunction()
98 ->onlyWritesMemory();
99 case VPBranchOnMaskSC:
100 case VPPredInstPHISC:
101 case VPScalarIVStepsSC:
102 case VPWidenStoreEVLSC:
103 case VPWidenStoreSC:
104 return false;
105 case VPBlendSC:
106 case VPReductionSC:
107 case VPWidenCanonicalIVSC:
108 case VPWidenCastSC:
109 case VPWidenGEPSC:
110 case VPWidenIntOrFpInductionSC:
111 case VPWidenPHISC:
112 case VPWidenSC:
113 case VPWidenSelectSC: {
114 const Instruction *I =
115 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
116 (void)I;
117 assert((!I || !I->mayReadFromMemory()) &&
118 "underlying instruction may read from memory");
119 return false;
120 }
121 default:
122 return true;
123 }
124}
125
127 switch (getVPDefID()) {
128 case VPDerivedIVSC:
129 case VPPredInstPHISC:
130 case VPScalarCastSC:
131 return false;
132 case VPInstructionSC:
133 switch (cast<VPInstruction>(this)->getOpcode()) {
134 case Instruction::Or:
135 case Instruction::ICmp:
136 case Instruction::Select:
141 return false;
142 default:
143 return true;
144 }
145 case VPWidenCallSC: {
146 Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
147 return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
148 }
149 case VPBlendSC:
150 case VPReductionSC:
151 case VPScalarIVStepsSC:
152 case VPWidenCanonicalIVSC:
153 case VPWidenCastSC:
154 case VPWidenGEPSC:
155 case VPWidenIntOrFpInductionSC:
156 case VPWidenPHISC:
157 case VPWidenPointerInductionSC:
158 case VPWidenSC:
159 case VPWidenSelectSC: {
160 const Instruction *I =
161 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
162 (void)I;
163 assert((!I || !I->mayHaveSideEffects()) &&
164 "underlying instruction has side-effects");
165 return false;
166 }
167 case VPInterleaveSC:
168 return mayWriteToMemory();
169 case VPWidenLoadEVLSC:
170 case VPWidenLoadSC:
171 case VPWidenStoreEVLSC:
172 case VPWidenStoreSC:
173 assert(
174 cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
176 "mayHaveSideffects result for ingredient differs from this "
177 "implementation");
178 return mayWriteToMemory();
179 case VPReplicateSC: {
180 auto *R = cast<VPReplicateRecipe>(this);
181 return R->getUnderlyingInstr()->mayHaveSideEffects();
182 }
183 default:
184 return true;
185 }
186}
187
189 auto Lane = VPLane::getLastLaneForVF(State.VF);
190 VPValue *ExitValue = getOperand(0);
192 Lane = VPLane::getFirstLane();
193 VPBasicBlock *MiddleVPBB =
194 cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
195 assert(MiddleVPBB->getNumSuccessors() == 0 &&
196 "the middle block must not have any successors");
197 BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB];
198 Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
199 MiddleBB);
200}
201
202#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
204 O << "Live-out ";
206 O << " = ";
208 O << "\n";
209}
210#endif
211
213 assert(!Parent && "Recipe already in some VPBasicBlock");
214 assert(InsertPos->getParent() &&
215 "Insertion position not in any VPBasicBlock");
216 InsertPos->getParent()->insert(this, InsertPos->getIterator());
217}
218
221 assert(!Parent && "Recipe already in some VPBasicBlock");
222 assert(I == BB.end() || I->getParent() == &BB);
223 BB.insert(this, I);
224}
225
227 assert(!Parent && "Recipe already in some VPBasicBlock");
228 assert(InsertPos->getParent() &&
229 "Insertion position not in any VPBasicBlock");
230 InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
231}
232
234 assert(getParent() && "Recipe not in any VPBasicBlock");
236 Parent = nullptr;
237}
238
240 assert(getParent() && "Recipe not in any VPBasicBlock");
242}
243
246 insertAfter(InsertPos);
247}
248
252 insertBefore(BB, I);
253}
254
256 assert(OpType == OperationType::FPMathOp &&
257 "recipe doesn't have fast math flags");
258 FastMathFlags Res;
259 Res.setAllowReassoc(FMFs.AllowReassoc);
260 Res.setNoNaNs(FMFs.NoNaNs);
261 Res.setNoInfs(FMFs.NoInfs);
262 Res.setNoSignedZeros(FMFs.NoSignedZeros);
263 Res.setAllowReciprocal(FMFs.AllowReciprocal);
264 Res.setAllowContract(FMFs.AllowContract);
265 Res.setApproxFunc(FMFs.ApproxFunc);
266 return Res;
267}
268
271 const Twine &Name)
272 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
273 Pred, DL),
274 Opcode(Opcode), Name(Name.str()) {
275 assert(Opcode == Instruction::ICmp &&
276 "only ICmp predicates supported at the moment");
277}
278
280 std::initializer_list<VPValue *> Operands,
281 FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
282 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
283 Opcode(Opcode), Name(Name.str()) {
284 // Make sure the VPInstruction is a floating-point operation.
285 assert(isFPMathOp() && "this op can't take fast-math flags");
286}
287
288bool VPInstruction::doesGeneratePerAllLanes() const {
289 return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
290}
291
292bool VPInstruction::canGenerateScalarForFirstLane() const {
294 return true;
295
296 switch (Opcode) {
304 return true;
305 default:
306 return false;
307 }
308}
309
310Value *VPInstruction::generatePerLane(VPTransformState &State,
311 const VPIteration &Lane) {
312 IRBuilderBase &Builder = State.Builder;
313
315 "only PtrAdd opcodes are supported for now");
316 return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
317 State.get(getOperand(1), Lane), Name);
318}
319
320Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
321 IRBuilderBase &Builder = State.Builder;
322
324 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
325 if (Part != 0 && vputils::onlyFirstPartUsed(this))
326 return State.get(this, 0, OnlyFirstLaneUsed);
327
328 Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed);
329 Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed);
330 auto *Res =
331 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
332 if (auto *I = dyn_cast<Instruction>(Res))
333 setFlags(I);
334 return Res;
335 }
336
337 switch (getOpcode()) {
338 case VPInstruction::Not: {
339 Value *A = State.get(getOperand(0), Part);
340 return Builder.CreateNot(A, Name);
341 }
342 case Instruction::ICmp: {
343 Value *A = State.get(getOperand(0), Part);
344 Value *B = State.get(getOperand(1), Part);
345 return Builder.CreateCmp(getPredicate(), A, B, Name);
346 }
347 case Instruction::Select: {
348 Value *Cond = State.get(getOperand(0), Part);
349 Value *Op1 = State.get(getOperand(1), Part);
350 Value *Op2 = State.get(getOperand(2), Part);
351 return Builder.CreateSelect(Cond, Op1, Op2, Name);
352 }
354 // Get first lane of vector induction variable.
355 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
356 // Get the original loop tripcount.
357 Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0));
358
359 // If this part of the active lane mask is scalar, generate the CMP directly
360 // to avoid unnecessary extracts.
361 if (State.VF.isScalar())
362 return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
363 Name);
364
365 auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
366 auto *PredTy = VectorType::get(Int1Ty, State.VF);
367 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
368 {PredTy, ScalarTC->getType()},
369 {VIVElem0, ScalarTC}, nullptr, Name);
370 }
372 // Generate code to combine the previous and current values in vector v3.
373 //
374 // vector.ph:
375 // v_init = vector(..., ..., ..., a[-1])
376 // br vector.body
377 //
378 // vector.body
379 // i = phi [0, vector.ph], [i+4, vector.body]
380 // v1 = phi [v_init, vector.ph], [v2, vector.body]
381 // v2 = a[i, i+1, i+2, i+3];
382 // v3 = vector(v1(3), v2(0, 1, 2))
383
384 // For the first part, use the recurrence phi (v1), otherwise v2.
385 auto *V1 = State.get(getOperand(0), 0);
386 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
387 if (!PartMinus1->getType()->isVectorTy())
388 return PartMinus1;
389 Value *V2 = State.get(getOperand(1), Part);
390 return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name);
391 }
393 if (Part != 0)
394 return State.get(this, 0, /*IsScalar*/ true);
395
396 Value *ScalarTC = State.get(getOperand(0), {0, 0});
397 Value *Step =
398 createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF);
399 Value *Sub = Builder.CreateSub(ScalarTC, Step);
400 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
401 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
402 return Builder.CreateSelect(Cmp, Sub, Zero);
403 }
405 // Compute EVL
406 auto GetEVL = [=](VPTransformState &State, Value *AVL) {
407 assert(AVL->getType()->isIntegerTy() &&
408 "Requested vector length should be an integer.");
409
410 // TODO: Add support for MaxSafeDist for correct loop emission.
411 assert(State.VF.isScalable() && "Expected scalable vector factor.");
412 Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
413
414 Value *EVL = State.Builder.CreateIntrinsic(
415 State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
416 {AVL, VFArg, State.Builder.getTrue()});
417 return EVL;
418 };
419 // TODO: Restructure this code with an explicit remainder loop, vsetvli can
420 // be outside of the main loop.
421 assert(Part == 0 && "No unrolling expected for predicated vectorization.");
422 // Compute VTC - IV as the AVL (requested vector length).
423 Value *Index = State.get(getOperand(0), VPIteration(0, 0));
424 Value *TripCount = State.get(getOperand(1), VPIteration(0, 0));
425 Value *AVL = State.Builder.CreateSub(TripCount, Index);
426 Value *EVL = GetEVL(State, AVL);
427 return EVL;
428 }
430 auto *IV = State.get(getOperand(0), VPIteration(0, 0));
431 if (Part == 0)
432 return IV;
433
434 // The canonical IV is incremented by the vectorization factor (num of SIMD
435 // elements) times the unroll part.
436 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
437 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
439 }
441 if (Part != 0)
442 return nullptr;
443
444 Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
445 VPRegionBlock *ParentRegion = getParent()->getParent();
446 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
447
448 // Replace the temporary unreachable terminator with a new conditional
449 // branch, hooking it up to backward destination for exiting blocks now and
450 // to forward destination(s) later when they are created.
451 BranchInst *CondBr =
452 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
453
454 if (getParent()->isExiting())
455 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
456
457 CondBr->setSuccessor(0, nullptr);
459 return CondBr;
460 }
462 if (Part != 0)
463 return nullptr;
464 // First create the compare.
465 Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true);
466 Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true);
467 Value *Cond = Builder.CreateICmpEQ(IV, TC);
468
469 // Now create the branch.
470 auto *Plan = getParent()->getPlan();
471 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
472 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
473
474 // Replace the temporary unreachable terminator with a new conditional
475 // branch, hooking it up to backward destination (the header) now and to the
476 // forward destination (the exit/middle block) later when it is created.
477 // Note that CreateCondBr expects a valid BB as first argument, so we need
478 // to set it to nullptr later.
479 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
480 State.CFG.VPBB2IRBB[Header]);
481 CondBr->setSuccessor(0, nullptr);
483 return CondBr;
484 }
486 if (Part != 0)
487 return State.get(this, 0, /*IsScalar*/ true);
488
489 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
490 // and will be removed by breaking up the recipe further.
491 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
492 auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
493 // Get its reduction variable descriptor.
494 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
495
496 RecurKind RK = RdxDesc.getRecurrenceKind();
497
498 VPValue *LoopExitingDef = getOperand(1);
499 Type *PhiTy = OrigPhi->getType();
500 VectorParts RdxParts(State.UF);
501 for (unsigned Part = 0; Part < State.UF; ++Part)
502 RdxParts[Part] = State.get(LoopExitingDef, Part, PhiR->isInLoop());
503
504 // If the vector reduction can be performed in a smaller type, we truncate
505 // then extend the loop exit value to enable InstCombine to evaluate the
506 // entire expression in the smaller type.
507 // TODO: Handle this in truncateToMinBW.
508 if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
509 Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
510 for (unsigned Part = 0; Part < State.UF; ++Part)
511 RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
512 }
513 // Reduce all of the unrolled parts into a single vector.
514 Value *ReducedPartRdx = RdxParts[0];
515 unsigned Op = RecurrenceDescriptor::getOpcode(RK);
517 Op = Instruction::Or;
518
519 if (PhiR->isOrdered()) {
520 ReducedPartRdx = RdxParts[State.UF - 1];
521 } else {
522 // Floating-point operations should have some FMF to enable the reduction.
524 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
525 for (unsigned Part = 1; Part < State.UF; ++Part) {
526 Value *RdxPart = RdxParts[Part];
527 if (Op != Instruction::ICmp && Op != Instruction::FCmp)
528 ReducedPartRdx = Builder.CreateBinOp(
529 (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
530 else
531 ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
532 }
533 }
534
535 // Create the reduction after the loop. Note that inloop reductions create
536 // the target reduction in the loop using a Reduction recipe.
537 if ((State.VF.isVector() ||
539 !PhiR->isInLoop()) {
540 ReducedPartRdx =
541 createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
542 // If the reduction can be performed in a smaller type, we need to extend
543 // the reduction to the wider type before we branch to the original loop.
544 if (PhiTy != RdxDesc.getRecurrenceType())
545 ReducedPartRdx = RdxDesc.isSigned()
546 ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
547 : Builder.CreateZExt(ReducedPartRdx, PhiTy);
548 }
549
550 // If there were stores of the reduction value to a uniform memory address
551 // inside the loop, create the final store here.
552 if (StoreInst *SI = RdxDesc.IntermediateStore) {
553 auto *NewSI = Builder.CreateAlignedStore(
554 ReducedPartRdx, SI->getPointerOperand(), SI->getAlign());
555 propagateMetadata(NewSI, SI);
556 }
557
558 return ReducedPartRdx;
559 }
562 "can only generate first lane for PtrAdd");
563 Value *Ptr = State.get(getOperand(0), Part, /* IsScalar */ true);
564 Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true);
565 return Builder.CreatePtrAdd(Ptr, Addend, Name);
566 }
567 default:
568 llvm_unreachable("Unsupported opcode for instruction");
569 }
570}
571
572#if !defined(NDEBUG)
573bool VPInstruction::isFPMathOp() const {
574 // Inspired by FPMathOperator::classof. Notable differences are that we don't
575 // support Call, PHI and Select opcodes here yet.
576 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
577 Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
578 Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
579 Opcode == Instruction::FCmp || Opcode == Instruction::Select;
580}
581#endif
582
584 assert(!State.Instance && "VPInstruction executing an Instance");
586 assert((hasFastMathFlags() == isFPMathOp() ||
587 getOpcode() == Instruction::Select) &&
588 "Recipe not a FPMathOp but has fast-math flags?");
589 if (hasFastMathFlags())
592 bool GeneratesPerFirstLaneOnly =
593 canGenerateScalarForFirstLane() &&
596 bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
597 for (unsigned Part = 0; Part < State.UF; ++Part) {
598 if (GeneratesPerAllLanes) {
599 for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
600 Lane != NumLanes; ++Lane) {
601 Value *GeneratedValue = generatePerLane(State, VPIteration(Part, Lane));
602 assert(GeneratedValue && "generatePerLane must produce a value");
603 State.set(this, GeneratedValue, VPIteration(Part, Lane));
604 }
605 continue;
606 }
607
608 Value *GeneratedValue = generatePerPart(State, Part);
609 if (!hasResult())
610 continue;
611 assert(GeneratedValue && "generatePerPart must produce a value");
612 assert((GeneratedValue->getType()->isVectorTy() ==
613 !GeneratesPerFirstLaneOnly ||
614 State.VF.isScalar()) &&
615 "scalar value but not only first lane defined");
616 State.set(this, GeneratedValue, Part,
617 /*IsScalar*/ GeneratesPerFirstLaneOnly);
618 }
619}
620
622 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
624 return vputils::onlyFirstLaneUsed(this);
625
626 switch (getOpcode()) {
627 default:
628 return false;
629 case Instruction::ICmp:
631 // TODO: Cover additional opcodes.
632 return vputils::onlyFirstLaneUsed(this);
638 return true;
639 };
640 llvm_unreachable("switch should return");
641}
642
643#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
645 VPSlotTracker SlotTracker(getParent()->getPlan());
646 print(dbgs(), "", SlotTracker);
647}
648
650 VPSlotTracker &SlotTracker) const {
651 O << Indent << "EMIT ";
652
653 if (hasResult()) {
655 O << " = ";
656 }
657
658 switch (getOpcode()) {
660 O << "not";
661 break;
663 O << "combined load";
664 break;
666 O << "combined store";
667 break;
669 O << "active lane mask";
670 break;
672 O << "EXPLICIT-VECTOR-LENGTH";
673 break;
675 O << "first-order splice";
676 break;
678 O << "branch-on-cond";
679 break;
681 O << "TC > VF ? TC - VF : 0";
682 break;
684 O << "VF * Part +";
685 break;
687 O << "branch-on-count";
688 break;
690 O << "compute-reduction-result";
691 break;
693 O << "ptradd";
694 break;
695 default:
697 }
698
699 printFlags(O);
701
702 if (auto DL = getDebugLoc()) {
703 O << ", !dbg ";
704 DL.print(O);
705 }
706}
707#endif
708
710 assert(State.VF.isVector() && "not widening");
711 Function *CalledScalarFn = getCalledScalarFunction();
712 assert(!isDbgInfoIntrinsic(CalledScalarFn->getIntrinsicID()) &&
713 "DbgInfoIntrinsic should have been dropped during VPlan construction");
715
716 bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic;
717 FunctionType *VFTy = nullptr;
718 if (Variant)
719 VFTy = Variant->getFunctionType();
720 for (unsigned Part = 0; Part < State.UF; ++Part) {
721 SmallVector<Type *, 2> TysForDecl;
722 // Add return type if intrinsic is overloaded on it.
723 if (UseIntrinsic &&
724 isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1))
725 TysForDecl.push_back(VectorType::get(
726 CalledScalarFn->getReturnType()->getScalarType(), State.VF));
728 for (const auto &I : enumerate(arg_operands())) {
729 // Some intrinsics have a scalar argument - don't replace it with a
730 // vector.
731 Value *Arg;
732 if (UseIntrinsic &&
733 isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
734 Arg = State.get(I.value(), VPIteration(0, 0));
735 // Some vectorized function variants may also take a scalar argument,
736 // e.g. linear parameters for pointers. This needs to be the scalar value
737 // from the start of the respective part when interleaving.
738 else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy())
739 Arg = State.get(I.value(), VPIteration(Part, 0));
740 else
741 Arg = State.get(I.value(), Part);
742 if (UseIntrinsic &&
743 isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
744 TysForDecl.push_back(Arg->getType());
745 Args.push_back(Arg);
746 }
747
748 Function *VectorF;
749 if (UseIntrinsic) {
750 // Use vector version of the intrinsic.
751 Module *M = State.Builder.GetInsertBlock()->getModule();
752 VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
753 assert(VectorF && "Can't retrieve vector intrinsic.");
754 } else {
755#ifndef NDEBUG
756 assert(Variant != nullptr && "Can't create vector function.");
757#endif
758 VectorF = Variant;
759 }
760
761 auto *CI = cast_or_null<CallInst>(getUnderlyingInstr());
763 if (CI)
764 CI->getOperandBundlesAsDefs(OpBundles);
765
766 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
767
768 if (isa<FPMathOperator>(V))
769 V->copyFastMathFlags(CI);
770
771 if (!V->getType()->isVoidTy())
772 State.set(this, V, Part);
773 State.addMetadata(V, CI);
774 }
775}
776
777#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
779 VPSlotTracker &SlotTracker) const {
780 O << Indent << "WIDEN-CALL ";
781
782 Function *CalledFn = getCalledScalarFunction();
783 if (CalledFn->getReturnType()->isVoidTy())
784 O << "void ";
785 else {
787 O << " = ";
788 }
789
790 O << "call @" << CalledFn->getName() << "(";
792 Op->printAsOperand(O, SlotTracker);
793 });
794 O << ")";
795
796 if (VectorIntrinsicID)
797 O << " (using vector intrinsic)";
798 else {
799 O << " (using library function";
800 if (Variant->hasName())
801 O << ": " << Variant->getName();
802 O << ")";
803 }
804}
805
807 VPSlotTracker &SlotTracker) const {
808 O << Indent << "WIDEN-SELECT ";
810 O << " = select ";
812 O << ", ";
814 O << ", ";
816 O << (isInvariantCond() ? " (condition is loop invariant)" : "");
817}
818#endif
819
822
823 // The condition can be loop invariant but still defined inside the
824 // loop. This means that we can't just use the original 'cond' value.
825 // We have to take the 'vectorized' value and pick the first lane.
826 // Instcombine will make this a no-op.
827 auto *InvarCond =
828 isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
829
830 for (unsigned Part = 0; Part < State.UF; ++Part) {
831 Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part);
832 Value *Op0 = State.get(getOperand(1), Part);
833 Value *Op1 = State.get(getOperand(2), Part);
834 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
835 State.set(this, Sel, Part);
836 State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
837 }
838}
839
840VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
841 const FastMathFlags &FMF) {
842 AllowReassoc = FMF.allowReassoc();
843 NoNaNs = FMF.noNaNs();
844 NoInfs = FMF.noInfs();
845 NoSignedZeros = FMF.noSignedZeros();
846 AllowReciprocal = FMF.allowReciprocal();
847 AllowContract = FMF.allowContract();
848 ApproxFunc = FMF.approxFunc();
849}
850
851#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
853 switch (OpType) {
854 case OperationType::Cmp:
856 break;
857 case OperationType::DisjointOp:
859 O << " disjoint";
860 break;
861 case OperationType::PossiblyExactOp:
862 if (ExactFlags.IsExact)
863 O << " exact";
864 break;
865 case OperationType::OverflowingBinOp:
866 if (WrapFlags.HasNUW)
867 O << " nuw";
868 if (WrapFlags.HasNSW)
869 O << " nsw";
870 break;
871 case OperationType::FPMathOp:
873 break;
874 case OperationType::GEPOp:
876 O << " inbounds";
877 break;
878 case OperationType::NonNegOp:
879 if (NonNegFlags.NonNeg)
880 O << " nneg";
881 break;
882 case OperationType::Other:
883 break;
884 }
885 if (getNumOperands() > 0)
886 O << " ";
887}
888#endif
889
892 auto &Builder = State.Builder;
893 switch (Opcode) {
894 case Instruction::Call:
895 case Instruction::Br:
896 case Instruction::PHI:
897 case Instruction::GetElementPtr:
898 case Instruction::Select:
899 llvm_unreachable("This instruction is handled by a different recipe.");
900 case Instruction::UDiv:
901 case Instruction::SDiv:
902 case Instruction::SRem:
903 case Instruction::URem:
904 case Instruction::Add:
905 case Instruction::FAdd:
906 case Instruction::Sub:
907 case Instruction::FSub:
908 case Instruction::FNeg:
909 case Instruction::Mul:
910 case Instruction::FMul:
911 case Instruction::FDiv:
912 case Instruction::FRem:
913 case Instruction::Shl:
914 case Instruction::LShr:
915 case Instruction::AShr:
916 case Instruction::And:
917 case Instruction::Or:
918 case Instruction::Xor: {
919 // Just widen unops and binops.
920 for (unsigned Part = 0; Part < State.UF; ++Part) {
922 for (VPValue *VPOp : operands())
923 Ops.push_back(State.get(VPOp, Part));
924
925 Value *V = Builder.CreateNAryOp(Opcode, Ops);
926
927 if (auto *VecOp = dyn_cast<Instruction>(V))
928 setFlags(VecOp);
929
930 // Use this vector value for all users of the original instruction.
931 State.set(this, V, Part);
932 State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
933 }
934
935 break;
936 }
937 case Instruction::Freeze: {
938 for (unsigned Part = 0; Part < State.UF; ++Part) {
939 Value *Op = State.get(getOperand(0), Part);
940
941 Value *Freeze = Builder.CreateFreeze(Op);
942 State.set(this, Freeze, Part);
943 }
944 break;
945 }
946 case Instruction::ICmp:
947 case Instruction::FCmp: {
948 // Widen compares. Generate vector compares.
949 bool FCmp = Opcode == Instruction::FCmp;
950 for (unsigned Part = 0; Part < State.UF; ++Part) {
951 Value *A = State.get(getOperand(0), Part);
952 Value *B = State.get(getOperand(1), Part);
953 Value *C = nullptr;
954 if (FCmp) {
955 // Propagate fast math flags.
956 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
957 if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue()))
958 Builder.setFastMathFlags(I->getFastMathFlags());
959 C = Builder.CreateFCmp(getPredicate(), A, B);
960 } else {
961 C = Builder.CreateICmp(getPredicate(), A, B);
962 }
963 State.set(this, C, Part);
964 State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
965 }
966
967 break;
968 }
969 default:
970 // This instruction is not vectorized by simple widening.
971 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
972 << Instruction::getOpcodeName(Opcode));
973 llvm_unreachable("Unhandled instruction!");
974 } // end of switch.
975
976#if !defined(NDEBUG)
977 // Verify that VPlan type inference results agree with the type of the
978 // generated values.
979 for (unsigned Part = 0; Part < State.UF; ++Part) {
981 State.VF) == State.get(this, Part)->getType() &&
982 "inferred type and type from generated instructions do not match");
983 }
984#endif
985}
986
987#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
989 VPSlotTracker &SlotTracker) const {
990 O << Indent << "WIDEN ";
992 O << " = " << Instruction::getOpcodeName(Opcode);
993 printFlags(O);
995}
996#endif
997
1000 auto &Builder = State.Builder;
1001 /// Vectorize casts.
1002 assert(State.VF.isVector() && "Not vectorizing?");
1003 Type *DestTy = VectorType::get(getResultType(), State.VF);
1004 VPValue *Op = getOperand(0);
1005 for (unsigned Part = 0; Part < State.UF; ++Part) {
1006 if (Part > 0 && Op->isLiveIn()) {
1007 // FIXME: Remove once explicit unrolling is implemented using VPlan.
1008 State.set(this, State.get(this, 0), Part);
1009 continue;
1010 }
1011 Value *A = State.get(Op, Part);
1012 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
1013 State.set(this, Cast, Part);
1014 State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
1015 }
1016}
1017
1018#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1020 VPSlotTracker &SlotTracker) const {
1021 O << Indent << "WIDEN-CAST ";
1023 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1024 printFlags(O);
1026 O << " to " << *getResultType();
1027}
1028#endif
1029
1030/// This function adds
1031/// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
1032/// to each vector element of Val. The sequence starts at StartIndex.
1033/// \p Opcode is relevant for FP induction variable.
1034static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
1036 IRBuilderBase &Builder) {
1037 assert(VF.isVector() && "only vector VFs are supported");
1038
1039 // Create and check the types.
1040 auto *ValVTy = cast<VectorType>(Val->getType());
1041 ElementCount VLen = ValVTy->getElementCount();
1042
1043 Type *STy = Val->getType()->getScalarType();
1044 assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
1045 "Induction Step must be an integer or FP");
1046 assert(Step->getType() == STy && "Step has wrong type");
1047
1049
1050 // Create a vector of consecutive numbers from zero to VF.
1051 VectorType *InitVecValVTy = ValVTy;
1052 if (STy->isFloatingPointTy()) {
1053 Type *InitVecValSTy =
1055 InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
1056 }
1057 Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
1058
1059 // Splat the StartIdx
1060 Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);
1061
1062 if (STy->isIntegerTy()) {
1063 InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
1064 Step = Builder.CreateVectorSplat(VLen, Step);
1065 assert(Step->getType() == Val->getType() && "Invalid step vec");
1066 // FIXME: The newly created binary instructions should contain nsw/nuw
1067 // flags, which can be found from the original scalar operations.
1068 Step = Builder.CreateMul(InitVec, Step);
1069 return Builder.CreateAdd(Val, Step, "induction");
1070 }
1071
1072 // Floating point induction.
1073 assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
1074 "Binary Opcode should be specified for FP induction");
1075 InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
1076 InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);
1077
1078 Step = Builder.CreateVectorSplat(VLen, Step);
1079 Value *MulOp = Builder.CreateFMul(InitVec, Step);
1080 return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
1081}
1082
1083/// A helper function that returns an integer or floating-point constant with
1084/// value C.
1086 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
1087 : ConstantFP::get(Ty, C);
1088}
1089
1091 ElementCount VF) {
1092 assert(FTy->isFloatingPointTy() && "Expected floating point type!");
1093 Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits());
1094 Value *RuntimeVF = getRuntimeVF(B, IntTy, VF);
1095 return B.CreateUIToFP(RuntimeVF, FTy);
1096}
1097
1099 assert(!State.Instance && "Int or FP induction being replicated.");
1100
1101 Value *Start = getStartValue()->getLiveInIRValue();
1103 TruncInst *Trunc = getTruncInst();
1104 IRBuilderBase &Builder = State.Builder;
1105 assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
1106 assert(State.VF.isVector() && "must have vector VF");
1107
1108 // The value from the original loop to which we are mapping the new induction
1109 // variable.
1110 Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
1111
1112 // Fast-math-flags propagate from the original induction instruction.
1113 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1114 if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
1115 Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
1116
1117 // Now do the actual transformations, and start with fetching the step value.
1118 Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1119
1120 assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
1121 "Expected either an induction phi-node or a truncate of it!");
1122
1123 // Construct the initial value of the vector IV in the vector loop preheader
1124 auto CurrIP = Builder.saveIP();
1125 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1126 Builder.SetInsertPoint(VectorPH->getTerminator());
1127 if (isa<TruncInst>(EntryVal)) {
1128 assert(Start->getType()->isIntegerTy() &&
1129 "Truncation requires an integer type");
1130 auto *TruncType = cast<IntegerType>(EntryVal->getType());
1131 Step = Builder.CreateTrunc(Step, TruncType);
1132 Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
1133 }
1134
1135 Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
1136 Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
1137 Value *SteppedStart = getStepVector(
1138 SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder);
1139
1140 // We create vector phi nodes for both integer and floating-point induction
1141 // variables. Here, we determine the kind of arithmetic we will perform.
1144 if (Step->getType()->isIntegerTy()) {
1145 AddOp = Instruction::Add;
1146 MulOp = Instruction::Mul;
1147 } else {
1148 AddOp = ID.getInductionOpcode();
1149 MulOp = Instruction::FMul;
1150 }
1151
1152 // Multiply the vectorization factor by the step using integer or
1153 // floating-point arithmetic as appropriate.
1154 Type *StepType = Step->getType();
1155 Value *RuntimeVF;
1156 if (Step->getType()->isFloatingPointTy())
1157 RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
1158 else
1159 RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
1160 Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
1161
1162 // Create a vector splat to use in the induction update.
1163 //
1164 // FIXME: If the step is non-constant, we create the vector splat with
1165 // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
1166 // handle a constant vector splat.
1167 Value *SplatVF = isa<Constant>(Mul)
1168 ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))
1169 : Builder.CreateVectorSplat(State.VF, Mul);
1170 Builder.restoreIP(CurrIP);
1171
1172 // We may need to add the step a number of times, depending on the unroll
1173 // factor. The last of those goes into the PHI.
1174 PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
1175 VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1176 VecInd->setDebugLoc(EntryVal->getDebugLoc());
1177 Instruction *LastInduction = VecInd;
1178 for (unsigned Part = 0; Part < State.UF; ++Part) {
1179 State.set(this, LastInduction, Part);
1180
1181 if (isa<TruncInst>(EntryVal))
1182 State.addMetadata(LastInduction, EntryVal);
1183
1184 LastInduction = cast<Instruction>(
1185 Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
1186 LastInduction->setDebugLoc(EntryVal->getDebugLoc());
1187 }
1188
1189 LastInduction->setName("vec.ind.next");
1190 VecInd->addIncoming(SteppedStart, VectorPH);
1191 // Add induction update using an incorrect block temporarily. The phi node
1192 // will be fixed after VPlan execution. Note that at this point the latch
1193 // block cannot be used, as it does not exist yet.
1194 // TODO: Model increment value in VPlan, by turning the recipe into a
1195 // multi-def and a subclass of VPHeaderPHIRecipe.
1196 VecInd->addIncoming(LastInduction, VectorPH);
1197}
1198
1199#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1201 VPSlotTracker &SlotTracker) const {
1202 O << Indent << "WIDEN-INDUCTION";
1203 if (getTruncInst()) {
1204 O << "\\l\"";
1205 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
1206 O << " +\n" << Indent << "\" ";
1208 } else
1209 O << " " << VPlanIngredient(IV);
1210
1211 O << ", ";
1213}
1214#endif
1215
1217 // The step may be defined by a recipe in the preheader (e.g. if it requires
1218 // SCEV expansion), but for the canonical induction the step is required to be
1219 // 1, which is represented as live-in.
1221 return false;
1222 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
1223 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1224 auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
1225 return StartC && StartC->isZero() && StepC && StepC->isOne() &&
1226 getScalarType() == CanIV->getScalarType();
1227}
1228
1229#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1231 VPSlotTracker &SlotTracker) const {
1232 O << Indent;
1234 O << Indent << "= DERIVED-IV ";
1236 O << " + ";
1238 O << " * ";
1240}
1241#endif
1242
1244 // Fast-math-flags propagate from the original induction instruction.
1246 if (hasFastMathFlags())
1248
1249 /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1250 /// variable on which to base the steps, \p Step is the size of the step.
1251
1252 Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0));
1253 Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1254 IRBuilderBase &Builder = State.Builder;
1255
1256 // Ensure step has the same type as that of scalar IV.
1257 Type *BaseIVTy = BaseIV->getType()->getScalarType();
1258 assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
1259
1260 // We build scalar steps for both integer and floating-point induction
1261 // variables. Here, we determine the kind of arithmetic we will perform.
1264 if (BaseIVTy->isIntegerTy()) {
1265 AddOp = Instruction::Add;
1266 MulOp = Instruction::Mul;
1267 } else {
1268 AddOp = InductionOpcode;
1269 MulOp = Instruction::FMul;
1270 }
1271
1272 // Determine the number of scalars we need to generate for each unroll
1273 // iteration.
1274 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
1275 // Compute the scalar steps and save the results in State.
1276 Type *IntStepTy =
1277 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
1278 Type *VecIVTy = nullptr;
1279 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
1280 if (!FirstLaneOnly && State.VF.isScalable()) {
1281 VecIVTy = VectorType::get(BaseIVTy, State.VF);
1282 UnitStepVec =
1283 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
1284 SplatStep = Builder.CreateVectorSplat(State.VF, Step);
1285 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
1286 }
1287
1288 unsigned StartPart = 0;
1289 unsigned EndPart = State.UF;
1290 unsigned StartLane = 0;
1291 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1292 if (State.Instance) {
1293 StartPart = State.Instance->Part;
1294 EndPart = StartPart + 1;
1295 StartLane = State.Instance->Lane.getKnownLane();
1296 EndLane = StartLane + 1;
1297 }
1298 for (unsigned Part = StartPart; Part < EndPart; ++Part) {
1299 Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
1300
1301 if (!FirstLaneOnly && State.VF.isScalable()) {
1302 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
1303 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
1304 if (BaseIVTy->isFloatingPointTy())
1305 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
1306 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
1307 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
1308 State.set(this, Add, Part);
1309 // It's useful to record the lane values too for the known minimum number
1310 // of elements so we do those below. This improves the code quality when
1311 // trying to extract the first element, for example.
1312 }
1313
1314 if (BaseIVTy->isFloatingPointTy())
1315 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
1316
1317 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1318 Value *StartIdx = Builder.CreateBinOp(
1319 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
1320 // The step returned by `createStepForVF` is a runtime-evaluated value
1321 // when VF is scalable. Otherwise, it should be folded into a Constant.
1322 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
1323 "Expected StartIdx to be folded to a constant when VF is not "
1324 "scalable");
1325 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
1326 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
1327 State.set(this, Add, VPIteration(Part, Lane));
1328 }
1329 }
1330}
1331
1332#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1334 VPSlotTracker &SlotTracker) const {
1335 O << Indent;
1337 O << " = SCALAR-STEPS ";
1339}
1340#endif
1341
1343 assert(State.VF.isVector() && "not widening");
1344 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1345 // Construct a vector GEP by widening the operands of the scalar GEP as
1346 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1347 // results in a vector of pointers when at least one operand of the GEP
1348 // is vector-typed. Thus, to keep the representation compact, we only use
1349 // vector-typed operands for loop-varying values.
1350
1351 if (areAllOperandsInvariant()) {
1352 // If we are vectorizing, but the GEP has only loop-invariant operands,
1353 // the GEP we build (by only using vector-typed operands for
1354 // loop-varying values) would be a scalar pointer. Thus, to ensure we
1355 // produce a vector of pointers, we need to either arbitrarily pick an
1356 // operand to broadcast, or broadcast a clone of the original GEP.
1357 // Here, we broadcast a clone of the original.
1358 //
1359 // TODO: If at some point we decide to scalarize instructions having
1360 // loop-invariant operands, this special case will no longer be
1361 // required. We would add the scalarization decision to
1362 // collectLoopScalars() and teach getVectorValue() to broadcast
1363 // the lane-zero scalar value.
1365 for (unsigned I = 0, E = getNumOperands(); I != E; I++)
1366 Ops.push_back(State.get(getOperand(I), VPIteration(0, 0)));
1367
1368 auto *NewGEP =
1369 State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
1370 ArrayRef(Ops).drop_front(), "", isInBounds());
1371 for (unsigned Part = 0; Part < State.UF; ++Part) {
1372 Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP);
1373 State.set(this, EntryPart, Part);
1374 State.addMetadata(EntryPart, GEP);
1375 }
1376 } else {
1377 // If the GEP has at least one loop-varying operand, we are sure to
1378 // produce a vector of pointers. But if we are only unrolling, we want
1379 // to produce a scalar GEP for each unroll part. Thus, the GEP we
1380 // produce with the code below will be scalar (if VF == 1) or vector
1381 // (otherwise). Note that for the unroll-only case, we still maintain
1382 // values in the vector mapping with initVector, as we do for other
1383 // instructions.
1384 for (unsigned Part = 0; Part < State.UF; ++Part) {
1385 // The pointer operand of the new GEP. If it's loop-invariant, we
1386 // won't broadcast it.
1387 auto *Ptr = isPointerLoopInvariant()
1388 ? State.get(getOperand(0), VPIteration(0, 0))
1389 : State.get(getOperand(0), Part);
1390
1391 // Collect all the indices for the new GEP. If any index is
1392 // loop-invariant, we won't broadcast it.
1394 for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
1395 VPValue *Operand = getOperand(I);
1396 if (isIndexLoopInvariant(I - 1))
1397 Indices.push_back(State.get(Operand, VPIteration(0, 0)));
1398 else
1399 Indices.push_back(State.get(Operand, Part));
1400 }
1401
1402 // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
1403 // but it should be a vector, otherwise.
1404 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
1405 Indices, "", isInBounds());
1406 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
1407 "NewGEP is not a pointer vector");
1408 State.set(this, NewGEP, Part);
1409 State.addMetadata(NewGEP, GEP);
1410 }
1411 }
1412}
1413
1414#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1416 VPSlotTracker &SlotTracker) const {
1417 O << Indent << "WIDEN-GEP ";
1418 O << (isPointerLoopInvariant() ? "Inv" : "Var");
1419 for (size_t I = 0; I < getNumOperands() - 1; ++I)
1420 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
1421
1422 O << " ";
1424 O << " = getelementptr";
1425 printFlags(O);
1427}
1428#endif
1429
1430void VPVectorPointerRecipe ::execute(VPTransformState &State) {
1431 auto &Builder = State.Builder;
1433 for (unsigned Part = 0; Part < State.UF; ++Part) {
1434 // Calculate the pointer for the specific unroll-part.
1435 Value *PartPtr = nullptr;
1436 // Use i32 for the gep index type when the value is constant,
1437 // or query DataLayout for a more suitable index type otherwise.
1438 const DataLayout &DL =
1439 Builder.GetInsertBlock()->getModule()->getDataLayout();
1440 Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0)
1441 ? DL.getIndexType(IndexedTy->getPointerTo())
1442 : Builder.getInt32Ty();
1443 Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));
1444 bool InBounds = isInBounds();
1445 if (IsReverse) {
1446 // If the address is consecutive but reversed, then the
1447 // wide store needs to start at the last vector element.
1448 // RunTimeVF = VScale * VF.getKnownMinValue()
1449 // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
1450 Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
1451 // NumElt = -Part * RunTimeVF
1452 Value *NumElt = Builder.CreateMul(
1453 ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF);
1454 // LastLane = 1 - RunTimeVF
1455 Value *LastLane =
1456 Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
1457 PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
1458 PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds);
1459 } else {
1460 Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
1461 PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
1462 }
1463
1464 State.set(this, PartPtr, Part, /*IsScalar*/ true);
1465 }
1466}
1467
1468#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1470 VPSlotTracker &SlotTracker) const {
1471 O << Indent;
1473 O << " = vector-pointer ";
1474 if (IsReverse)
1475 O << "(reverse) ";
1476
1478}
1479#endif
1480
1483 // We know that all PHIs in non-header blocks are converted into
1484 // selects, so we don't have to worry about the insertion order and we
1485 // can just use the builder.
1486 // At this point we generate the predication tree. There may be
1487 // duplications since this is a simple recursive scan, but future
1488 // optimizations will clean it up.
1489
1490 unsigned NumIncoming = getNumIncomingValues();
1491
1492 // Generate a sequence of selects of the form:
1493 // SELECT(Mask3, In3,
1494 // SELECT(Mask2, In2,
1495 // SELECT(Mask1, In1,
1496 // In0)))
1497 // Note that Mask0 is never used: lanes for which no path reaches this phi and
1498 // are essentially undef are taken from In0.
1499 VectorParts Entry(State.UF);
1500 for (unsigned In = 0; In < NumIncoming; ++In) {
1501 for (unsigned Part = 0; Part < State.UF; ++Part) {
1502 // We might have single edge PHIs (blocks) - use an identity
1503 // 'select' for the first PHI operand.
1504 Value *In0 = State.get(getIncomingValue(In), Part);
1505 if (In == 0)
1506 Entry[Part] = In0; // Initialize with the first incoming value.
1507 else {
1508 // Select between the current value and the previous incoming edge
1509 // based on the incoming mask.
1510 Value *Cond = State.get(getMask(In), Part);
1511 Entry[Part] =
1512 State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
1513 }
1514 }
1515 }
1516 for (unsigned Part = 0; Part < State.UF; ++Part)
1517 State.set(this, Entry[Part], Part);
1518}
1519
1520#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1522 VPSlotTracker &SlotTracker) const {
1523 O << Indent << "BLEND ";
1525 O << " =";
1526 if (getNumIncomingValues() == 1) {
1527 // Not a User of any mask: not really blending, this is a
1528 // single-predecessor phi.
1529 O << " ";
1531 } else {
1532 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
1533 O << " ";
1535 if (I == 0)
1536 continue;
1537 O << "/";
1539 }
1540 }
1541}
1542#endif
1543
1545 assert(!State.Instance && "Reduction being replicated.");
1546 Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true);
1547 RecurKind Kind = RdxDesc.getRecurrenceKind();
1548 // Propagate the fast-math flags carried by the underlying instruction.
1550 State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
1551 for (unsigned Part = 0; Part < State.UF; ++Part) {
1552 Value *NewVecOp = State.get(getVecOp(), Part);
1553 if (VPValue *Cond = getCondOp()) {
1554 Value *NewCond = State.get(Cond, Part, State.VF.isScalar());
1555 VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
1556 Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
1557 Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy,
1558 RdxDesc.getFastMathFlags());
1559 if (State.VF.isVector()) {
1560 Iden = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden);
1561 }
1562
1563 Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden);
1564 NewVecOp = Select;
1565 }
1566 Value *NewRed;
1567 Value *NextInChain;
1568 if (IsOrdered) {
1569 if (State.VF.isVector())
1570 NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp,
1571 PrevInChain);
1572 else
1573 NewRed = State.Builder.CreateBinOp(
1574 (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain,
1575 NewVecOp);
1576 PrevInChain = NewRed;
1577 } else {
1578 PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true);
1579 NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp);
1580 }
1582 NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
1583 NewRed, PrevInChain);
1584 } else if (IsOrdered)
1585 NextInChain = NewRed;
1586 else
1587 NextInChain = State.Builder.CreateBinOp(
1588 (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain);
1589 State.set(this, NextInChain, Part, /*IsScalar*/ true);
1590 }
1591}
1592
1593#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1595 VPSlotTracker &SlotTracker) const {
1596 O << Indent << "REDUCE ";
1598 O << " = ";
1600 O << " +";
1601 if (isa<FPMathOperator>(getUnderlyingInstr()))
1603 O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
1605 if (getCondOp()) {
1606 O << ", ";
1608 }
1609 O << ")";
1610 if (RdxDesc.IntermediateStore)
1611 O << " (with final reduction value stored in invariant address sank "
1612 "outside of loop)";
1613}
1614#endif
1615
1617 // Find if the recipe is used by a widened recipe via an intervening
1618 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
1619 return any_of(users(), [](const VPUser *U) {
1620 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
1621 return any_of(PredR->users(), [PredR](const VPUser *U) {
1622 return !U->usesScalars(PredR);
1623 });
1624 return false;
1625 });
1626}
1627
1628#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1630 VPSlotTracker &SlotTracker) const {
1631 O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
1632
1633 if (!getUnderlyingInstr()->getType()->isVoidTy()) {
1635 O << " = ";
1636 }
1637 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
1638 O << "call";
1639 printFlags(O);
1640 O << "@" << CB->getCalledFunction()->getName() << "(";
1642 O, [&O, &SlotTracker](VPValue *Op) {
1643 Op->printAsOperand(O, SlotTracker);
1644 });
1645 O << ")";
1646 } else {
1648 printFlags(O);
1650 }
1651
1652 if (shouldPack())
1653 O << " (S->V)";
1654}
1655#endif
1656
1657/// Checks if \p C is uniform across all VFs and UFs. It is considered as such
1658/// if it is either defined outside the vector region or its operand is known to
1659/// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI).
1660/// TODO: Uniformity should be associated with a VPValue and there should be a
1661/// generic way to check.
1663 return C->isDefinedOutsideVectorRegions() ||
1664 isa<VPDerivedIVRecipe>(C->getOperand(0)) ||
1665 isa<VPCanonicalIVPHIRecipe>(C->getOperand(0));
1666}
1667
1668Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {
1670 "Codegen only implemented for first lane.");
1671 switch (Opcode) {
1672 case Instruction::SExt:
1673 case Instruction::ZExt:
1674 case Instruction::Trunc: {
1675 // Note: SExt/ZExt not used yet.
1676 Value *Op = State.get(getOperand(0), VPIteration(Part, 0));
1677 return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
1678 }
1679 default:
1680 llvm_unreachable("opcode not implemented yet");
1681 }
1682}
1683
1684void VPScalarCastRecipe ::execute(VPTransformState &State) {
1685 bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this);
1686 for (unsigned Part = 0; Part != State.UF; ++Part) {
1687 Value *Res;
1688 // Only generate a single instance, if the recipe is uniform across UFs and
1689 // VFs.
1690 if (Part > 0 && IsUniformAcrossVFsAndUFs)
1691 Res = State.get(this, VPIteration(0, 0));
1692 else
1693 Res = generate(State, Part);
1694 State.set(this, Res, VPIteration(Part, 0));
1695 }
1696}
1697
1698#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1699void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
1700 VPSlotTracker &SlotTracker) const {
1701 O << Indent << "SCALAR-CAST ";
1702 printAsOperand(O, SlotTracker);
1703 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1704 printOperands(O, SlotTracker);
1705 O << " to " << *ResultTy;
1706}
1707#endif
1708
1710 assert(State.Instance && "Branch on Mask works only on single instance.");
1711
1712 unsigned Part = State.Instance->Part;
1713 unsigned Lane = State.Instance->Lane.getKnownLane();
1714
1715 Value *ConditionBit = nullptr;
1716 VPValue *BlockInMask = getMask();
1717 if (BlockInMask) {
1718 ConditionBit = State.get(BlockInMask, Part);
1719 if (ConditionBit->getType()->isVectorTy())
1720 ConditionBit = State.Builder.CreateExtractElement(
1721 ConditionBit, State.Builder.getInt32(Lane));
1722 } else // Block in mask is all-one.
1723 ConditionBit = State.Builder.getTrue();
1724
1725 // Replace the temporary unreachable terminator with a new conditional branch,
1726 // whose two destinations will be set later when they are created.
1727 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
1728 assert(isa<UnreachableInst>(CurrentTerminator) &&
1729 "Expected to replace unreachable terminator with conditional branch.");
1730 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
1731 CondBr->setSuccessor(0, nullptr);
1732 ReplaceInstWithInst(CurrentTerminator, CondBr);
1733}
1734
1736 assert(State.Instance && "Predicated instruction PHI works per instance.");
1737 Instruction *ScalarPredInst =
1738 cast<Instruction>(State.get(getOperand(0), *State.Instance));
1739 BasicBlock *PredicatedBB = ScalarPredInst->getParent();
1740 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
1741 assert(PredicatingBB && "Predicated block has no single predecessor.");
1742 assert(isa<VPReplicateRecipe>(getOperand(0)) &&
1743 "operand must be VPReplicateRecipe");
1744
1745 // By current pack/unpack logic we need to generate only a single phi node: if
1746 // a vector value for the predicated instruction exists at this point it means
1747 // the instruction has vector users only, and a phi for the vector value is
1748 // needed. In this case the recipe of the predicated instruction is marked to
1749 // also do that packing, thereby "hoisting" the insert-element sequence.
1750 // Otherwise, a phi node for the scalar value is needed.
1751 unsigned Part = State.Instance->Part;
1752 if (State.hasVectorValue(getOperand(0), Part)) {
1753 Value *VectorValue = State.get(getOperand(0), Part);
1754 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
1755 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
1756 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
1757 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
1758 if (State.hasVectorValue(this, Part))
1759 State.reset(this, VPhi, Part);
1760 else
1761 State.set(this, VPhi, Part);
1762 // NOTE: Currently we need to update the value of the operand, so the next
1763 // predicated iteration inserts its generated value in the correct vector.
1764 State.reset(getOperand(0), VPhi, Part);
1765 } else {
1766 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
1767 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
1768 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
1769 PredicatingBB);
1770 Phi->addIncoming(ScalarPredInst, PredicatedBB);
1771 if (State.hasScalarValue(this, *State.Instance))
1772 State.reset(this, Phi, *State.Instance);
1773 else
1774 State.set(this, Phi, *State.Instance);
1775 // NOTE: Currently we need to update the value of the operand, so the next
1776 // predicated iteration inserts its generated value in the correct vector.
1777 State.reset(getOperand(0), Phi, *State.Instance);
1778 }
1779}
1780
1781#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1783 VPSlotTracker &SlotTracker) const {
1784 O << Indent << "PHI-PREDICATED-INSTRUCTION ";
1786 O << " = ";
1788}
1789
1791 VPSlotTracker &SlotTracker) const {
1792 O << Indent << "WIDEN ";
1794 O << " = load ";
1796}
1797
1799 VPSlotTracker &SlotTracker) const {
1800 O << Indent << "WIDEN ";
1802 O << " = vp.load ";
1804}
1805
1807 VPSlotTracker &SlotTracker) const {
1808 O << Indent << "WIDEN store ";
1810}
1811
1813 VPSlotTracker &SlotTracker) const {
1814 O << Indent << "WIDEN vp.store ";
1816}
1817#endif
1818
1820 Value *Start = getStartValue()->getLiveInIRValue();
1821 PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index");
1822 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1823
1824 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1825 EntryPart->addIncoming(Start, VectorPH);
1826 EntryPart->setDebugLoc(getDebugLoc());
1827 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1828 State.set(this, EntryPart, Part, /*IsScalar*/ true);
1829}
1830
1831#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1833 VPSlotTracker &SlotTracker) const {
1834 O << Indent << "EMIT ";
1836 O << " = CANONICAL-INDUCTION ";
1838}
1839#endif
1840
1843 VPValue *Step) const {
1844 // Must be an integer induction.
1846 return false;
1847 // Start must match the start value of this canonical induction.
1848 if (Start != getStartValue())
1849 return false;
1850
1851 // If the step is defined by a recipe, it is not a ConstantInt.
1852 if (Step->getDefiningRecipe())
1853 return false;
1854
1855 ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
1856 return StepC && StepC->isOne();
1857}
1858
1860 return IsScalarAfterVectorization &&
1861 (!IsScalable || vputils::onlyFirstLaneUsed(this));
1862}
1863
1864#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1866 VPSlotTracker &SlotTracker) const {
1867 O << Indent << "EMIT ";
1869 O << " = WIDEN-POINTER-INDUCTION ";
1871 O << ", " << *IndDesc.getStep();
1872}
1873#endif
1874
1876 assert(!State.Instance && "cannot be used in per-lane");
1877 const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
1878 SCEVExpander Exp(SE, DL, "induction");
1879
1880 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
1881 &*State.Builder.GetInsertPoint());
1882 assert(!State.ExpandedSCEVs.contains(Expr) &&
1883 "Same SCEV expanded multiple times");
1884 State.ExpandedSCEVs[Expr] = Res;
1885 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1886 State.set(this, Res, {Part, 0});
1887}
1888
1889#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1891 VPSlotTracker &SlotTracker) const {
1892 O << Indent << "EMIT ";
1894 O << " = EXPAND SCEV " << *Expr;
1895}
1896#endif
1897
1899 Value *CanonicalIV = State.get(getOperand(0), 0, /*IsScalar*/ true);
1900 Type *STy = CanonicalIV->getType();
1901 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
1902 ElementCount VF = State.VF;
1903 Value *VStart = VF.isScalar()
1904 ? CanonicalIV
1905 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
1906 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1907 Value *VStep = createStepForVF(Builder, STy, VF, Part);
1908 if (VF.isVector()) {
1909 VStep = Builder.CreateVectorSplat(VF, VStep);
1910 VStep =
1911 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
1912 }
1913 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
1914 State.set(this, CanonicalVectorIV, Part);
1915 }
1916}
1917
1918#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1920 VPSlotTracker &SlotTracker) const {
1921 O << Indent << "EMIT ";
1923 O << " = WIDEN-CANONICAL-INDUCTION ";
1925}
1926#endif
1927
1929 auto &Builder = State.Builder;
1930 // Create a vector from the initial value.
1931 auto *VectorInit = getStartValue()->getLiveInIRValue();
1932
1933 Type *VecTy = State.VF.isScalar()
1934 ? VectorInit->getType()
1935 : VectorType::get(VectorInit->getType(), State.VF);
1936
1937 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1938 if (State.VF.isVector()) {
1939 auto *IdxTy = Builder.getInt32Ty();
1940 auto *One = ConstantInt::get(IdxTy, 1);
1941 IRBuilder<>::InsertPointGuard Guard(Builder);
1942 Builder.SetInsertPoint(VectorPH->getTerminator());
1943 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
1944 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
1945 VectorInit = Builder.CreateInsertElement(
1946 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
1947 }
1948
1949 // Create a phi node for the new recurrence.
1950 PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur");
1951 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1952 EntryPart->addIncoming(VectorInit, VectorPH);
1953 State.set(this, EntryPart, 0);
1954}
1955
1956#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1958 VPSlotTracker &SlotTracker) const {
1959 O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
1961 O << " = phi ";
1963}
1964#endif
1965
1967 auto &Builder = State.Builder;
1968
1969 // Reductions do not have to start at zero. They can start with
1970 // any loop invariant values.
1971 VPValue *StartVPV = getStartValue();
1972 Value *StartV = StartVPV->getLiveInIRValue();
1973
1974 // In order to support recurrences we need to be able to vectorize Phi nodes.
1975 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
1976 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
1977 // this value when we vectorize all of the instructions that use the PHI.
1978 bool ScalarPHI = State.VF.isScalar() || IsInLoop;
1979 Type *VecTy = ScalarPHI ? StartV->getType()
1980 : VectorType::get(StartV->getType(), State.VF);
1981
1982 BasicBlock *HeaderBB = State.CFG.PrevBB;
1983 assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
1984 "recipe must be in the vector loop header");
1985 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
1986 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1987 Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi");
1988 EntryPart->insertBefore(HeaderBB->getFirstInsertionPt());
1989 State.set(this, EntryPart, Part, IsInLoop);
1990 }
1991
1992 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1993
1994 Value *Iden = nullptr;
1995 RecurKind RK = RdxDesc.getRecurrenceKind();
1998 // MinMax and AnyOf reductions have the start value as their identity.
1999 if (ScalarPHI) {
2000 Iden = StartV;
2001 } else {
2002 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
2003 Builder.SetInsertPoint(VectorPH->getTerminator());
2004 StartV = Iden =
2005 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
2006 }
2007 } else {
2008 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
2009 RdxDesc.getFastMathFlags());
2010
2011 if (!ScalarPHI) {
2012 Iden = Builder.CreateVectorSplat(State.VF, Iden);
2013 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
2014 Builder.SetInsertPoint(VectorPH->getTerminator());
2015 Constant *Zero = Builder.getInt32(0);
2016 StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
2017 }
2018 }
2019
2020 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
2021 Value *EntryPart = State.get(this, Part, IsInLoop);
2022 // Make sure to add the reduction start value only to the
2023 // first unroll part.
2024 Value *StartVal = (Part == 0) ? StartV : Iden;
2025 cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
2026 }
2027}
2028
2029#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2031 VPSlotTracker &SlotTracker) const {
2032 O << Indent << "WIDEN-REDUCTION-PHI ";
2033
2035 O << " = phi ";
2037}
2038#endif
2039
2042 "Non-native vplans are not expected to have VPWidenPHIRecipes.");
2043
2044 Value *Op0 = State.get(getOperand(0), 0);
2045 Type *VecTy = Op0->getType();
2046 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
2047 State.set(this, VecPhi, 0);
2048}
2049
2050#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2052 VPSlotTracker &SlotTracker) const {
2053 O << Indent << "WIDEN-PHI ";
2054
2055 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
2056 // Unless all incoming values are modeled in VPlan print the original PHI
2057 // directly.
2058 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
2059 // values as VPValues.
2060 if (getNumOperands() != OriginalPhi->getNumOperands()) {
2061 O << VPlanIngredient(OriginalPhi);
2062 return;
2063 }
2064
2066 O << " = phi ";
2068}
2069#endif
2070
2071// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2072// remove VPActiveLaneMaskPHIRecipe.
2074 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2075 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
2076 Value *StartMask = State.get(getOperand(0), Part);
2077 PHINode *EntryPart =
2078 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
2079 EntryPart->addIncoming(StartMask, VectorPH);
2080 EntryPart->setDebugLoc(getDebugLoc());
2081 State.set(this, EntryPart, Part);
2082 }
2083}
2084
2085#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2087 VPSlotTracker &SlotTracker) const {
2088 O << Indent << "ACTIVE-LANE-MASK-PHI ";
2089
2091 O << " = phi ";
2093}
2094#endif
2095
2097 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2098 assert(State.UF == 1 && "Expected unroll factor 1 for VP vectorization.");
2099 Value *Start = State.get(getOperand(0), VPIteration(0, 0));
2100 PHINode *EntryPart =
2101 State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv");
2102 EntryPart->addIncoming(Start, VectorPH);
2103 EntryPart->setDebugLoc(getDebugLoc());
2104 State.set(this, EntryPart, 0, /*IsScalar=*/true);
2105}
2106
2107#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2109 VPSlotTracker &SlotTracker) const {
2110 O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
2111
2113 O << " = phi ";
2115}
2116#endif
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
Hexagon Common GEP
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static Value * getStepVector(Value *Val, Value *StartIdx, Value *Step, Instruction::BinaryOps BinOp, ElementCount VF, IRBuilderBase &Builder)
This function adds (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step,...
static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C)
Checks if C is uniform across all VFs and UFs.
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
static Value * getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy, ElementCount VF)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:409
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:452
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:289
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, BasicBlock::iterator InsertBefore)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:1016
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018
static StringRef getPredicateName(Predicate P)
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:211
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:123
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1449
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:323
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:319
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
void setAllowContract(bool B=true)
Definition: FMF.h:91
bool noSignedZeros() const
Definition: FMF.h:68
bool noInfs() const
Definition: FMF.h:67
void setAllowReciprocal(bool B=true)
Definition: FMF.h:88
bool allowReciprocal() const
Definition: FMF.h:69
void print(raw_ostream &O) const
Print fast-math flags to O.
Definition: Operator.cpp:259
void setNoSignedZeros(bool B=true)
Definition: FMF.h:85
bool allowReassoc() const
Flag queries.
Definition: FMF.h:65
bool approxFunc() const
Definition: FMF.h:71
void setNoNaNs(bool B=true)
Definition: FMF.h:79
void setAllowReassoc(bool B=true)
Flag setters.
Definition: FMF.h:76
bool noNaNs() const
Definition: FMF.h:66
void setApproxFunc(bool B=true)
Definition: FMF.h:94
void setNoInfs(bool B=true)
Definition: FMF.h:82
bool allowContract() const
Definition: FMF.h:70
Class to represent function types.
Definition: DerivedTypes.h:103
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:202
bool willReturn() const
Determine if the function will return.
Definition: Function.h:643
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:232
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:576
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:207
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2361
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2472
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1978
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2094
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2460
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1533
Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
Definition: IRBuilder.cpp:1185
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1212
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:466
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1110
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2033
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2535
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:526
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2081
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:311
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:277
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:486
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2366
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2397
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2241
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1120
Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Definition: IRBuilder.cpp:1005
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2021
LLVMContext & getContext() const
Definition: IRBuilder.h:176
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1327
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1666
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2161
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:289
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1826
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1866
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2351
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1587
Value * CreateStepVector(Type *DstType, const Twine &Name="")
Creates a vector of type DstType with the linear sequence <0, 1, ...>
Definition: IRBuilder.cpp:109
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1361
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
A struct for saving information about induction variables.
const SCEV * getStep() const
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_IntInduction
Integer induction variable. Step = C.
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:454
bool isBinaryOp() const
Definition: Instruction.h:257
const BasicBlock * getParent() const
Definition: Instruction.h:152
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
const char * getOpcodeName() const
Definition: Instruction.h:254
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
BlockT * getHeader() const
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:71
FastMathFlags getFastMathFlags() const
static unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
Type * getRecurrenceType() const
Returns the type of the recurrence.
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
bool isSigned() const
Returns true if all source operands of the recurrence are SExtInsts.
RecurKind getRecurrenceKind() const
Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF) const
Returns identity corresponding to the RecurrenceKind.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class uses information about analyze scalars to rewrite expressions in canonical form.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:693
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
Value * getOperand(unsigned i) const
Definition: User.h:169
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:2825
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:2872
iterator end()
Definition: VPlan.h:2856
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:2884
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:1971
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:1976
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account that the first incoming value has no mask.
Definition: VPlan.h:1968
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPRegionBlock * getParent()
Definition: VPlan.h:489
size_t getNumSuccessors() const
Definition: VPlan.h:534
VPlan * getPlan()
Definition: VPlan.cpp:148
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:153
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:524
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2247
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
void execute(VPTransformState &State) override
Generate the canonical scalar induction phi of the vector loop.
bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step) const
Check if the induction described by Kind, /p Start and Step is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:313
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:401
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:413
unsigned getVPDefID() const
Definition: VPlanValue.h:433
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:2763
VPValue * getStartValue() const
Definition: VPlan.h:2762
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate phi for handling IV based on EVL over iterations correctly.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:1667
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1165
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1175
@ CalculateTripCountMinusVF
Definition: VPlan.h:1173
bool hasResult() const
Definition: VPlan.h:1283
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:1259
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
void execute(VPTransformState &State) override
Generate the instruction.
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:169
static VPLane getFirstLane()
Definition: VPlan.h:167
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the VPLiveOut to O.
PHINode * getPhi() const
Definition: VPlan.h:694
void fixPhi(VPlan &Plan, VPTransformState &State)
Fixup the wrapped LCSSA phi node in the unique exit block.
void execute(VPTransformState &State) override
Generates phi nodes for live-outs as needed to retain SSA form.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:709
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
VPBasicBlock * getParent()
Definition: VPlan.h:734
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:800
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:897
ExactFlagsTy ExactFlags
Definition: VPlan.h:953
FastMathFlagsTy FMFs
Definition: VPlan.h:956
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:955
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1082
bool isInBounds() const
Definition: VPlan.h:1121
GEPFlagsTy GEPFlags
Definition: VPlan.h:954
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1128
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:952
WrapFlagsTy WrapFlags
Definition: VPlan.h:951
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1132
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1115
bool hasNoSignedWrap() const
Definition: VPlan.h:1138
FastMathFlags getFastMathFlags() const
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:1940
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2131
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2133
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2129
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:2958
const VPBlockBase * getEntry() const
Definition: VPlan.h:2997
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getOpcode() const
Definition: VPlan.h:2211
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
VPScalarCastRecipe is a recipe to create scalar cast instructions.
Definition: VPlan.h:1410
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:2812
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:888
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:454
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:203
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1306
operand_range operands()
Definition: VPlanValue.h:278
unsigned getNumOperands() const
Definition: VPlanValue.h:252
operand_iterator op_begin()
Definition: VPlanValue.h:274
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:253
Value * getUnderlyingValue()
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:77
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:118
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1302
friend class VPInstruction
Definition: VPlanValue.h:47
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:173
user_range users()
Definition: VPlanValue.h:133
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Function * getCalledScalarFunction() const
Definition: VPlan.h:1483
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1487
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1406
void execute(VPTransformState &State) override
Produce widened copies of the cast.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:1751
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:1746
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:1765
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1757
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce widened copies of all Ingredients.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:3059
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.h:3246
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:5079
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:641
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676
Type * getElementType() const
Definition: DerivedTypes.h:436
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
self_iterator getIterator()
Definition: ilist_node.h:109
iterator erase(iterator where)
Definition: ilist.h:204
pointer remove(iterator &IT)
Definition: ilist.h:188
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1471
bool isUniformAfterVectorization(VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlan.h:3606
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlan.cpp:1454
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlan.cpp:1449
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2165
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:1037
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
Definition: VPlan.cpp:53
static bool isDbgInfoIntrinsic(Intrinsic::ID ID)
Check if ID corresponds to a debug info intrinsic.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
Definition: LoopUtils.cpp:1211
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:34
@ Mul
Product of integers.
@ Add
Sum of integers.
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Value * createTargetReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic target reduction using a recurrence descriptor Desc The target is queried to determi...
Definition: LoopUtils.cpp:1195
void execute(VPTransformState &State) override
Generate the phi nodes.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPIteration represents a single point in the iteration space of the output (vectorized and/or unrolle...
Definition: VPlan.h:219
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:365
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:373
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:348
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:236
Value * get(VPValue *Def, unsigned Part, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def and a given Part if IsScalar is false,...
Definition: VPlan.cpp:247
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:409
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:412
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:361
void reset(VPValue *Def, Value *V, unsigned Part)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:303
struct llvm::VPTransformState::CFGState CFG
void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar=false)
Set the generated vector Value for a given VPValue and a given Part, if IsScalar is false.
Definition: VPlan.h:288
std::optional< VPIteration > Instance
Hold the indices to generate specific scalar instructions.
Definition: VPlan.h:248
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:389
bool hasScalarValue(VPValue *Def, VPIteration Instance)
Definition: VPlan.h:276
bool hasVectorValue(VPValue *Def, unsigned Part)
Definition: VPlan.h:270
ElementCount VF
The chosen Vectorization and Unroll Factors of the loop being vectorized.
Definition: VPlan.h:242
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:398
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:372
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isInvariantCond() const
Definition: VPlan.h:1530
VPValue * getCond() const
Definition: VPlan.h:1526
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.