LLVM 18.0.0git
VPlanRecipes.cpp
Go to the documentation of this file.
1//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains implementations for different VPlan recipes.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPlan.h"
15#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/Twine.h"
19#include "llvm/IR/BasicBlock.h"
20#include "llvm/IR/IRBuilder.h"
21#include "llvm/IR/Instruction.h"
23#include "llvm/IR/Type.h"
24#include "llvm/IR/Value.h"
27#include "llvm/Support/Debug.h"
31#include <cassert>
32
33using namespace llvm;
34
36
37namespace llvm {
39}
40
41#define LV_NAME "loop-vectorize"
42#define DEBUG_TYPE LV_NAME
43
45 switch (getVPDefID()) {
46 case VPWidenMemoryInstructionSC: {
47 return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
48 }
49 case VPReplicateSC:
50 case VPWidenCallSC:
51 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
52 ->mayWriteToMemory();
53 case VPBranchOnMaskSC:
54 case VPScalarIVStepsSC:
55 case VPPredInstPHISC:
56 return false;
57 case VPBlendSC:
58 case VPReductionSC:
59 case VPWidenCanonicalIVSC:
60 case VPWidenCastSC:
61 case VPWidenGEPSC:
62 case VPWidenIntOrFpInductionSC:
63 case VPWidenPHISC:
64 case VPWidenSC:
65 case VPWidenSelectSC: {
66 const Instruction *I =
67 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
68 (void)I;
69 assert((!I || !I->mayWriteToMemory()) &&
70 "underlying instruction may write to memory");
71 return false;
72 }
73 default:
74 return true;
75 }
76}
77
79 switch (getVPDefID()) {
80 case VPWidenMemoryInstructionSC: {
81 return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
82 }
83 case VPReplicateSC:
84 case VPWidenCallSC:
85 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
86 ->mayReadFromMemory();
87 case VPBranchOnMaskSC:
88 case VPScalarIVStepsSC:
89 case VPPredInstPHISC:
90 return false;
91 case VPBlendSC:
92 case VPReductionSC:
93 case VPWidenCanonicalIVSC:
94 case VPWidenCastSC:
95 case VPWidenGEPSC:
96 case VPWidenIntOrFpInductionSC:
97 case VPWidenPHISC:
98 case VPWidenSC:
99 case VPWidenSelectSC: {
100 const Instruction *I =
101 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
102 (void)I;
103 assert((!I || !I->mayReadFromMemory()) &&
104 "underlying instruction may read from memory");
105 return false;
106 }
107 default:
108 return true;
109 }
110}
111
113 switch (getVPDefID()) {
114 case VPDerivedIVSC:
115 case VPPredInstPHISC:
116 return false;
117 case VPInstructionSC:
118 switch (cast<VPInstruction>(this)->getOpcode()) {
119 case Instruction::ICmp:
124 return false;
125 default:
126 return true;
127 }
128 case VPWidenCallSC:
129 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
130 ->mayHaveSideEffects();
131 case VPBlendSC:
132 case VPReductionSC:
133 case VPScalarIVStepsSC:
134 case VPWidenCanonicalIVSC:
135 case VPWidenCastSC:
136 case VPWidenGEPSC:
137 case VPWidenIntOrFpInductionSC:
138 case VPWidenPHISC:
139 case VPWidenPointerInductionSC:
140 case VPWidenSC:
141 case VPWidenSelectSC: {
142 const Instruction *I =
143 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
144 (void)I;
145 assert((!I || !I->mayHaveSideEffects()) &&
146 "underlying instruction has side-effects");
147 return false;
148 }
149 case VPWidenMemoryInstructionSC:
150 assert(cast<VPWidenMemoryInstructionRecipe>(this)
151 ->getIngredient()
153 "mayHaveSideffects result for ingredient differs from this "
154 "implementation");
155 return mayWriteToMemory();
156 case VPReplicateSC: {
157 auto *R = cast<VPReplicateRecipe>(this);
158 return R->getUnderlyingInstr()->mayHaveSideEffects();
159 }
160 default:
161 return true;
162 }
163}
164
166 auto Lane = VPLane::getLastLaneForVF(State.VF);
167 VPValue *ExitValue = getOperand(0);
169 Lane = VPLane::getFirstLane();
170 VPBasicBlock *MiddleVPBB =
171 cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
172 assert(MiddleVPBB->getNumSuccessors() == 0 &&
173 "the middle block must not have any successors");
174 BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB];
175 Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
176 MiddleBB);
177}
178
179#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
181 O << "Live-out ";
183 O << " = ";
185 O << "\n";
186}
187#endif
188
190 assert(!Parent && "Recipe already in some VPBasicBlock");
191 assert(InsertPos->getParent() &&
192 "Insertion position not in any VPBasicBlock");
193 Parent = InsertPos->getParent();
194 Parent->getRecipeList().insert(InsertPos->getIterator(), this);
195}
196
199 assert(!Parent && "Recipe already in some VPBasicBlock");
200 assert(I == BB.end() || I->getParent() == &BB);
201 Parent = &BB;
202 BB.getRecipeList().insert(I, this);
203}
204
206 assert(!Parent && "Recipe already in some VPBasicBlock");
207 assert(InsertPos->getParent() &&
208 "Insertion position not in any VPBasicBlock");
209 Parent = InsertPos->getParent();
210 Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
211}
212
214 assert(getParent() && "Recipe not in any VPBasicBlock");
216 Parent = nullptr;
217}
218
220 assert(getParent() && "Recipe not in any VPBasicBlock");
222}
223
226 insertAfter(InsertPos);
227}
228
232 insertBefore(BB, I);
233}
234
236 assert(OpType == OperationType::FPMathOp &&
237 "recipe doesn't have fast math flags");
238 FastMathFlags Res;
239 Res.setAllowReassoc(FMFs.AllowReassoc);
240 Res.setNoNaNs(FMFs.NoNaNs);
241 Res.setNoInfs(FMFs.NoInfs);
242 Res.setNoSignedZeros(FMFs.NoSignedZeros);
243 Res.setAllowReciprocal(FMFs.AllowReciprocal);
244 Res.setAllowContract(FMFs.AllowContract);
245 Res.setApproxFunc(FMFs.ApproxFunc);
246 return Res;
247}
248
251 const Twine &Name)
252 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
253 Pred, DL),
254 VPValue(this), Opcode(Opcode), Name(Name.str()) {
255 assert(Opcode == Instruction::ICmp &&
256 "only ICmp predicates supported at the moment");
257}
258
260 std::initializer_list<VPValue *> Operands,
261 FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
262 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
263 VPValue(this), Opcode(Opcode), Name(Name.str()) {
264 // Make sure the VPInstruction is a floating-point operation.
265 assert(isFPMathOp() && "this op can't take fast-math flags");
266}
267
268Value *VPInstruction::generateInstruction(VPTransformState &State,
269 unsigned Part) {
271 Builder.SetCurrentDebugLocation(getDebugLoc());
272
274 Value *A = State.get(getOperand(0), Part);
275 Value *B = State.get(getOperand(1), Part);
276 return Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
277 }
278
279 switch (getOpcode()) {
280 case VPInstruction::Not: {
281 Value *A = State.get(getOperand(0), Part);
282 return Builder.CreateNot(A, Name);
283 }
284 case Instruction::ICmp: {
285 Value *A = State.get(getOperand(0), Part);
286 Value *B = State.get(getOperand(1), Part);
287 return Builder.CreateCmp(getPredicate(), A, B, Name);
288 }
289 case Instruction::Select: {
290 Value *Cond = State.get(getOperand(0), Part);
291 Value *Op1 = State.get(getOperand(1), Part);
292 Value *Op2 = State.get(getOperand(2), Part);
293 return Builder.CreateSelect(Cond, Op1, Op2, Name);
294 }
296 // Get first lane of vector induction variable.
297 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
298 // Get the original loop tripcount.
299 Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0));
300
301 auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
302 auto *PredTy = VectorType::get(Int1Ty, State.VF);
303 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
304 {PredTy, ScalarTC->getType()},
305 {VIVElem0, ScalarTC}, nullptr, Name);
306 }
308 // Generate code to combine the previous and current values in vector v3.
309 //
310 // vector.ph:
311 // v_init = vector(..., ..., ..., a[-1])
312 // br vector.body
313 //
314 // vector.body
315 // i = phi [0, vector.ph], [i+4, vector.body]
316 // v1 = phi [v_init, vector.ph], [v2, vector.body]
317 // v2 = a[i, i+1, i+2, i+3];
318 // v3 = vector(v1(3), v2(0, 1, 2))
319
320 // For the first part, use the recurrence phi (v1), otherwise v2.
321 auto *V1 = State.get(getOperand(0), 0);
322 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
323 if (!PartMinus1->getType()->isVectorTy())
324 return PartMinus1;
325 Value *V2 = State.get(getOperand(1), Part);
326 return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name);
327 }
329 Value *ScalarTC = State.get(getOperand(0), {0, 0});
330 Value *Step =
331 createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF);
332 Value *Sub = Builder.CreateSub(ScalarTC, Step);
333 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
334 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
335 return Builder.CreateSelect(Cmp, Sub, Zero);
336 }
338 if (Part == 0) {
339 auto *Phi = State.get(getOperand(0), 0);
340 // The loop step is equal to the vectorization factor (num of SIMD
341 // elements) times the unroll factor (num of SIMD instructions).
342 Value *Step =
343 createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
344 return Builder.CreateAdd(Phi, Step, Name, hasNoUnsignedWrap(),
346 }
347 return State.get(this, 0);
348 }
349
351 auto *IV = State.get(getOperand(0), VPIteration(0, 0));
352 if (Part == 0)
353 return IV;
354
355 // The canonical IV is incremented by the vectorization factor (num of SIMD
356 // elements) times the unroll part.
357 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
358 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
360 }
362 if (Part != 0)
363 return nullptr;
364
365 Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
366 VPRegionBlock *ParentRegion = getParent()->getParent();
367 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
368
369 // Replace the temporary unreachable terminator with a new conditional
370 // branch, hooking it up to backward destination for exiting blocks now and
371 // to forward destination(s) later when they are created.
372 BranchInst *CondBr =
373 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
374
375 if (getParent()->isExiting())
376 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
377
378 CondBr->setSuccessor(0, nullptr);
379 Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
380 return CondBr;
381 }
383 if (Part != 0)
384 return nullptr;
385 // First create the compare.
386 Value *IV = State.get(getOperand(0), Part);
387 Value *TC = State.get(getOperand(1), Part);
388 Value *Cond = Builder.CreateICmpEQ(IV, TC);
389
390 // Now create the branch.
391 auto *Plan = getParent()->getPlan();
392 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
393 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
394
395 // Replace the temporary unreachable terminator with a new conditional
396 // branch, hooking it up to backward destination (the header) now and to the
397 // forward destination (the exit/middle block) later when it is created.
398 // Note that CreateCondBr expects a valid BB as first argument, so we need
399 // to set it to nullptr later.
400 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
401 State.CFG.VPBB2IRBB[Header]);
402 CondBr->setSuccessor(0, nullptr);
403 Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
404 return CondBr;
405 }
406 default:
407 llvm_unreachable("Unsupported opcode for instruction");
408 }
409}
410
411#if !defined(NDEBUG)
412bool VPInstruction::isFPMathOp() const {
413 // Inspired by FPMathOperator::classof. Notable differences are that we don't
414 // support Call, PHI and Select opcodes here yet.
415 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
416 Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
417 Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
418 Opcode == Instruction::FCmp || Opcode == Instruction::Select;
419}
420#endif
421
423 assert(!State.Instance && "VPInstruction executing an Instance");
425 assert((hasFastMathFlags() == isFPMathOp() ||
426 getOpcode() == Instruction::Select) &&
427 "Recipe not a FPMathOp but has fast-math flags?");
428 if (hasFastMathFlags())
430 for (unsigned Part = 0; Part < State.UF; ++Part) {
431 Value *GeneratedValue = generateInstruction(State, Part);
432 if (!hasResult())
433 continue;
434 assert(GeneratedValue && "generateInstruction must produce a value");
435 State.set(this, GeneratedValue, Part);
436 }
437}
438
439#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
441 VPSlotTracker SlotTracker(getParent()->getPlan());
442 print(dbgs(), "", SlotTracker);
443}
444
446 VPSlotTracker &SlotTracker) const {
447 O << Indent << "EMIT ";
448
449 if (hasResult()) {
451 O << " = ";
452 }
453
454 switch (getOpcode()) {
456 O << "not";
457 break;
459 O << "combined load";
460 break;
462 O << "combined store";
463 break;
465 O << "active lane mask";
466 break;
468 O << "first-order splice";
469 break;
471 O << "VF * UF +";
472 break;
474 O << "branch-on-cond";
475 break;
477 O << "TC > VF ? TC - VF : 0";
478 break;
480 O << "VF * Part +";
481 break;
483 O << "branch-on-count";
484 break;
485 default:
487 }
488
489 printFlags(O);
491
492 if (auto DL = getDebugLoc()) {
493 O << ", !dbg ";
494 DL.print(O);
495 }
496}
497#endif
498
500 assert(State.VF.isVector() && "not widening");
501 auto &CI = *cast<CallInst>(getUnderlyingInstr());
502 assert(!isa<DbgInfoIntrinsic>(CI) &&
503 "DbgInfoIntrinsic should have been dropped during VPlan construction");
504 State.setDebugLocFrom(CI.getDebugLoc());
505
506 for (unsigned Part = 0; Part < State.UF; ++Part) {
507 SmallVector<Type *, 2> TysForDecl;
508 // Add return type if intrinsic is overloaded on it.
509 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) {
510 TysForDecl.push_back(
511 VectorType::get(CI.getType()->getScalarType(), State.VF));
512 }
514 for (const auto &I : enumerate(operands())) {
515 // Some intrinsics have a scalar argument - don't replace it with a
516 // vector.
517 Value *Arg;
518 if (VectorIntrinsicID == Intrinsic::not_intrinsic ||
519 !isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
520 Arg = State.get(I.value(), Part);
521 else
522 Arg = State.get(I.value(), VPIteration(0, 0));
523 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
524 TysForDecl.push_back(Arg->getType());
525 Args.push_back(Arg);
526 }
527
528 Function *VectorF;
529 if (VectorIntrinsicID != Intrinsic::not_intrinsic) {
530 // Use vector version of the intrinsic.
531 Module *M = State.Builder.GetInsertBlock()->getModule();
532 VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
533 assert(VectorF && "Can't retrieve vector intrinsic.");
534 } else {
535#ifndef NDEBUG
536 assert(Variant != nullptr && "Can't create vector function.");
537#endif
538 VectorF = Variant;
539 }
540
542 CI.getOperandBundlesAsDefs(OpBundles);
543 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
544
545 if (isa<FPMathOperator>(V))
546 V->copyFastMathFlags(&CI);
547
548 State.set(this, V, Part);
549 State.addMetadata(V, &CI);
550 }
551}
552
553#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
555 VPSlotTracker &SlotTracker) const {
556 O << Indent << "WIDEN-CALL ";
557
558 auto *CI = cast<CallInst>(getUnderlyingInstr());
559 if (CI->getType()->isVoidTy())
560 O << "void ";
561 else {
563 O << " = ";
564 }
565
566 O << "call @" << CI->getCalledFunction()->getName() << "(";
568 O << ")";
569
570 if (VectorIntrinsicID)
571 O << " (using vector intrinsic)";
572 else {
573 O << " (using library function";
574 if (Variant->hasName())
575 O << ": " << Variant->getName();
576 O << ")";
577 }
578}
579
581 VPSlotTracker &SlotTracker) const {
582 O << Indent << "WIDEN-SELECT ";
584 O << " = select ";
586 O << ", ";
588 O << ", ";
590 O << (isInvariantCond() ? " (condition is loop invariant)" : "");
591}
592#endif
593
596
597 // The condition can be loop invariant but still defined inside the
598 // loop. This means that we can't just use the original 'cond' value.
599 // We have to take the 'vectorized' value and pick the first lane.
600 // Instcombine will make this a no-op.
601 auto *InvarCond =
602 isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
603
604 for (unsigned Part = 0; Part < State.UF; ++Part) {
605 Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part);
606 Value *Op0 = State.get(getOperand(1), Part);
607 Value *Op1 = State.get(getOperand(2), Part);
608 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
609 State.set(this, Sel, Part);
610 State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
611 }
612}
613
614VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
615 const FastMathFlags &FMF) {
616 AllowReassoc = FMF.allowReassoc();
617 NoNaNs = FMF.noNaNs();
618 NoInfs = FMF.noInfs();
619 NoSignedZeros = FMF.noSignedZeros();
620 AllowReciprocal = FMF.allowReciprocal();
621 AllowContract = FMF.allowContract();
622 ApproxFunc = FMF.approxFunc();
623}
624
625#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
627 switch (OpType) {
628 case OperationType::Cmp:
630 break;
631 case OperationType::PossiblyExactOp:
632 if (ExactFlags.IsExact)
633 O << " exact";
634 break;
635 case OperationType::OverflowingBinOp:
636 if (WrapFlags.HasNUW)
637 O << " nuw";
638 if (WrapFlags.HasNSW)
639 O << " nsw";
640 break;
641 case OperationType::FPMathOp:
643 break;
644 case OperationType::GEPOp:
645 if (GEPFlags.IsInBounds)
646 O << " inbounds";
647 break;
648 case OperationType::Other:
649 break;
650 }
651 if (getNumOperands() > 0)
652 O << " ";
653}
654#endif
655
658 auto &Builder = State.Builder;
659 switch (Opcode) {
660 case Instruction::Call:
661 case Instruction::Br:
662 case Instruction::PHI:
663 case Instruction::GetElementPtr:
664 case Instruction::Select:
665 llvm_unreachable("This instruction is handled by a different recipe.");
666 case Instruction::UDiv:
667 case Instruction::SDiv:
668 case Instruction::SRem:
669 case Instruction::URem:
670 case Instruction::Add:
671 case Instruction::FAdd:
672 case Instruction::Sub:
673 case Instruction::FSub:
674 case Instruction::FNeg:
675 case Instruction::Mul:
676 case Instruction::FMul:
677 case Instruction::FDiv:
678 case Instruction::FRem:
679 case Instruction::Shl:
680 case Instruction::LShr:
681 case Instruction::AShr:
682 case Instruction::And:
683 case Instruction::Or:
684 case Instruction::Xor: {
685 // Just widen unops and binops.
686 for (unsigned Part = 0; Part < State.UF; ++Part) {
688 for (VPValue *VPOp : operands())
689 Ops.push_back(State.get(VPOp, Part));
690
691 Value *V = Builder.CreateNAryOp(Opcode, Ops);
692
693 if (auto *VecOp = dyn_cast<Instruction>(V))
694 setFlags(VecOp);
695
696 // Use this vector value for all users of the original instruction.
697 State.set(this, V, Part);
698 State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
699 }
700
701 break;
702 }
703 case Instruction::Freeze: {
704 for (unsigned Part = 0; Part < State.UF; ++Part) {
705 Value *Op = State.get(getOperand(0), Part);
706
707 Value *Freeze = Builder.CreateFreeze(Op);
708 State.set(this, Freeze, Part);
709 }
710 break;
711 }
712 case Instruction::ICmp:
713 case Instruction::FCmp: {
714 // Widen compares. Generate vector compares.
715 bool FCmp = Opcode == Instruction::FCmp;
716 for (unsigned Part = 0; Part < State.UF; ++Part) {
717 Value *A = State.get(getOperand(0), Part);
718 Value *B = State.get(getOperand(1), Part);
719 Value *C = nullptr;
720 if (FCmp) {
721 // Propagate fast math flags.
723 if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue()))
724 Builder.setFastMathFlags(I->getFastMathFlags());
725 C = Builder.CreateFCmp(getPredicate(), A, B);
726 } else {
727 C = Builder.CreateICmp(getPredicate(), A, B);
728 }
729 State.set(this, C, Part);
730 State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
731 }
732
733 break;
734 }
735 default:
736 // This instruction is not vectorized by simple widening.
737 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
738 << Instruction::getOpcodeName(Opcode));
739 llvm_unreachable("Unhandled instruction!");
740 } // end of switch.
741}
742#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
744 VPSlotTracker &SlotTracker) const {
745 O << Indent << "WIDEN ";
747 O << " = " << Instruction::getOpcodeName(Opcode);
748 printFlags(O);
750}
751#endif
752
755 auto &Builder = State.Builder;
756 /// Vectorize casts.
757 assert(State.VF.isVector() && "Not vectorizing?");
758 Type *DestTy = VectorType::get(getResultType(), State.VF);
759
760 for (unsigned Part = 0; Part < State.UF; ++Part) {
761 Value *A = State.get(getOperand(0), Part);
762 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
763 State.set(this, Cast, Part);
764 State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
765 }
766}
767
768#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
770 VPSlotTracker &SlotTracker) const {
771 O << Indent << "WIDEN-CAST ";
773 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
775 O << " to " << *getResultType();
776}
777#endif
778
779/// This function adds
780/// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
781/// to each vector element of Val. The sequence starts at StartIndex.
782/// \p Opcode is relevant for FP induction variable.
783static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
785 IRBuilderBase &Builder) {
786 assert(VF.isVector() && "only vector VFs are supported");
787
788 // Create and check the types.
789 auto *ValVTy = cast<VectorType>(Val->getType());
790 ElementCount VLen = ValVTy->getElementCount();
791
792 Type *STy = Val->getType()->getScalarType();
793 assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
794 "Induction Step must be an integer or FP");
795 assert(Step->getType() == STy && "Step has wrong type");
796
798
799 // Create a vector of consecutive numbers from zero to VF.
800 VectorType *InitVecValVTy = ValVTy;
801 if (STy->isFloatingPointTy()) {
802 Type *InitVecValSTy =
804 InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
805 }
806 Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
807
808 // Splat the StartIdx
809 Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);
810
811 if (STy->isIntegerTy()) {
812 InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
813 Step = Builder.CreateVectorSplat(VLen, Step);
814 assert(Step->getType() == Val->getType() && "Invalid step vec");
815 // FIXME: The newly created binary instructions should contain nsw/nuw
816 // flags, which can be found from the original scalar operations.
817 Step = Builder.CreateMul(InitVec, Step);
818 return Builder.CreateAdd(Val, Step, "induction");
819 }
820
821 // Floating point induction.
822 assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
823 "Binary Opcode should be specified for FP induction");
824 InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
825 InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);
826
827 Step = Builder.CreateVectorSplat(VLen, Step);
828 Value *MulOp = Builder.CreateFMul(InitVec, Step);
829 return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
830}
831
832/// A helper function that returns an integer or floating-point constant with
833/// value C.
835 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
836 : ConstantFP::get(Ty, C);
837}
838
840 ElementCount VF) {
841 assert(FTy->isFloatingPointTy() && "Expected floating point type!");
842 Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits());
843 Value *RuntimeVF = getRuntimeVF(B, IntTy, VF);
844 return B.CreateUIToFP(RuntimeVF, FTy);
845}
846
848 assert(!State.Instance && "Int or FP induction being replicated.");
849
852 TruncInst *Trunc = getTruncInst();
854 assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
855 assert(State.VF.isVector() && "must have vector VF");
856
857 // The value from the original loop to which we are mapping the new induction
858 // variable.
859 Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
860
861 // Fast-math-flags propagate from the original induction instruction.
863 if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
864 Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
865
866 // Now do the actual transformations, and start with fetching the step value.
867 Value *Step = State.get(getStepValue(), VPIteration(0, 0));
868
869 assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
870 "Expected either an induction phi-node or a truncate of it!");
871
872 // Construct the initial value of the vector IV in the vector loop preheader
873 auto CurrIP = Builder.saveIP();
874 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
875 Builder.SetInsertPoint(VectorPH->getTerminator());
876 if (isa<TruncInst>(EntryVal)) {
877 assert(Start->getType()->isIntegerTy() &&
878 "Truncation requires an integer type");
879 auto *TruncType = cast<IntegerType>(EntryVal->getType());
880 Step = Builder.CreateTrunc(Step, TruncType);
881 Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
882 }
883
884 Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
885 Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
886 Value *SteppedStart = getStepVector(
887 SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder);
888
889 // We create vector phi nodes for both integer and floating-point induction
890 // variables. Here, we determine the kind of arithmetic we will perform.
893 if (Step->getType()->isIntegerTy()) {
894 AddOp = Instruction::Add;
895 MulOp = Instruction::Mul;
896 } else {
897 AddOp = ID.getInductionOpcode();
898 MulOp = Instruction::FMul;
899 }
900
901 // Multiply the vectorization factor by the step using integer or
902 // floating-point arithmetic as appropriate.
903 Type *StepType = Step->getType();
904 Value *RuntimeVF;
905 if (Step->getType()->isFloatingPointTy())
906 RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
907 else
908 RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
909 Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
910
911 // Create a vector splat to use in the induction update.
912 //
913 // FIXME: If the step is non-constant, we create the vector splat with
914 // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
915 // handle a constant vector splat.
916 Value *SplatVF = isa<Constant>(Mul)
917 ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))
918 : Builder.CreateVectorSplat(State.VF, Mul);
919 Builder.restoreIP(CurrIP);
920
921 // We may need to add the step a number of times, depending on the unroll
922 // factor. The last of those goes into the PHI.
923 PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
924 VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
925 VecInd->setDebugLoc(EntryVal->getDebugLoc());
926 Instruction *LastInduction = VecInd;
927 for (unsigned Part = 0; Part < State.UF; ++Part) {
928 State.set(this, LastInduction, Part);
929
930 if (isa<TruncInst>(EntryVal))
931 State.addMetadata(LastInduction, EntryVal);
932
933 LastInduction = cast<Instruction>(
934 Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
935 LastInduction->setDebugLoc(EntryVal->getDebugLoc());
936 }
937
938 LastInduction->setName("vec.ind.next");
939 VecInd->addIncoming(SteppedStart, VectorPH);
940 // Add induction update using an incorrect block temporarily. The phi node
941 // will be fixed after VPlan execution. Note that at this point the latch
942 // block cannot be used, as it does not exist yet.
943 // TODO: Model increment value in VPlan, by turning the recipe into a
944 // multi-def and a subclass of VPHeaderPHIRecipe.
945 VecInd->addIncoming(LastInduction, VectorPH);
946}
947
948#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
950 VPSlotTracker &SlotTracker) const {
951 O << Indent << "WIDEN-INDUCTION";
952 if (getTruncInst()) {
953 O << "\\l\"";
954 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
955 O << " +\n" << Indent << "\" ";
957 } else
958 O << " " << VPlanIngredient(IV);
959
960 O << ", ";
962}
963#endif
964
966 // The step may be defined by a recipe in the preheader (e.g. if it requires
967 // SCEV expansion), but for the canonical induction the step is required to be
968 // 1, which is represented as live-in.
970 return false;
971 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
972 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
973 return StartC && StartC->isZero() && StepC && StepC->isOne();
974}
975
976#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
978 VPSlotTracker &SlotTracker) const {
979 O << Indent;
981 O << Indent << "= DERIVED-IV ";
983 O << " + ";
985 O << " * ";
987
988 if (TruncResultTy)
989 O << " (truncated to " << *TruncResultTy << ")";
990}
991#endif
992
994 // Fast-math-flags propagate from the original induction instruction.
996 if (hasFastMathFlags())
998
999 /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1000 /// variable on which to base the steps, \p Step is the size of the step.
1001
1002 Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0));
1003 Value *Step = State.get(getStepValue(), VPIteration(0, 0));
1004 IRBuilderBase &Builder = State.Builder;
1005
1006 // Ensure step has the same type as that of scalar IV.
1007 Type *BaseIVTy = BaseIV->getType()->getScalarType();
1008 if (BaseIVTy != Step->getType()) {
1009 // TODO: Also use VPDerivedIVRecipe when only the step needs truncating, to
1010 // avoid separate truncate here.
1011 assert(Step->getType()->isIntegerTy() &&
1012 "Truncation requires an integer step");
1013 Step = State.Builder.CreateTrunc(Step, BaseIVTy);
1014 }
1015
1016 // We build scalar steps for both integer and floating-point induction
1017 // variables. Here, we determine the kind of arithmetic we will perform.
1020 if (BaseIVTy->isIntegerTy()) {
1021 AddOp = Instruction::Add;
1022 MulOp = Instruction::Mul;
1023 } else {
1024 AddOp = InductionOpcode;
1025 MulOp = Instruction::FMul;
1026 }
1027
1028 // Determine the number of scalars we need to generate for each unroll
1029 // iteration.
1030 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
1031 // Compute the scalar steps and save the results in State.
1032 Type *IntStepTy =
1033 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
1034 Type *VecIVTy = nullptr;
1035 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
1036 if (!FirstLaneOnly && State.VF.isScalable()) {
1037 VecIVTy = VectorType::get(BaseIVTy, State.VF);
1038 UnitStepVec =
1039 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
1040 SplatStep = Builder.CreateVectorSplat(State.VF, Step);
1041 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
1042 }
1043
1044 unsigned StartPart = 0;
1045 unsigned EndPart = State.UF;
1046 unsigned StartLane = 0;
1047 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1048 if (State.Instance) {
1049 StartPart = State.Instance->Part;
1050 EndPart = StartPart + 1;
1051 StartLane = State.Instance->Lane.getKnownLane();
1052 EndLane = StartLane + 1;
1053 }
1054 for (unsigned Part = StartPart; Part < EndPart; ++Part) {
1055 Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
1056
1057 if (!FirstLaneOnly && State.VF.isScalable()) {
1058 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
1059 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
1060 if (BaseIVTy->isFloatingPointTy())
1061 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
1062 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
1063 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
1064 State.set(this, Add, Part);
1065 // It's useful to record the lane values too for the known minimum number
1066 // of elements so we do those below. This improves the code quality when
1067 // trying to extract the first element, for example.
1068 }
1069
1070 if (BaseIVTy->isFloatingPointTy())
1071 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
1072
1073 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1074 Value *StartIdx = Builder.CreateBinOp(
1075 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
1076 // The step returned by `createStepForVF` is a runtime-evaluated value
1077 // when VF is scalable. Otherwise, it should be folded into a Constant.
1078 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
1079 "Expected StartIdx to be folded to a constant when VF is not "
1080 "scalable");
1081 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
1082 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
1083 State.set(this, Add, VPIteration(Part, Lane));
1084 }
1085 }
1086}
1087
1088#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1090 VPSlotTracker &SlotTracker) const {
1091 O << Indent;
1093 O << " = SCALAR-STEPS ";
1095}
1096#endif
1097
1099 assert(State.VF.isVector() && "not widening");
1100 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1101 // Construct a vector GEP by widening the operands of the scalar GEP as
1102 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1103 // results in a vector of pointers when at least one operand of the GEP
1104 // is vector-typed. Thus, to keep the representation compact, we only use
1105 // vector-typed operands for loop-varying values.
1106
1107 if (areAllOperandsInvariant()) {
1108 // If we are vectorizing, but the GEP has only loop-invariant operands,
1109 // the GEP we build (by only using vector-typed operands for
1110 // loop-varying values) would be a scalar pointer. Thus, to ensure we
1111 // produce a vector of pointers, we need to either arbitrarily pick an
1112 // operand to broadcast, or broadcast a clone of the original GEP.
1113 // Here, we broadcast a clone of the original.
1114 //
1115 // TODO: If at some point we decide to scalarize instructions having
1116 // loop-invariant operands, this special case will no longer be
1117 // required. We would add the scalarization decision to
1118 // collectLoopScalars() and teach getVectorValue() to broadcast
1119 // the lane-zero scalar value.
1121 for (unsigned I = 0, E = getNumOperands(); I != E; I++)
1122 Ops.push_back(State.get(getOperand(I), VPIteration(0, 0)));
1123
1124 auto *NewGEP =
1125 State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
1126 ArrayRef(Ops).drop_front(), "", isInBounds());
1127 for (unsigned Part = 0; Part < State.UF; ++Part) {
1128 Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP);
1129 State.set(this, EntryPart, Part);
1130 State.addMetadata(EntryPart, GEP);
1131 }
1132 } else {
1133 // If the GEP has at least one loop-varying operand, we are sure to
1134 // produce a vector of pointers. But if we are only unrolling, we want
1135 // to produce a scalar GEP for each unroll part. Thus, the GEP we
1136 // produce with the code below will be scalar (if VF == 1) or vector
1137 // (otherwise). Note that for the unroll-only case, we still maintain
1138 // values in the vector mapping with initVector, as we do for other
1139 // instructions.
1140 for (unsigned Part = 0; Part < State.UF; ++Part) {
1141 // The pointer operand of the new GEP. If it's loop-invariant, we
1142 // won't broadcast it.
1143 auto *Ptr = isPointerLoopInvariant()
1144 ? State.get(getOperand(0), VPIteration(0, 0))
1145 : State.get(getOperand(0), Part);
1146
1147 // Collect all the indices for the new GEP. If any index is
1148 // loop-invariant, we won't broadcast it.
1150 for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
1151 VPValue *Operand = getOperand(I);
1152 if (isIndexLoopInvariant(I - 1))
1153 Indices.push_back(State.get(Operand, VPIteration(0, 0)));
1154 else
1155 Indices.push_back(State.get(Operand, Part));
1156 }
1157
1158 // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
1159 // but it should be a vector, otherwise.
1160 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
1161 Indices, "", isInBounds());
1162 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
1163 "NewGEP is not a pointer vector");
1164 State.set(this, NewGEP, Part);
1165 State.addMetadata(NewGEP, GEP);
1166 }
1167 }
1168}
1169
1170#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1172 VPSlotTracker &SlotTracker) const {
1173 O << Indent << "WIDEN-GEP ";
1174 O << (isPointerLoopInvariant() ? "Inv" : "Var");
1175 for (size_t I = 0; I < getNumOperands() - 1; ++I)
1176 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
1177
1178 O << " ";
1180 O << " = getelementptr";
1181 printFlags(O);
1183}
1184#endif
1185
1188 // We know that all PHIs in non-header blocks are converted into
1189 // selects, so we don't have to worry about the insertion order and we
1190 // can just use the builder.
1191 // At this point we generate the predication tree. There may be
1192 // duplications since this is a simple recursive scan, but future
1193 // optimizations will clean it up.
1194
1195 unsigned NumIncoming = getNumIncomingValues();
1196
1197 // Generate a sequence of selects of the form:
1198 // SELECT(Mask3, In3,
1199 // SELECT(Mask2, In2,
1200 // SELECT(Mask1, In1,
1201 // In0)))
1202 // Note that Mask0 is never used: lanes for which no path reaches this phi and
1203 // are essentially undef are taken from In0.
1204 VectorParts Entry(State.UF);
1205 for (unsigned In = 0; In < NumIncoming; ++In) {
1206 for (unsigned Part = 0; Part < State.UF; ++Part) {
1207 // We might have single edge PHIs (blocks) - use an identity
1208 // 'select' for the first PHI operand.
1209 Value *In0 = State.get(getIncomingValue(In), Part);
1210 if (In == 0)
1211 Entry[Part] = In0; // Initialize with the first incoming value.
1212 else {
1213 // Select between the current value and the previous incoming edge
1214 // based on the incoming mask.
1215 Value *Cond = State.get(getMask(In), Part);
1216 Entry[Part] =
1217 State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
1218 }
1219 }
1220 }
1221 for (unsigned Part = 0; Part < State.UF; ++Part)
1222 State.set(this, Entry[Part], Part);
1223}
1224
1225#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1227 VPSlotTracker &SlotTracker) const {
1228 O << Indent << "BLEND ";
1230 O << " =";
1231 if (getNumIncomingValues() == 1) {
1232 // Not a User of any mask: not really blending, this is a
1233 // single-predecessor phi.
1234 O << " ";
1236 } else {
1237 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
1238 O << " ";
1240 O << "/";
1242 }
1243 }
1244}
1245
1247 VPSlotTracker &SlotTracker) const {
1248 O << Indent << "REDUCE ";
1250 O << " = ";
1252 O << " +";
1253 if (isa<FPMathOperator>(getUnderlyingInstr()))
1255 O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
1257 if (getCondOp()) {
1258 O << ", ";
1260 }
1261 O << ")";
1262 if (RdxDesc.IntermediateStore)
1263 O << " (with final reduction value stored in invariant address sank "
1264 "outside of loop)";
1265}
1266#endif
1267
1269 // Find if the recipe is used by a widened recipe via an intervening
1270 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
1271 return any_of(users(), [](const VPUser *U) {
1272 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
1273 return any_of(PredR->users(), [PredR](const VPUser *U) {
1274 return !U->usesScalars(PredR);
1275 });
1276 return false;
1277 });
1278}
1279
1280#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1282 VPSlotTracker &SlotTracker) const {
1283 O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
1284
1285 if (!getUnderlyingInstr()->getType()->isVoidTy()) {
1287 O << " = ";
1288 }
1289 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
1290 O << "call";
1291 printFlags(O);
1292 O << "@" << CB->getCalledFunction()->getName() << "(";
1294 O, [&O, &SlotTracker](VPValue *Op) {
1295 Op->printAsOperand(O, SlotTracker);
1296 });
1297 O << ")";
1298 } else {
1300 printFlags(O);
1302 }
1303
1304 if (shouldPack())
1305 O << " (S->V)";
1306}
1307#endif
1308
1310 assert(State.Instance && "Branch on Mask works only on single instance.");
1311
1312 unsigned Part = State.Instance->Part;
1313 unsigned Lane = State.Instance->Lane.getKnownLane();
1314
1315 Value *ConditionBit = nullptr;
1316 VPValue *BlockInMask = getMask();
1317 if (BlockInMask) {
1318 ConditionBit = State.get(BlockInMask, Part);
1319 if (ConditionBit->getType()->isVectorTy())
1320 ConditionBit = State.Builder.CreateExtractElement(
1321 ConditionBit, State.Builder.getInt32(Lane));
1322 } else // Block in mask is all-one.
1323 ConditionBit = State.Builder.getTrue();
1324
1325 // Replace the temporary unreachable terminator with a new conditional branch,
1326 // whose two destinations will be set later when they are created.
1327 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
1328 assert(isa<UnreachableInst>(CurrentTerminator) &&
1329 "Expected to replace unreachable terminator with conditional branch.");
1330 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
1331 CondBr->setSuccessor(0, nullptr);
1332 ReplaceInstWithInst(CurrentTerminator, CondBr);
1333}
1334
1336 assert(State.Instance && "Predicated instruction PHI works per instance.");
1337 Instruction *ScalarPredInst =
1338 cast<Instruction>(State.get(getOperand(0), *State.Instance));
1339 BasicBlock *PredicatedBB = ScalarPredInst->getParent();
1340 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
1341 assert(PredicatingBB && "Predicated block has no single predecessor.");
1342 assert(isa<VPReplicateRecipe>(getOperand(0)) &&
1343 "operand must be VPReplicateRecipe");
1344
1345 // By current pack/unpack logic we need to generate only a single phi node: if
1346 // a vector value for the predicated instruction exists at this point it means
1347 // the instruction has vector users only, and a phi for the vector value is
1348 // needed. In this case the recipe of the predicated instruction is marked to
1349 // also do that packing, thereby "hoisting" the insert-element sequence.
1350 // Otherwise, a phi node for the scalar value is needed.
1351 unsigned Part = State.Instance->Part;
1352 if (State.hasVectorValue(getOperand(0), Part)) {
1353 Value *VectorValue = State.get(getOperand(0), Part);
1354 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
1355 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
1356 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
1357 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
1358 if (State.hasVectorValue(this, Part))
1359 State.reset(this, VPhi, Part);
1360 else
1361 State.set(this, VPhi, Part);
1362 // NOTE: Currently we need to update the value of the operand, so the next
1363 // predicated iteration inserts its generated value in the correct vector.
1364 State.reset(getOperand(0), VPhi, Part);
1365 } else {
1366 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
1367 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
1368 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
1369 PredicatingBB);
1370 Phi->addIncoming(ScalarPredInst, PredicatedBB);
1371 if (State.hasScalarValue(this, *State.Instance))
1372 State.reset(this, Phi, *State.Instance);
1373 else
1374 State.set(this, Phi, *State.Instance);
1375 // NOTE: Currently we need to update the value of the operand, so the next
1376 // predicated iteration inserts its generated value in the correct vector.
1377 State.reset(getOperand(0), Phi, *State.Instance);
1378 }
1379}
1380
1381#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1383 VPSlotTracker &SlotTracker) const {
1384 O << Indent << "PHI-PREDICATED-INSTRUCTION ";
1386 O << " = ";
1388}
1389
1391 VPSlotTracker &SlotTracker) const {
1392 O << Indent << "WIDEN ";
1393
1394 if (!isStore()) {
1396 O << " = ";
1397 }
1398 O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
1399
1401}
1402#endif
1403
1405 Value *Start = getStartValue()->getLiveInIRValue();
1406 PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index");
1407 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1408
1409 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1410 EntryPart->addIncoming(Start, VectorPH);
1411 EntryPart->setDebugLoc(getDebugLoc());
1412 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1413 State.set(this, EntryPart, Part);
1414}
1415
1416#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1418 VPSlotTracker &SlotTracker) const {
1419 O << Indent << "EMIT ";
1421 O << " = CANONICAL-INDUCTION";
1422}
1423#endif
1424
1427 Type *Ty) const {
1428 // The types must match and it must be an integer induction.
1430 return false;
1431 // Start must match the start value of this canonical induction.
1432 if (Start != getStartValue())
1433 return false;
1434
1435 // If the step is defined by a recipe, it is not a ConstantInt.
1436 if (Step->getDefiningRecipe())
1437 return false;
1438
1439 ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
1440 return StepC && StepC->isOne();
1441}
1442
1444 return IsScalarAfterVectorization &&
1445 (!VF.isScalable() || vputils::onlyFirstLaneUsed(this));
1446}
1447
1448#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1450 VPSlotTracker &SlotTracker) const {
1451 O << Indent << "EMIT ";
1453 O << " = WIDEN-POINTER-INDUCTION ";
1455 O << ", " << *IndDesc.getStep();
1456}
1457#endif
1458
1460 assert(!State.Instance && "cannot be used in per-lane");
1461 const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
1462 SCEVExpander Exp(SE, DL, "induction");
1463
1464 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
1465 &*State.Builder.GetInsertPoint());
1466 assert(!State.ExpandedSCEVs.contains(Expr) &&
1467 "Same SCEV expanded multiple times");
1468 State.ExpandedSCEVs[Expr] = Res;
1469 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1470 State.set(this, Res, {Part, 0});
1471}
1472
1473#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1475 VPSlotTracker &SlotTracker) const {
1476 O << Indent << "EMIT ";
1478 O << " = EXPAND SCEV " << *Expr;
1479}
1480#endif
1481
1483 Value *CanonicalIV = State.get(getOperand(0), 0);
1484 Type *STy = CanonicalIV->getType();
1486 ElementCount VF = State.VF;
1487 Value *VStart = VF.isScalar()
1488 ? CanonicalIV
1489 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
1490 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1491 Value *VStep = createStepForVF(Builder, STy, VF, Part);
1492 if (VF.isVector()) {
1493 VStep = Builder.CreateVectorSplat(VF, VStep);
1494 VStep =
1495 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
1496 }
1497 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
1498 State.set(this, CanonicalVectorIV, Part);
1499 }
1500}
1501
1502#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1504 VPSlotTracker &SlotTracker) const {
1505 O << Indent << "EMIT ";
1507 O << " = WIDEN-CANONICAL-INDUCTION ";
1509}
1510#endif
1511
1513 auto &Builder = State.Builder;
1514 // Create a vector from the initial value.
1515 auto *VectorInit = getStartValue()->getLiveInIRValue();
1516
1517 Type *VecTy = State.VF.isScalar()
1518 ? VectorInit->getType()
1519 : VectorType::get(VectorInit->getType(), State.VF);
1520
1521 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1522 if (State.VF.isVector()) {
1523 auto *IdxTy = Builder.getInt32Ty();
1524 auto *One = ConstantInt::get(IdxTy, 1);
1526 Builder.SetInsertPoint(VectorPH->getTerminator());
1527 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
1528 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
1529 VectorInit = Builder.CreateInsertElement(
1530 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
1531 }
1532
1533 // Create a phi node for the new recurrence.
1534 PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur");
1535 EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1536 EntryPart->addIncoming(VectorInit, VectorPH);
1537 State.set(this, EntryPart, 0);
1538}
1539
1540#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1542 VPSlotTracker &SlotTracker) const {
1543 O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
1545 O << " = phi ";
1547}
1548#endif
1549
1551 PHINode *PN = cast<PHINode>(getUnderlyingValue());
1552 auto &Builder = State.Builder;
1553
1554 // In order to support recurrences we need to be able to vectorize Phi nodes.
1555 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
1556 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
1557 // this value when we vectorize all of the instructions that use the PHI.
1558 bool ScalarPHI = State.VF.isScalar() || IsInLoop;
1559 Type *VecTy =
1560 ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
1561
1562 BasicBlock *HeaderBB = State.CFG.PrevBB;
1563 assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
1564 "recipe must be in the vector loop header");
1565 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
1566 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1567 Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi");
1568 EntryPart->insertBefore(HeaderBB->getFirstInsertionPt());
1569 State.set(this, EntryPart, Part);
1570 }
1571
1572 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1573
1574 // Reductions do not have to start at zero. They can start with
1575 // any loop invariant values.
1576 VPValue *StartVPV = getStartValue();
1577 Value *StartV = StartVPV->getLiveInIRValue();
1578
1579 Value *Iden = nullptr;
1580 RecurKind RK = RdxDesc.getRecurrenceKind();
1583 // MinMax and AnyOf reductions have the start value as their identity.
1584 if (ScalarPHI) {
1585 Iden = StartV;
1586 } else {
1588 Builder.SetInsertPoint(VectorPH->getTerminator());
1589 StartV = Iden =
1590 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
1591 }
1592 } else {
1593 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
1594 RdxDesc.getFastMathFlags());
1595
1596 if (!ScalarPHI) {
1597 Iden = Builder.CreateVectorSplat(State.VF, Iden);
1599 Builder.SetInsertPoint(VectorPH->getTerminator());
1600 Constant *Zero = Builder.getInt32(0);
1601 StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
1602 }
1603 }
1604
1605 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1606 Value *EntryPart = State.get(this, Part);
1607 // Make sure to add the reduction start value only to the
1608 // first unroll part.
1609 Value *StartVal = (Part == 0) ? StartV : Iden;
1610 cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
1611 }
1612}
1613
1614#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1616 VPSlotTracker &SlotTracker) const {
1617 O << Indent << "WIDEN-REDUCTION-PHI ";
1618
1620 O << " = phi ";
1622}
1623#endif
1624
1627 "Non-native vplans are not expected to have VPWidenPHIRecipes.");
1628
1629 Value *Op0 = State.get(getOperand(0), 0);
1630 Type *VecTy = Op0->getType();
1631 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
1632 State.set(this, VecPhi, 0);
1633}
1634
1635#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1637 VPSlotTracker &SlotTracker) const {
1638 O << Indent << "WIDEN-PHI ";
1639
1640 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
1641 // Unless all incoming values are modeled in VPlan print the original PHI
1642 // directly.
1643 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
1644 // values as VPValues.
1645 if (getNumOperands() != OriginalPhi->getNumOperands()) {
1646 O << VPlanIngredient(OriginalPhi);
1647 return;
1648 }
1649
1651 O << " = phi ";
1653}
1654#endif
1655
1656// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
1657// remove VPActiveLaneMaskPHIRecipe.
1659 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1660 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1661 Value *StartMask = State.get(getOperand(0), Part);
1662 PHINode *EntryPart =
1663 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
1664 EntryPart->addIncoming(StartMask, VectorPH);
1665 EntryPart->setDebugLoc(getDebugLoc());
1666 State.set(this, EntryPart, Part);
1667 }
1668}
1669
1670#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1672 VPSlotTracker &SlotTracker) const {
1673 O << Indent << "ACTIVE-LANE-MASK-PHI ";
1674
1676 O << " = phi ";
1678}
1679#endif
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
assume Assume Builder
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::string Name
Hexagon Common GEP
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static Value * getStepVector(Value *Val, Value *StartIdx, Value *Step, Instruction::BinaryOps BinOp, ElementCount VF, IRBuilderBase &Builder)
This function adds (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step,...
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
static Value * getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy, ElementCount VF)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:257
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:296
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:145
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:711
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:734
static StringRef getPredicateName(Predicate P)
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:927
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:203
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:114
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1385
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:306
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:302
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
void setAllowContract(bool B=true)
Definition: FMF.h:91
bool noSignedZeros() const
Definition: FMF.h:68
bool noInfs() const
Definition: FMF.h:67
void setAllowReciprocal(bool B=true)
Definition: FMF.h:88
bool allowReciprocal() const
Definition: FMF.h:69
void print(raw_ostream &O) const
Print fast-math flags to O.
Definition: Operator.cpp:233
void setNoSignedZeros(bool B=true)
Definition: FMF.h:85
bool allowReassoc() const
Flag queries.
Definition: FMF.h:65
bool approxFunc() const
Definition: FMF.h:71
void setNoNaNs(bool B=true)
Definition: FMF.h:79
void setAllowReassoc(bool B=true)
Flag setters.
Definition: FMF.h:76
bool noNaNs() const
Definition: FMF.h:66
void setApproxFunc(bool B=true)
Definition: FMF.h:94
void setNoInfs(bool B=true)
Definition: FMF.h:82
bool allowContract() const
Definition: FMF.h:70
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1993
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2419
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1223
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:452
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1119
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:297
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:472
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2356
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2371
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1862
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2625
A struct for saving information about induction variables.
const SCEV * getStep() const
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_IntInduction
Integer induction variable. Step = C.
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:89
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:392
bool isBinaryOp() const
Definition: Instruction.h:200
const BasicBlock * getParent() const
Definition: Instruction.h:90
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
const char * getOpcodeName() const
Definition: Instruction.h:197
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:195
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:389
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:279
BlockT * getHeader() const
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:254
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1743
FastMathFlags getFastMathFlags() const
static unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RecurKind getRecurrenceKind() const
Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF) const
Returns identity corresponding to the RecurrenceKind.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class uses information about analyze scalars to rewrite expressions in canonical form.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:677
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
Value * getOperand(unsigned i) const
Definition: User.h:169
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:2253
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:2300
iterator end()
Definition: VPlan.h:2284
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:1642
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:1645
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account that a single incoming value has no mask.
Definition: VPlan.h:1639
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPRegionBlock * getParent()
Definition: VPlan.h:492
size_t getNumSuccessors() const
Definition: VPlan.h:537
VPlan * getPlan()
Definition: VPlan.cpp:147
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:152
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:527
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:1886
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step, Type *Ty) const
Check if the induction described by Kind, /p Start and Step is canonical, i.e.
void execute(VPTransformState &State) override
Generate the canonical scalar induction phi of the vector loop.
const Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:2083
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:306
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:388
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:400
unsigned getVPDefID() const
Definition: VPlanValue.h:420
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:2197
VPValue * getCanonicalIV() const
Definition: VPlan.h:2196
VPValue * getStartValue() const
Definition: VPlan.h:2195
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:1369
bool hasResult() const
Definition: VPlan.h:1109
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1024
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1035
@ CalculateTripCountMinusVF
Definition: VPlan.h:1031
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:1085
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
void execute(VPTransformState &State) override
Generate the instruction.
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:166
static VPLane getFirstLane()
Definition: VPlan.h:164
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the VPLiveOut to O.
PHINode * getPhi() const
Definition: VPlan.h:692
void fixPhi(VPlan &Plan, VPTransformState &State)
Fixup the wrapped LCSSA phi node in the unique exit block.
void execute(VPTransformState &State) override
Generates phi nodes for live-outs as needed to retain SSA form.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:707
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
Instruction * getUnderlyingInstr()
Returns the underlying instruction, if the recipe is a VPValue or nullptr otherwise.
Definition: VPlan.h:767
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
VPBasicBlock * getParent()
Definition: VPlan.h:729
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:804
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:825
ExactFlagsTy ExactFlags
Definition: VPlan.h:867
FastMathFlagsTy FMFs
Definition: VPlan.h:869
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:953
bool isInBounds() const
Definition: VPlan.h:986
GEPFlagsTy GEPFlags
Definition: VPlan.h:868
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:993
WrapFlagsTy WrapFlags
Definition: VPlan.h:866
bool hasNoUnsignedWrap() const
Definition: VPlan.h:997
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:980
bool hasNoSignedWrap() const
Definition: VPlan.h:1003
FastMathFlags getFastMathFlags() const
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:1613
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:1784
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:1786
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:1782
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:2377
const VPBlockBase * getEntry() const
Definition: VPlan.h:2416
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:2240
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
This class can be used to assign consecutive numbers to all VPValues in a VPlan and allows querying t...
Definition: VPlanValue.h:438
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:204
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1140
operand_range operands()
Definition: VPlanValue.h:279
unsigned getNumOperands() const
Definition: VPlanValue.h:253
operand_iterator op_begin()
Definition: VPlanValue.h:275
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:254
Value * getUnderlyingValue()
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:84
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:117
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1125
friend class VPInstruction
Definition: VPlanValue.h:47
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:180
user_range users()
Definition: VPlanValue.h:147
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1212
void execute(VPTransformState &State) override
Produce widened copies of the cast.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:1448
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:1443
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1454
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isStore() const
Returns true if this recipe is a store.
Definition: VPlan.h:1991
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(ElementCount VF)
Returns true if only scalar values will be generated.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce widened copies of all Ingredients.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:2474
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.h:2668
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:378
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:4777
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:677
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:163
Iterator for intrusive lists based on ilist_node.
self_iterator getIterator()
Definition: ilist_node.h:82
iterator erase(iterator where)
Definition: ilist.h:204
pointer remove(iterator &IT)
Definition: ilist.h:188
iterator insertAfter(iterator where, pointer New)
Definition: ilist.h:174
iterator insert(iterator where, pointer New)
Definition: ilist.h:165
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1422
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
bool isUniformAfterVectorization(VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlan.h:3021
bool onlyFirstLaneUsed(VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlan.cpp:1223
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2338
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2121
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1734
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
Definition: VPlan.cpp:52
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:35
@ Mul
Product of integers.
@ Add
Sum of integers.
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
void execute(VPTransformState &State) override
Generate the phi nodes.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPIteration represents a single point in the iteration space of the output (vectorized and/or unrolle...
Definition: VPlan.h:216
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:366
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:374
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:330
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:233
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:415
void addMetadata(Instruction *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:343
Value * get(VPValue *Def, unsigned Part)
Get the generated Value for a given VPValue and a given Part.
Definition: VPlan.cpp:238
void reset(VPValue *Def, Value *V, unsigned Part)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:300
struct llvm::VPTransformState::CFGState CFG
std::optional< VPIteration > Instance
Hold the indices to generate specific scalar instructions.
Definition: VPlan.h:247
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:390
bool hasScalarValue(VPValue *Def, VPIteration Instance)
Definition: VPlan.h:281
bool hasVectorValue(VPValue *Def, unsigned Part)
Definition: VPlan.h:271
ElementCount VF
The chosen Vectorization and Unroll Factors of the loop being vectorized.
Definition: VPlan.h:241
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:404
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:363
void set(VPValue *Def, Value *V, unsigned Part)
Set the generated Value for a given VPValue and a given Part.
Definition: VPlan.h:292
bool isInvariantCond() const
Definition: VPlan.h:1272
VPValue * getCond() const
Definition: VPlan.h:1268
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.