LLVM 17.0.0git
VPlanRecipes.cpp
Go to the documentation of this file.
1//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains implementations for different VPlan recipes.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPlan.h"
15#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/Twine.h"
19#include "llvm/IR/BasicBlock.h"
20#include "llvm/IR/IRBuilder.h"
21#include "llvm/IR/Instruction.h"
23#include "llvm/IR/Type.h"
24#include "llvm/IR/Value.h"
27#include "llvm/Support/Debug.h"
31#include <cassert>
32
33using namespace llvm;
34
36
37namespace llvm {
39}
40
41#define LV_NAME "loop-vectorize"
42#define DEBUG_TYPE LV_NAME
43
45 switch (getVPDefID()) {
46 case VPWidenMemoryInstructionSC: {
47 return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
48 }
49 case VPReplicateSC:
50 case VPWidenCallSC:
51 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
52 ->mayWriteToMemory();
53 case VPBranchOnMaskSC:
54 case VPScalarIVStepsSC:
55 case VPPredInstPHISC:
56 return false;
57 case VPWidenIntOrFpInductionSC:
58 case VPWidenCanonicalIVSC:
59 case VPWidenPHISC:
60 case VPBlendSC:
61 case VPWidenSC:
62 case VPWidenGEPSC:
63 case VPReductionSC:
64 case VPWidenSelectSC: {
65 const Instruction *I =
66 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
67 (void)I;
68 assert((!I || !I->mayWriteToMemory()) &&
69 "underlying instruction may write to memory");
70 return false;
71 }
72 default:
73 return true;
74 }
75}
76
78 switch (getVPDefID()) {
79 case VPWidenMemoryInstructionSC: {
80 return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
81 }
82 case VPReplicateSC:
83 case VPWidenCallSC:
84 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
85 ->mayReadFromMemory();
86 case VPBranchOnMaskSC:
87 case VPScalarIVStepsSC:
88 case VPPredInstPHISC:
89 return false;
90 case VPWidenIntOrFpInductionSC:
91 case VPWidenCanonicalIVSC:
92 case VPWidenPHISC:
93 case VPBlendSC:
94 case VPWidenSC:
95 case VPWidenGEPSC:
96 case VPReductionSC:
97 case VPWidenSelectSC: {
98 const Instruction *I =
99 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
100 (void)I;
101 assert((!I || !I->mayReadFromMemory()) &&
102 "underlying instruction may read from memory");
103 return false;
104 }
105 default:
106 return true;
107 }
108}
109
111 switch (getVPDefID()) {
112 case VPDerivedIVSC:
113 case VPPredInstPHISC:
114 return false;
115 case VPWidenCallSC:
116 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
117 ->mayHaveSideEffects();
118 case VPWidenIntOrFpInductionSC:
119 case VPWidenPointerInductionSC:
120 case VPWidenCanonicalIVSC:
121 case VPWidenPHISC:
122 case VPBlendSC:
123 case VPWidenSC:
124 case VPWidenGEPSC:
125 case VPReductionSC:
126 case VPWidenSelectSC:
127 case VPScalarIVStepsSC: {
128 const Instruction *I =
129 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
130 (void)I;
131 assert((!I || !I->mayHaveSideEffects()) &&
132 "underlying instruction has side-effects");
133 return false;
134 }
135 case VPWidenMemoryInstructionSC:
136 assert(cast<VPWidenMemoryInstructionRecipe>(this)
137 ->getIngredient()
139 "mayHaveSideffects result for ingredient differs from this "
140 "implementation");
141 return mayWriteToMemory();
142 case VPReplicateSC: {
143 auto *R = cast<VPReplicateRecipe>(this);
144 return R->getUnderlyingInstr()->mayHaveSideEffects();
145 }
146 default:
147 return true;
148 }
149}
150
152 auto Lane = VPLane::getLastLaneForVF(State.VF);
153 VPValue *ExitValue = getOperand(0);
155 Lane = VPLane::getFirstLane();
156 Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
157 State.Builder.GetInsertBlock());
158}
159
161 assert(!Parent && "Recipe already in some VPBasicBlock");
162 assert(InsertPos->getParent() &&
163 "Insertion position not in any VPBasicBlock");
164 Parent = InsertPos->getParent();
165 Parent->getRecipeList().insert(InsertPos->getIterator(), this);
166}
167
170 assert(!Parent && "Recipe already in some VPBasicBlock");
171 assert(I == BB.end() || I->getParent() == &BB);
172 Parent = &BB;
173 BB.getRecipeList().insert(I, this);
174}
175
177 assert(!Parent && "Recipe already in some VPBasicBlock");
178 assert(InsertPos->getParent() &&
179 "Insertion position not in any VPBasicBlock");
180 Parent = InsertPos->getParent();
181 Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
182}
183
185 assert(getParent() && "Recipe not in any VPBasicBlock");
187 Parent = nullptr;
188}
189
191 assert(getParent() && "Recipe not in any VPBasicBlock");
193}
194
197 insertAfter(InsertPos);
198}
199
203 insertBefore(BB, I);
204}
205
206void VPInstruction::generateInstruction(VPTransformState &State,
207 unsigned Part) {
209 Builder.SetCurrentDebugLocation(DL);
210
212 Value *A = State.get(getOperand(0), Part);
213 Value *B = State.get(getOperand(1), Part);
214 Value *V =
215 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
216 State.set(this, V, Part);
217 return;
218 }
219
220 switch (getOpcode()) {
221 case VPInstruction::Not: {
222 Value *A = State.get(getOperand(0), Part);
223 Value *V = Builder.CreateNot(A, Name);
224 State.set(this, V, Part);
225 break;
226 }
228 Value *IV = State.get(getOperand(0), Part);
229 Value *TC = State.get(getOperand(1), Part);
230 Value *V = Builder.CreateICmpULE(IV, TC, Name);
231 State.set(this, V, Part);
232 break;
233 }
234 case Instruction::Select: {
235 Value *Cond = State.get(getOperand(0), Part);
236 Value *Op1 = State.get(getOperand(1), Part);
237 Value *Op2 = State.get(getOperand(2), Part);
238 Value *V = Builder.CreateSelect(Cond, Op1, Op2, Name);
239 State.set(this, V, Part);
240 break;
241 }
243 // Get first lane of vector induction variable.
244 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
245 // Get the original loop tripcount.
246 Value *ScalarTC = State.get(getOperand(1), Part);
247
248 auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
249 auto *PredTy = VectorType::get(Int1Ty, State.VF);
250 Instruction *Call = Builder.CreateIntrinsic(
251 Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
252 {VIVElem0, ScalarTC}, nullptr, Name);
253 State.set(this, Call, Part);
254 break;
255 }
257 // Generate code to combine the previous and current values in vector v3.
258 //
259 // vector.ph:
260 // v_init = vector(..., ..., ..., a[-1])
261 // br vector.body
262 //
263 // vector.body
264 // i = phi [0, vector.ph], [i+4, vector.body]
265 // v1 = phi [v_init, vector.ph], [v2, vector.body]
266 // v2 = a[i, i+1, i+2, i+3];
267 // v3 = vector(v1(3), v2(0, 1, 2))
268
269 // For the first part, use the recurrence phi (v1), otherwise v2.
270 auto *V1 = State.get(getOperand(0), 0);
271 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
272 if (!PartMinus1->getType()->isVectorTy()) {
273 State.set(this, PartMinus1, Part);
274 } else {
275 Value *V2 = State.get(getOperand(1), Part);
276 State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1, Name),
277 Part);
278 }
279 break;
280 }
282 Value *ScalarTC = State.get(getOperand(0), Part);
283 Value *Step =
284 createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF);
285 Value *Sub = Builder.CreateSub(ScalarTC, Step);
286 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
287 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
288 Value *Sel = Builder.CreateSelect(Cmp, Sub, Zero);
289 State.set(this, Sel, Part);
290 break;
291 }
294 Value *Next = nullptr;
295 if (Part == 0) {
297 auto *Phi = State.get(getOperand(0), 0);
298 // The loop step is equal to the vectorization factor (num of SIMD
299 // elements) times the unroll factor (num of SIMD instructions).
300 Value *Step =
301 createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
302 Next = Builder.CreateAdd(Phi, Step, Name, IsNUW, false);
303 } else {
304 Next = State.get(this, 0);
305 }
306
307 State.set(this, Next, Part);
308 break;
309 }
310
314 auto *IV = State.get(getOperand(0), VPIteration(0, 0));
315 if (Part == 0) {
316 State.set(this, IV, Part);
317 break;
318 }
319
320 // The canonical IV is incremented by the vectorization factor (num of SIMD
321 // elements) times the unroll part.
322 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
323 Value *Next = Builder.CreateAdd(IV, Step, Name, IsNUW, false);
324 State.set(this, Next, Part);
325 break;
326 }
328 if (Part != 0)
329 break;
330
331 Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
332 VPRegionBlock *ParentRegion = getParent()->getParent();
333 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
334
335 // Replace the temporary unreachable terminator with a new conditional
336 // branch, hooking it up to backward destination for exiting blocks now and
337 // to forward destination(s) later when they are created.
338 BranchInst *CondBr =
339 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
340
341 if (getParent()->isExiting())
342 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
343
344 CondBr->setSuccessor(0, nullptr);
345 Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
346 break;
347 }
349 if (Part != 0)
350 break;
351 // First create the compare.
352 Value *IV = State.get(getOperand(0), Part);
353 Value *TC = State.get(getOperand(1), Part);
354 Value *Cond = Builder.CreateICmpEQ(IV, TC);
355
356 // Now create the branch.
357 auto *Plan = getParent()->getPlan();
358 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
359 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
360
361 // Replace the temporary unreachable terminator with a new conditional
362 // branch, hooking it up to backward destination (the header) now and to the
363 // forward destination (the exit/middle block) later when it is created.
364 // Note that CreateCondBr expects a valid BB as first argument, so we need
365 // to set it to nullptr later.
366 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
367 State.CFG.VPBB2IRBB[Header]);
368 CondBr->setSuccessor(0, nullptr);
369 Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
370 break;
371 }
372 default:
373 llvm_unreachable("Unsupported opcode for instruction");
374 }
375}
376
378 assert(!State.Instance && "VPInstruction executing an Instance");
380 State.Builder.setFastMathFlags(FMF);
381 for (unsigned Part = 0; Part < State.UF; ++Part)
382 generateInstruction(State, Part);
383}
384
385#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
387 VPSlotTracker SlotTracker(getParent()->getPlan());
388 print(dbgs(), "", SlotTracker);
389}
390
392 VPSlotTracker &SlotTracker) const {
393 O << Indent << "EMIT ";
394
395 if (hasResult()) {
397 O << " = ";
398 }
399
400 switch (getOpcode()) {
402 O << "not";
403 break;
405 O << "icmp ule";
406 break;
408 O << "combined load";
409 break;
411 O << "combined store";
412 break;
414 O << "active lane mask";
415 break;
417 O << "first-order splice";
418 break;
420 O << "VF * UF + ";
421 break;
423 O << "VF * UF +(nuw) ";
424 break;
426 O << "branch-on-cond";
427 break;
429 O << "TC > VF ? TC - VF : 0";
430 break;
432 O << "VF * Part + ";
433 break;
435 O << "VF * Part +(nuw) ";
436 break;
438 O << "branch-on-count ";
439 break;
440 default:
442 }
443
444 O << FMF;
445
446 for (const VPValue *Operand : operands()) {
447 O << " ";
448 Operand->printAsOperand(O, SlotTracker);
449 }
450
451 if (DL) {
452 O << ", !dbg ";
453 DL.print(O);
454 }
455}
456#endif
457
459 // Make sure the VPInstruction is a floating-point operation.
460 assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
461 Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
462 Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
463 Opcode == Instruction::FCmp) &&
464 "this op can't take fast-math flags");
465 FMF = FMFNew;
466}
467
469 auto &CI = *cast<CallInst>(getUnderlyingInstr());
470 assert(!isa<DbgInfoIntrinsic>(CI) &&
471 "DbgInfoIntrinsic should have been dropped during VPlan construction");
472 State.setDebugLocFromInst(&CI);
473
474 for (unsigned Part = 0; Part < State.UF; ++Part) {
475 SmallVector<Type *, 2> TysForDecl = {CI.getType()};
477 for (const auto &I : enumerate(operands())) {
478 // Some intrinsics have a scalar argument - don't replace it with a
479 // vector.
480 Value *Arg;
481 if (VectorIntrinsicID == Intrinsic::not_intrinsic ||
482 !isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
483 Arg = State.get(I.value(), Part);
484 else
485 Arg = State.get(I.value(), VPIteration(0, 0));
486 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
487 TysForDecl.push_back(Arg->getType());
488 Args.push_back(Arg);
489 }
490
491 Function *VectorF;
492 if (VectorIntrinsicID != Intrinsic::not_intrinsic) {
493 // Use vector version of the intrinsic.
494 if (State.VF.isVector())
495 TysForDecl[0] =
496 VectorType::get(CI.getType()->getScalarType(), State.VF);
497 Module *M = State.Builder.GetInsertBlock()->getModule();
498 VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
499 assert(VectorF && "Can't retrieve vector intrinsic.");
500 } else {
501#ifndef NDEBUG
502 assert(Variant != nullptr && "Can't create vector function.");
503#endif
504 VectorF = Variant;
505 }
506
508 CI.getOperandBundlesAsDefs(OpBundles);
509 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
510
511 if (isa<FPMathOperator>(V))
512 V->copyFastMathFlags(&CI);
513
514 State.set(this, V, Part);
515 State.addMetadata(V, &CI);
516 }
517}
518
519#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
521 VPSlotTracker &SlotTracker) const {
522 O << Indent << "WIDEN-CALL ";
523
524 auto *CI = cast<CallInst>(getUnderlyingInstr());
525 if (CI->getType()->isVoidTy())
526 O << "void ";
527 else {
529 O << " = ";
530 }
531
532 O << "call @" << CI->getCalledFunction()->getName() << "(";
534 O << ")";
535
536 if (VectorIntrinsicID)
537 O << " (using vector intrinsic)";
538 else {
539 O << " (using library function";
540 if (Variant->hasName())
541 O << ": " << Variant->getName();
542 O << ")";
543 }
544}
545
547 VPSlotTracker &SlotTracker) const {
548 O << Indent << "WIDEN-SELECT ";
550 O << " = select ";
552 O << ", ";
554 O << ", ";
556 O << (isInvariantCond() ? " (condition is loop invariant)" : "");
557}
558#endif
559
561 auto &I = *cast<SelectInst>(getUnderlyingInstr());
562 State.setDebugLocFromInst(&I);
563
564 // The condition can be loop invariant but still defined inside the
565 // loop. This means that we can't just use the original 'cond' value.
566 // We have to take the 'vectorized' value and pick the first lane.
567 // Instcombine will make this a no-op.
568 auto *InvarCond =
569 isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
570
571 for (unsigned Part = 0; Part < State.UF; ++Part) {
572 Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part);
573 Value *Op0 = State.get(getOperand(1), Part);
574 Value *Op1 = State.get(getOperand(2), Part);
575 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
576 State.set(this, Sel, Part);
577 State.addMetadata(Sel, &I);
578 }
579}
580
582 auto &I = *cast<Instruction>(getUnderlyingValue());
583 auto &Builder = State.Builder;
584 switch (I.getOpcode()) {
585 case Instruction::Call:
586 case Instruction::Br:
587 case Instruction::PHI:
588 case Instruction::GetElementPtr:
589 case Instruction::Select:
590 llvm_unreachable("This instruction is handled by a different recipe.");
591 case Instruction::UDiv:
592 case Instruction::SDiv:
593 case Instruction::SRem:
594 case Instruction::URem:
595 case Instruction::Add:
596 case Instruction::FAdd:
597 case Instruction::Sub:
598 case Instruction::FSub:
599 case Instruction::FNeg:
600 case Instruction::Mul:
601 case Instruction::FMul:
602 case Instruction::FDiv:
603 case Instruction::FRem:
604 case Instruction::Shl:
605 case Instruction::LShr:
606 case Instruction::AShr:
607 case Instruction::And:
608 case Instruction::Or:
609 case Instruction::Xor: {
610 // Just widen unops and binops.
611 State.setDebugLocFromInst(&I);
612
613 for (unsigned Part = 0; Part < State.UF; ++Part) {
615 for (VPValue *VPOp : operands())
616 Ops.push_back(State.get(VPOp, Part));
617
618 Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
619
620 if (auto *VecOp = dyn_cast<Instruction>(V)) {
621 VecOp->copyIRFlags(&I);
622
623 // If the instruction is vectorized and was in a basic block that needed
624 // predication, we can't propagate poison-generating flags (nuw/nsw,
625 // exact, etc.). The control flow has been linearized and the
626 // instruction is no longer guarded by the predicate, which could make
627 // the flag properties to no longer hold.
628 if (State.MayGeneratePoisonRecipes.contains(this))
629 VecOp->dropPoisonGeneratingFlags();
630 }
631
632 // Use this vector value for all users of the original instruction.
633 State.set(this, V, Part);
634 State.addMetadata(V, &I);
635 }
636
637 break;
638 }
639 case Instruction::Freeze: {
640 State.setDebugLocFromInst(&I);
641
642 for (unsigned Part = 0; Part < State.UF; ++Part) {
643 Value *Op = State.get(getOperand(0), Part);
644
645 Value *Freeze = Builder.CreateFreeze(Op);
646 State.set(this, Freeze, Part);
647 }
648 break;
649 }
650 case Instruction::ICmp:
651 case Instruction::FCmp: {
652 // Widen compares. Generate vector compares.
653 bool FCmp = (I.getOpcode() == Instruction::FCmp);
654 auto *Cmp = cast<CmpInst>(&I);
655 State.setDebugLocFromInst(Cmp);
656 for (unsigned Part = 0; Part < State.UF; ++Part) {
657 Value *A = State.get(getOperand(0), Part);
658 Value *B = State.get(getOperand(1), Part);
659 Value *C = nullptr;
660 if (FCmp) {
661 // Propagate fast math flags.
663 Builder.setFastMathFlags(Cmp->getFastMathFlags());
664 C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
665 } else {
666 C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
667 }
668 State.set(this, C, Part);
669 State.addMetadata(C, &I);
670 }
671
672 break;
673 }
674
675 case Instruction::ZExt:
676 case Instruction::SExt:
677 case Instruction::FPToUI:
678 case Instruction::FPToSI:
679 case Instruction::FPExt:
680 case Instruction::PtrToInt:
681 case Instruction::IntToPtr:
682 case Instruction::SIToFP:
683 case Instruction::UIToFP:
684 case Instruction::Trunc:
685 case Instruction::FPTrunc:
686 case Instruction::BitCast: {
687 auto *CI = cast<CastInst>(&I);
688 State.setDebugLocFromInst(CI);
689
690 /// Vectorize casts.
691 Type *DestTy = (State.VF.isScalar())
692 ? CI->getType()
693 : VectorType::get(CI->getType(), State.VF);
694
695 for (unsigned Part = 0; Part < State.UF; ++Part) {
696 Value *A = State.get(getOperand(0), Part);
697 Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
698 State.set(this, Cast, Part);
699 State.addMetadata(Cast, &I);
700 }
701 break;
702 }
703 default:
704 // This instruction is not vectorized by simple widening.
705 LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
706 llvm_unreachable("Unhandled instruction!");
707 } // end of switch.
708}
709#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
711 VPSlotTracker &SlotTracker) const {
712 O << Indent << "WIDEN ";
714 const Instruction *UI = getUnderlyingInstr();
715 O << " = " << UI->getOpcodeName() << " ";
716 if (auto *Cmp = dyn_cast<CmpInst>(UI))
717 O << Cmp->getPredicate() << " ";
719}
720
722 VPSlotTracker &SlotTracker) const {
723 O << Indent << "WIDEN-INDUCTION";
724 if (getTruncInst()) {
725 O << "\\l\"";
726 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
727 O << " +\n" << Indent << "\" ";
729 } else
730 O << " " << VPlanIngredient(IV);
731
732 O << ", ";
734}
735#endif
736
738 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
739 auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep());
740 return StartC && StartC->isZero() && StepC && StepC->isOne();
741}
742
743#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
745 VPSlotTracker &SlotTracker) const {
746 O << Indent;
748 O << Indent << "= DERIVED-IV ";
750 O << " + ";
752 O << " * ";
754
755 if (IndDesc.getStep()->getType() != ResultTy)
756 O << " (truncated to " << *ResultTy << ")";
757}
758#endif
759
760#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
762 VPSlotTracker &SlotTracker) const {
763 O << Indent;
765 O << Indent << "= SCALAR-STEPS ";
767}
768#endif
769
771 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
772 // Construct a vector GEP by widening the operands of the scalar GEP as
773 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
774 // results in a vector of pointers when at least one operand of the GEP
775 // is vector-typed. Thus, to keep the representation compact, we only use
776 // vector-typed operands for loop-varying values.
777
778 if (State.VF.isVector() && areAllOperandsInvariant()) {
779 // If we are vectorizing, but the GEP has only loop-invariant operands,
780 // the GEP we build (by only using vector-typed operands for
781 // loop-varying values) would be a scalar pointer. Thus, to ensure we
782 // produce a vector of pointers, we need to either arbitrarily pick an
783 // operand to broadcast, or broadcast a clone of the original GEP.
784 // Here, we broadcast a clone of the original.
785 //
786 // TODO: If at some point we decide to scalarize instructions having
787 // loop-invariant operands, this special case will no longer be
788 // required. We would add the scalarization decision to
789 // collectLoopScalars() and teach getVectorValue() to broadcast
790 // the lane-zero scalar value.
791 auto *Clone = State.Builder.Insert(GEP->clone());
792 for (unsigned Part = 0; Part < State.UF; ++Part) {
793 Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone);
794 State.set(this, EntryPart, Part);
795 State.addMetadata(EntryPart, GEP);
796 }
797 } else {
798 // If the GEP has at least one loop-varying operand, we are sure to
799 // produce a vector of pointers. But if we are only unrolling, we want
800 // to produce a scalar GEP for each unroll part. Thus, the GEP we
801 // produce with the code below will be scalar (if VF == 1) or vector
802 // (otherwise). Note that for the unroll-only case, we still maintain
803 // values in the vector mapping with initVector, as we do for other
804 // instructions.
805 for (unsigned Part = 0; Part < State.UF; ++Part) {
806 // The pointer operand of the new GEP. If it's loop-invariant, we
807 // won't broadcast it.
808 auto *Ptr = isPointerLoopInvariant()
809 ? State.get(getOperand(0), VPIteration(0, 0))
810 : State.get(getOperand(0), Part);
811
812 // Collect all the indices for the new GEP. If any index is
813 // loop-invariant, we won't broadcast it.
815 for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
816 VPValue *Operand = getOperand(I);
817 if (isIndexLoopInvariant(I - 1))
818 Indices.push_back(State.get(Operand, VPIteration(0, 0)));
819 else
820 Indices.push_back(State.get(Operand, Part));
821 }
822
823 // If the GEP instruction is vectorized and was in a basic block that
824 // needed predication, we can't propagate the poison-generating 'inbounds'
825 // flag. The control flow has been linearized and the GEP is no longer
826 // guarded by the predicate, which could make the 'inbounds' properties to
827 // no longer hold.
828 bool IsInBounds =
829 GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0;
830
831 // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
832 // but it should be a vector, otherwise.
833 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
834 Indices, "", IsInBounds);
835 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
836 "NewGEP is not a pointer vector");
837 State.set(this, NewGEP, Part);
838 State.addMetadata(NewGEP, GEP);
839 }
840 }
841}
842
843#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
845 VPSlotTracker &SlotTracker) const {
846 O << Indent << "WIDEN-GEP ";
847 O << (isPointerLoopInvariant() ? "Inv" : "Var");
848 for (size_t I = 0; I < getNumOperands() - 1; ++I)
849 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
850
851 O << " ";
853 O << " = getelementptr ";
855}
856#endif
857
859 State.setDebugLocFromInst(Phi);
860 // We know that all PHIs in non-header blocks are converted into
861 // selects, so we don't have to worry about the insertion order and we
862 // can just use the builder.
863 // At this point we generate the predication tree. There may be
864 // duplications since this is a simple recursive scan, but future
865 // optimizations will clean it up.
866
867 unsigned NumIncoming = getNumIncomingValues();
868
869 // Generate a sequence of selects of the form:
870 // SELECT(Mask3, In3,
871 // SELECT(Mask2, In2,
872 // SELECT(Mask1, In1,
873 // In0)))
874 // Note that Mask0 is never used: lanes for which no path reaches this phi and
875 // are essentially undef are taken from In0.
876 VectorParts Entry(State.UF);
877 for (unsigned In = 0; In < NumIncoming; ++In) {
878 for (unsigned Part = 0; Part < State.UF; ++Part) {
879 // We might have single edge PHIs (blocks) - use an identity
880 // 'select' for the first PHI operand.
881 Value *In0 = State.get(getIncomingValue(In), Part);
882 if (In == 0)
883 Entry[Part] = In0; // Initialize with the first incoming value.
884 else {
885 // Select between the current value and the previous incoming edge
886 // based on the incoming mask.
887 Value *Cond = State.get(getMask(In), Part);
888 Entry[Part] =
889 State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
890 }
891 }
892 }
893 for (unsigned Part = 0; Part < State.UF; ++Part)
894 State.set(this, Entry[Part], Part);
895}
896
897#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
899 VPSlotTracker &SlotTracker) const {
900 O << Indent << "BLEND ";
901 Phi->printAsOperand(O, false);
902 O << " =";
903 if (getNumIncomingValues() == 1) {
904 // Not a User of any mask: not really blending, this is a
905 // single-predecessor phi.
906 O << " ";
908 } else {
909 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
910 O << " ";
912 O << "/";
914 }
915 }
916}
917
919 VPSlotTracker &SlotTracker) const {
920 O << Indent << "REDUCE ";
922 O << " = ";
924 O << " +";
925 if (isa<FPMathOperator>(getUnderlyingInstr()))
927 O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " (";
929 if (getCondOp()) {
930 O << ", ";
932 }
933 O << ")";
934 if (RdxDesc->IntermediateStore)
935 O << " (with final reduction value stored in invariant address sank "
936 "outside of loop)";
937}
938#endif
939
941 // Find if the recipe is used by a widened recipe via an intervening
942 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
943 return any_of(users(), [](const VPUser *U) {
944 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
945 return any_of(PredR->users(), [PredR](const VPUser *U) {
946 return !U->usesScalars(PredR);
947 });
948 return false;
949 });
950}
951
952#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
954 VPSlotTracker &SlotTracker) const {
955 O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
956
957 if (!getUnderlyingInstr()->getType()->isVoidTy()) {
959 O << " = ";
960 }
961 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
962 O << "call @" << CB->getCalledFunction()->getName() << "(";
964 O, [&O, &SlotTracker](VPValue *Op) {
965 Op->printAsOperand(O, SlotTracker);
966 });
967 O << ")";
968 } else {
971 }
972
973 if (shouldPack())
974 O << " (S->V)";
975}
976#endif
977
979 assert(State.Instance && "Branch on Mask works only on single instance.");
980
981 unsigned Part = State.Instance->Part;
982 unsigned Lane = State.Instance->Lane.getKnownLane();
983
984 Value *ConditionBit = nullptr;
985 VPValue *BlockInMask = getMask();
986 if (BlockInMask) {
987 ConditionBit = State.get(BlockInMask, Part);
988 if (ConditionBit->getType()->isVectorTy())
989 ConditionBit = State.Builder.CreateExtractElement(
990 ConditionBit, State.Builder.getInt32(Lane));
991 } else // Block in mask is all-one.
992 ConditionBit = State.Builder.getTrue();
993
994 // Replace the temporary unreachable terminator with a new conditional branch,
995 // whose two destinations will be set later when they are created.
996 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
997 assert(isa<UnreachableInst>(CurrentTerminator) &&
998 "Expected to replace unreachable terminator with conditional branch.");
999 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
1000 CondBr->setSuccessor(0, nullptr);
1001 ReplaceInstWithInst(CurrentTerminator, CondBr);
1002}
1003
1005 assert(State.Instance && "Predicated instruction PHI works per instance.");
1006 Instruction *ScalarPredInst =
1007 cast<Instruction>(State.get(getOperand(0), *State.Instance));
1008 BasicBlock *PredicatedBB = ScalarPredInst->getParent();
1009 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
1010 assert(PredicatingBB && "Predicated block has no single predecessor.");
1011 assert(isa<VPReplicateRecipe>(getOperand(0)) &&
1012 "operand must be VPReplicateRecipe");
1013
1014 // By current pack/unpack logic we need to generate only a single phi node: if
1015 // a vector value for the predicated instruction exists at this point it means
1016 // the instruction has vector users only, and a phi for the vector value is
1017 // needed. In this case the recipe of the predicated instruction is marked to
1018 // also do that packing, thereby "hoisting" the insert-element sequence.
1019 // Otherwise, a phi node for the scalar value is needed.
1020 unsigned Part = State.Instance->Part;
1021 if (State.hasVectorValue(getOperand(0), Part)) {
1022 Value *VectorValue = State.get(getOperand(0), Part);
1023 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
1024 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
1025 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
1026 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
1027 if (State.hasVectorValue(this, Part))
1028 State.reset(this, VPhi, Part);
1029 else
1030 State.set(this, VPhi, Part);
1031 // NOTE: Currently we need to update the value of the operand, so the next
1032 // predicated iteration inserts its generated value in the correct vector.
1033 State.reset(getOperand(0), VPhi, Part);
1034 } else {
1035 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
1036 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
1037 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
1038 PredicatingBB);
1039 Phi->addIncoming(ScalarPredInst, PredicatedBB);
1040 if (State.hasScalarValue(this, *State.Instance))
1041 State.reset(this, Phi, *State.Instance);
1042 else
1043 State.set(this, Phi, *State.Instance);
1044 // NOTE: Currently we need to update the value of the operand, so the next
1045 // predicated iteration inserts its generated value in the correct vector.
1046 State.reset(getOperand(0), Phi, *State.Instance);
1047 }
1048}
1049
1050#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1052 VPSlotTracker &SlotTracker) const {
1053 O << Indent << "PHI-PREDICATED-INSTRUCTION ";
1055 O << " = ";
1057}
1058
1060 VPSlotTracker &SlotTracker) const {
1061 O << Indent << "WIDEN ";
1062
1063 if (!isStore()) {
1065 O << " = ";
1066 }
1067 O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
1068
1070}
1071#endif
1072
1074 Value *Start = getStartValue()->getLiveInIRValue();
1075 PHINode *EntryPart = PHINode::Create(
1076 Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
1077
1078 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1079 EntryPart->addIncoming(Start, VectorPH);
1080 EntryPart->setDebugLoc(DL);
1081 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1082 State.set(this, EntryPart, Part);
1083}
1084
1085#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1087 VPSlotTracker &SlotTracker) const {
1088 O << Indent << "EMIT ";
1090 O << " = CANONICAL-INDUCTION";
1091}
1092#endif
1093
1095 Type *Ty) const {
1096 if (Ty != getScalarType())
1097 return false;
1098 // The start value of ID must match the start value of this canonical
1099 // induction.
1100 if (getStartValue()->getLiveInIRValue() != ID.getStartValue())
1101 return false;
1102
1103 ConstantInt *Step = ID.getConstIntStepValue();
1104 // ID must also be incremented by one. IK_IntInduction always increment the
1105 // induction by Step, but the binary op may not be set.
1106 return ID.getKind() == InductionDescriptor::IK_IntInduction && Step &&
1107 Step->isOne();
1108}
1109
1111 return IsScalarAfterVectorization &&
1112 (!VF.isScalable() || vputils::onlyFirstLaneUsed(this));
1113}
1114
1115#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1117 VPSlotTracker &SlotTracker) const {
1118 O << Indent << "EMIT ";
1120 O << " = WIDEN-POINTER-INDUCTION ";
1122 O << ", " << *IndDesc.getStep();
1123}
1124#endif
1125
1127 assert(!State.Instance && "cannot be used in per-lane");
1128 const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
1129 SCEVExpander Exp(SE, DL, "induction");
1130
1131 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
1132 &*State.Builder.GetInsertPoint());
1133
1134 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1135 State.set(this, Res, Part);
1136}
1137
1138#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1140 VPSlotTracker &SlotTracker) const {
1141 O << Indent << "EMIT ";
1143 O << " = EXPAND SCEV " << *Expr;
1144}
1145#endif
1146
1148 Value *CanonicalIV = State.get(getOperand(0), 0);
1149 Type *STy = CanonicalIV->getType();
1151 ElementCount VF = State.VF;
1152 Value *VStart = VF.isScalar()
1153 ? CanonicalIV
1154 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
1155 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1156 Value *VStep = createStepForVF(Builder, STy, VF, Part);
1157 if (VF.isVector()) {
1158 VStep = Builder.CreateVectorSplat(VF, VStep);
1159 VStep =
1160 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
1161 }
1162 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
1163 State.set(this, CanonicalVectorIV, Part);
1164 }
1165}
1166
1167#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1169 VPSlotTracker &SlotTracker) const {
1170 O << Indent << "EMIT ";
1172 O << " = WIDEN-CANONICAL-INDUCTION ";
1174}
1175#endif
1176
1178 auto &Builder = State.Builder;
1179 // Create a vector from the initial value.
1180 auto *VectorInit = getStartValue()->getLiveInIRValue();
1181
1182 Type *VecTy = State.VF.isScalar()
1183 ? VectorInit->getType()
1184 : VectorType::get(VectorInit->getType(), State.VF);
1185
1186 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1187 if (State.VF.isVector()) {
1188 auto *IdxTy = Builder.getInt32Ty();
1189 auto *One = ConstantInt::get(IdxTy, 1);
1191 Builder.SetInsertPoint(VectorPH->getTerminator());
1192 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
1193 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
1194 VectorInit = Builder.CreateInsertElement(
1195 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
1196 }
1197
1198 // Create a phi node for the new recurrence.
1199 PHINode *EntryPart = PHINode::Create(
1200 VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
1201 EntryPart->addIncoming(VectorInit, VectorPH);
1202 State.set(this, EntryPart, 0);
1203}
1204
1205#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1207 VPSlotTracker &SlotTracker) const {
1208 O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
1210 O << " = phi ";
1212}
1213#endif
1214
1216 PHINode *PN = cast<PHINode>(getUnderlyingValue());
1217 auto &Builder = State.Builder;
1218
1219 // In order to support recurrences we need to be able to vectorize Phi nodes.
1220 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
1221 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
1222 // this value when we vectorize all of the instructions that use the PHI.
1223 bool ScalarPHI = State.VF.isScalar() || IsInLoop;
1224 Type *VecTy =
1225 ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
1226
1227 BasicBlock *HeaderBB = State.CFG.PrevBB;
1228 assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
1229 "recipe must be in the vector loop header");
1230 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
1231 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1232 Value *EntryPart =
1233 PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt());
1234 State.set(this, EntryPart, Part);
1235 }
1236
1237 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1238
1239 // Reductions do not have to start at zero. They can start with
1240 // any loop invariant values.
1241 VPValue *StartVPV = getStartValue();
1242 Value *StartV = StartVPV->getLiveInIRValue();
1243
1244 Value *Iden = nullptr;
1245 RecurKind RK = RdxDesc.getRecurrenceKind();
1248 // MinMax reduction have the start value as their identify.
1249 if (ScalarPHI) {
1250 Iden = StartV;
1251 } else {
1253 Builder.SetInsertPoint(VectorPH->getTerminator());
1254 StartV = Iden =
1255 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
1256 }
1257 } else {
1258 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
1259 RdxDesc.getFastMathFlags());
1260
1261 if (!ScalarPHI) {
1262 Iden = Builder.CreateVectorSplat(State.VF, Iden);
1264 Builder.SetInsertPoint(VectorPH->getTerminator());
1265 Constant *Zero = Builder.getInt32(0);
1266 StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
1267 }
1268 }
1269
1270 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1271 Value *EntryPart = State.get(this, Part);
1272 // Make sure to add the reduction start value only to the
1273 // first unroll part.
1274 Value *StartVal = (Part == 0) ? StartV : Iden;
1275 cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
1276 }
1277}
1278
1279#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1281 VPSlotTracker &SlotTracker) const {
1282 O << Indent << "WIDEN-REDUCTION-PHI ";
1283
1285 O << " = phi ";
1287}
1288#endif
1289
1292 "Non-native vplans are not expected to have VPWidenPHIRecipes.");
1293
1294 // Currently we enter here in the VPlan-native path for non-induction
1295 // PHIs where all control flow is uniform. We simply widen these PHIs.
1296 // Create a vector phi with no operands - the vector phi operands will be
1297 // set at the end of vector code generation.
1298 VPBasicBlock *Parent = getParent();
1299 VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion();
1300 unsigned StartIdx = 0;
1301 // For phis in header blocks of loop regions, use the index of the value
1302 // coming from the preheader.
1303 if (LoopRegion->getEntryBasicBlock() == Parent) {
1304 for (unsigned I = 0; I < getNumOperands(); ++I) {
1305 if (getIncomingBlock(I) ==
1307 StartIdx = I;
1308 }
1309 }
1310 Value *Op0 = State.get(getOperand(StartIdx), 0);
1311 Type *VecTy = Op0->getType();
1312 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
1313 State.set(this, VecPhi, 0);
1314}
1315
1316#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1318 VPSlotTracker &SlotTracker) const {
1319 O << Indent << "WIDEN-PHI ";
1320
1321 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
1322 // Unless all incoming values are modeled in VPlan print the original PHI
1323 // directly.
1324 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
1325 // values as VPValues.
1326 if (getNumOperands() != OriginalPhi->getNumOperands()) {
1327 O << VPlanIngredient(OriginalPhi);
1328 return;
1329 }
1330
1332 O << " = phi ";
1334}
1335#endif
1336
1337// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
1338// remove VPActiveLaneMaskPHIRecipe.
1340 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1341 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1342 Value *StartMask = State.get(getOperand(0), Part);
1343 PHINode *EntryPart =
1344 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
1345 EntryPart->addIncoming(StartMask, VectorPH);
1346 EntryPart->setDebugLoc(DL);
1347 State.set(this, EntryPart, Part);
1348 }
1349}
1350
1351#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1353 VPSlotTracker &SlotTracker) const {
1354 O << Indent << "ACTIVE-LANE-MASK-PHI ";
1355
1357 O << " = phi ";
1359}
1360#endif
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
assume Assume Builder
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
Hexagon Common GEP
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Definition: blake3_impl.h:77
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:245
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:284
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:146
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:741
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:199
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
void print(raw_ostream &OS) const
prints source location /path/to/file.exe:line:col @[inlined at]
Definition: DebugLoc.cpp:143
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:306
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:302
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:21
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2349
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1259
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:452
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1136
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:297
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:472
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2286
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition: IRBuilder.h:145
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2301
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1789
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2558
A struct for saving information about induction variables.
const SCEV * getStep() const
@ IK_IntInduction
Integer induction variable. Step = C.
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
bool isBinaryOp() const
Definition: Instruction.h:173
const BasicBlock * getParent() const
Definition: Instruction.h:90
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
const char * getOpcodeName() const
Definition: Instruction.h:170
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:168
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:355
BlockT * getHeader() const
Definition: LoopInfo.h:105
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1758
FastMathFlags getFastMathFlags() const
static unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
RecurKind getRecurrenceKind() const
Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF) const
Returns identity corresponding to the RecurrenceKind.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
static bool isSelectCmpRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
This class uses information about analyze scalars to rewrite expressions in canonical form.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:678
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:267
static IntegerType * getInt1Ty(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:350
Value * getOperand(unsigned i) const
Definition: User.h:169
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:1956
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:2003
iterator end()
Definition: VPlan.h:1987
VPRegionBlock * getEnclosingLoopRegion()
Definition: VPlan.cpp:427
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:1362
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:1365
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account that a single incoming value has no mask.
Definition: VPlan.h:1359
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPRegionBlock * getParent()
Definition: VPlan.h:462
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:172
VPlan * getPlan()
Definition: VPlan.cpp:146
VPBlockBase * getSinglePredecessor() const
Definition: VPlan.h:503
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:151
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:1601
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
void execute(VPTransformState &State) override
Generate the canonical scalar induction phi of the vector loop.
bool isCanonical(const InductionDescriptor &ID, Type *Ty) const
Check if the induction described by ID is canonical, i.e.
const Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:1800
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:384
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:396
unsigned getVPDefID() const
Definition: VPlanValue.h:416
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:1909
VPValue * getCanonicalIV() const
Definition: VPlan.h:1908
VPValue * getStartValue() const
Definition: VPlan.h:1907
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:1083
@ CanonicalIVIncrementForPartNUW
Definition: VPlan.h:799
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:785
@ CanonicalIVIncrementNUW
Definition: VPlan.h:795
@ CanonicalIVIncrementForPart
Definition: VPlan.h:798
@ CalculateTripCountMinusVF
Definition: VPlan.h:793
bool hasResult() const
Definition: VPlan.h:861
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:837
void setFastMathFlags(FastMathFlags FMFNew)
Set the fast-math flags.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
void execute(VPTransformState &State) override
Generate the instruction.
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:139
static VPLane getFirstLane()
Definition: VPlan.h:137
void fixPhi(VPlan &Plan, VPTransformState &State)
Fixup the wrapped LCSSA phi node in the unique exit block.
void execute(VPTransformState &State) override
Generates phi nodes for live-outs as needed to retain SSA form.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:666
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
Instruction * getUnderlyingInstr()
Returns the underlying instruction, if the recipe is a VPValue or nullptr otherwise.
Definition: VPlan.h:721
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
VPBasicBlock * getParent()
Definition: VPlan.h:683
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:1332
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:1499
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:1501
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:1497
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:2080
const VPBlockBase * getEntry() const
Definition: VPlan.h:2119
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class can be used to assign consecutive numbers to all VPValues in a VPlan and allows querying t...
Definition: VPlanValue.h:434
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:201
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1045
operand_range operands()
Definition: VPlanValue.h:276
unsigned getNumOperands() const
Definition: VPlanValue.h:250
operand_iterator op_begin()
Definition: VPlanValue.h:272
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:251
Value * getUnderlyingValue()
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:84
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1030
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:177
user_range users()
Definition: VPlanValue.h:147
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:1164
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:1159
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1170
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isStore() const
Returns true if this recipe is a store.
Definition: VPlan.h:1706
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock * getIncomingBlock(unsigned I)
Returns the I th incoming VPBasicBlock.
Definition: VPlan.h:1258
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(ElementCount VF)
Returns true if only scalar values will be generated.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce widened copies of all Ingredients.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:2177
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:4781
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:688
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
Iterator for intrusive lists based on ilist_node.
self_iterator getIterator()
Definition: ilist_node.h:82
iterator erase(iterator where)
Definition: ilist.h:268
pointer remove(iterator &IT)
Definition: ilist.h:252
iterator insertAfter(iterator where, pointer New)
Definition: ilist.h:238
iterator insert(iterator where, pointer New)
Definition: ilist.h:229
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1506
bool isUniformAfterVectorization(VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlan.h:2720
bool onlyFirstLaneUsed(VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlan.cpp:1129
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, unsigned OpdIdx)
Identifies if the vector form of the intrinsic has a operand that has an overloaded type.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2387
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2176
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1789
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
Definition: VPlan.cpp:51
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:35
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
void execute(VPTransformState &State) override
Generate the phi nodes.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPIteration represents a single point in the iteration space of the output (vectorized and/or unrolle...
Definition: VPlan.h:189
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:336
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:344
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:235
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:206
void addMetadata(Instruction *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:248
Value * get(VPValue *Def, unsigned Part)
Get the generated Value for a given VPValue and a given Part.
void setDebugLocFromInst(const Value *V)
Set the debug location in the builder using the debug location in V.
Definition: VPlan.cpp:260
void reset(VPValue *Def, Value *V, unsigned Part)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:273
struct llvm::VPTransformState::CFGState CFG
std::optional< VPIteration > Instance
Hold the indices to generate specific scalar instructions.
Definition: VPlan.h:220
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:360
SmallPtrSet< VPRecipeBase *, 16 > MayGeneratePoisonRecipes
Holds recipes that may generate a poison value that is used after vectorization, even when their oper...
Definition: VPlan.h:375
bool hasScalarValue(VPValue *Def, VPIteration Instance)
Definition: VPlan.h:254
bool hasVectorValue(VPValue *Def, unsigned Part)
Definition: VPlan.h:244
ElementCount VF
The chosen Vectorization and Unroll Factors of the loop being vectorized.
Definition: VPlan.h:214
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:378
void set(VPValue *Def, Value *V, unsigned Part)
Set the generated Value for a given VPValue and a given Part.
Definition: VPlan.h:265
bool isInvariantCond() const
Definition: VPlan.h:987
VPValue * getCond() const
Definition: VPlan.h:983
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.