doxygen/html/VPlanRecipes_8cpp_source.html

//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

///

/// \file

/// This file contains implementations for different VPlan recipes.

///

//===----------------------------------------------------------------------===//


#include "VPlan.h"

#include "VPlanAnalysis.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/Twine.h"

#include "llvm/Analysis/IVDescriptors.h"

#include "llvm/IR/BasicBlock.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/Type.h"

#include "llvm/IR/Value.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Transforms/Utils/BasicBlockUtils.h"

#include "llvm/Transforms/Utils/LoopUtils.h"

#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"

#include <cassert>


using namespace llvm;


using VectorParts = SmallVector<Value *, 2>;


namespace llvm {

extern cl::opt<bool> EnableVPlanNativePath;

}


#define LV_NAME "loop-vectorize"

#define DEBUG_TYPE LV_NAME


bool VPRecipeBase::mayWriteToMemory() const {

  switch (getVPDefID()) {

  case VPInterleaveSC:

    return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;

  case VPWidenStoreEVLSC:

  case VPWidenStoreSC:

    return true;

  case VPReplicateSC:

    return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())

        ->mayWriteToMemory();

  case VPWidenCallSC:

    return !cast<VPWidenCallRecipe>(this)

                ->getCalledScalarFunction()

                ->onlyReadsMemory();

  case VPBranchOnMaskSC:

  case VPScalarIVStepsSC:

  case VPPredInstPHISC:

    return false;

  case VPBlendSC:

  case VPReductionSC:

  case VPWidenCanonicalIVSC:

  case VPWidenCastSC:

  case VPWidenGEPSC:

  case VPWidenIntOrFpInductionSC:

  case VPWidenLoadEVLSC:

  case VPWidenLoadSC:

  case VPWidenPHISC:

  case VPWidenSC:

  case VPWidenSelectSC: {

    const Instruction *I =

        dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());

    (void)I;

    assert((!I || !I->mayWriteToMemory()) &&

           "underlying instruction may write to memory");

    return false;

  }

  default:

    return true;

  }

}


bool VPRecipeBase::mayReadFromMemory() const {

  switch (getVPDefID()) {

  case VPWidenLoadEVLSC:

  case VPWidenLoadSC:

    return true;

  case VPReplicateSC:

    return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())

        ->mayReadFromMemory();

  case VPWidenCallSC:

    return !cast<VPWidenCallRecipe>(this)

                ->getCalledScalarFunction()

                ->onlyWritesMemory();

  case VPBranchOnMaskSC:

  case VPPredInstPHISC:

  case VPScalarIVStepsSC:

  case VPWidenStoreEVLSC:

  case VPWidenStoreSC:

    return false;

  case VPBlendSC:

  case VPReductionSC:

  case VPWidenCanonicalIVSC:

  case VPWidenCastSC:

  case VPWidenGEPSC:

  case VPWidenIntOrFpInductionSC:

  case VPWidenPHISC:

  case VPWidenSC:

  case VPWidenSelectSC: {

    const Instruction *I =

        dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());

    (void)I;

    assert((!I || !I->mayReadFromMemory()) &&

           "underlying instruction may read from memory");

    return false;

  }

  default:

    return true;

  }

}


bool VPRecipeBase::mayHaveSideEffects() const {

  switch (getVPDefID()) {

  case VPDerivedIVSC:

  case VPPredInstPHISC:

  case VPScalarCastSC:

    return false;

  case VPInstructionSC:

    switch (cast<VPInstruction>(this)->getOpcode()) {

    case Instruction::Or:

    case Instruction::ICmp:

    case Instruction::Select:

    case VPInstruction::Not:

    case VPInstruction::CalculateTripCountMinusVF:

    case VPInstruction::CanonicalIVIncrementForPart:

    case VPInstruction::PtrAdd:

      return false;

    default:

      return true;

    }

  case VPWidenCallSC: {

    Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();

    return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();

  }

  case VPBlendSC:

  case VPReductionSC:

  case VPScalarIVStepsSC:

  case VPWidenCanonicalIVSC:

  case VPWidenCastSC:

  case VPWidenGEPSC:

  case VPWidenIntOrFpInductionSC:

  case VPWidenPHISC:

  case VPWidenPointerInductionSC:

  case VPWidenSC:

  case VPWidenSelectSC: {

    const Instruction *I =

        dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());

    (void)I;

    assert((!I || !I->mayHaveSideEffects()) &&

           "underlying instruction has side-effects");

    return false;

  }

  case VPInterleaveSC:

    return mayWriteToMemory();

  case VPWidenLoadEVLSC:

  case VPWidenLoadSC:

  case VPWidenStoreEVLSC:

  case VPWidenStoreSC:

    assert(

        cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==

            mayWriteToMemory() &&

        "mayHaveSideffects result for ingredient differs from this "

        "implementation");

    return mayWriteToMemory();

  case VPReplicateSC: {

    auto *R = cast<VPReplicateRecipe>(this);

    return R->getUnderlyingInstr()->mayHaveSideEffects();

  }

  default:

    return true;

  }

}


void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {

  auto Lane = VPLane::getLastLaneForVF(State.VF);

  VPValue *ExitValue = getOperand(0);

  if (vputils::isUniformAfterVectorization(ExitValue))

    Lane = VPLane::getFirstLane();

  VPBasicBlock *MiddleVPBB =

      cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());

  assert(MiddleVPBB->getNumSuccessors() == 0 &&

         "the middle block must not have any successors");

  BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB];

  Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),

                   MiddleBB);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {

  O << "Live-out ";

  getPhi()->printAsOperand(O);

  O << " = ";

  getOperand(0)->printAsOperand(O, SlotTracker);

  O << "\n";

}

#endif


void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {

  assert(!Parent && "Recipe already in some VPBasicBlock");

  assert(InsertPos->getParent() &&

         "Insertion position not in any VPBasicBlock");

  InsertPos->getParent()->insert(this, InsertPos->getIterator());

}


void VPRecipeBase::insertBefore(VPBasicBlock &BB,

                                iplist<VPRecipeBase>::iterator I) {

  assert(!Parent && "Recipe already in some VPBasicBlock");

  assert(I == BB.end() || I->getParent() == &BB);

  BB.insert(this, I);

}


void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {

  assert(!Parent && "Recipe already in some VPBasicBlock");

  assert(InsertPos->getParent() &&

         "Insertion position not in any VPBasicBlock");

  InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));

}


void VPRecipeBase::removeFromParent() {

  assert(getParent() && "Recipe not in any VPBasicBlock");

  getParent()->getRecipeList().remove(getIterator());

  Parent = nullptr;

}


iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {

  assert(getParent() && "Recipe not in any VPBasicBlock");

  return getParent()->getRecipeList().erase(getIterator());

}


void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {

  removeFromParent();

  insertAfter(InsertPos);

}


void VPRecipeBase::moveBefore(VPBasicBlock &BB,

                              iplist<VPRecipeBase>::iterator I) {

  removeFromParent();

  insertBefore(BB, I);

}


FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const {

  assert(OpType == OperationType::FPMathOp &&

         "recipe doesn't have fast math flags");

  FastMathFlags Res;

  Res.setAllowReassoc(FMFs.AllowReassoc);

  Res.setNoNaNs(FMFs.NoNaNs);

  Res.setNoInfs(FMFs.NoInfs);

  Res.setNoSignedZeros(FMFs.NoSignedZeros);

  Res.setAllowReciprocal(FMFs.AllowReciprocal);

  Res.setAllowContract(FMFs.AllowContract);

  Res.setApproxFunc(FMFs.ApproxFunc);

  return Res;

}


VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred,

                             VPValue *A, VPValue *B, DebugLoc DL,

                             const Twine &Name)

    : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),

                          Pred, DL),

      Opcode(Opcode), Name(Name.str()) {

  assert(Opcode == Instruction::ICmp &&

         "only ICmp predicates supported at the moment");

}


VPInstruction::VPInstruction(unsigned Opcode,

                             std::initializer_list<VPValue *> Operands,

                             FastMathFlags FMFs, DebugLoc DL, const Twine &Name)

    : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),

      Opcode(Opcode), Name(Name.str()) {

  // Make sure the VPInstruction is a floating-point operation.

  assert(isFPMathOp() && "this op can't take fast-math flags");

}


bool VPInstruction::doesGeneratePerAllLanes() const {

  return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);

}


bool VPInstruction::canGenerateScalarForFirstLane() const {

  if (Instruction::isBinaryOp(getOpcode()))

    return true;


  switch (Opcode) {

  case VPInstruction::BranchOnCond:

  case VPInstruction::BranchOnCount:

  case VPInstruction::CalculateTripCountMinusVF:

  case VPInstruction::CanonicalIVIncrementForPart:

  case VPInstruction::ComputeReductionResult:

  case VPInstruction::PtrAdd:

  case VPInstruction::ExplicitVectorLength:

    return true;

  default:

    return false;

  }

}


Value *VPInstruction::generatePerLane(VPTransformState &State,

                                      const VPIteration &Lane) {

  IRBuilderBase &Builder = State.Builder;


  assert(getOpcode() == VPInstruction::PtrAdd &&

         "only PtrAdd opcodes are supported for now");

  return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),

                              State.get(getOperand(1), Lane), Name);

}


Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {

  IRBuilderBase &Builder = State.Builder;


  if (Instruction::isBinaryOp(getOpcode())) {

    bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);

    if (Part != 0 && vputils::onlyFirstPartUsed(this))

      return State.get(this, 0, OnlyFirstLaneUsed);


    Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed);

    Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed);

    auto *Res =

        Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);

    if (auto *I = dyn_cast<Instruction>(Res))

      setFlags(I);

    return Res;

  }


  switch (getOpcode()) {

  case VPInstruction::Not: {

    Value *A = State.get(getOperand(0), Part);

    return Builder.CreateNot(A, Name);

  }

  case Instruction::ICmp: {

    Value *A = State.get(getOperand(0), Part);

    Value *B = State.get(getOperand(1), Part);

    return Builder.CreateCmp(getPredicate(), A, B, Name);

  }

  case Instruction::Select: {

    Value *Cond = State.get(getOperand(0), Part);

    Value *Op1 = State.get(getOperand(1), Part);

    Value *Op2 = State.get(getOperand(2), Part);

    return Builder.CreateSelect(Cond, Op1, Op2, Name);

  }

  case VPInstruction::ActiveLaneMask: {

    // Get first lane of vector induction variable.

    Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));

    // Get the original loop tripcount.

    Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0));


    // If this part of the active lane mask is scalar, generate the CMP directly

    // to avoid unnecessary extracts.

    if (State.VF.isScalar())

      return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,

                               Name);


    auto *Int1Ty = Type::getInt1Ty(Builder.getContext());

    auto *PredTy = VectorType::get(Int1Ty, State.VF);

    return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,

                                   {PredTy, ScalarTC->getType()},

                                   {VIVElem0, ScalarTC}, nullptr, Name);

  }

  case VPInstruction::FirstOrderRecurrenceSplice: {

    // Generate code to combine the previous and current values in vector v3.

    //

    //   vector.ph:

    //     v_init = vector(..., ..., ..., a[-1])

    //     br vector.body

    //

    //   vector.body

    //     i = phi [0, vector.ph], [i+4, vector.body]

    //     v1 = phi [v_init, vector.ph], [v2, vector.body]

    //     v2 = a[i, i+1, i+2, i+3];

    //     v3 = vector(v1(3), v2(0, 1, 2))


    // For the first part, use the recurrence phi (v1), otherwise v2.

    auto *V1 = State.get(getOperand(0), 0);

    Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);

    if (!PartMinus1->getType()->isVectorTy())

      return PartMinus1;

    Value *V2 = State.get(getOperand(1), Part);

    return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name);

  }

  case VPInstruction::CalculateTripCountMinusVF: {

    if (Part != 0)

      return State.get(this, 0, /*IsScalar*/ true);


    Value *ScalarTC = State.get(getOperand(0), {0, 0});

    Value *Step =

        createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF);

    Value *Sub = Builder.CreateSub(ScalarTC, Step);

    Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);

    Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);

    return Builder.CreateSelect(Cmp, Sub, Zero);

  }

  case VPInstruction::ExplicitVectorLength: {

    // Compute EVL

    auto GetEVL = [=](VPTransformState &State, Value *AVL) {

      assert(AVL->getType()->isIntegerTy() &&

             "Requested vector length should be an integer.");


      // TODO: Add support for MaxSafeDist for correct loop emission.

      assert(State.VF.isScalable() && "Expected scalable vector factor.");

      Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());


      Value *EVL = State.Builder.CreateIntrinsic(

          State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,

          {AVL, VFArg, State.Builder.getTrue()});

      return EVL;

    };

    // TODO: Restructure this code with an explicit remainder loop, vsetvli can

    // be outside of the main loop.

    assert(Part == 0 && "No unrolling expected for predicated vectorization.");

    // Compute VTC - IV as the AVL (requested vector length).

    Value *Index = State.get(getOperand(0), VPIteration(0, 0));

    Value *TripCount = State.get(getOperand(1), VPIteration(0, 0));

    Value *AVL = State.Builder.CreateSub(TripCount, Index);

    Value *EVL = GetEVL(State, AVL);

    return EVL;

  }

  case VPInstruction::CanonicalIVIncrementForPart: {

    auto *IV = State.get(getOperand(0), VPIteration(0, 0));

    if (Part == 0)

      return IV;


    // The canonical IV is incremented by the vectorization factor (num of SIMD

    // elements) times the unroll part.

    Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);

    return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),

                             hasNoSignedWrap());

  }

  case VPInstruction::BranchOnCond: {

    if (Part != 0)

      return nullptr;


    Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));

    VPRegionBlock *ParentRegion = getParent()->getParent();

    VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();


    // Replace the temporary unreachable terminator with a new conditional

    // branch, hooking it up to backward destination for exiting blocks now and

    // to forward destination(s) later when they are created.

    BranchInst *CondBr =

        Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);


    if (getParent()->isExiting())

      CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);


    CondBr->setSuccessor(0, nullptr);

    Builder.GetInsertBlock()->getTerminator()->eraseFromParent();

    return CondBr;

  }

  case VPInstruction::BranchOnCount: {

    if (Part != 0)

      return nullptr;

    // First create the compare.

    Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true);

    Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true);

    Value *Cond = Builder.CreateICmpEQ(IV, TC);


    // Now create the branch.

    auto *Plan = getParent()->getPlan();

    VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();

    VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();


    // Replace the temporary unreachable terminator with a new conditional

    // branch, hooking it up to backward destination (the header) now and to the

    // forward destination (the exit/middle block) later when it is created.

    // Note that CreateCondBr expects a valid BB as first argument, so we need

    // to set it to nullptr later.

    BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),

                                              State.CFG.VPBB2IRBB[Header]);

    CondBr->setSuccessor(0, nullptr);

    Builder.GetInsertBlock()->getTerminator()->eraseFromParent();

    return CondBr;

  }

  case VPInstruction::ComputeReductionResult: {

    if (Part != 0)

      return State.get(this, 0, /*IsScalar*/ true);


    // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary

    // and will be removed by breaking up the recipe further.

    auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));

    auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());

    // Get its reduction variable descriptor.

    const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();


    RecurKind RK = RdxDesc.getRecurrenceKind();


    VPValue *LoopExitingDef = getOperand(1);

    Type *PhiTy = OrigPhi->getType();

    VectorParts RdxParts(State.UF);

    for (unsigned Part = 0; Part < State.UF; ++Part)

      RdxParts[Part] = State.get(LoopExitingDef, Part, PhiR->isInLoop());


    // If the vector reduction can be performed in a smaller type, we truncate

    // then extend the loop exit value to enable InstCombine to evaluate the

    // entire expression in the smaller type.

    // TODO: Handle this in truncateToMinBW.

    if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {

      Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);

      for (unsigned Part = 0; Part < State.UF; ++Part)

        RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);

    }

    // Reduce all of the unrolled parts into a single vector.

    Value *ReducedPartRdx = RdxParts[0];

    unsigned Op = RecurrenceDescriptor::getOpcode(RK);

    if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))

      Op = Instruction::Or;


    if (PhiR->isOrdered()) {

      ReducedPartRdx = RdxParts[State.UF - 1];

    } else {

      // Floating-point operations should have some FMF to enable the reduction.

      IRBuilderBase::FastMathFlagGuard FMFG(Builder);

      Builder.setFastMathFlags(RdxDesc.getFastMathFlags());

      for (unsigned Part = 1; Part < State.UF; ++Part) {

        Value *RdxPart = RdxParts[Part];

        if (Op != Instruction::ICmp && Op != Instruction::FCmp)

          ReducedPartRdx = Builder.CreateBinOp(

              (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");

        else

          ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);

      }

    }


    // Create the reduction after the loop. Note that inloop reductions create

    // the target reduction in the loop using a Reduction recipe.

    if ((State.VF.isVector() ||

         RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) &&

        !PhiR->isInLoop()) {

      ReducedPartRdx =

          createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);

      // If the reduction can be performed in a smaller type, we need to extend

      // the reduction to the wider type before we branch to the original loop.

      if (PhiTy != RdxDesc.getRecurrenceType())

        ReducedPartRdx = RdxDesc.isSigned()

                             ? Builder.CreateSExt(ReducedPartRdx, PhiTy)

                             : Builder.CreateZExt(ReducedPartRdx, PhiTy);

    }


    // If there were stores of the reduction value to a uniform memory address

    // inside the loop, create the final store here.

    if (StoreInst *SI = RdxDesc.IntermediateStore) {

      auto *NewSI = Builder.CreateAlignedStore(

          ReducedPartRdx, SI->getPointerOperand(), SI->getAlign());

      propagateMetadata(NewSI, SI);

    }


    return ReducedPartRdx;

  }

  case VPInstruction::PtrAdd: {

    assert(vputils::onlyFirstLaneUsed(this) &&

           "can only generate first lane for PtrAdd");

    Value *Ptr = State.get(getOperand(0), Part, /* IsScalar */ true);

    Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true);

    return Builder.CreatePtrAdd(Ptr, Addend, Name);

  }

  default:

    llvm_unreachable("Unsupported opcode for instruction");

  }

}


#if !defined(NDEBUG)

bool VPInstruction::isFPMathOp() const {

  // Inspired by FPMathOperator::classof. Notable differences are that we don't

  // support Call, PHI and Select opcodes here yet.

  return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||

         Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||

         Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||

         Opcode == Instruction::FCmp || Opcode == Instruction::Select;

}

#endif


void VPInstruction::execute(VPTransformState &State) {

  assert(!State.Instance && "VPInstruction executing an Instance");

  IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);

  assert((hasFastMathFlags() == isFPMathOp() ||

          getOpcode() == Instruction::Select) &&

         "Recipe not a FPMathOp but has fast-math flags?");

  if (hasFastMathFlags())

    State.Builder.setFastMathFlags(getFastMathFlags());

  State.setDebugLocFrom(getDebugLoc());

  bool GeneratesPerFirstLaneOnly =

      canGenerateScalarForFirstLane() &&

      (vputils::onlyFirstLaneUsed(this) ||

       getOpcode() == VPInstruction::ComputeReductionResult);

  bool GeneratesPerAllLanes = doesGeneratePerAllLanes();

  for (unsigned Part = 0; Part < State.UF; ++Part) {

    if (GeneratesPerAllLanes) {

      for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();

           Lane != NumLanes; ++Lane) {

        Value *GeneratedValue = generatePerLane(State, VPIteration(Part, Lane));

        assert(GeneratedValue && "generatePerLane must produce a value");

        State.set(this, GeneratedValue, VPIteration(Part, Lane));

      }

      continue;

    }


    Value *GeneratedValue = generatePerPart(State, Part);

    if (!hasResult())

      continue;

    assert(GeneratedValue && "generatePerPart must produce a value");

    assert((GeneratedValue->getType()->isVectorTy() ==

                !GeneratesPerFirstLaneOnly ||

            State.VF.isScalar()) &&

           "scalar value but not only first lane defined");

    State.set(this, GeneratedValue, Part,

              /*IsScalar*/ GeneratesPerFirstLaneOnly);

  }

}


bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {

  assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");

  if (Instruction::isBinaryOp(getOpcode()))

    return vputils::onlyFirstLaneUsed(this);


  switch (getOpcode()) {

  default:

    return false;

  case Instruction::ICmp:

  case VPInstruction::PtrAdd:

    // TODO: Cover additional opcodes.

    return vputils::onlyFirstLaneUsed(this);

  case VPInstruction::ActiveLaneMask:

  case VPInstruction::ExplicitVectorLength:

  case VPInstruction::CalculateTripCountMinusVF:

  case VPInstruction::CanonicalIVIncrementForPart:

  case VPInstruction::BranchOnCount:

    return true;

  };

  llvm_unreachable("switch should return");

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPInstruction::dump() const {

  VPSlotTracker SlotTracker(getParent()->getPlan());

  print(dbgs(), "", SlotTracker);

}


void VPInstruction::print(raw_ostream &O, const Twine &Indent,

                          VPSlotTracker &SlotTracker) const {

  O << Indent << "EMIT ";


  if (hasResult()) {

    printAsOperand(O, SlotTracker);

    O << " = ";

  }


  switch (getOpcode()) {

  case VPInstruction::Not:

    O << "not";

    break;

  case VPInstruction::SLPLoad:

    O << "combined load";

    break;

  case VPInstruction::SLPStore:

    O << "combined store";

    break;

  case VPInstruction::ActiveLaneMask:

    O << "active lane mask";

    break;

  case VPInstruction::ExplicitVectorLength:

    O << "EXPLICIT-VECTOR-LENGTH";

    break;

  case VPInstruction::FirstOrderRecurrenceSplice:

    O << "first-order splice";

    break;

  case VPInstruction::BranchOnCond:

    O << "branch-on-cond";

    break;

  case VPInstruction::CalculateTripCountMinusVF:

    O << "TC > VF ? TC - VF : 0";

    break;

  case VPInstruction::CanonicalIVIncrementForPart:

    O << "VF * Part +";

    break;

  case VPInstruction::BranchOnCount:

    O << "branch-on-count";

    break;

  case VPInstruction::ComputeReductionResult:

    O << "compute-reduction-result";

    break;

  case VPInstruction::PtrAdd:

    O << "ptradd";

    break;

  default:

    O << Instruction::getOpcodeName(getOpcode());

  }


  printFlags(O);

  printOperands(O, SlotTracker);


  if (auto DL = getDebugLoc()) {

    O << ", !dbg ";

    DL.print(O);

  }

}

#endif


void VPWidenCallRecipe::execute(VPTransformState &State) {

  assert(State.VF.isVector() && "not widening");

  Function *CalledScalarFn = getCalledScalarFunction();

  assert(!isDbgInfoIntrinsic(CalledScalarFn->getIntrinsicID()) &&

         "DbgInfoIntrinsic should have been dropped during VPlan construction");

  State.setDebugLocFrom(getDebugLoc());


  bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic;

  FunctionType *VFTy = nullptr;

  if (Variant)

    VFTy = Variant->getFunctionType();

  for (unsigned Part = 0; Part < State.UF; ++Part) {

    SmallVector<Type *, 2> TysForDecl;

    // Add return type if intrinsic is overloaded on it.

    if (UseIntrinsic &&

        isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1))

      TysForDecl.push_back(VectorType::get(

          CalledScalarFn->getReturnType()->getScalarType(), State.VF));

    SmallVector<Value *, 4> Args;

    for (const auto &I : enumerate(arg_operands())) {

      // Some intrinsics have a scalar argument - don't replace it with a

      // vector.

      Value *Arg;

      if (UseIntrinsic &&

          isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))

        Arg = State.get(I.value(), VPIteration(0, 0));

      // Some vectorized function variants may also take a scalar argument,

      // e.g. linear parameters for pointers. This needs to be the scalar value

      // from the start of the respective part when interleaving.

      else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy())

        Arg = State.get(I.value(), VPIteration(Part, 0));

      else

        Arg = State.get(I.value(), Part);

      if (UseIntrinsic &&

          isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))

        TysForDecl.push_back(Arg->getType());

      Args.push_back(Arg);

    }


    Function *VectorF;

    if (UseIntrinsic) {

      // Use vector version of the intrinsic.

      Module *M = State.Builder.GetInsertBlock()->getModule();

      VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);

      assert(VectorF && "Can't retrieve vector intrinsic.");

    } else {

#ifndef NDEBUG

      assert(Variant != nullptr && "Can't create vector function.");

#endif

      VectorF = Variant;

    }


    auto *CI = cast_or_null<CallInst>(getUnderlyingInstr());

    SmallVector<OperandBundleDef, 1> OpBundles;

    if (CI)

      CI->getOperandBundlesAsDefs(OpBundles);


    CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);


    if (isa<FPMathOperator>(V))

      V->copyFastMathFlags(CI);


    if (!V->getType()->isVoidTy())

      State.set(this, V, Part);

    State.addMetadata(V, CI);

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,

                              VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-CALL ";


  Function *CalledFn = getCalledScalarFunction();

  if (CalledFn->getReturnType()->isVoidTy())

    O << "void ";

  else {

    printAsOperand(O, SlotTracker);

    O << " = ";

  }


  O << "call @" << CalledFn->getName() << "(";

  interleaveComma(arg_operands(), O, [&O, &SlotTracker](VPValue *Op) {

    Op->printAsOperand(O, SlotTracker);

  });

  O << ")";


  if (VectorIntrinsicID)

    O << " (using vector intrinsic)";

  else {

    O << " (using library function";

    if (Variant->hasName())

      O << ": " << Variant->getName();

    O << ")";

  }

}


void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,

                                VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-SELECT ";

  printAsOperand(O, SlotTracker);

  O << " = select ";

  getOperand(0)->printAsOperand(O, SlotTracker);

  O << ", ";

  getOperand(1)->printAsOperand(O, SlotTracker);

  O << ", ";

  getOperand(2)->printAsOperand(O, SlotTracker);

  O << (isInvariantCond() ? " (condition is loop invariant)" : "");

}

#endif


void VPWidenSelectRecipe::execute(VPTransformState &State) {

  State.setDebugLocFrom(getDebugLoc());


  // The condition can be loop invariant but still defined inside the

  // loop. This means that we can't just use the original 'cond' value.

  // We have to take the 'vectorized' value and pick the first lane.

  // Instcombine will make this a no-op.

  auto *InvarCond =

      isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;


  for (unsigned Part = 0; Part < State.UF; ++Part) {

    Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part);

    Value *Op0 = State.get(getOperand(1), Part);

    Value *Op1 = State.get(getOperand(2), Part);

    Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);

    State.set(this, Sel, Part);

    State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));

  }

}


VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(

    const FastMathFlags &FMF) {

  AllowReassoc = FMF.allowReassoc();

  NoNaNs = FMF.noNaNs();

  NoInfs = FMF.noInfs();

  NoSignedZeros = FMF.noSignedZeros();

  AllowReciprocal = FMF.allowReciprocal();

  AllowContract = FMF.allowContract();

  ApproxFunc = FMF.approxFunc();

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {

  switch (OpType) {

  case OperationType::Cmp:

    O << " " << CmpInst::getPredicateName(getPredicate());

    break;

  case OperationType::DisjointOp:

    if (DisjointFlags.IsDisjoint)

      O << " disjoint";

    break;

  case OperationType::PossiblyExactOp:

    if (ExactFlags.IsExact)

      O << " exact";

    break;

  case OperationType::OverflowingBinOp:

    if (WrapFlags.HasNUW)

      O << " nuw";

    if (WrapFlags.HasNSW)

      O << " nsw";

    break;

  case OperationType::FPMathOp:

    getFastMathFlags().print(O);

    break;

  case OperationType::GEPOp:

    if (GEPFlags.IsInBounds)

      O << " inbounds";

    break;

  case OperationType::NonNegOp:

    if (NonNegFlags.NonNeg)

      O << " nneg";

    break;

  case OperationType::Other:

    break;

  }

  if (getNumOperands() > 0)

    O << " ";

}

#endif


void VPWidenRecipe::execute(VPTransformState &State) {

  State.setDebugLocFrom(getDebugLoc());

  auto &Builder = State.Builder;

  switch (Opcode) {

  case Instruction::Call:

  case Instruction::Br:

  case Instruction::PHI:

  case Instruction::GetElementPtr:

  case Instruction::Select:

    llvm_unreachable("This instruction is handled by a different recipe.");

  case Instruction::UDiv:

  case Instruction::SDiv:

  case Instruction::SRem:

  case Instruction::URem:

  case Instruction::Add:

  case Instruction::FAdd:

  case Instruction::Sub:

  case Instruction::FSub:

  case Instruction::FNeg:

  case Instruction::Mul:

  case Instruction::FMul:

  case Instruction::FDiv:

  case Instruction::FRem:

  case Instruction::Shl:

  case Instruction::LShr:

  case Instruction::AShr:

  case Instruction::And:

  case Instruction::Or:

  case Instruction::Xor: {

    // Just widen unops and binops.

    for (unsigned Part = 0; Part < State.UF; ++Part) {

      SmallVector<Value *, 2> Ops;

      for (VPValue *VPOp : operands())

        Ops.push_back(State.get(VPOp, Part));


      Value *V = Builder.CreateNAryOp(Opcode, Ops);


      if (auto *VecOp = dyn_cast<Instruction>(V))

        setFlags(VecOp);


      // Use this vector value for all users of the original instruction.

      State.set(this, V, Part);

      State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));

    }


    break;

  }

  case Instruction::Freeze: {

    for (unsigned Part = 0; Part < State.UF; ++Part) {

      Value *Op = State.get(getOperand(0), Part);


      Value *Freeze = Builder.CreateFreeze(Op);

      State.set(this, Freeze, Part);

    }

    break;

  }

  case Instruction::ICmp:

  case Instruction::FCmp: {

    // Widen compares. Generate vector compares.

    bool FCmp = Opcode == Instruction::FCmp;

    for (unsigned Part = 0; Part < State.UF; ++Part) {

      Value *A = State.get(getOperand(0), Part);

      Value *B = State.get(getOperand(1), Part);

      Value *C = nullptr;

      if (FCmp) {

        // Propagate fast math flags.

        IRBuilder<>::FastMathFlagGuard FMFG(Builder);

        if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue()))

          Builder.setFastMathFlags(I->getFastMathFlags());

        C = Builder.CreateFCmp(getPredicate(), A, B);

      } else {

        C = Builder.CreateICmp(getPredicate(), A, B);

      }

      State.set(this, C, Part);

      State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));

    }


    break;

  }

  default:

    // This instruction is not vectorized by simple widening.

    LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "

                      << Instruction::getOpcodeName(Opcode));

    llvm_unreachable("Unhandled instruction!");

  } // end of switch.


#if !defined(NDEBUG)

  // Verify that VPlan type inference results agree with the type of the

  // generated values.

  for (unsigned Part = 0; Part < State.UF; ++Part) {

    assert(VectorType::get(State.TypeAnalysis.inferScalarType(this),

                           State.VF) == State.get(this, Part)->getType() &&

           "inferred type and type from generated instructions do not match");

  }

#endif

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,

                          VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN ";

  printAsOperand(O, SlotTracker);

  O << " = " << Instruction::getOpcodeName(Opcode);

  printFlags(O);

  printOperands(O, SlotTracker);

}

#endif


void VPWidenCastRecipe::execute(VPTransformState &State) {

  State.setDebugLocFrom(getDebugLoc());

  auto &Builder = State.Builder;

  /// Vectorize casts.

  assert(State.VF.isVector() && "Not vectorizing?");

  Type *DestTy = VectorType::get(getResultType(), State.VF);

  VPValue *Op = getOperand(0);

  for (unsigned Part = 0; Part < State.UF; ++Part) {

    if (Part > 0 && Op->isLiveIn()) {

      // FIXME: Remove once explicit unrolling is implemented using VPlan.

      State.set(this, State.get(this, 0), Part);

      continue;

    }

    Value *A = State.get(Op, Part);

    Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);

    State.set(this, Cast, Part);

    State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,

                              VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-CAST ";

  printAsOperand(O, SlotTracker);

  O << " = " << Instruction::getOpcodeName(Opcode) << " ";

  printFlags(O);

  printOperands(O, SlotTracker);

  O << " to " << *getResultType();

}

#endif


/// This function adds

/// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)

/// to each vector element of Val. The sequence starts at StartIndex.

/// \p Opcode is relevant for FP induction variable.

static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,

                            Instruction::BinaryOps BinOp, ElementCount VF,

                            IRBuilderBase &Builder) {

  assert(VF.isVector() && "only vector VFs are supported");


  // Create and check the types.

  auto *ValVTy = cast<VectorType>(Val->getType());

  ElementCount VLen = ValVTy->getElementCount();


  Type *STy = Val->getType()->getScalarType();

  assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&

         "Induction Step must be an integer or FP");

  assert(Step->getType() == STy && "Step has wrong type");


  SmallVector<Constant *, 8> Indices;


  // Create a vector of consecutive numbers from zero to VF.

  VectorType *InitVecValVTy = ValVTy;

  if (STy->isFloatingPointTy()) {

    Type *InitVecValSTy =

        IntegerType::get(STy->getContext(), STy->getScalarSizeInBits());

    InitVecValVTy = VectorType::get(InitVecValSTy, VLen);

  }

  Value *InitVec = Builder.CreateStepVector(InitVecValVTy);


  // Splat the StartIdx

  Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);


  if (STy->isIntegerTy()) {

    InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);

    Step = Builder.CreateVectorSplat(VLen, Step);

    assert(Step->getType() == Val->getType() && "Invalid step vec");

    // FIXME: The newly created binary instructions should contain nsw/nuw

    // flags, which can be found from the original scalar operations.

    Step = Builder.CreateMul(InitVec, Step);

    return Builder.CreateAdd(Val, Step, "induction");

  }


  // Floating point induction.

  assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&

         "Binary Opcode should be specified for FP induction");

  InitVec = Builder.CreateUIToFP(InitVec, ValVTy);

  InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);


  Step = Builder.CreateVectorSplat(VLen, Step);

  Value *MulOp = Builder.CreateFMul(InitVec, Step);

  return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");

}


/// A helper function that returns an integer or floating-point constant with

/// value C.

static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {

  return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)

                           : ConstantFP::get(Ty, C);

}


static Value *getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy,

                                  ElementCount VF) {

  assert(FTy->isFloatingPointTy() && "Expected floating point type!");

  Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits());

  Value *RuntimeVF = getRuntimeVF(B, IntTy, VF);

  return B.CreateUIToFP(RuntimeVF, FTy);

}


void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {

  assert(!State.Instance && "Int or FP induction being replicated.");


  Value *Start = getStartValue()->getLiveInIRValue();

  const InductionDescriptor &ID = getInductionDescriptor();

  TruncInst *Trunc = getTruncInst();

  IRBuilderBase &Builder = State.Builder;

  assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");

  assert(State.VF.isVector() && "must have vector VF");


  // The value from the original loop to which we are mapping the new induction

  // variable.

  Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;


  // Fast-math-flags propagate from the original induction instruction.

  IRBuilder<>::FastMathFlagGuard FMFG(Builder);

  if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))

    Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());


  // Now do the actual transformations, and start with fetching the step value.

  Value *Step = State.get(getStepValue(), VPIteration(0, 0));


  assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&

         "Expected either an induction phi-node or a truncate of it!");


  // Construct the initial value of the vector IV in the vector loop preheader

  auto CurrIP = Builder.saveIP();

  BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);

  Builder.SetInsertPoint(VectorPH->getTerminator());

  if (isa<TruncInst>(EntryVal)) {

    assert(Start->getType()->isIntegerTy() &&

           "Truncation requires an integer type");

    auto *TruncType = cast<IntegerType>(EntryVal->getType());

    Step = Builder.CreateTrunc(Step, TruncType);

    Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);

  }


  Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);

  Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);

  Value *SteppedStart = getStepVector(

      SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder);


  // We create vector phi nodes for both integer and floating-point induction

  // variables. Here, we determine the kind of arithmetic we will perform.

  Instruction::BinaryOps AddOp;

  Instruction::BinaryOps MulOp;

  if (Step->getType()->isIntegerTy()) {

    AddOp = Instruction::Add;

    MulOp = Instruction::Mul;

  } else {

    AddOp = ID.getInductionOpcode();

    MulOp = Instruction::FMul;

  }


  // Multiply the vectorization factor by the step using integer or

  // floating-point arithmetic as appropriate.

  Type *StepType = Step->getType();

  Value *RuntimeVF;

  if (Step->getType()->isFloatingPointTy())

    RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);

  else

    RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);

  Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);


  // Create a vector splat to use in the induction update.

  //

  // FIXME: If the step is non-constant, we create the vector splat with

  //        IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't

  //        handle a constant vector splat.

  Value *SplatVF = isa<Constant>(Mul)

                       ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))

                       : Builder.CreateVectorSplat(State.VF, Mul);

  Builder.restoreIP(CurrIP);


  // We may need to add the step a number of times, depending on the unroll

  // factor. The last of those goes into the PHI.

  PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");

  VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());

  VecInd->setDebugLoc(EntryVal->getDebugLoc());

  Instruction *LastInduction = VecInd;

  for (unsigned Part = 0; Part < State.UF; ++Part) {

    State.set(this, LastInduction, Part);


    if (isa<TruncInst>(EntryVal))

      State.addMetadata(LastInduction, EntryVal);


    LastInduction = cast<Instruction>(

        Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));

    LastInduction->setDebugLoc(EntryVal->getDebugLoc());

  }


  LastInduction->setName("vec.ind.next");

  VecInd->addIncoming(SteppedStart, VectorPH);

  // Add induction update using an incorrect block temporarily. The phi node

  // will be fixed after VPlan execution. Note that at this point the latch

  // block cannot be used, as it does not exist yet.

  // TODO: Model increment value in VPlan, by turning the recipe into a

  // multi-def and a subclass of VPHeaderPHIRecipe.

  VecInd->addIncoming(LastInduction, VectorPH);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,

                                          VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-INDUCTION";

  if (getTruncInst()) {

    O << "\\l\"";

    O << " +\n" << Indent << "\"  " << VPlanIngredient(IV) << "\\l\"";

    O << " +\n" << Indent << "\"  ";

    getVPValue(0)->printAsOperand(O, SlotTracker);

  } else

    O << " " << VPlanIngredient(IV);


  O << ", ";

  getStepValue()->printAsOperand(O, SlotTracker);

}

#endif


bool VPWidenIntOrFpInductionRecipe::isCanonical() const {

  // The step may be defined by a recipe in the preheader (e.g. if it requires

  // SCEV expansion), but for the canonical induction the step is required to be

  // 1, which is represented as live-in.

  if (getStepValue()->getDefiningRecipe())

    return false;

  auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());

  auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());

  auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());

  return StartC && StartC->isZero() && StepC && StepC->isOne() &&

         getScalarType() == CanIV->getScalarType();

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent,

                              VPSlotTracker &SlotTracker) const {

  O << Indent;

  printAsOperand(O, SlotTracker);

  O << Indent << "= DERIVED-IV ";

  getStartValue()->printAsOperand(O, SlotTracker);

  O << " + ";

  getOperand(1)->printAsOperand(O, SlotTracker);

  O << " * ";

  getStepValue()->printAsOperand(O, SlotTracker);

}

#endif


void VPScalarIVStepsRecipe::execute(VPTransformState &State) {

  // Fast-math-flags propagate from the original induction instruction.

  IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);

  if (hasFastMathFlags())

    State.Builder.setFastMathFlags(getFastMathFlags());


  /// Compute scalar induction steps. \p ScalarIV is the scalar induction

  /// variable on which to base the steps, \p Step is the size of the step.


  Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0));

  Value *Step = State.get(getStepValue(), VPIteration(0, 0));

  IRBuilderBase &Builder = State.Builder;


  // Ensure step has the same type as that of scalar IV.

  Type *BaseIVTy = BaseIV->getType()->getScalarType();

  assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");


  // We build scalar steps for both integer and floating-point induction

  // variables. Here, we determine the kind of arithmetic we will perform.

  Instruction::BinaryOps AddOp;

  Instruction::BinaryOps MulOp;

  if (BaseIVTy->isIntegerTy()) {

    AddOp = Instruction::Add;

    MulOp = Instruction::Mul;

  } else {

    AddOp = InductionOpcode;

    MulOp = Instruction::FMul;

  }


  // Determine the number of scalars we need to generate for each unroll

  // iteration.

  bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);

  // Compute the scalar steps and save the results in State.

  Type *IntStepTy =

      IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());

  Type *VecIVTy = nullptr;

  Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;

  if (!FirstLaneOnly && State.VF.isScalable()) {

    VecIVTy = VectorType::get(BaseIVTy, State.VF);

    UnitStepVec =

        Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));

    SplatStep = Builder.CreateVectorSplat(State.VF, Step);

    SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);

  }


  unsigned StartPart = 0;

  unsigned EndPart = State.UF;

  unsigned StartLane = 0;

  unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();

  if (State.Instance) {

    StartPart = State.Instance->Part;

    EndPart = StartPart + 1;

    StartLane = State.Instance->Lane.getKnownLane();

    EndLane = StartLane + 1;

  }

  for (unsigned Part = StartPart; Part < EndPart; ++Part) {

    Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);


    if (!FirstLaneOnly && State.VF.isScalable()) {

      auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);

      auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);

      if (BaseIVTy->isFloatingPointTy())

        InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);

      auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);

      auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);

      State.set(this, Add, Part);

      // It's useful to record the lane values too for the known minimum number

      // of elements so we do those below. This improves the code quality when

      // trying to extract the first element, for example.

    }


    if (BaseIVTy->isFloatingPointTy())

      StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);


    for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {

      Value *StartIdx = Builder.CreateBinOp(

          AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));

      // The step returned by `createStepForVF` is a runtime-evaluated value

      // when VF is scalable. Otherwise, it should be folded into a Constant.

      assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&

             "Expected StartIdx to be folded to a constant when VF is not "

             "scalable");

      auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);

      auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);

      State.set(this, Add, VPIteration(Part, Lane));

    }

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,

                                  VPSlotTracker &SlotTracker) const {

  O << Indent;

  printAsOperand(O, SlotTracker);

  O << " = SCALAR-STEPS ";

  printOperands(O, SlotTracker);

}

#endif


void VPWidenGEPRecipe::execute(VPTransformState &State) {

  assert(State.VF.isVector() && "not widening");

  auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());

  // Construct a vector GEP by widening the operands of the scalar GEP as

  // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP

  // results in a vector of pointers when at least one operand of the GEP

  // is vector-typed. Thus, to keep the representation compact, we only use

  // vector-typed operands for loop-varying values.


  if (areAllOperandsInvariant()) {

    // If we are vectorizing, but the GEP has only loop-invariant operands,

    // the GEP we build (by only using vector-typed operands for

    // loop-varying values) would be a scalar pointer. Thus, to ensure we

    // produce a vector of pointers, we need to either arbitrarily pick an

    // operand to broadcast, or broadcast a clone of the original GEP.

    // Here, we broadcast a clone of the original.

    //

    // TODO: If at some point we decide to scalarize instructions having

    //       loop-invariant operands, this special case will no longer be

    //       required. We would add the scalarization decision to

    //       collectLoopScalars() and teach getVectorValue() to broadcast

    //       the lane-zero scalar value.

    SmallVector<Value *> Ops;

    for (unsigned I = 0, E = getNumOperands(); I != E; I++)

      Ops.push_back(State.get(getOperand(I), VPIteration(0, 0)));


    auto *NewGEP =

        State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],

                                ArrayRef(Ops).drop_front(), "", isInBounds());

    for (unsigned Part = 0; Part < State.UF; ++Part) {

      Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP);

      State.set(this, EntryPart, Part);

      State.addMetadata(EntryPart, GEP);

    }

  } else {

    // If the GEP has at least one loop-varying operand, we are sure to

    // produce a vector of pointers. But if we are only unrolling, we want

    // to produce a scalar GEP for each unroll part. Thus, the GEP we

    // produce with the code below will be scalar (if VF == 1) or vector

    // (otherwise). Note that for the unroll-only case, we still maintain

    // values in the vector mapping with initVector, as we do for other

    // instructions.

    for (unsigned Part = 0; Part < State.UF; ++Part) {

      // The pointer operand of the new GEP. If it's loop-invariant, we

      // won't broadcast it.

      auto *Ptr = isPointerLoopInvariant()

                      ? State.get(getOperand(0), VPIteration(0, 0))

                      : State.get(getOperand(0), Part);


      // Collect all the indices for the new GEP. If any index is

      // loop-invariant, we won't broadcast it.

      SmallVector<Value *, 4> Indices;

      for (unsigned I = 1, E = getNumOperands(); I < E; I++) {

        VPValue *Operand = getOperand(I);

        if (isIndexLoopInvariant(I - 1))

          Indices.push_back(State.get(Operand, VPIteration(0, 0)));

        else

          Indices.push_back(State.get(Operand, Part));

      }


      // Create the new GEP. Note that this GEP may be a scalar if VF == 1,

      // but it should be a vector, otherwise.

      auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,

                                             Indices, "", isInBounds());

      assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&

             "NewGEP is not a pointer vector");

      State.set(this, NewGEP, Part);

      State.addMetadata(NewGEP, GEP);

    }

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,

                             VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-GEP ";

  O << (isPointerLoopInvariant() ? "Inv" : "Var");

  for (size_t I = 0; I < getNumOperands() - 1; ++I)

    O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";


  O << " ";

  printAsOperand(O, SlotTracker);

  O << " = getelementptr";

  printFlags(O);

  printOperands(O, SlotTracker);

}

#endif


void VPVectorPointerRecipe ::execute(VPTransformState &State) {

  auto &Builder = State.Builder;

  State.setDebugLocFrom(getDebugLoc());

  for (unsigned Part = 0; Part < State.UF; ++Part) {

    // Calculate the pointer for the specific unroll-part.

    Value *PartPtr = nullptr;

    // Use i32 for the gep index type when the value is constant,

    // or query DataLayout for a more suitable index type otherwise.

    const DataLayout &DL =

        Builder.GetInsertBlock()->getModule()->getDataLayout();

    Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0)

                        ? DL.getIndexType(IndexedTy->getPointerTo())

                        : Builder.getInt32Ty();

    Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));

    bool InBounds = isInBounds();

    if (IsReverse) {

      // If the address is consecutive but reversed, then the

      // wide store needs to start at the last vector element.

      // RunTimeVF =  VScale * VF.getKnownMinValue()

      // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()

      Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);

      // NumElt = -Part * RunTimeVF

      Value *NumElt = Builder.CreateMul(

          ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF);

      // LastLane = 1 - RunTimeVF

      Value *LastLane =

          Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);

      PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);

      PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds);

    } else {

      Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);

      PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);

    }


    State.set(this, PartPtr, Part, /*IsScalar*/ true);

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,

                                  VPSlotTracker &SlotTracker) const {

  O << Indent;

  printAsOperand(O, SlotTracker);

  O << " = vector-pointer ";

  if (IsReverse)

    O << "(reverse) ";


  printOperands(O, SlotTracker);

}

#endif


void VPBlendRecipe::execute(VPTransformState &State) {

  State.setDebugLocFrom(getDebugLoc());

  // We know that all PHIs in non-header blocks are converted into

  // selects, so we don't have to worry about the insertion order and we

  // can just use the builder.

  // At this point we generate the predication tree. There may be

  // duplications since this is a simple recursive scan, but future

  // optimizations will clean it up.


  unsigned NumIncoming = getNumIncomingValues();


  // Generate a sequence of selects of the form:

  // SELECT(Mask3, In3,

  //        SELECT(Mask2, In2,

  //               SELECT(Mask1, In1,

  //                      In0)))

  // Note that Mask0 is never used: lanes for which no path reaches this phi and

  // are essentially undef are taken from In0.

 VectorParts Entry(State.UF);

  for (unsigned In = 0; In < NumIncoming; ++In) {

    for (unsigned Part = 0; Part < State.UF; ++Part) {

      // We might have single edge PHIs (blocks) - use an identity

      // 'select' for the first PHI operand.

      Value *In0 = State.get(getIncomingValue(In), Part);

      if (In == 0)

        Entry[Part] = In0; // Initialize with the first incoming value.

      else {

        // Select between the current value and the previous incoming edge

        // based on the incoming mask.

        Value *Cond = State.get(getMask(In), Part);

        Entry[Part] =

            State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");

      }

    }

  }

  for (unsigned Part = 0; Part < State.UF; ++Part)

    State.set(this, Entry[Part], Part);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,

                          VPSlotTracker &SlotTracker) const {

  O << Indent << "BLEND ";

  printAsOperand(O, SlotTracker);

  O << " =";

  if (getNumIncomingValues() == 1) {

    // Not a User of any mask: not really blending, this is a

    // single-predecessor phi.

    O << " ";

    getIncomingValue(0)->printAsOperand(O, SlotTracker);

  } else {

    for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {

      O << " ";

      getIncomingValue(I)->printAsOperand(O, SlotTracker);

      if (I == 0)

        continue;

      O << "/";

      getMask(I)->printAsOperand(O, SlotTracker);

    }

  }

}

#endif


void VPReductionRecipe::execute(VPTransformState &State) {

  assert(!State.Instance && "Reduction being replicated.");

  Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true);

  RecurKind Kind = RdxDesc.getRecurrenceKind();

  // Propagate the fast-math flags carried by the underlying instruction.

  IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);

  State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());

  for (unsigned Part = 0; Part < State.UF; ++Part) {

    Value *NewVecOp = State.get(getVecOp(), Part);

    if (VPValue *Cond = getCondOp()) {

      Value *NewCond = State.get(Cond, Part, State.VF.isScalar());

      VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());

      Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();

      Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy,

                                                  RdxDesc.getFastMathFlags());

      if (State.VF.isVector()) {

        Iden = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden);

      }


      Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden);

      NewVecOp = Select;

    }

    Value *NewRed;

    Value *NextInChain;

    if (IsOrdered) {

      if (State.VF.isVector())

        NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp,

                                        PrevInChain);

      else

        NewRed = State.Builder.CreateBinOp(

            (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain,

            NewVecOp);

      PrevInChain = NewRed;

    } else {

      PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true);

      NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp);

    }

    if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {

      NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),

                                   NewRed, PrevInChain);

    } else if (IsOrdered)

      NextInChain = NewRed;

    else

      NextInChain = State.Builder.CreateBinOp(

          (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain);

    State.set(this, NextInChain, Part, /*IsScalar*/ true);

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,

                              VPSlotTracker &SlotTracker) const {

  O << Indent << "REDUCE ";

  printAsOperand(O, SlotTracker);

  O << " = ";

  getChainOp()->printAsOperand(O, SlotTracker);

  O << " +";

  if (isa<FPMathOperator>(getUnderlyingInstr()))

    O << getUnderlyingInstr()->getFastMathFlags();

  O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";

  getVecOp()->printAsOperand(O, SlotTracker);

  if (getCondOp()) {

    O << ", ";

    getCondOp()->printAsOperand(O, SlotTracker);

  }

  O << ")";

  if (RdxDesc.IntermediateStore)

    O << " (with final reduction value stored in invariant address sank "

         "outside of loop)";

}

#endif


bool VPReplicateRecipe::shouldPack() const {

  // Find if the recipe is used by a widened recipe via an intervening

  // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.

  return any_of(users(), [](const VPUser *U) {

    if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))

      return any_of(PredR->users(), [PredR](const VPUser *U) {

        return !U->usesScalars(PredR);

      });

    return false;

  });

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,

                              VPSlotTracker &SlotTracker) const {

  O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");


  if (!getUnderlyingInstr()->getType()->isVoidTy()) {

    printAsOperand(O, SlotTracker);

    O << " = ";

  }

  if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {

    O << "call";

    printFlags(O);

    O << "@" << CB->getCalledFunction()->getName() << "(";

    interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),

                    O, [&O, &SlotTracker](VPValue *Op) {

                      Op->printAsOperand(O, SlotTracker);

                    });

    O << ")";

  } else {

    O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode());

    printFlags(O);

    printOperands(O, SlotTracker);

  }


  if (shouldPack())

    O << " (S->V)";

}

#endif


/// Checks if \p C is uniform across all VFs and UFs. It is considered as such

/// if it is either defined outside the vector region or its operand is known to

/// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI).

/// TODO: Uniformity should be associated with a VPValue and there should be a

/// generic way to check.

static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C) {

  return C->isDefinedOutsideVectorRegions() ||

         isa<VPDerivedIVRecipe>(C->getOperand(0)) ||

         isa<VPCanonicalIVPHIRecipe>(C->getOperand(0));

}


Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {

  assert(vputils::onlyFirstLaneUsed(this) &&

         "Codegen only implemented for first lane.");

  switch (Opcode) {

  case Instruction::SExt:

  case Instruction::ZExt:

  case Instruction::Trunc: {

    // Note: SExt/ZExt not used yet.

    Value *Op = State.get(getOperand(0), VPIteration(Part, 0));

    return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);

  }

  default:

    llvm_unreachable("opcode not implemented yet");

  }

}


void VPScalarCastRecipe ::execute(VPTransformState &State) {

  bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this);

  for (unsigned Part = 0; Part != State.UF; ++Part) {

    Value *Res;

    // Only generate a single instance, if the recipe is uniform across UFs and

    // VFs.

    if (Part > 0 && IsUniformAcrossVFsAndUFs)

      Res = State.get(this, VPIteration(0, 0));

    else

      Res = generate(State, Part);

    State.set(this, Res, VPIteration(Part, 0));

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,

                                VPSlotTracker &SlotTracker) const {

  O << Indent << "SCALAR-CAST ";

  printAsOperand(O, SlotTracker);

  O << " = " << Instruction::getOpcodeName(Opcode) << " ";

  printOperands(O, SlotTracker);

  O << " to " << *ResultTy;

}

#endif


void VPBranchOnMaskRecipe::execute(VPTransformState &State) {

  assert(State.Instance && "Branch on Mask works only on single instance.");


  unsigned Part = State.Instance->Part;

  unsigned Lane = State.Instance->Lane.getKnownLane();


  Value *ConditionBit = nullptr;

  VPValue *BlockInMask = getMask();

  if (BlockInMask) {

    ConditionBit = State.get(BlockInMask, Part);

    if (ConditionBit->getType()->isVectorTy())

      ConditionBit = State.Builder.CreateExtractElement(

          ConditionBit, State.Builder.getInt32(Lane));

  } else // Block in mask is all-one.

    ConditionBit = State.Builder.getTrue();


  // Replace the temporary unreachable terminator with a new conditional branch,

  // whose two destinations will be set later when they are created.

  auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();

  assert(isa<UnreachableInst>(CurrentTerminator) &&

         "Expected to replace unreachable terminator with conditional branch.");

  auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);

  CondBr->setSuccessor(0, nullptr);

  ReplaceInstWithInst(CurrentTerminator, CondBr);

}


void VPPredInstPHIRecipe::execute(VPTransformState &State) {

  assert(State.Instance && "Predicated instruction PHI works per instance.");

  Instruction *ScalarPredInst =

      cast<Instruction>(State.get(getOperand(0), *State.Instance));

  BasicBlock *PredicatedBB = ScalarPredInst->getParent();

  BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();

  assert(PredicatingBB && "Predicated block has no single predecessor.");

  assert(isa<VPReplicateRecipe>(getOperand(0)) &&

         "operand must be VPReplicateRecipe");


  // By current pack/unpack logic we need to generate only a single phi node: if

  // a vector value for the predicated instruction exists at this point it means

  // the instruction has vector users only, and a phi for the vector value is

  // needed. In this case the recipe of the predicated instruction is marked to

  // also do that packing, thereby "hoisting" the insert-element sequence.

  // Otherwise, a phi node for the scalar value is needed.

  unsigned Part = State.Instance->Part;

  if (State.hasVectorValue(getOperand(0), Part)) {

    Value *VectorValue = State.get(getOperand(0), Part);

    InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);

    PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);

    VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.

    VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.

    if (State.hasVectorValue(this, Part))

      State.reset(this, VPhi, Part);

    else

      State.set(this, VPhi, Part);

    // NOTE: Currently we need to update the value of the operand, so the next

    // predicated iteration inserts its generated value in the correct vector.

    State.reset(getOperand(0), VPhi, Part);

  } else {

    Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();

    PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);

    Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),

                     PredicatingBB);

    Phi->addIncoming(ScalarPredInst, PredicatedBB);

    if (State.hasScalarValue(this, *State.Instance))

      State.reset(this, Phi, *State.Instance);

    else

      State.set(this, Phi, *State.Instance);

    // NOTE: Currently we need to update the value of the operand, so the next

    // predicated iteration inserts its generated value in the correct vector.

    State.reset(getOperand(0), Phi, *State.Instance);

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,

                                VPSlotTracker &SlotTracker) const {

  O << Indent << "PHI-PREDICATED-INSTRUCTION ";

  printAsOperand(O, SlotTracker);

  O << " = ";

  printOperands(O, SlotTracker);

}


void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent,

                              VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN ";

  printAsOperand(O, SlotTracker);

  O << " = load ";

  printOperands(O, SlotTracker);

}


void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent,

                                 VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN ";

  printAsOperand(O, SlotTracker);

  O << " = vp.load ";

  printOperands(O, SlotTracker);

}


void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent,

                               VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN store ";

  printOperands(O, SlotTracker);

}


void VPWidenStoreEVLRecipe::print(raw_ostream &O, const Twine &Indent,

                                  VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN vp.store ";

  printOperands(O, SlotTracker);

}

#endif


void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {

  Value *Start = getStartValue()->getLiveInIRValue();

  PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index");

  EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());


  BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);

  EntryPart->addIncoming(Start, VectorPH);

  EntryPart->setDebugLoc(getDebugLoc());

  for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)

    State.set(this, EntryPart, Part, /*IsScalar*/ true);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,

                                   VPSlotTracker &SlotTracker) const {

  O << Indent << "EMIT ";

  printAsOperand(O, SlotTracker);

  O << " = CANONICAL-INDUCTION ";

  printOperands(O, SlotTracker);

}

#endif


bool VPCanonicalIVPHIRecipe::isCanonical(

    InductionDescriptor::InductionKind Kind, VPValue *Start,

    VPValue *Step) const {

  // Must be an integer induction.

  if (Kind != InductionDescriptor::IK_IntInduction)

    return false;

  // Start must match the start value of this canonical induction.

  if (Start != getStartValue())

    return false;


  // If the step is defined by a recipe, it is not a ConstantInt.

  if (Step->getDefiningRecipe())

    return false;


  ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue());

  return StepC && StepC->isOne();

}


bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) {

  return IsScalarAfterVectorization &&

         (!IsScalable || vputils::onlyFirstLaneUsed(this));

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,

                                          VPSlotTracker &SlotTracker) const {

  O << Indent << "EMIT ";

  printAsOperand(O, SlotTracker);

  O << " = WIDEN-POINTER-INDUCTION ";

  getStartValue()->printAsOperand(O, SlotTracker);

  O << ", " << *IndDesc.getStep();

}

#endif


void VPExpandSCEVRecipe::execute(VPTransformState &State) {

  assert(!State.Instance && "cannot be used in per-lane");

  const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();

  SCEVExpander Exp(SE, DL, "induction");


  Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),

                                 &*State.Builder.GetInsertPoint());

  assert(!State.ExpandedSCEVs.contains(Expr) &&

         "Same SCEV expanded multiple times");

  State.ExpandedSCEVs[Expr] = Res;

  for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)

    State.set(this, Res, {Part, 0});

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,

                               VPSlotTracker &SlotTracker) const {

  O << Indent << "EMIT ";

  getVPSingleValue()->printAsOperand(O, SlotTracker);

  O << " = EXPAND SCEV " << *Expr;

}

#endif


void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {

  Value *CanonicalIV = State.get(getOperand(0), 0, /*IsScalar*/ true);

  Type *STy = CanonicalIV->getType();

  IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());

  ElementCount VF = State.VF;

  Value *VStart = VF.isScalar()

                      ? CanonicalIV

                      : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");

  for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {

    Value *VStep = createStepForVF(Builder, STy, VF, Part);

    if (VF.isVector()) {

      VStep = Builder.CreateVectorSplat(VF, VStep);

      VStep =

          Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));

    }

    Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");

    State.set(this, CanonicalVectorIV, Part);

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,

                                     VPSlotTracker &SlotTracker) const {

  O << Indent << "EMIT ";

  printAsOperand(O, SlotTracker);

  O << " = WIDEN-CANONICAL-INDUCTION ";

  printOperands(O, SlotTracker);

}

#endif


void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {

  auto &Builder = State.Builder;

  // Create a vector from the initial value.

  auto *VectorInit = getStartValue()->getLiveInIRValue();


  Type *VecTy = State.VF.isScalar()

                    ? VectorInit->getType()

                    : VectorType::get(VectorInit->getType(), State.VF);


  BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);

  if (State.VF.isVector()) {

    auto *IdxTy = Builder.getInt32Ty();

    auto *One = ConstantInt::get(IdxTy, 1);

    IRBuilder<>::InsertPointGuard Guard(Builder);

    Builder.SetInsertPoint(VectorPH->getTerminator());

    auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);

    auto *LastIdx = Builder.CreateSub(RuntimeVF, One);

    VectorInit = Builder.CreateInsertElement(

        PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");

  }


  // Create a phi node for the new recurrence.

  PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur");

  EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());

  EntryPart->addIncoming(VectorInit, VectorPH);

  State.set(this, EntryPart, 0);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,

                                            VPSlotTracker &SlotTracker) const {

  O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";

  printAsOperand(O, SlotTracker);

  O << " = phi ";

  printOperands(O, SlotTracker);

}

#endif


void VPReductionPHIRecipe::execute(VPTransformState &State) {

  auto &Builder = State.Builder;


  // Reductions do not have to start at zero. They can start with

  // any loop invariant values.

  VPValue *StartVPV = getStartValue();

  Value *StartV = StartVPV->getLiveInIRValue();


  // In order to support recurrences we need to be able to vectorize Phi nodes.

  // Phi nodes have cycles, so we need to vectorize them in two stages. This is

  // stage #1: We create a new vector PHI node with no incoming edges. We'll use

  // this value when we vectorize all of the instructions that use the PHI.

  bool ScalarPHI = State.VF.isScalar() || IsInLoop;

  Type *VecTy = ScalarPHI ? StartV->getType()

                          : VectorType::get(StartV->getType(), State.VF);


  BasicBlock *HeaderBB = State.CFG.PrevBB;

  assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&

         "recipe must be in the vector loop header");

  unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;

  for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {

    Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi");

    EntryPart->insertBefore(HeaderBB->getFirstInsertionPt());

    State.set(this, EntryPart, Part, IsInLoop);

  }


  BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);


  Value *Iden = nullptr;

  RecurKind RK = RdxDesc.getRecurrenceKind();

  if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||

      RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {

    // MinMax and AnyOf reductions have the start value as their identity.

    if (ScalarPHI) {

      Iden = StartV;

    } else {

      IRBuilderBase::InsertPointGuard IPBuilder(Builder);

      Builder.SetInsertPoint(VectorPH->getTerminator());

      StartV = Iden =

          Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");

    }

  } else {

    Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),

                                         RdxDesc.getFastMathFlags());


    if (!ScalarPHI) {

      Iden = Builder.CreateVectorSplat(State.VF, Iden);

      IRBuilderBase::InsertPointGuard IPBuilder(Builder);

      Builder.SetInsertPoint(VectorPH->getTerminator());

      Constant *Zero = Builder.getInt32(0);

      StartV = Builder.CreateInsertElement(Iden, StartV, Zero);

    }

  }


  for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {

    Value *EntryPart = State.get(this, Part, IsInLoop);

    // Make sure to add the reduction start value only to the

    // first unroll part.

    Value *StartVal = (Part == 0) ? StartV : Iden;

    cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,

                                 VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-REDUCTION-PHI ";


  printAsOperand(O, SlotTracker);

  O << " = phi ";

  printOperands(O, SlotTracker);

}

#endif


void VPWidenPHIRecipe::execute(VPTransformState &State) {

  assert(EnableVPlanNativePath &&

         "Non-native vplans are not expected to have VPWidenPHIRecipes.");


  Value *Op0 = State.get(getOperand(0), 0);

  Type *VecTy = Op0->getType();

  Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");

  State.set(this, VecPhi, 0);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,

                             VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-PHI ";


  auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());

  // Unless all incoming values are modeled in VPlan  print the original PHI

  // directly.

  // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming

  // values as VPValues.

  if (getNumOperands() != OriginalPhi->getNumOperands()) {

    O << VPlanIngredient(OriginalPhi);

    return;

  }


  printAsOperand(O, SlotTracker);

  O << " = phi ";

  printOperands(O, SlotTracker);

}

#endif


// TODO: It would be good to use the existing VPWidenPHIRecipe instead and

// remove VPActiveLaneMaskPHIRecipe.

void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {

  BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);

  for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {

    Value *StartMask = State.get(getOperand(0), Part);

    PHINode *EntryPart =

        State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");

    EntryPart->addIncoming(StartMask, VectorPH);

    EntryPart->setDebugLoc(getDebugLoc());

    State.set(this, EntryPart, Part);

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,

                                      VPSlotTracker &SlotTracker) const {

  O << Indent << "ACTIVE-LANE-MASK-PHI ";


  printAsOperand(O, SlotTracker);

  O << " = phi ";

  printOperands(O, SlotTracker);

}

#endif


void VPEVLBasedIVPHIRecipe::execute(VPTransformState &State) {

  BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);

  assert(State.UF == 1 && "Expected unroll factor 1 for VP vectorization.");

  Value *Start = State.get(getOperand(0), VPIteration(0, 0));

  PHINode *EntryPart =

      State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv");

  EntryPart->addIncoming(Start, VectorPH);

  EntryPart->setDebugLoc(getDebugLoc());

  State.set(this, EntryPart, 0, /*IsScalar=*/true);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,

                                  VPSlotTracker &SlotTracker) const {

  O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";


  printAsOperand(O, SlotTracker);

  O << " = phi ";

  printOperands(O, SlotTracker);

}

#endif

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:74

Select
amdgpu AMDGPU Register Bank Select
Definition: AMDGPURegBankSelect.cpp:46

BasicBlockUtils.h

BasicBlock.h

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

Casting.h

CommandLine.h

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101

Name
std::string Name
Definition: ELFObjHandler.cpp:77

GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171

IRBuilder.h

Instruction.h

IVDescriptors.h

Instructions.h

LoopUtils.h

I
#define I(x, y, z)
Definition: MD5.cpp:58

Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74

getDebugLoc
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
Definition: MachineInstrBundle.cpp:109

Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition: RISCVRedundantCopyElimination.cpp:75

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

ScalarEvolutionExpander.h

SmallVector.h
This file defines the SmallVector class.

Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:76

Twine.h

Type.h

VPlanAnalysis.h

getStepVector
static Value * getStepVector(Value *Val, Value *StartIdx, Value *Step, Instruction::BinaryOps BinOp, ElementCount VF, IRBuilderBase &Builder)
This function adds (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step,...
Definition: VPlanRecipes.cpp:1034

isUniformAcrossVFsAndUFs
static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C)
Checks if C is uniform across all VFs and UFs.
Definition: VPlanRecipes.cpp:1662

getSignedIntOrFpConstant
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
Definition: VPlanRecipes.cpp:1085

getRuntimeVFAsFloat
static Value * getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy, ElementCount VF)
Definition: VPlanRecipes.cpp:1090

getOpcode
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191

VPlan.h
This file contains the declarations of the Vectorization Plan base classes:

Value.h

IV
static const uint32_t IV[8]
Definition: blake3_impl.h:78

VectorType
Definition: ItaniumDemangle.h:1149

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41

llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:60

llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:409

llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:452

llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221

llvm::BasicBlock::getModule
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:289

llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3439

llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, BasicBlock::iterator InsertBefore)
Definition: Instructions.h:3500

llvm::BranchInst::setSuccessor
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Definition: Instructions.h:3552

llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1565

llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993

llvm::CmpInst::ICMP_UGT
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:1016

llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018

llvm::CmpInst::getPredicateName
static StringRef getPredicateName(Predicate P)
Definition: Instructions.cpp:4659

llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:80

llvm::ConstantInt::isOne
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:211

llvm::ConstantInt::getSigned
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:123

llvm::ConstantVector::getSplat
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1449

llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41

llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition: DWARFExpression.h:32

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110

llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33

llvm::ElementCount
Definition: TypeSize.h:297

llvm::ElementCount::isVector
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:323

llvm::ElementCount::isScalar
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:319

llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20

llvm::FastMathFlags::setAllowContract
void setAllowContract(bool B=true)
Definition: FMF.h:91

llvm::FastMathFlags::noSignedZeros
bool noSignedZeros() const
Definition: FMF.h:68

llvm::FastMathFlags::noInfs
bool noInfs() const
Definition: FMF.h:67

llvm::FastMathFlags::setAllowReciprocal
void setAllowReciprocal(bool B=true)
Definition: FMF.h:88

llvm::FastMathFlags::allowReciprocal
bool allowReciprocal() const
Definition: FMF.h:69

llvm::FastMathFlags::print
void print(raw_ostream &O) const
Print fast-math flags to O.
Definition: Operator.cpp:259

llvm::FastMathFlags::setNoSignedZeros
void setNoSignedZeros(bool B=true)
Definition: FMF.h:85

llvm::FastMathFlags::allowReassoc
bool allowReassoc() const
Flag queries.
Definition: FMF.h:65

llvm::FastMathFlags::approxFunc
bool approxFunc() const
Definition: FMF.h:71

llvm::FastMathFlags::setNoNaNs
void setNoNaNs(bool B=true)
Definition: FMF.h:79

llvm::FastMathFlags::setAllowReassoc
void setAllowReassoc(bool B=true)
Flag setters.
Definition: FMF.h:76

llvm::FastMathFlags::noNaNs
bool noNaNs() const
Definition: FMF.h:66

llvm::FastMathFlags::setApproxFunc
void setApproxFunc(bool B=true)
Definition: FMF.h:94

llvm::FastMathFlags::setNoInfs
void setNoInfs(bool B=true)
Definition: FMF.h:82

llvm::FastMathFlags::allowContract
bool allowContract() const
Definition: FMF.h:70

llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:103

llvm::FunctionType::getParamType
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135

llvm::Function
Definition: Function.h:63

llvm::Function::getFunctionType
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:202

llvm::Function::willReturn
bool willReturn() const
Determine if the function will return.
Definition: Function.h:643

llvm::Function::getIntrinsicID
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:232

llvm::Function::doesNotThrow
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:576

llvm::Function::getReturnType
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:207

llvm::IRBuilderBase::FastMathFlagGuard
Definition: IRBuilder.h:397

llvm::IRBuilderBase::InsertPointGuard
Definition: IRBuilder.h:375

llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94

llvm::IRBuilderBase::CreateFCmp
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2361

llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2472

llvm::IRBuilderBase::CreatePtrAdd
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1978

llvm::IRBuilderBase::CreateSIToFP
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2094

llvm::IRBuilderBase::CreateExtractElement
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2460

llvm::IRBuilderBase::CreateFAdd
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1533

llvm::IRBuilderBase::CreateVectorSplice
Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
Definition: IRBuilder.cpp:1185

llvm::IRBuilderBase::CreateVectorSplat
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1212

llvm::IRBuilderBase::getTrue
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:466

llvm::IRBuilderBase::CreateIntrinsic
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932

llvm::IRBuilderBase::CreateSelect
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1110

llvm::IRBuilderBase::GetInsertPoint
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175

llvm::IRBuilderBase::CreateSExt
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2033

llvm::IRBuilderBase::CreateFreeze
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2535

llvm::IRBuilderBase::getInt32Ty
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:526

llvm::IRBuilderBase::CreateUIToFP
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2081

llvm::IRBuilderBase::GetInsertBlock
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174

llvm::IRBuilderBase::setFastMathFlags
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:311

llvm::IRBuilderBase::saveIP
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:277

llvm::IRBuilderBase::getInt32
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:486

llvm::IRBuilderBase::CreateCmp
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2366

llvm::IRBuilderBase::CreatePHI
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2397

llvm::IRBuilderBase::CreateNot
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749

llvm::IRBuilderBase::CreateICmpEQ
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2241

llvm::IRBuilderBase::CreateSub
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344

llvm::IRBuilderBase::CreateCondBr
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1120

llvm::IRBuilderBase::CreateNAryOp
Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Definition: IRBuilder.cpp:1005

llvm::IRBuilderBase::CreateZExt
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2021

llvm::IRBuilderBase::getContext
LLVMContext & getContext() const
Definition: IRBuilder.h:176

llvm::IRBuilderBase::CreateAdd
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1327

llvm::IRBuilderBase::CreateTrunc
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007

llvm::IRBuilderBase::CreateBinOp
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1666

llvm::IRBuilderBase::CreateCast
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2161

llvm::IRBuilderBase::restoreIP
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:289

llvm::IRBuilderBase::SetInsertPoint
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180

llvm::IRBuilderBase::CreateAlignedStore
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1826

llvm::IRBuilderBase::CreateCall
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412

llvm::IRBuilderBase::CreateGEP
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1866

llvm::IRBuilderBase::CreateICmp
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2351

llvm::IRBuilderBase::CreateFMul
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1587

llvm::IRBuilderBase::CreateStepVector
Value * CreateStepVector(Type *DstType, const Twine &Name="")
Creates a vector of type DstType with the linear sequence <0, 1, ...>
Definition: IRBuilder.cpp:109

llvm::IRBuilderBase::CreateMul
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1361

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666

llvm::InductionDescriptor
A struct for saving information about induction variables.
Definition: IVDescriptors.h:306

llvm::InductionDescriptor::getStep
const SCEV * getStep() const
Definition: IVDescriptors.h:322

llvm::InductionDescriptor::InductionKind
InductionKind
This enum represents the kinds of inductions that we support.
Definition: IVDescriptors.h:309

llvm::InductionDescriptor::IK_IntInduction
@ IK_IntInduction
Integer induction variable. Step = C.
Definition: IVDescriptors.h:311

llvm::InsertElementInst
This instruction inserts a single (scalar) element into a VectorType value.
Definition: Instructions.h:2091

llvm::InsertElementInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:2132

llvm::Instruction
Definition: Instruction.h:49

llvm::Instruction::insertBefore
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:110

llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:454

llvm::Instruction::isBinaryOp
bool isBinaryOp() const
Definition: Instruction.h:257

llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:152

llvm::Instruction::eraseFromParent
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:105

llvm::Instruction::getFastMathFlags
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
Definition: Instruction.cpp:622

llvm::Instruction::getOpcodeName
const char * getOpcodeName() const
Definition: Instruction.h:254

llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:947

llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451

llvm::Instruction::CastOps
CastOps
Definition: Instruction.h:961

llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278

llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: GenericLoopInfo.h:90

llvm::MachineBasicBlock::print
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
Definition: MachineBasicBlock.cpp:337

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65

llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293

llvm::PHINode
Definition: Instructions.h:2973

llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:3134

llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:3024

llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827

llvm::RecurrenceDescriptor
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:71

llvm::RecurrenceDescriptor::getFastMathFlags
FastMathFlags getFastMathFlags() const
Definition: IVDescriptors.h:201

llvm::RecurrenceDescriptor::getOpcode
static unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
Definition: IVDescriptors.cpp:1097

llvm::RecurrenceDescriptor::getOpcode
unsigned getOpcode() const
Definition: IVDescriptors.h:199

llvm::RecurrenceDescriptor::getRecurrenceType
Type * getRecurrenceType() const
Returns the type of the recurrence.
Definition: IVDescriptors.h:245

llvm::RecurrenceDescriptor::isAnyOfRecurrenceKind
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
Definition: IVDescriptors.h:239

llvm::RecurrenceDescriptor::isSigned
bool isSigned() const
Returns true if all source operands of the recurrence are SExtInsts.
Definition: IVDescriptors.h:257

llvm::RecurrenceDescriptor::getRecurrenceKind
RecurKind getRecurrenceKind() const
Definition: IVDescriptors.h:197

llvm::RecurrenceDescriptor::getRecurrenceIdentity
Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF) const
Returns identity corresponding to the RecurrenceKind.
Definition: IVDescriptors.cpp:1038

llvm::RecurrenceDescriptor::IntermediateStore
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
Definition: IVDescriptors.h:276

llvm::RecurrenceDescriptor::isMinMaxRecurrenceKind
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
Definition: IVDescriptors.h:233

llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:61

llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:380

llvm::SlotTracker
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:693

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:426

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209

llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:317

llvm::TruncInst
This class represents a truncation of integer types.
Definition: Instructions.h:5349

llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265

llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)

llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129

llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185

llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228

llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140

llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348

llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169

llvm::VPActiveLaneMaskPHIRecipe::execute
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
Definition: VPlanRecipes.cpp:2073

llvm::VPActiveLaneMaskPHIRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:2086

llvm::VPBasicBlock
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:2825

llvm::VPBasicBlock::getRecipeList
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:2872

llvm::VPBasicBlock::end
iterator end()
Definition: VPlan.h:2856

llvm::VPBasicBlock::insert
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:2884

llvm::VPBlendRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1521

llvm::VPBlendRecipe::getIncomingValue
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:1971

llvm::VPBlendRecipe::getMask
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:1976

llvm::VPBlendRecipe::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account that the first incoming value has no mask.
Definition: VPlan.h:1968

llvm::VPBlendRecipe::execute
void execute(VPTransformState &State) override
Generate the phi/select nodes.
Definition: VPlanRecipes.cpp:1481

llvm::VPBlockBase::getParent
VPRegionBlock * getParent()
Definition: VPlan.h:489

llvm::VPBlockBase::getNumSuccessors
size_t getNumSuccessors() const
Definition: VPlan.h:534

llvm::VPBlockBase::getPlan
VPlan * getPlan()
Definition: VPlan.cpp:148

llvm::VPBlockBase::getEntryBasicBlock
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:153

llvm::VPBlockBase::getSingleSuccessor
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:524

llvm::VPBranchOnMaskRecipe::getMask
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2247

llvm::VPBranchOnMaskRecipe::execute
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
Definition: VPlanRecipes.cpp:1709

llvm::VPCanonicalIVPHIRecipe::execute
void execute(VPTransformState &State) override
Generate the canonical scalar induction phi of the vector loop.
Definition: VPlanRecipes.cpp:1819

llvm::VPCanonicalIVPHIRecipe::isCanonical
bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step) const
Check if the induction described by Kind, /p Start and Step is canonical, i.e.
Definition: VPlanRecipes.cpp:1841

llvm::VPCanonicalIVPHIRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1832

llvm::VPDef
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:313

llvm::VPDef::getVPSingleValue
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:401

llvm::VPDef::getVPValue
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:413

llvm::VPDef::getVPDefID
unsigned getVPDefID() const
Definition: VPlanValue.h:433

llvm::VPDerivedIVRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1230

llvm::VPDerivedIVRecipe::getStepValue
VPValue * getStepValue() const
Definition: VPlan.h:2763

llvm::VPDerivedIVRecipe::getStartValue
VPValue * getStartValue() const
Definition: VPlan.h:2762

llvm::VPEVLBasedIVPHIRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:2108

llvm::VPEVLBasedIVPHIRecipe::execute
void execute(VPTransformState &State) override
Generate phi for handling IV based on EVL over iterations correctly.
Definition: VPlanRecipes.cpp:2096

llvm::VPExpandSCEVRecipe::execute
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
Definition: VPlanRecipes.cpp:1875

llvm::VPExpandSCEVRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1890

llvm::VPHeaderPHIRecipe::getStartValue
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:1667

llvm::VPInstruction::BranchOnCond
@ BranchOnCond
Definition: VPlan.h:1177

llvm::VPInstruction::PtrAdd
@ PtrAdd
Definition: VPlan.h:1182

llvm::VPInstruction::BranchOnCount
@ BranchOnCount
Definition: VPlan.h:1176

llvm::VPInstruction::ActiveLaneMask
@ ActiveLaneMask
Definition: VPlan.h:1171

llvm::VPInstruction::FirstOrderRecurrenceSplice
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1165

llvm::VPInstruction::ExplicitVectorLength
@ ExplicitVectorLength
Definition: VPlan.h:1172

llvm::VPInstruction::SLPStore
@ SLPStore
Definition: VPlan.h:1170

llvm::VPInstruction::CanonicalIVIncrementForPart
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1175

llvm::VPInstruction::SLPLoad
@ SLPLoad
Definition: VPlan.h:1169

llvm::VPInstruction::ComputeReductionResult
@ ComputeReductionResult
Definition: VPlan.h:1178

llvm::VPInstruction::Not
@ Not
Definition: VPlan.h:1168

llvm::VPInstruction::CalculateTripCountMinusVF
@ CalculateTripCountMinusVF
Definition: VPlan.h:1173

llvm::VPInstruction::hasResult
bool hasResult() const
Definition: VPlan.h:1283

llvm::VPInstruction::dump
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
Definition: VPlanRecipes.cpp:644

llvm::VPInstruction::getOpcode
unsigned getOpcode() const
Definition: VPlan.h:1259

llvm::VPInstruction::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
Definition: VPlanRecipes.cpp:649

llvm::VPInstruction::onlyFirstLaneUsed
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlanRecipes.cpp:621

llvm::VPInstruction::execute
void execute(VPTransformState &State) override
Generate the instruction.
Definition: VPlanRecipes.cpp:583

llvm::VPLane::getLastLaneForVF
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:169

llvm::VPLane::getFirstLane
static VPLane getFirstLane()
Definition: VPlan.h:167

llvm::VPLiveOut::print
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the VPLiveOut to O.
Definition: VPlanRecipes.cpp:203

llvm::VPLiveOut::getPhi
PHINode * getPhi() const
Definition: VPlan.h:694

llvm::VPLiveOut::fixPhi
void fixPhi(VPlan &Plan, VPTransformState &State)
Fixup the wrapped LCSSA phi node in the unique exit block.
Definition: VPlanRecipes.cpp:188

llvm::VPPredInstPHIRecipe::execute
void execute(VPTransformState &State) override
Generates phi nodes for live-outs as needed to retain SSA form.
Definition: VPlanRecipes.cpp:1735

llvm::VPPredInstPHIRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1782

llvm::VPRecipeBase
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:709

llvm::VPRecipeBase::mayReadFromMemory
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
Definition: VPlanRecipes.cpp:87

llvm::VPRecipeBase::mayHaveSideEffects
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
Definition: VPlanRecipes.cpp:126

llvm::VPRecipeBase::mayWriteToMemory
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
Definition: VPlanRecipes.cpp:46

llvm::VPRecipeBase::getParent
VPBasicBlock * getParent()
Definition: VPlan.h:734

llvm::VPRecipeBase::getDebugLoc
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:800

llvm::VPRecipeBase::moveBefore
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
Definition: VPlanRecipes.cpp:249

llvm::VPRecipeBase::insertBefore
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
Definition: VPlanRecipes.cpp:212

llvm::VPRecipeBase::insertAfter
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
Definition: VPlanRecipes.cpp:226

llvm::VPRecipeBase::eraseFromParent
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: VPlanRecipes.cpp:239

llvm::VPRecipeBase::removeFromParent
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: VPlanRecipes.cpp:233

llvm::VPRecipeBase::moveAfter
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Definition: VPlanRecipes.cpp:244

llvm::VPRecipeWithIRFlags
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:897

llvm::VPRecipeWithIRFlags::ExactFlags
ExactFlagsTy ExactFlags
Definition: VPlan.h:953

llvm::VPRecipeWithIRFlags::FMFs
FastMathFlagsTy FMFs
Definition: VPlan.h:956

llvm::VPRecipeWithIRFlags::NonNegFlags
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:955

llvm::VPRecipeWithIRFlags::setFlags
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1082

llvm::VPRecipeWithIRFlags::isInBounds
bool isInBounds() const
Definition: VPlan.h:1121

llvm::VPRecipeWithIRFlags::GEPFlags
GEPFlagsTy GEPFlags
Definition: VPlan.h:954

llvm::VPRecipeWithIRFlags::hasFastMathFlags
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1128

llvm::VPRecipeWithIRFlags::DisjointFlags
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:952

llvm::VPRecipeWithIRFlags::WrapFlags
WrapFlagsTy WrapFlags
Definition: VPlan.h:951

llvm::VPRecipeWithIRFlags::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1132

llvm::VPRecipeWithIRFlags::printFlags
void printFlags(raw_ostream &O) const
Definition: VPlanRecipes.cpp:852

llvm::VPRecipeWithIRFlags::getPredicate
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1115

llvm::VPRecipeWithIRFlags::hasNoSignedWrap
bool hasNoSignedWrap() const
Definition: VPlan.h:1138

llvm::VPRecipeWithIRFlags::getFastMathFlags
FastMathFlags getFastMathFlags() const
Definition: VPlanRecipes.cpp:255

llvm::VPReductionPHIRecipe::isOrdered
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:1940

llvm::VPReductionPHIRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:2030

llvm::VPReductionPHIRecipe::execute
void execute(VPTransformState &State) override
Generate the phi/select nodes.
Definition: VPlanRecipes.cpp:1966

llvm::VPReductionRecipe::getVecOp
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2131

llvm::VPReductionRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1594

llvm::VPReductionRecipe::getCondOp
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2133

llvm::VPReductionRecipe::getChainOp
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2129

llvm::VPReductionRecipe::execute
void execute(VPTransformState &State) override
Generate the reduction in the loop.
Definition: VPlanRecipes.cpp:1544

llvm::VPRegionBlock
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:2958

llvm::VPRegionBlock::getEntry
const VPBlockBase * getEntry() const
Definition: VPlan.h:2997

llvm::VPReplicateRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1629

llvm::VPReplicateRecipe::getOpcode
unsigned getOpcode() const
Definition: VPlan.h:2211

llvm::VPReplicateRecipe::shouldPack
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
Definition: VPlanRecipes.cpp:1616

llvm::VPScalarCastRecipe
VPScalarCastRecipe is a recipe to create scalar cast instructions.
Definition: VPlan.h:1410

llvm::VPScalarIVStepsRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1333

llvm::VPScalarIVStepsRecipe::getStepValue
VPValue * getStepValue() const
Definition: VPlan.h:2812

llvm::VPScalarIVStepsRecipe::execute
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
Definition: VPlanRecipes.cpp:1243

llvm::VPSingleDefRecipe::getUnderlyingInstr
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:888

llvm::VPSlotTracker
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:454

llvm::VPTypeAnalysis::inferScalarType
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
Definition: VPlanAnalysis.cpp:211

llvm::VPUser
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:203

llvm::VPUser::printOperands
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1306

llvm::VPUser::operands
operand_range operands()
Definition: VPlanValue.h:278

llvm::VPUser::getNumOperands
unsigned getNumOperands() const
Definition: VPlanValue.h:252

llvm::VPUser::op_begin
operand_iterator op_begin()
Definition: VPlanValue.h:274

llvm::VPUser::getOperand
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:253

llvm::VPValue
Definition: VPlanValue.h:44

llvm::VPValue::getUnderlyingValue
Value * getUnderlyingValue()
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:77

llvm::VPValue::getDefiningRecipe
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:118

llvm::VPValue::printAsOperand
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1302

llvm::VPValue::VPInstruction
friend class VPInstruction
Definition: VPlanValue.h:47

llvm::VPValue::getLiveInIRValue
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:173

llvm::VPValue::users
user_range users()
Definition: VPlanValue.h:133

llvm::VPVectorPointerRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1469

llvm::VPWidenCallRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:778

llvm::VPWidenCallRecipe::getCalledScalarFunction
Function * getCalledScalarFunction() const
Definition: VPlan.h:1483

llvm::VPWidenCallRecipe::execute
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
Definition: VPlanRecipes.cpp:709

llvm::VPWidenCallRecipe::arg_operands
operand_range arg_operands()
Definition: VPlan.h:1487

llvm::VPWidenCanonicalIVRecipe::execute
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
Definition: VPlanRecipes.cpp:1898

llvm::VPWidenCanonicalIVRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1919

llvm::VPWidenCastRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1019

llvm::VPWidenCastRecipe::getResultType
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1406

llvm::VPWidenCastRecipe::execute
void execute(VPTransformState &State) override
Produce widened copies of the cast.
Definition: VPlanRecipes.cpp:998

llvm::VPWidenGEPRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1415

llvm::VPWidenGEPRecipe::execute
void execute(VPTransformState &State) override
Generate the gep nodes.
Definition: VPlanRecipes.cpp:1342

llvm::VPWidenIntOrFpInductionRecipe::getTruncInst
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:1751

llvm::VPWidenIntOrFpInductionRecipe::execute
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
Definition: VPlanRecipes.cpp:1098

llvm::VPWidenIntOrFpInductionRecipe::getStepValue
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:1746

llvm::VPWidenIntOrFpInductionRecipe::getScalarType
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:1765

llvm::VPWidenIntOrFpInductionRecipe::isCanonical
bool isCanonical() const
Returns true if the induction is canonical, i.e.
Definition: VPlanRecipes.cpp:1216

llvm::VPWidenIntOrFpInductionRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1200

llvm::VPWidenIntOrFpInductionRecipe::getInductionDescriptor
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1757

llvm::VPWidenPHIRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:2051

llvm::VPWidenPHIRecipe::execute
void execute(VPTransformState &State) override
Generate the phi/select nodes.
Definition: VPlanRecipes.cpp:2040

llvm::VPWidenPointerInductionRecipe::onlyScalarsGenerated
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
Definition: VPlanRecipes.cpp:1859

llvm::VPWidenPointerInductionRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1865

llvm::VPWidenRecipe::execute
void execute(VPTransformState &State) override
Produce widened copies of all Ingredients.
Definition: VPlanRecipes.cpp:890

llvm::VPWidenRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:988

llvm::VPlan
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:3059

llvm::VPlan::getVectorLoopRegion
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.h:3246

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255

llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377

llvm::Value::printAsOperand
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:5079

llvm::Value::hasName
bool hasName() const
Definition: Value.h:261

llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309

llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403

llvm::VectorType::getElementCount
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:641

llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676

llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:436

llvm::cl::opt
Definition: CommandLine.h:1430

llvm::details::FixedOrScalableQuantity::isScalable
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171

llvm::details::FixedOrScalableQuantity::getKnownMinValue
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:109

llvm::iplist_impl< simple_ilist< T, Options... >, ilist_traits< T > >::iterator
base_list_type::iterator iterator
Definition: ilist.h:121

llvm::iplist_impl::erase
iterator erase(iterator where)
Definition: ilist.h:204

llvm::iplist_impl::remove
pointer remove(iterator &IT)
Definition: ilist.h:188

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52

unsigned

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:44

llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1471

llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:131

llvm::SIEncodingFamily::SI
@ SI
Definition: SIDefines.h:36

llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp

llvm::dwarf::Index
Index
Definition: Dwarf.h:872

llvm::logicalview::LVAttributeKind::Zero
@ Zero

llvm::vputils::isUniformAfterVectorization
bool isUniformAfterVectorization(VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlan.h:3606

llvm::vputils::onlyFirstPartUsed
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlan.cpp:1454

llvm::vputils::onlyFirstLaneUsed
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlan.cpp:1449

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::ReplaceInstWithInst
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
Definition: BasicBlockUtils.cpp:722

llvm::enumerate
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406

llvm::isVectorIntrinsicWithOverloadTypeAtArg
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
Definition: VectorUtils.cpp:125

llvm::getRuntimeVF
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
Definition: LoopVectorize.cpp:977

llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:76

llvm::interleaveComma
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2165

llvm::propagateMetadata
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Definition: VectorUtils.cpp:787

llvm::createMinMaxOp
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:1037

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729

llvm::EnableVPlanNativePath
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
Definition: VPlan.cpp:53

llvm::isDbgInfoIntrinsic
static bool isDbgInfoIntrinsic(Intrinsic::ID ID)
Check if ID corresponds to a debug info intrinsic.
Definition: IntrinsicInst.h:165

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163

llvm::createOrderedReduction
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
Definition: LoopUtils.cpp:1211

llvm::RecurKind
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:34

llvm::RecurKind::Mul
@ Mul
Product of integers.

llvm::RecurKind::Add
@ Add
Sum of integers.

llvm::createStepForVF
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
Definition: LoopVectorize.cpp:970

llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879

llvm::isVectorIntrinsicWithScalarOpAtArg
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:106

llvm::createTargetReduction
Value * createTargetReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic target reduction using a recurrence descriptor Desc The target is queried to determi...
Definition: LoopUtils.cpp:1195

raw_ostream.h

llvm::VPFirstOrderRecurrencePHIRecipe::execute
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition: VPlanRecipes.cpp:1928

llvm::VPFirstOrderRecurrencePHIRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1957

llvm::VPIteration
VPIteration represents a single point in the iteration space of the output (vectorized and/or unrolle...
Definition: VPlan.h:219

llvm::VPRecipeWithIRFlags::DisjointFlagsTy::IsDisjoint
char IsDisjoint
Definition: VPlan.h:918

llvm::VPRecipeWithIRFlags::GEPFlagsTy::IsInBounds
char IsInBounds
Definition: VPlan.h:924

llvm::VPRecipeWithIRFlags::WrapFlagsTy::HasNUW
char HasNUW
Definition: VPlan.h:911

llvm::VPRecipeWithIRFlags::WrapFlagsTy::HasNSW
char HasNSW
Definition: VPlan.h:912

llvm::VPTransformState::CFGState::PrevBB
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:365

llvm::VPTransformState::CFGState::VPBB2IRBB
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:373

llvm::VPTransformState::CFGState::getPreheaderBBFor
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:348

llvm::VPTransformState
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:236

llvm::VPTransformState::get
Value * get(VPValue *Def, unsigned Part, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def and a given Part if IsScalar is false,...
Definition: VPlan.cpp:247

llvm::VPTransformState::ExpandedSCEVs
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:409

llvm::VPTransformState::TypeAnalysis
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:412

llvm::VPTransformState::addMetadata
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:361

llvm::VPTransformState::reset
void reset(VPValue *Def, Value *V, unsigned Part)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:303

llvm::VPTransformState::CFG
struct llvm::VPTransformState::CFGState CFG

llvm::VPTransformState::set
void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar=false)
Set the generated vector Value for a given VPValue and a given Part, if IsScalar is false.
Definition: VPlan.h:288

llvm::VPTransformState::Instance
std::optional< VPIteration > Instance
Hold the indices to generate specific scalar instructions.
Definition: VPlan.h:248

llvm::VPTransformState::Builder
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:389

llvm::VPTransformState::UF
unsigned UF
Definition: VPlan.h:243

llvm::VPTransformState::hasScalarValue
bool hasScalarValue(VPValue *Def, VPIteration Instance)
Definition: VPlan.h:276

llvm::VPTransformState::hasVectorValue
bool hasVectorValue(VPValue *Def, unsigned Part)
Definition: VPlan.h:270

llvm::VPTransformState::VF
ElementCount VF
The chosen Vectorization and Unroll Factors of the loop being vectorized.
Definition: VPlan.h:242

llvm::VPTransformState::CurrentVectorLoop
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:398

llvm::VPTransformState::setDebugLocFrom
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:372

llvm::VPWidenLoadEVLRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1798

llvm::VPWidenLoadRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1790

llvm::VPWidenSelectRecipe::isInvariantCond
bool isInvariantCond() const
Definition: VPlan.h:1530

llvm::VPWidenSelectRecipe::getCond
VPValue * getCond() const
Definition: VPlan.h:1526

llvm::VPWidenSelectRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:806

llvm::VPWidenSelectRecipe::execute
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
Definition: VPlanRecipes.cpp:820

llvm::VPWidenStoreEVLRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1812

llvm::VPWidenStoreRecipe::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlanRecipes.cpp:1806

llvm::VPlanIngredient
Definition: VPlan.h:3349