47#define LV_NAME "loop-vectorize" 
   48#define DEBUG_TYPE LV_NAME 
   56  case VPInterleaveEVLSC:
 
   59  case VPWidenStoreEVLSC:
 
   67                ->getCalledScalarFunction()
 
   69  case VPWidenIntrinsicSC:
 
   71  case VPCanonicalIVPHISC:
 
   72  case VPBranchOnMaskSC:
 
   74  case VPFirstOrderRecurrencePHISC:
 
   75  case VPReductionPHISC:
 
   76  case VPScalarIVStepsSC:
 
   80  case VPReductionEVLSC:
 
   82  case VPVectorPointerSC:
 
   83  case VPWidenCanonicalIVSC:
 
   86  case VPWidenIntOrFpInductionSC:
 
   87  case VPWidenLoadEVLSC:
 
   90  case VPWidenPointerInductionSC:
 
   92  case VPWidenSelectSC: {
 
   96    assert((!
I || !
I->mayWriteToMemory()) &&
 
   97           "underlying instruction may write to memory");
 
 
  109  case VPInstructionSC:
 
  111  case VPWidenLoadEVLSC:
 
  116        ->mayReadFromMemory();
 
  119                ->getCalledScalarFunction()
 
  120                ->onlyWritesMemory();
 
  121  case VPWidenIntrinsicSC:
 
  123  case VPBranchOnMaskSC:
 
  125  case VPFirstOrderRecurrencePHISC:
 
  126  case VPPredInstPHISC:
 
  127  case VPScalarIVStepsSC:
 
  128  case VPWidenStoreEVLSC:
 
  132  case VPReductionEVLSC:
 
  134  case VPVectorPointerSC:
 
  135  case VPWidenCanonicalIVSC:
 
  138  case VPWidenIntOrFpInductionSC:
 
  140  case VPWidenPointerInductionSC:
 
  142  case VPWidenSelectSC: {
 
  146    assert((!
I || !
I->mayReadFromMemory()) &&
 
  147           "underlying instruction may read from memory");
 
 
  161  case VPFirstOrderRecurrencePHISC:
 
  162  case VPPredInstPHISC:
 
  163  case VPVectorEndPointerSC:
 
  165  case VPInstructionSC: {
 
  171  case VPWidenCallSC: {
 
  175  case VPWidenIntrinsicSC:
 
  178  case VPReductionEVLSC:
 
  179  case VPPartialReductionSC:
 
  181  case VPScalarIVStepsSC:
 
  182  case VPVectorPointerSC:
 
  183  case VPWidenCanonicalIVSC:
 
  186  case VPWidenIntOrFpInductionSC:
 
  188  case VPWidenPointerInductionSC:
 
  190  case VPWidenSelectSC: {
 
  194    assert((!
I || !
I->mayHaveSideEffects()) &&
 
  195           "underlying instruction has side-effects");
 
  198  case VPInterleaveEVLSC:
 
  201  case VPWidenLoadEVLSC:
 
  203  case VPWidenStoreEVLSC:
 
  208        "mayHaveSideffects result for ingredient differs from this " 
  211  case VPReplicateSC: {
 
  213    return R->getUnderlyingInstr()->mayHaveSideEffects();
 
 
  221  assert(!Parent && 
"Recipe already in some VPBasicBlock");
 
  223         "Insertion position not in any VPBasicBlock");
 
 
  229  assert(!Parent && 
"Recipe already in some VPBasicBlock");
 
 
  235  assert(!Parent && 
"Recipe already in some VPBasicBlock");
 
  237         "Insertion position not in any VPBasicBlock");
 
 
  272    UI = IG->getInsertPos();
 
  274    UI = &WidenMem->getIngredient();
 
  277  if (UI && Ctx.skipCostComputation(UI, VF.
isVector())) {
 
  287    dbgs() << 
"Cost of " << RecipeCost << 
" for VF " << VF << 
": ";
 
 
  311  std::optional<unsigned> Opcode;
 
  321    auto *PhiType = Ctx.Types.inferScalarType(
getChainOp());
 
  322    auto *InputType = Ctx.Types.inferScalarType(
getVecOp());
 
  323    return Ctx.TTI.getPartialReductionCost(
getOpcode(), InputType, InputType,
 
  329  Type *InputTypeA = 
nullptr, *InputTypeB = 
nullptr;
 
  339    if (WidenCastR->getOpcode() == Instruction::CastOps::ZExt)
 
  341    if (WidenCastR->getOpcode() == Instruction::CastOps::SExt)
 
  352    Opcode = 
Widen->getOpcode();
 
  355    InputTypeA = Ctx.Types.inferScalarType(ExtAR ? ExtAR->
getOperand(0)
 
  356                                                 : 
Widen->getOperand(0));
 
  357    InputTypeB = Ctx.Types.inferScalarType(ExtBR ? ExtBR->
getOperand(0)
 
  358                                                 : 
Widen->getOperand(1));
 
  359    ExtAType = GetExtendKind(ExtAR);
 
  360    ExtBType = GetExtendKind(ExtBR);
 
  366      InputTypeB = InputTypeA;
 
  372    InputTypeA = Ctx.Types.inferScalarType(OpR->
getOperand(0));
 
  373    ExtAType = GetExtendKind(OpR);
 
  377      InputTypeA = Ctx.Types.inferScalarType(RedPhiOp1R->getOperand(0));
 
  378      ExtAType = GetExtendKind(RedPhiOp1R);
 
  384    return Reduction->computeCost(VF, Ctx);
 
  386  auto *PhiType = Ctx.Types.inferScalarType(
getOperand(1));
 
  387  return Ctx.TTI.getPartialReductionCost(
getOpcode(), InputTypeA, InputTypeB,
 
  388                                         PhiType, VF, ExtAType, ExtBType,
 
  389                                         Opcode, Ctx.CostKind);
 
 
  393  auto &Builder = State.Builder;
 
  396         "Unhandled partial reduction opcode");
 
  400  assert(PhiVal && BinOpVal && 
"Phi and Mul must be set");
 
  405      Builder.CreateIntrinsic(RetTy, Intrinsic::vector_partial_reduce_add,
 
  406                              {PhiVal, BinOpVal}, 
nullptr, 
"partial.reduce");
 
 
  411#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
  414  O << Indent << 
"PARTIAL-REDUCE ";
 
 
  422  assert(OpType == Other.OpType && 
"OpType must match");
 
  424  case OperationType::OverflowingBinOp:
 
  425    WrapFlags.HasNUW &= Other.WrapFlags.HasNUW;
 
  426    WrapFlags.HasNSW &= Other.WrapFlags.HasNSW;
 
  428  case OperationType::Trunc:
 
  432  case OperationType::DisjointOp:
 
  435  case OperationType::PossiblyExactOp:
 
  436    ExactFlags.IsExact &= Other.ExactFlags.IsExact;
 
  438  case OperationType::GEPOp:
 
  441  case OperationType::FPMathOp:
 
  442    FMFs.NoNaNs &= Other.FMFs.NoNaNs;
 
  443    FMFs.NoInfs &= Other.FMFs.NoInfs;
 
  445  case OperationType::NonNegOp:
 
  448  case OperationType::Cmp:
 
  451  case OperationType::Other:
 
 
  458  assert(OpType == OperationType::FPMathOp &&
 
  459         "recipe doesn't have fast math flags");
 
 
  471#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
  475template <
unsigned PartOpIdx>
 
  478  if (U.getNumOperands() == PartOpIdx + 1)
 
  479    return U.getOperand(PartOpIdx);
 
 
  483template <
unsigned PartOpIdx>
 
  502         "Set flags not supported for the provided opcode");
 
  503  assert((getNumOperandsForOpcode(Opcode) == -1u ||
 
  505         "number of operands does not match opcode");
 
 
  509unsigned VPInstruction::getNumOperandsForOpcode(
unsigned Opcode) {
 
  520  case Instruction::Alloca:
 
  521  case Instruction::ExtractValue:
 
  522  case Instruction::Freeze:
 
  523  case Instruction::Load:
 
  538  case Instruction::ICmp:
 
  539  case Instruction::FCmp:
 
  540  case Instruction::Store:
 
  549  case Instruction::Select:
 
  556  case Instruction::Call:
 
  557  case Instruction::GetElementPtr:
 
  558  case Instruction::PHI:
 
  559  case Instruction::Switch:
 
  571bool VPInstruction::canGenerateScalarForFirstLane()
 const {
 
  577  case Instruction::Freeze:
 
  578  case Instruction::ICmp:
 
  579  case Instruction::PHI:
 
  580  case Instruction::Select:
 
  606  BasicBlock *SecondIRSucc = State.CFG.VPBB2IRBB.lookup(SecondVPSucc);
 
  608  BranchInst *CondBr = State.Builder.CreateCondBr(
Cond, IRBB, SecondIRSucc);
 
 
  616  IRBuilderBase &Builder = State.
Builder;
 
  635  case Instruction::ExtractElement: {
 
  638      unsigned IdxToExtract =
 
  646  case Instruction::Freeze: {
 
  650  case Instruction::FCmp:
 
  651  case Instruction::ICmp: {
 
  657  case Instruction::PHI: {
 
  660  case Instruction::Select: {
 
  685                                   {VIVElem0, ScalarTC}, 
nullptr, Name);
 
  701    if (!V1->getType()->isVectorTy())
 
  721           "Requested vector length should be an integer.");
 
  728        {AVL, VFArg, State.Builder.getTrue()});
 
  734    assert(Part != 0 && 
"Must have a positive part");
 
  765      for (
unsigned FieldIndex = 0; FieldIndex != StructTy->getNumElements();
 
  789    IRBuilderBase::FastMathFlagGuard FMFG(Builder);
 
  819    RecurKind RK = PhiR->getRecurrenceKind();
 
  821           "Unexpected reduction kind");
 
  822    assert(!PhiR->isInLoop() &&
 
  823           "In-loop FindLastIV reduction is not supported yet");
 
  835    for (
unsigned Part = 1; Part < 
UF; ++Part)
 
  836      ReducedPartRdx = 
createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx,
 
  850    RecurKind RK = PhiR->getRecurrenceKind();
 
  852           "should be handled by ComputeFindIVResult");
 
  858    for (
unsigned Part = 0; Part < 
UF; ++Part)
 
  859      RdxParts[Part] = State.
get(
getOperand(1 + Part), PhiR->isInLoop());
 
  861    IRBuilderBase::FastMathFlagGuard FMFG(Builder);
 
  866    Value *ReducedPartRdx = RdxParts[0];
 
  867    if (PhiR->isOrdered()) {
 
  868      ReducedPartRdx = RdxParts[
UF - 1];
 
  871      for (
unsigned Part = 1; Part < 
UF; ++Part) {
 
  872        Value *RdxPart = RdxParts[Part];
 
  874          ReducedPartRdx = 
createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
 
  880            Opcode = Instruction::Add;
 
  885              Builder.
CreateBinOp(Opcode, RdxPart, ReducedPartRdx, 
"bin.rdx");
 
  892    if (State.
VF.
isVector() && !PhiR->isInLoop()) {
 
  899    return ReducedPartRdx;
 
  909             "invalid offset to extract from");
 
  913      assert(
Offset <= 1 && 
"invalid offset to extract from");
 
  927           "can only generate first lane for PtrAdd");
 
  941      Res = Builder.CreateOr(Res, Builder.CreateFreeze(State.get(
Op)));
 
  942    return State.VF.isScalar() ? Res : Builder.CreateOrReduce(Res);
 
  947    Value *Res = 
nullptr;
 
  952          Builder.CreateMul(RuntimeVF, ConstantInt::get(IdxTy, Idx - 1));
 
  953      Value *VectorIdx = Idx == 1
 
  955                             : Builder.CreateSub(LaneToExtract, VectorStart);
 
  956      Value *Ext = State.VF.isScalar()
 
  958                       : Builder.CreateExtractElement(
 
  961        Value *Cmp = Builder.CreateICmpUGE(LaneToExtract, VectorStart);
 
  962        Res = Builder.CreateSelect(Cmp, Ext, Res);
 
  981    Value *Res = 
nullptr;
 
  982    for (
int Idx = LastOpIdx; Idx >= 0; --Idx) {
 
  983      Value *TrailingZeros =
 
 1013  Type *ScalarTy = Ctx.Types.inferScalarType(
this);
 
 1016  case Instruction::FNeg:
 
 1017    return Ctx.TTI.getArithmeticInstrCost(Opcode, ResultTy, Ctx.CostKind);
 
 1018  case Instruction::UDiv:
 
 1019  case Instruction::SDiv:
 
 1020  case Instruction::SRem:
 
 1021  case Instruction::URem:
 
 1022  case Instruction::Add:
 
 1023  case Instruction::FAdd:
 
 1024  case Instruction::Sub:
 
 1025  case Instruction::FSub:
 
 1026  case Instruction::Mul:
 
 1027  case Instruction::FMul:
 
 1028  case Instruction::FDiv:
 
 1029  case Instruction::FRem:
 
 1030  case Instruction::Shl:
 
 1031  case Instruction::LShr:
 
 1032  case Instruction::AShr:
 
 1033  case Instruction::And:
 
 1034  case Instruction::Or:
 
 1035  case Instruction::Xor: {
 
 1043      RHSInfo = Ctx.getOperandInfo(RHS);
 
 1054    return Ctx.TTI.getArithmeticInstrCost(
 
 1055        Opcode, ResultTy, Ctx.CostKind,
 
 1056        {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
 
 1057        RHSInfo, Operands, CtxI, &Ctx.TLI);
 
 1059  case Instruction::Freeze:
 
 1061    return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, ResultTy,
 
 1063  case Instruction::ExtractValue:
 
 1064    return Ctx.TTI.getInsertExtractValueCost(Instruction::ExtractValue,
 
 1066  case Instruction::ICmp:
 
 1067  case Instruction::FCmp: {
 
 1071    return Ctx.TTI.getCmpSelInstrCost(
 
 1073        Ctx.CostKind, {TTI::OK_AnyValue, TTI::OP_None},
 
 1074        {TTI::OK_AnyValue, TTI::OP_None}, CtxI);
 
 
 1090           "Should only generate a vector value or single scalar, not scalars " 
 1098  case Instruction::Select: {
 
 1102    auto *CondTy = Ctx.Types.inferScalarType(
getOperand(0));
 
 1103    auto *VecTy = Ctx.Types.inferScalarType(
getOperand(1));
 
 1108    return Ctx.TTI.getCmpSelInstrCost(Instruction::Select, VecTy, CondTy, Pred,
 
 1111  case Instruction::ExtractElement:
 
 1121    return Ctx.TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy,
 
 1125    auto *VecTy = 
toVectorTy(Ctx.Types.inferScalarType(
this), VF);
 
 1126    return Ctx.TTI.getArithmeticReductionCost(
 
 1132      return Ctx.TTI.getCmpSelInstrCost(Instruction::ICmp, ScalarTy,
 
 1139                                  {PredTy, Type::getInt1Ty(Ctx.LLVMCtx)});
 
 1140    return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
 
 1146    Type *VectorTy = 
toVectorTy(Ctx.Types.inferScalarType(
this), VF);
 
 1155    unsigned Multiplier =
 
 1160    return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
 
 1167                                  I32Ty, {Arg0Ty, I32Ty, I1Ty});
 
 1168    return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
 
 1173    return Ctx.TTI.getIndexedVectorInstrCostFromEnd(Instruction::ExtractElement,
 
 1174                                                    VecTy, Ctx.CostKind, 0);
 
 1184           "unexpected VPInstruction witht underlying value");
 
 
 1193         getOpcode() == Instruction::ExtractElement ||
 
 
 1204  case Instruction::PHI:
 
 
 1215  assert(!State.Lane && 
"VPInstruction executing an Lane");
 
 1218         "Set flags not supported for the provided opcode");
 
 1221  Value *GeneratedValue = generate(State);
 
 1224  assert(GeneratedValue && 
"generate must produce a value");
 
 1225  bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
 
 1230           !GeneratesPerFirstLaneOnly) ||
 
 1231          State.VF.isScalar()) &&
 
 1232         "scalar value but not only first lane defined");
 
 1233  State.set(
this, GeneratedValue,
 
 1234             GeneratesPerFirstLaneOnly);
 
 
 1241  case Instruction::ExtractElement:
 
 1242  case Instruction::Freeze:
 
 1243  case Instruction::FCmp:
 
 1244  case Instruction::ICmp:
 
 1245  case Instruction::Select:
 
 1246  case Instruction::PHI:
 
 
 1285  case Instruction::ExtractElement:
 
 1287  case Instruction::PHI:
 
 1289  case Instruction::FCmp:
 
 1290  case Instruction::ICmp:
 
 1291  case Instruction::Select:
 
 1292  case Instruction::Or:
 
 1293  case Instruction::Freeze:
 
 
 1334  case Instruction::FCmp:
 
 1335  case Instruction::ICmp:
 
 1336  case Instruction::Select:
 
 
 1346#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 1354  O << Indent << 
"EMIT" << (
isSingleScalar() ? 
"-SCALAR" : 
"") << 
" ";
 
 1366    O << 
"combined load";
 
 1369    O << 
"combined store";
 
 1372    O << 
"active lane mask";
 
 1375    O << 
"EXPLICIT-VECTOR-LENGTH";
 
 1378    O << 
"first-order splice";
 
 1381    O << 
"branch-on-cond";
 
 1384    O << 
"TC > VF ? TC - VF : 0";
 
 1390    O << 
"branch-on-count";
 
 1396    O << 
"buildstructvector";
 
 1402    O << 
"extract-lane";
 
 1405    O << 
"extract-last-element";
 
 1408    O << 
"extract-last-lane-per-part";
 
 1411    O << 
"extract-penultimate-element";
 
 1414    O << 
"compute-anyof-result";
 
 1417    O << 
"compute-find-iv-result";
 
 1420    O << 
"compute-reduction-result";
 
 1435    O << 
"first-active-lane";
 
 1438    O << 
"reduction-start-vector";
 
 1441    O << 
"resume-for-epilogue";
 
 
 1466    State.set(
this, Cast, 
VPLane(0));
 
 1477    Value *
VScale = State.Builder.CreateVScale(ResultTy);
 
 1478    State.set(
this, 
VScale, 
true);
 
 
 1487#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 1490  O << Indent << 
"EMIT" << (
isSingleScalar() ? 
"-SCALAR" : 
"") << 
" ";
 
 1496    O << 
"wide-iv-step ";
 
 1500    O << 
"step-vector " << *ResultTy;
 
 1503    O << 
"vscale " << *ResultTy;
 
 1509    O << 
" to " << *ResultTy;
 
 
 1516  PHINode *NewPhi = State.Builder.CreatePHI(
 
 1517      State.TypeAnalysis.inferScalarType(
this), 2, 
getName());
 
 1524  for (
unsigned Idx = 0; Idx != NumIncoming; ++Idx) {
 
 1529  State.set(
this, NewPhi, 
VPLane(0));
 
 
 1532#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 1535  O << Indent << 
"EMIT" << (
isSingleScalar() ? 
"-SCALAR" : 
"") << 
" ";
 
 
 1550         "PHINodes must be handled by VPIRPhi");
 
 1553  State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));
 
 
 1565         "can only update exiting operands to phi nodes");
 
 
 1575#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 1578  O << Indent << 
"IR " << I;
 
 
 1590    auto *PredVPBB = Pred->getExitingBasicBlock();
 
 1591    BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
 
 1598    if (Phi->getBasicBlockIndex(PredBB) == -1)
 
 1599      Phi->addIncoming(V, PredBB);
 
 1601      Phi->setIncomingValueForBlock(PredBB, V);
 
 1606  State.Builder.SetInsertPoint(Phi->getParent(), std::next(Phi->getIterator()));
 
 
 1611  assert(R->getNumOperands() == R->getParent()->getNumPredecessors() &&
 
 1612         "Number of phi operands must match number of predecessors");
 
 1613  unsigned Position = R->getParent()->getIndexForPredecessor(IncomingBlock);
 
 1614  R->removeOperand(Position);
 
 
 1617#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 1631#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 1637    O << 
" (extra operand" << (
getNumOperands() > 1 ? 
"s" : 
"") << 
": ";
 
 1642                      std::get<1>(
Op)->printAsOperand(O);
 
 
 1655    Metadata.emplace_back(LLVMContext::MD_alias_scope, AliasScopeMD);
 
 1657    Metadata.emplace_back(LLVMContext::MD_noalias, NoAliasMD);
 
 
 1661  for (
const auto &[Kind, 
Node] : Metadata)
 
 1662    I.setMetadata(Kind, 
Node);
 
 
 1667  for (
const auto &[KindA, MDA] : Metadata) {
 
 1668    for (
const auto &[KindB, MDB] : 
Other.Metadata) {
 
 1669      if (KindA == KindB && MDA == MDB) {
 
 1675  Metadata = std::move(MetadataIntersection);
 
 
 1679  assert(State.VF.isVector() && 
"not widening");
 
 1680  assert(Variant != 
nullptr && 
"Can't create vector function.");
 
 1691      Arg = State.get(
I.value(), 
VPLane(0));
 
 1694    Args.push_back(Arg);
 
 1700    CI->getOperandBundlesAsDefs(OpBundles);
 
 1702  CallInst *V = State.Builder.CreateCall(Variant, Args, OpBundles);
 
 1705  V->setCallingConv(Variant->getCallingConv());
 
 1707  if (!V->getType()->isVoidTy())
 
 
 1713  return Ctx.TTI.getCallInstrCost(
nullptr, Variant->getReturnType(),
 
 1714                                  Variant->getFunctionType()->params(),
 
 
 1718#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 1721  O << Indent << 
"WIDEN-CALL ";
 
 1733  O << 
" @" << CalledFn->
getName() << 
"(";
 
 1739  O << 
" (using library function";
 
 1740  if (Variant->hasName())
 
 1741    O << 
": " << Variant->getName();
 
 
 1747  assert(State.VF.isVector() && 
"not widening");
 
 1760      Arg = State.get(
I.value(), 
VPLane(0));
 
 1766    Args.push_back(Arg);
 
 1770  Module *M = State.Builder.GetInsertBlock()->getModule();
 
 1774         "Can't retrieve vector intrinsic or vector-predication intrinsics.");
 
 1779    CI->getOperandBundlesAsDefs(OpBundles);
 
 1781  CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
 
 1786  if (!V->getType()->isVoidTy())
 
 
 1802  for (
const auto &[Idx, 
Op] : 
enumerate(Operands)) {
 
 1803    auto *V = 
Op->getUnderlyingValue();
 
 1806        Arguments.push_back(UI->getArgOperand(Idx));
 
 1815  Type *ScalarRetTy = Ctx.Types.inferScalarType(&R);
 
 1821                           : Ctx.Types.inferScalarType(
Op));
 
 1826      R.hasFastMathFlags() ? R.getFastMathFlags() : 
FastMathFlags();
 
 1831  return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, Ctx.CostKind);
 
 
 1853#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 1856  O << Indent << 
"WIDEN-INTRINSIC ";
 
 1857  if (ResultTy->isVoidTy()) {
 
 
 1885  Value *Mask = 
nullptr;
 
 1887    Mask = State.get(VPMask);
 
 1890        Builder.CreateVectorSplat(VTy->
getElementCount(), Builder.getInt1(1));
 
 1894  if (Opcode == Instruction::Sub)
 
 1895    IncAmt = Builder.CreateNeg(IncAmt);
 
 1897    assert(Opcode == Instruction::Add && 
"only add or sub supported for now");
 
 1899  State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add,
 
 
 1914  Type *IncTy = Ctx.Types.inferScalarType(IncAmt);
 
 1920      Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VTy, Ctx.CostKind);
 
 1933                              {PtrTy, IncTy, MaskTy});
 
 1936  return Ctx.TTI.getIntrinsicInstrCost(ICA, Ctx.CostKind) + MulCost +
 
 1937         Ctx.TTI.getArithmeticInstrCost(Opcode, VTy, Ctx.CostKind);
 
 
 1940#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 1943  O << Indent << 
"WIDEN-HISTOGRAM buckets: ";
 
 1946  if (Opcode == Instruction::Sub)
 
 1949    assert(Opcode == Instruction::Add);
 
 
 1962  O << Indent << 
"WIDEN-SELECT ";
 
 
 1984  Value *Sel = State.Builder.CreateSelect(
Cond, Op0, Op1);
 
 1985  State.set(
this, Sel);
 
 
 1997  Type *ScalarTy = Ctx.Types.inferScalarType(
this);
 
 1998  Type *VectorTy = 
toVectorTy(Ctx.Types.inferScalarType(
this), VF);
 
 2006    const auto [Op1VK, Op1VP] = Ctx.getOperandInfo(Op0);
 
 2007    const auto [Op2VK, Op2VP] = Ctx.getOperandInfo(Op1);
 
 2011               [](
VPValue *
Op) { 
return Op->getUnderlyingValue(); }))
 
 2012      Operands.
append(
SI->op_begin(), 
SI->op_end());
 
 2014    return Ctx.TTI.getArithmeticInstrCost(
 
 2015        IsLogicalOr ? Instruction::Or : Instruction::And, VectorTy,
 
 2016        Ctx.CostKind, {Op1VK, Op1VP}, {Op2VK, Op2VP}, Operands, 
SI);
 
 2025    Pred = Cmp->getPredicate();
 
 2026  return Ctx.TTI.getCmpSelInstrCost(
 
 2027      Instruction::Select, VectorTy, CondTy, Pred, Ctx.CostKind,
 
 2028      {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, 
SI);
 
 
 2031VPIRFlags::FastMathFlagsTy::FastMathFlagsTy(
const FastMathFlags &FMF) {
 
 2044  case OperationType::OverflowingBinOp:
 
 2045    return Opcode == Instruction::Add || Opcode == Instruction::Sub ||
 
 2046           Opcode == Instruction::Mul ||
 
 2047           Opcode == VPInstruction::VPInstruction::CanonicalIVIncrementForPart;
 
 2048  case OperationType::Trunc:
 
 2049    return Opcode == Instruction::Trunc;
 
 2050  case OperationType::DisjointOp:
 
 2051    return Opcode == Instruction::Or;
 
 2052  case OperationType::PossiblyExactOp:
 
 2053    return Opcode == Instruction::AShr;
 
 2054  case OperationType::GEPOp:
 
 2055    return Opcode == Instruction::GetElementPtr ||
 
 2058  case OperationType::FPMathOp:
 
 2059    return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
 
 2060           Opcode == Instruction::FSub || Opcode == Instruction::FNeg ||
 
 2061           Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
 
 2062           Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc ||
 
 2063           Opcode == Instruction::FCmp || Opcode == Instruction::Select ||
 
 2067  case OperationType::NonNegOp:
 
 2068    return Opcode == Instruction::ZExt || Opcode == Instruction::UIToFP;
 
 2069  case OperationType::Cmp:
 
 2070    return Opcode == Instruction::FCmp || Opcode == Instruction::ICmp;
 
 2071  case OperationType::Other:
 
 
 2078#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 2081  case OperationType::Cmp:
 
 2084  case OperationType::DisjointOp:
 
 2088  case OperationType::PossiblyExactOp:
 
 2092  case OperationType::OverflowingBinOp:
 
 2098  case OperationType::Trunc:
 
 2104  case OperationType::FPMathOp:
 
 2107  case OperationType::GEPOp:
 
 2110    else if (
GEPFlags.hasNoUnsignedSignedWrap())
 
 2115  case OperationType::NonNegOp:
 
 2119  case OperationType::Other:
 
 
 2127  auto &Builder = State.Builder;
 
 2129  case Instruction::Call:
 
 2130  case Instruction::Br:
 
 2131  case Instruction::PHI:
 
 2132  case Instruction::GetElementPtr:
 
 2133  case Instruction::Select:
 
 2135  case Instruction::UDiv:
 
 2136  case Instruction::SDiv:
 
 2137  case Instruction::SRem:
 
 2138  case Instruction::URem:
 
 2139  case Instruction::Add:
 
 2140  case Instruction::FAdd:
 
 2141  case Instruction::Sub:
 
 2142  case Instruction::FSub:
 
 2143  case Instruction::FNeg:
 
 2144  case Instruction::Mul:
 
 2145  case Instruction::FMul:
 
 2146  case Instruction::FDiv:
 
 2147  case Instruction::FRem:
 
 2148  case Instruction::Shl:
 
 2149  case Instruction::LShr:
 
 2150  case Instruction::AShr:
 
 2151  case Instruction::And:
 
 2152  case Instruction::Or:
 
 2153  case Instruction::Xor: {
 
 2157      Ops.push_back(State.get(VPOp));
 
 2159    Value *V = Builder.CreateNAryOp(Opcode, 
Ops);
 
 2170  case Instruction::ExtractValue: {
 
 2174    Value *Extract = Builder.CreateExtractValue(
Op, CI->getZExtValue());
 
 2175    State.set(
this, Extract);
 
 2178  case Instruction::Freeze: {
 
 2180    Value *Freeze = Builder.CreateFreeze(
Op);
 
 2181    State.set(
this, Freeze);
 
 2184  case Instruction::ICmp:
 
 2185  case Instruction::FCmp: {
 
 2187    bool FCmp = Opcode == Instruction::FCmp;
 
 2193      C = Builder.CreateFCmpFMF(
 
 2215             State.get(
this)->getType() &&
 
 2216         "inferred type and type from generated instructions do not match");
 
 
 2223  case Instruction::UDiv:
 
 2224  case Instruction::SDiv:
 
 2225  case Instruction::SRem:
 
 2226  case Instruction::URem:
 
 2231  case Instruction::FNeg:
 
 2232  case Instruction::Add:
 
 2233  case Instruction::FAdd:
 
 2234  case Instruction::Sub:
 
 2235  case Instruction::FSub:
 
 2236  case Instruction::Mul:
 
 2237  case Instruction::FMul:
 
 2238  case Instruction::FDiv:
 
 2239  case Instruction::FRem:
 
 2240  case Instruction::Shl:
 
 2241  case Instruction::LShr:
 
 2242  case Instruction::AShr:
 
 2243  case Instruction::And:
 
 2244  case Instruction::Or:
 
 2245  case Instruction::Xor:
 
 2246  case Instruction::Freeze:
 
 2247  case Instruction::ExtractValue:
 
 2248  case Instruction::ICmp:
 
 2249  case Instruction::FCmp:
 
 
 2256#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 2259  O << Indent << 
"WIDEN ";
 
 
 2268  auto &Builder = State.Builder;
 
 2270  assert(State.VF.isVector() && 
"Not vectorizing?");
 
 2275  State.set(
this, Cast);
 
 
 2299    if (WidenMemoryRecipe == 
nullptr)
 
 2301    if (!WidenMemoryRecipe->isConsecutive())
 
 2303    if (WidenMemoryRecipe->isReverse())
 
 2305    if (WidenMemoryRecipe->isMasked())
 
 2313  if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
 
 2316      CCH = ComputeCCH(StoreRecipe);
 
 2319  else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
 
 2320           Opcode == Instruction::FPExt) {
 
 2331  return Ctx.TTI.getCastInstrCost(
 
 2332      Opcode, DestTy, SrcTy, CCH, Ctx.CostKind,
 
 
 2336#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 2339  O << Indent << 
"WIDEN-CAST ";
 
 
 2350  return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
 
 
 2357                           : ConstantFP::get(Ty, 
C);
 
 
 2360#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 2365  O << 
" = WIDEN-INDUCTION  ";
 
 2369    O << 
" (truncated to " << *TI->getType() << 
")";
 
 
 2381  return StartC && StartC->isZero() && StepC && StepC->isOne() &&
 
 
 2385#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 2390  O << 
" = DERIVED-IV ";
 
 
 2414  assert(BaseIVTy == Step->
getType() && 
"Types of BaseIV and Step must match!");
 
 2421    AddOp = Instruction::Add;
 
 2422    MulOp = Instruction::Mul;
 
 2424    AddOp = InductionOpcode;
 
 2425    MulOp = Instruction::FMul;
 
 2434  Type *VecIVTy = 
nullptr;
 
 2435  Value *UnitStepVec = 
nullptr, *SplatStep = 
nullptr, *SplatIV = 
nullptr;
 
 2436  if (!FirstLaneOnly && State.VF.isScalable()) {
 
 2440    SplatStep = Builder.CreateVectorSplat(State.VF, Step);
 
 2441    SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
 
 2444  unsigned StartLane = 0;
 
 2445  unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
 
 2447    StartLane = State.Lane->getKnownLane();
 
 2448    EndLane = StartLane + 1;
 
 2452    StartIdx0 = ConstantInt::get(IntStepTy, 0);
 
 2457          Builder.CreateMul(StartIdx0, ConstantInt::get(StartIdx0->
getType(),
 
 2460    StartIdx0 = Builder.CreateSExtOrTrunc(StartIdx0, IntStepTy);
 
 2463  if (!FirstLaneOnly && State.VF.isScalable()) {
 
 2464    auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
 
 2465    auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
 
 2467      InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
 
 2468    auto *
Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
 
 2469    auto *
Add = Builder.CreateBinOp(AddOp, SplatIV, 
Mul);
 
 2470    State.set(
this, 
Add);
 
 2477    StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
 
 2479  for (
unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
 
 2480    Value *StartIdx = Builder.CreateBinOp(
 
 2485           "Expected StartIdx to be folded to a constant when VF is not " 
 2487    auto *
Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
 
 2488    auto *
Add = Builder.CreateBinOp(AddOp, BaseIV, 
Mul);
 
 
 2493#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 2498  O << 
" = SCALAR-STEPS ";
 
 
 2504  assert(State.VF.isVector() && 
"not widening");
 
 2511  if (areAllOperandsInvariant()) {
 
 2531    Value *
Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);
 
 2532    State.set(
this, 
Splat);
 
 2538    auto *
Ptr = State.get(
getOperand(0), isPointerLoopInvariant());
 
 2545      Indices.
push_back(State.get(Operand, isIndexLoopInvariant(
I - 1)));
 
 2552    assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
 
 2553           "NewGEP is not a pointer vector");
 
 2554    State.set(
this, NewGEP);
 
 
 2558#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 2561  O << Indent << 
"WIDEN-GEP ";
 
 2562  O << (isPointerLoopInvariant() ? 
"Inv" : 
"Var");
 
 2564    O << 
"[" << (isIndexLoopInvariant(
I) ? 
"Inv" : 
"Var") << 
"]";
 
 2568  O << 
" = getelementptr";
 
 
 2578  const DataLayout &
DL = Builder.GetInsertBlock()->getDataLayout();
 
 2579  return !IsUnitStride || (IsScalable && (IsReverse || CurrentPart > 0))
 
 2580             ? 
DL.getIndexType(Builder.getPtrTy(0))
 
 2581             : Builder.getInt32Ty();
 
 
 2585  auto &Builder = State.Builder;
 
 2587  bool IsUnitStride = Stride == 1 || Stride == -1;
 
 2589                                IsUnitStride, CurrentPart, Builder);
 
 2593  if (IndexTy != RunTimeVF->
getType())
 
 2594    RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
 
 2596  Value *NumElt = Builder.CreateMul(
 
 2597      ConstantInt::get(IndexTy, Stride * (int64_t)CurrentPart), RunTimeVF);
 
 2599  Value *LastLane = Builder.CreateSub(RunTimeVF, ConstantInt::get(IndexTy, 1));
 
 2601    LastLane = Builder.CreateMul(ConstantInt::get(IndexTy, Stride), LastLane);
 
 2605  ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, 
"",
 
 2608  State.set(
this, ResultPtr,  
true);
 
 
 2611#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 2616  O << 
" = vector-end-pointer";
 
 
 2623  auto &Builder = State.Builder;
 
 2626                                 true, CurrentPart, Builder);
 
 2633  State.set(
this, ResultPtr,  
true);
 
 
 2636#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 2641  O << 
" = vector-pointer ";
 
 
 2652    return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
 
 2654  Type *ResultTy = 
toVectorTy(Ctx.Types.inferScalarType(
this), VF);
 
 2657         Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,
 
 
 2661#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 2664  O << Indent << 
"BLEND ";
 
 
 2686  assert(!State.Lane && 
"Reduction being replicated.");
 
 2690         "In-loop AnyOf reductions aren't currently supported");
 
 2696    Value *NewCond = State.get(
Cond, State.VF.isScalar());
 
 2701    if (State.VF.isVector())
 
 2702      Start = State.Builder.CreateVectorSplat(VecTy->
getElementCount(), Start);
 
 2704    Value *
Select = State.Builder.CreateSelect(NewCond, NewVecOp, Start);
 
 2710    if (State.VF.isVector())
 
 2714      NewRed = State.Builder.CreateBinOp(
 
 2716          PrevInChain, NewVecOp);
 
 2717    PrevInChain = NewRed;
 
 2718    NextInChain = NewRed;
 
 2723      NextInChain = 
createMinMaxOp(State.Builder, Kind, NewRed, PrevInChain);
 
 2725      NextInChain = State.Builder.CreateBinOp(
 
 2727          PrevInChain, NewRed);
 
 2729  State.set(
this, NextInChain,  
true);
 
 
 2733  assert(!State.Lane && 
"Reduction being replicated.");
 
 2735  auto &Builder = State.Builder;
 
 2747    Mask = State.get(CondOp);
 
 2749    Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
 
 2759      NewRed = Builder.CreateBinOp(
 
 2763  State.set(
this, NewRed,  
true);
 
 
 2769  Type *ElementTy = Ctx.Types.inferScalarType(
this);
 
 2773  std::optional<FastMathFlags> OptionalFMF =
 
 2780      "Any-of reduction not implemented in VPlan-based cost model currently.");
 
 2786    return Ctx.TTI.getMinMaxReductionCost(Id, VectorTy, 
FMFs, Ctx.CostKind);
 
 2791  return Ctx.TTI.getArithmeticReductionCost(Opcode, VectorTy, OptionalFMF,
 
 
 2796    ExpressionTypes ExpressionType,
 
 2799      ExpressionRecipes(ExpressionRecipes), 
ExpressionType(ExpressionType) {
 
 2800  assert(!ExpressionRecipes.empty() && 
"Nothing to combine?");
 
 2804      "expression cannot contain recipes with side-effects");
 
 2808  for (
auto *R : ExpressionRecipes)
 
 2809    ExpressionRecipesAsSetOfUsers.
insert(R);
 
 2815    if (R != ExpressionRecipes.back() &&
 
 2816        any_of(
R->users(), [&ExpressionRecipesAsSetOfUsers](
VPUser *U) {
 
 2817          return !ExpressionRecipesAsSetOfUsers.contains(U);
 
 2822      R->replaceUsesWithIf(CopyForExtUsers, [&ExpressionRecipesAsSetOfUsers](
 
 2824        return !ExpressionRecipesAsSetOfUsers.contains(&U);
 
 2829      R->removeFromParent();
 
 2836  for (
auto *R : ExpressionRecipes) {
 
 2837    for (
const auto &[Idx, 
Op] : 
enumerate(
R->operands())) {
 
 2838      auto *
Def = 
Op->getDefiningRecipe();
 
 2839      if (Def && ExpressionRecipesAsSetOfUsers.contains(Def))
 
 2842      LiveInPlaceholders.push_back(
new VPValue());
 
 2848  for (
auto *R : ExpressionRecipes)
 
 2849    for (
auto const &[LiveIn, Tmp] : 
zip(operands(), LiveInPlaceholders))
 
 2850      R->replaceUsesOfWith(LiveIn, Tmp);
 
 2854  for (
auto *R : ExpressionRecipes)
 
 2857    if (!R->getParent())
 
 2858      R->insertBefore(
this);
 
 2861    LiveInPlaceholders[Idx]->replaceAllUsesWith(
Op);
 
 2864  ExpressionRecipes.clear();
 
 
 2869  Type *RedTy = Ctx.Types.inferScalarType(
this);
 
 2873         "VPExpressionRecipe only supports integer types currently.");
 
 2876  switch (ExpressionType) {
 
 2877  case ExpressionTypes::ExtendedReduction: {
 
 2882               ? Ctx.TTI.getPartialReductionCost(
 
 2883                     Opcode, Ctx.Types.inferScalarType(
getOperand(0)), 
nullptr,
 
 2888               : Ctx.TTI.getExtendedReductionCost(
 
 2889                     Opcode, ExtR->getOpcode() == Instruction::ZExt, RedTy,
 
 2890                     SrcVecTy, std::nullopt, Ctx.CostKind);
 
 2892  case ExpressionTypes::MulAccReduction:
 
 2893    return Ctx.TTI.getMulAccReductionCost(
false, Opcode, RedTy, SrcVecTy,
 
 2896  case ExpressionTypes::ExtNegatedMulAccReduction:
 
 2897    assert(Opcode == Instruction::Add && 
"Unexpected opcode");
 
 2898    Opcode = Instruction::Sub;
 
 2900  case ExpressionTypes::ExtMulAccReduction: {
 
 2905      return Ctx.TTI.getPartialReductionCost(
 
 2906          Opcode, Ctx.Types.inferScalarType(
getOperand(0)),
 
 2907          Ctx.Types.inferScalarType(
getOperand(1)), RedTy, VF,
 
 2909              Ext0R->getOpcode()),
 
 2911              Ext1R->getOpcode()),
 
 2912          Mul->getOpcode(), Ctx.CostKind);
 
 2914    return Ctx.TTI.getMulAccReductionCost(
 
 2917        Opcode, RedTy, SrcVecTy, Ctx.CostKind);
 
 
 2925    return R->mayReadFromMemory() || R->mayWriteToMemory();
 
 
 2933      "expression cannot contain recipes with side-effects");
 
 
 2944#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 2948  O << Indent << 
"EXPRESSION ";
 
 2955  switch (ExpressionType) {
 
 2956  case ExpressionTypes::ExtendedReduction: {
 
 2958    O << 
" + " << (IsPartialReduction ? 
"partial." : 
"") << 
"reduce.";
 
 2965      << *Ext0->getResultType();
 
 2966    if (Red->isConditional()) {
 
 2973  case ExpressionTypes::ExtNegatedMulAccReduction: {
 
 2975    O << 
" + " << (IsPartialReduction ? 
"partial." : 
"") << 
"reduce.";
 
 2985      << *Ext0->getResultType() << 
"), (";
 
 2989      << *Ext1->getResultType() << 
")";
 
 2990    if (Red->isConditional()) {
 
 2997  case ExpressionTypes::MulAccReduction:
 
 2998  case ExpressionTypes::ExtMulAccReduction: {
 
 3000    O << 
" + " << (IsPartialReduction ? 
"partial." : 
"") << 
"reduce.";
 
 3005    bool IsExtended = ExpressionType == ExpressionTypes::ExtMulAccReduction;
 
 3007                                               : ExpressionRecipes[0]);
 
 3015        << *Ext0->getResultType() << 
"), (";
 
 3023        << *Ext1->getResultType() << 
")";
 
 3025    if (Red->isConditional()) {
 
 
 3037  O << Indent << 
"REDUCE ";
 
 
 3057  O << Indent << 
"REDUCE ";
 
 
 3085  assert((!Instr->getType()->isAggregateType() ||
 
 3087         "Expected vectorizable or non-aggregate type.");
 
 3090  bool IsVoidRetTy = Instr->getType()->isVoidTy();
 
 3094    Cloned->
setName(Instr->getName() + 
".cloned");
 
 3095    Type *ResultTy = State.TypeAnalysis.inferScalarType(RepRecipe);
 
 3099    if (ResultTy != Cloned->
getType())
 
 3110    State.setDebugLocFrom(
DL);
 
 3115    auto InputLane = Lane;
 
 3119    Cloned->
setOperand(
I.index(), State.get(Operand, InputLane));
 
 3123  State.Builder.Insert(Cloned);
 
 3125  State.set(RepRecipe, Cloned, Lane);
 
 3129    State.AC->registerAssumption(
II);
 
 3135              [](
VPValue *
Op) { return Op->isDefinedOutsideLoopRegions(); })) &&
 
 3136      "Expected a recipe is either within a region or all of its operands " 
 3137      "are defined outside the vectorized region.");
 
 
 3144    assert(IsSingleScalar && 
"VPReplicateRecipes outside replicate regions " 
 3145                             "must have already been unrolled");
 
 3151         "uniform recipe shouldn't be predicated");
 
 3152  assert(!State.VF.isScalable() && 
"Can't scalarize a scalable vector");
 
 3157        State.Lane->isFirstLane()
 
 3160    State.set(
this, State.packScalarIntoVectorizedValue(
this, WideValue,
 
 
 3182  auto *PtrR = 
Ptr->getDefiningRecipe();
 
 3185                      Instruction::GetElementPtr) ||
 
 3193    if (!Opd->isDefinedOutsideLoopRegions() &&
 
 
 3207  while (!WorkList.
empty()) {
 
 3209    if (!Cur || !Seen.
insert(Cur).second)
 
 3217                           return Seen.contains(
 
 3218                               Blend->getIncomingValue(I)->getDefiningRecipe());
 
 3222    for (
VPUser *U : Cur->users()) {
 
 3224        if (InterleaveR->getAddr() == Cur)
 
 3227        if (RepR->getOpcode() == Instruction::Load &&
 
 3228            RepR->getOperand(0) == Cur)
 
 3230        if (RepR->getOpcode() == Instruction::Store &&
 
 3231            RepR->getOperand(1) == Cur)
 
 3235        if (MemR->getAddr() == Cur && MemR->isConsecutive())
 
 
 3256  Ctx.SkipCostComputation.insert(UI);
 
 3262  case Instruction::GetElementPtr:
 
 3268  case Instruction::Call: {
 
 3274    for (
const VPValue *ArgOp : ArgOps)
 
 3275      Tys.
push_back(Ctx.Types.inferScalarType(ArgOp));
 
 3277    if (CalledFn->isIntrinsic())
 
 3280      switch (CalledFn->getIntrinsicID()) {
 
 3281      case Intrinsic::assume:
 
 3282      case Intrinsic::lifetime_end:
 
 3283      case Intrinsic::lifetime_start:
 
 3284      case Intrinsic::sideeffect:
 
 3285      case Intrinsic::pseudoprobe:
 
 3286      case Intrinsic::experimental_noalias_scope_decl: {
 
 3289               "scalarizing intrinsic should be free");
 
 3296    Type *ResultTy = Ctx.Types.inferScalarType(
this);
 
 3298        Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);
 
 3300      if (CalledFn->isIntrinsic())
 
 3301        ScalarCallCost = std::min(
 
 3305      return ScalarCallCost;
 
 3309           Ctx.getScalarizationOverhead(ResultTy, ArgOps, VF);
 
 3311  case Instruction::Add:
 
 3312  case Instruction::Sub:
 
 3313  case Instruction::FAdd:
 
 3314  case Instruction::FSub:
 
 3315  case Instruction::Mul:
 
 3316  case Instruction::FMul:
 
 3317  case Instruction::FDiv:
 
 3318  case Instruction::FRem:
 
 3319  case Instruction::Shl:
 
 3320  case Instruction::LShr:
 
 3321  case Instruction::AShr:
 
 3322  case Instruction::And:
 
 3323  case Instruction::Or:
 
 3324  case Instruction::Xor:
 
 3325  case Instruction::ICmp:
 
 3326  case Instruction::FCmp:
 
 3330  case Instruction::SDiv:
 
 3331  case Instruction::UDiv:
 
 3332  case Instruction::SRem:
 
 3333  case Instruction::URem: {
 
 3340                 Ctx.getScalarizationOverhead(Ctx.Types.inferScalarType(
this),
 
 3349                  Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
 
 3356  case Instruction::Load:
 
 3357  case Instruction::Store: {
 
 3364    bool IsLoad = UI->
getOpcode() == Instruction::Load;
 
 3370    Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? 
this : 
getOperand(0));
 
 3371    Type *ScalarPtrTy = Ctx.Types.inferScalarType(PtrOp);
 
 3376        UI->
getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo);
 
 3379    bool PreferVectorizedAddressing = Ctx.TTI.prefersVectorizedAddressing();
 
 3380    bool UsedByLoadStoreAddress =
 
 3383        ScalarMemOpCost + Ctx.TTI.getAddressComputationCost(
 
 3384                              PtrTy, UsedByLoadStoreAddress ? 
nullptr : &Ctx.SE,
 
 3385                              PtrSCEV, Ctx.CostKind);
 
 3395    if (!UsedByLoadStoreAddress) {
 
 3396      bool EfficientVectorLoadStore =
 
 3397          Ctx.TTI.supportsEfficientVectorElementLoadStore();
 
 3398      if (!(IsLoad && !PreferVectorizedAddressing) &&
 
 3399          !(!IsLoad && EfficientVectorLoadStore))
 
 3402      if (!EfficientVectorLoadStore)
 
 3403        ResultTy = Ctx.Types.inferScalarType(
this);
 
 3407           Ctx.getScalarizationOverhead(ResultTy, OpsToScalarize, VF, 
true);
 
 3411  return Ctx.getLegacyCost(UI, VF);
 
 
 3414#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 3417  O << Indent << (IsSingleScalar ? 
"CLONE " : 
"REPLICATE ");
 
 3426    O << 
"@" << CB->getCalledFunction()->getName() << 
"(";
 
 
 3444  assert(State.Lane && 
"Branch on Mask works only on single instance.");
 
 3447  Value *ConditionBit = State.get(BlockInMask, *State.Lane);
 
 3451  auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
 
 3453         "Expected to replace unreachable terminator with conditional branch.");
 
 3455      State.Builder.CreateCondBr(ConditionBit, State.CFG.PrevBB, 
nullptr);
 
 3456  CondBr->setSuccessor(0, 
nullptr);
 
 3457  CurrentTerminator->eraseFromParent();
 
 
 3469  assert(State.Lane && 
"Predicated instruction PHI works per instance.");
 
 3474  assert(PredicatingBB && 
"Predicated block has no single predecessor.");
 
 3476         "operand must be VPReplicateRecipe");
 
 3487           "Packed operands must generate an insertelement or insertvalue");
 
 3495      for (
unsigned I = 0; 
I < StructTy->getNumContainedTypes() - 1; 
I++)
 
 3498    PHINode *VPhi = State.Builder.CreatePHI(VecI->getType(), 2);
 
 3499    VPhi->
addIncoming(VecI->getOperand(0), PredicatingBB); 
 
 3501    if (State.hasVectorValue(
this))
 
 3502      State.reset(
this, VPhi);
 
 3504      State.set(
this, VPhi);
 
 3512    Type *PredInstType = State.TypeAnalysis.inferScalarType(
getOperand(0));
 
 3513    PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
 
 3516    Phi->addIncoming(ScalarPredInst, PredicatedBB);
 
 3517    if (State.hasScalarValue(
this, *State.Lane))
 
 3518      State.reset(
this, Phi, *State.Lane);
 
 3520      State.set(
this, Phi, *State.Lane);
 
 3523    State.reset(
getOperand(0), Phi, *State.Lane);
 
 
 3527#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 3530  O << Indent << 
"PHI-PREDICATED-INSTRUCTION ";
 
 
 3541                    ->getAddressSpace();
 
 3544                        : Instruction::Store;
 
 3551           "Inconsecutive memory access should not have the order.");
 
 3561    return Ctx.TTI.getAddressComputationCost(PtrTy, 
nullptr, 
nullptr,
 
 3570        Ctx.TTI.getMaskedMemoryOpCost(Opcode, Ty, 
Alignment, AS, Ctx.CostKind);
 
 3575    Cost += Ctx.TTI.getMemoryOpCost(Opcode, Ty, 
Alignment, AS, Ctx.CostKind,
 
 3581  return Cost += Ctx.TTI.getShuffleCost(
 
 
 3591  auto &Builder = State.Builder;
 
 3592  Value *Mask = 
nullptr;
 
 3593  if (
auto *VPMask = 
getMask()) {
 
 3596    Mask = State.get(VPMask);
 
 3598      Mask = Builder.CreateVectorReverse(Mask, 
"reverse");
 
 3604    NewLI = Builder.CreateMaskedGather(DataTy, Addr, 
Alignment, Mask, 
nullptr,
 
 3605                                       "wide.masked.gather");
 
 3608        Builder.CreateMaskedLoad(DataTy, Addr, 
Alignment, Mask,
 
 3611    NewLI = Builder.CreateAlignedLoad(DataTy, Addr, 
Alignment, 
"wide.load");
 
 3615    NewLI = Builder.CreateVectorReverse(NewLI, 
"reverse");
 
 3616  State.set(
this, NewLI);
 
 
 3619#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 3622  O << Indent << 
"WIDEN ";
 
 
 3634  Value *AllTrueMask =
 
 3635      Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());
 
 3636  return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,
 
 3637                                 {Operand, AllTrueMask, EVL}, 
nullptr, Name);
 
 
 3645  auto &Builder = State.Builder;
 
 3649  Value *Mask = 
nullptr;
 
 3651    Mask = State.get(VPMask);
 
 3655    Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
 
 3660        Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
 
 3661                                nullptr, 
"wide.masked.gather");
 
 3663    NewLI = Builder.CreateIntrinsic(DataTy, Intrinsic::vp_load,
 
 3664                                    {Addr, Mask, EVL}, 
nullptr, 
"vp.op.load");
 
 3672  State.set(
this, Res);
 
 
 3687                    ->getAddressSpace();
 
 3689      Instruction::Load, Ty, 
Alignment, AS, Ctx.CostKind);
 
 3693  return Cost + Ctx.TTI.getShuffleCost(
 
 
 3698#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 3701  O << Indent << 
"WIDEN ";
 
 
 3712  auto &Builder = State.Builder;
 
 3714  Value *Mask = 
nullptr;
 
 3715  if (
auto *VPMask = 
getMask()) {
 
 3718    Mask = State.get(VPMask);
 
 3720      Mask = Builder.CreateVectorReverse(Mask, 
"reverse");
 
 3723  Value *StoredVal = State.get(StoredVPValue);
 
 3727    StoredVal = Builder.CreateVectorReverse(StoredVal, 
"reverse");
 
 3734    NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, 
Alignment, Mask);
 
 3736    NewSI = Builder.CreateMaskedStore(StoredVal, Addr, 
Alignment, Mask);
 
 3738    NewSI = Builder.CreateAlignedStore(StoredVal, Addr, 
Alignment);
 
 
 3742#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 3745  O << Indent << 
"WIDEN store ";
 
 
 3754  auto &Builder = State.Builder;
 
 3757  Value *StoredVal = State.get(StoredValue);
 
 3761  Value *Mask = 
nullptr;
 
 3763    Mask = State.get(VPMask);
 
 3767    Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
 
 3770  if (CreateScatter) {
 
 3772                                    Intrinsic::vp_scatter,
 
 3773                                    {StoredVal, Addr, Mask, EVL});
 
 3776                                    Intrinsic::vp_store,
 
 3777                                    {StoredVal, Addr, Mask, EVL});
 
 
 3796                    ->getAddressSpace();
 
 3798      Instruction::Store, Ty, 
Alignment, AS, Ctx.CostKind);
 
 3802  return Cost + Ctx.TTI.getShuffleCost(
 
 
 3807#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 3810  O << Indent << 
"WIDEN vp.store ";
 
 
 3818  auto VF = DstVTy->getElementCount();
 
 3820  assert(VF == SrcVecTy->getElementCount() && 
"Vector dimensions do not match");
 
 3821  Type *SrcElemTy = SrcVecTy->getElementType();
 
 3822  Type *DstElemTy = DstVTy->getElementType();
 
 3823  assert((
DL.getTypeSizeInBits(SrcElemTy) == 
DL.getTypeSizeInBits(DstElemTy)) &&
 
 3824         "Vector elements must have same size");
 
 3828    return Builder.CreateBitOrPointerCast(V, DstVTy);
 
 3835         "Only one type should be a pointer type");
 
 3837         "Only one type should be a floating point type");
 
 3841  Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
 
 3842  return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
 
 
 3848                                const Twine &Name) {
 
 3849  unsigned Factor = Vals.
size();
 
 3850  assert(Factor > 1 && 
"Tried to interleave invalid number of vectors");
 
 3854  for (
Value *Val : Vals)
 
 3855    assert(Val->getType() == VecTy && 
"Tried to interleave mismatched types");
 
 3860  if (VecTy->isScalableTy()) {
 
 3861    assert(Factor <= 8 && 
"Unsupported interleave factor for scalable vectors");
 
 3862    return Builder.CreateVectorInterleave(Vals, Name);
 
 3869  const unsigned NumElts = VecTy->getElementCount().getFixedValue();
 
 3870  return Builder.CreateShuffleVector(
 
 
 3903  assert(!State.Lane && 
"Interleave group being replicated.");
 
 3905         "Masking gaps for scalable vectors is not yet supported.");
 
 3911  unsigned InterleaveFactor = Group->
getFactor();
 
 3918  auto CreateGroupMask = [&BlockInMask, &State,
 
 3919                          &InterleaveFactor](
Value *MaskForGaps) -> 
Value * {
 
 3920    if (State.VF.isScalable()) {
 
 3921      assert(!MaskForGaps && 
"Interleaved groups with gaps are not supported.");
 
 3922      assert(InterleaveFactor <= 8 &&
 
 3923             "Unsupported deinterleave factor for scalable vectors");
 
 3924      auto *ResBlockInMask = State.get(BlockInMask);
 
 3932    Value *ResBlockInMask = State.get(BlockInMask);
 
 3933    Value *ShuffledMask = State.Builder.CreateShuffleVector(
 
 3936        "interleaved.mask");
 
 3937    return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
 
 3938                                                   ShuffledMask, MaskForGaps)
 
 3942  const DataLayout &DL = Instr->getDataLayout();
 
 3945    Value *MaskForGaps = 
nullptr;
 
 3949      assert(MaskForGaps && 
"Mask for Gaps is required but it is null");
 
 3953    if (BlockInMask || MaskForGaps) {
 
 3954      Value *GroupMask = CreateGroupMask(MaskForGaps);
 
 3956      NewLoad = State.Builder.CreateMaskedLoad(VecTy, ResAddr,
 
 3958                                               PoisonVec, 
"wide.masked.vec");
 
 3960      NewLoad = State.Builder.CreateAlignedLoad(VecTy, ResAddr,
 
 3967    if (VecTy->isScalableTy()) {
 
 3970      assert(InterleaveFactor <= 8 &&
 
 3971             "Unsupported deinterleave factor for scalable vectors");
 
 3972      NewLoad = State.Builder.CreateIntrinsic(
 
 3975          nullptr, 
"strided.vec");
 
 3978    auto CreateStridedVector = [&InterleaveFactor, &State,
 
 3979                                &NewLoad](
unsigned Index) -> 
Value * {
 
 3980      assert(Index < InterleaveFactor && 
"Illegal group index");
 
 3981      if (State.VF.isScalable())
 
 3982        return State.Builder.CreateExtractValue(NewLoad, Index);
 
 3988      return State.Builder.CreateShuffleVector(NewLoad, StrideMask,
 
 3992    for (
unsigned I = 0, J = 0; 
I < InterleaveFactor; ++
I) {
 
 3999      Value *StridedVec = CreateStridedVector(
I);
 
 4002      if (Member->getType() != ScalarTy) {
 
 4009        StridedVec = State.Builder.CreateVectorReverse(StridedVec, 
"reverse");
 
 4011      State.set(VPDefs[J], StridedVec);
 
 4021  Value *MaskForGaps =
 
 4024         "Mismatch between NeedsMaskForGaps and MaskForGaps");
 
 4028  unsigned StoredIdx = 0;
 
 4029  for (
unsigned i = 0; i < InterleaveFactor; i++) {
 
 4031           "Fail to get a member from an interleaved store group");
 
 4041    Value *StoredVec = State.get(StoredValues[StoredIdx]);
 
 4045      StoredVec = State.Builder.CreateVectorReverse(StoredVec, 
"reverse");
 
 4049    if (StoredVec->
getType() != SubVT)
 
 4058  if (BlockInMask || MaskForGaps) {
 
 4059    Value *GroupMask = CreateGroupMask(MaskForGaps);
 
 4060    NewStoreInstr = State.Builder.CreateMaskedStore(
 
 4061        IVec, ResAddr, Group->
getAlign(), GroupMask);
 
 4064        State.Builder.CreateAlignedStore(IVec, ResAddr, Group->
getAlign());
 
 
 4071#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 4075  O << Indent << 
"INTERLEAVE-GROUP with factor " << IG->getFactor() << 
" at ";
 
 4076  IG->getInsertPos()->printAsOperand(O, 
false);
 
 4086  for (
unsigned i = 0; i < IG->getFactor(); ++i) {
 
 4087    if (!IG->getMember(i))
 
 4090      O << 
"\n" << Indent << 
"  store ";
 
 4092      O << 
" to index " << i;
 
 4094      O << 
"\n" << Indent << 
"  ";
 
 4096      O << 
" = load from index " << i;
 
 
 4104  assert(!State.Lane && 
"Interleave group being replicated.");
 
 4105  assert(State.VF.isScalable() &&
 
 4106         "Only support scalable VF for EVL tail-folding.");
 
 4108         "Masking gaps for scalable vectors is not yet supported.");
 
 4114  unsigned InterleaveFactor = Group->
getFactor();
 
 4115  assert(InterleaveFactor <= 8 &&
 
 4116         "Unsupported deinterleave/interleave factor for scalable vectors");
 
 4123  Value *InterleaveEVL = State.Builder.CreateMul(
 
 4124      EVL, ConstantInt::get(EVL->
getType(), InterleaveFactor), 
"interleave.evl",
 
 4128  Value *GroupMask = 
nullptr;
 
 4134        State.Builder.CreateVectorSplat(WideVF, State.Builder.getTrue());
 
 4139    CallInst *NewLoad = State.Builder.CreateIntrinsic(
 
 4140        VecTy, Intrinsic::vp_load, {ResAddr, GroupMask, InterleaveEVL}, 
nullptr,
 
 4151    NewLoad = State.Builder.CreateIntrinsic(
 
 4154        nullptr, 
"strided.vec");
 
 4156    const DataLayout &DL = Instr->getDataLayout();
 
 4157    for (
unsigned I = 0, J = 0; 
I < InterleaveFactor; ++
I) {
 
 4163      Value *StridedVec = State.Builder.CreateExtractValue(NewLoad, 
I);
 
 4165      if (Member->getType() != ScalarTy) {
 
 4183  const DataLayout &DL = Instr->getDataLayout();
 
 4184  for (
unsigned I = 0, StoredIdx = 0; 
I < InterleaveFactor; 
I++) {
 
 4192    Value *StoredVec = State.get(StoredValues[StoredIdx]);
 
 4194    if (StoredVec->
getType() != SubVT)
 
 4204      State.Builder.CreateIntrinsic(
Type::getVoidTy(Ctx), Intrinsic::vp_store,
 
 4205                                    {IVec, ResAddr, GroupMask, InterleaveEVL});
 
 
 4214#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 4218  O << Indent << 
"INTERLEAVE-GROUP with factor " << IG->getFactor() << 
" at ";
 
 4219  IG->getInsertPos()->printAsOperand(O, 
false);
 
 4230  for (
unsigned i = 0; i < IG->getFactor(); ++i) {
 
 4231    if (!IG->getMember(i))
 
 4234      O << 
"\n" << Indent << 
"  vp.store ";
 
 4236      O << 
" to index " << i;
 
 4238      O << 
"\n" << Indent << 
"  ";
 
 4240      O << 
" = vp.load from index " << i;
 
 
 4251  unsigned InsertPosIdx = 0;
 
 4252  for (
unsigned Idx = 0; IG->getFactor(); ++Idx)
 
 4253    if (
auto *Member = IG->getMember(Idx)) {
 
 4254      if (Member == InsertPos)
 
 4258  Type *ValTy = Ctx.Types.inferScalarType(
 
 4263                    ->getAddressSpace();
 
 4265  unsigned InterleaveFactor = IG->getFactor();
 
 4270  for (
unsigned IF = 0; IF < InterleaveFactor; IF++)
 
 4271    if (IG->getMember(IF))
 
 4276      InsertPos->
getOpcode(), WideVecTy, IG->getFactor(), Indices,
 
 4277      IG->getAlign(), AS, Ctx.CostKind, 
getMask(), NeedsMaskForGaps);
 
 4279  if (!IG->isReverse())
 
 4282  return Cost + IG->getNumMembers() *
 
 4284                                           VectorTy, VectorTy, {}, Ctx.CostKind,
 
 
 4288#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 4291  O << Indent << 
"EMIT ";
 
 4293  O << 
" = CANONICAL-INDUCTION ";
 
 
 4299  return IsScalarAfterVectorization &&
 
 
 4303#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 4307         "unexpected number of operands");
 
 4308  O << Indent << 
"EMIT ";
 
 4310  O << 
" = WIDEN-POINTER-INDUCTION ";
 
 
 4326  O << Indent << 
"EMIT ";
 
 4328  O << 
" = EXPAND SCEV " << *Expr;
 
 
 4335  IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
 
 4339                      : Builder.CreateVectorSplat(VF, CanonicalIV, 
"broadcast");
 
 4342    VStep = Builder.CreateVectorSplat(VF, VStep);
 
 4344        Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->
getType()));
 
 4346  Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, 
"vec.iv");
 
 4347  State.set(
this, CanonicalVectorIV);
 
 
 4350#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 4353  O << Indent << 
"EMIT ";
 
 4355  O << 
" = WIDEN-CANONICAL-INDUCTION ";
 
 
 4361  auto &Builder = State.Builder;
 
 4365  Type *VecTy = State.VF.isScalar()
 
 4366                    ? VectorInit->getType()
 
 4370      State.CFG.VPBB2IRBB.at(
getParent()->getCFGPredecessor(0));
 
 4371  if (State.VF.isVector()) {
 
 4373    auto *One = ConstantInt::get(IdxTy, 1);
 
 4376    auto *RuntimeVF = 
getRuntimeVF(Builder, IdxTy, State.VF);
 
 4377    auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
 
 4378    VectorInit = Builder.CreateInsertElement(
 
 4384  Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
 
 4385  Phi->addIncoming(VectorInit, VectorPH);
 
 4386  State.set(
this, Phi);
 
 
 4393    return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
 
 
 4398#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 4401  O << Indent << 
"FIRST-ORDER-RECURRENCE-PHI ";
 
 
 4418      State.CFG.VPBB2IRBB.at(
getParent()->getCFGPredecessor(0));
 
 4419  bool ScalarPHI = State.VF.isScalar() || IsInLoop;
 
 4420  Value *StartV = State.get(StartVPV, ScalarPHI);
 
 4424  assert(State.CurrentParentLoop->getHeader() == HeaderBB &&
 
 4425         "recipe must be in the vector loop header");
 
 4428  State.set(
this, Phi, IsInLoop);
 
 4430  Phi->addIncoming(StartV, VectorPH);
 
 
 4433#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 4436  O << Indent << 
"WIDEN-REDUCTION-PHI ";
 
 4441  if (VFScaleFactor != 1)
 
 4442    O << 
" (VF scaled by 1/" << VFScaleFactor << 
")";
 
 
 4449  Instruction *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name);
 
 4450  State.set(
this, VecPhi);
 
 
 4453#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 4456  O << Indent << 
"WIDEN-PHI ";
 
 
 4468      State.CFG.VPBB2IRBB.at(
getParent()->getCFGPredecessor(0));
 
 4471      State.Builder.CreatePHI(StartMask->
getType(), 2, 
"active.lane.mask");
 
 4472  Phi->addIncoming(StartMask, VectorPH);
 
 4473  State.set(
this, Phi);
 
 
 4476#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 4479  O << Indent << 
"ACTIVE-LANE-MASK-PHI ";
 
 
 4487#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 
 4490  O << Indent << 
"EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
 
 
 
 
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
 
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
 
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
 
AMDGPU Lower Kernel Arguments
 
AMDGPU Register Bank Select
 
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
 
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
 
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
 
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
 
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
 
This file provides a LoopVectorizationPlanner class.
 
static const SCEV * getAddressAccessSCEV(Value *Ptr, LoopVectorizationLegality *Legal, PredicatedScalarEvolution &PSE, const Loop *TheLoop)
Gets Address Access SCEV after verifying that the access pattern is loop invariant except the inducti...
 
static bool isOrdered(const Instruction *I)
 
MachineInstr unsigned OpIdx
 
uint64_t IntrinsicInst * II
 
const SmallVectorImpl< MachineOperand > & Cond
 
This file defines the SmallVector class.
 
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
 
static SymbolRef::Type getType(const Symbol *Sym)
 
This file contains the declarations of different VPlan-related auxiliary helpers.
 
static Instruction * createReverseEVL(IRBuilderBase &Builder, Value *Operand, Value *EVL, const Twine &Name)
Use all-true mask for reverse rather than actual mask, as it avoids a dependence w/o affecting the re...
 
static Value * interleaveVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vals, const Twine &Name)
Return a vector containing interleaved elements from multiple smaller input vectors.
 
static InstructionCost getCostForIntrinsics(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost for the intrinsic ID with Operands, produced by R.
 
static Value * createBitOrPointerCast(IRBuilderBase &Builder, Value *V, VectorType *DstVTy, const DataLayout &DL)
 
static Type * getGEPIndexTy(bool IsScalable, bool IsReverse, bool IsUnitStride, unsigned CurrentPart, IRBuilderBase &Builder)
 
SmallVector< Value *, 2 > VectorParts
 
static bool isUsedByLoadStoreAddress(const VPUser *V)
Returns true if V is used as part of the address of another load or store.
 
static void scalarizeInstruction(const Instruction *Instr, VPReplicateRecipe *RepRecipe, const VPLane &Lane, VPTransformState &State)
A helper function to scalarize a single Instruction in the innermost loop.
 
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
 
static BranchInst * createCondBranch(Value *Cond, VPBasicBlock *VPBB, VPTransformState &State)
Create a conditional branch using Cond branching to the successors of VPBB.
 
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
 
This file contains the declarations of the Vectorization Plan base classes:
 
static const uint32_t IV[8]
 
Class for arbitrary precision integers.
 
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
 
size_t size() const
size - Get the array size.
 
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
 
LLVM Basic Block Representation.
 
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
 
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
 
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
 
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
 
Conditional or Unconditional Branch instruction.
 
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
 
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
 
This class represents a function call, abstracting a target machine's calling convention.
 
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
 
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
 
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
 
@ ICMP_UGT
unsigned greater than
 
@ ICMP_ULT
unsigned less than
 
static LLVM_ABI StringRef getPredicateName(Predicate P)
 
This is the shared class of boolean and integer constants.
 
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
 
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
 
This is an important base class in LLVM.
 
A parsed version of the target data layout string in and methods for querying it.
 
constexpr bool isVector() const
One or more elements.
 
static constexpr ElementCount getScalable(ScalarTy MinVal)
 
static constexpr ElementCount getFixed(ScalarTy MinVal)
 
constexpr bool isScalar() const
Exactly one element.
 
Convenience struct for specifying and reasoning about fast-math flags.
 
void setAllowContract(bool B=true)
 
bool noSignedZeros() const
 
void setAllowReciprocal(bool B=true)
 
bool allowReciprocal() const
 
LLVM_ABI void print(raw_ostream &O) const
Print fast-math flags to O.
 
void setNoSignedZeros(bool B=true)
 
bool allowReassoc() const
Flag queries.
 
void setNoNaNs(bool B=true)
 
void setAllowReassoc(bool B=true)
Flag setters.
 
void setApproxFunc(bool B=true)
 
void setNoInfs(bool B=true)
 
bool allowContract() const
 
Class to represent function types.
 
Type * getParamType(unsigned i) const
Parameter type accessors.
 
bool willReturn() const
Determine if the function will return.
 
bool doesNotThrow() const
Determine if the function cannot unwind.
 
Type * getReturnType() const
Returns the type of the ret val.
 
Common base class shared among various IRBuilders.
 
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
 
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
 
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
 
LLVM_ABI Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
 
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
 
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
 
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
 
Value * CreateFreeze(Value *V, const Twine &Name="")
 
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
 
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
 
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
 
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
 
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
 
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="", Instruction *MDFrom=nullptr)
 
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
 
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
 
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
 
Value * CreateNot(Value *V, const Twine &Name="")
 
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
 
Value * CreateCountTrailingZeroElems(Type *ResTy, Value *Mask, bool ZeroIsPoison=true, const Twine &Name="")
Create a call to llvm.experimental_cttz_elts.
 
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
 
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
 
LLVMContext & getContext() const
 
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
 
ConstantInt * getFalse()
Get the constant value for i1 false.
 
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
 
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
 
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
 
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
 
static InstructionCost getInvalid(CostType Val=0)
 
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
 
const char * getOpcodeName() const
 
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
 
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
 
The group of interleaved loads/stores sharing the same stride and close to each other.
 
uint32_t getFactor() const
 
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
 
InstTy * getInsertPos() const
 
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
 
This is an important class for using LLVM in a threaded context.
 
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
 
std::pair< MDNode *, MDNode * > getNoAliasMetadataFor(const Instruction *OrigInst) const
Returns a pair containing the alias_scope and noalias metadata nodes for OrigInst,...
 
Represents a single loop in the control flow graph.
 
A Module instance is used to store all the information related to an LLVM module.
 
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
 
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
 
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
 
static bool isSignedRecurrenceKind(RecurKind Kind)
Returns true if recurrece kind is a signed redux kind.
 
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
 
unsigned getOpcode() const
 
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
 
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
 
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
 
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
 
This class represents an analyzed expression in the program.
 
The main scalar evolution driver.
 
This class represents the LLVM 'select' instruction.
 
This class provides computation of slot numbers for LLVM Assembly writing.
 
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
 
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
 
reference emplace_back(ArgTypes &&... Args)
 
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
 
void push_back(const T &Elt)
 
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
 
StringRef - Represent a constant reference to a string, i.e.
 
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
 
The instances of the Type class are immutable: once they are created, they are never changed.
 
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
 
bool isVectorTy() const
True if this is an instance of VectorType.
 
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
 
bool isPointerTy() const
True if this is an instance of PointerType.
 
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
 
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
 
bool isStructTy() const
True if this is an instance of StructType.
 
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
 
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
 
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
 
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
 
bool isIntegerTy() const
True if this is an instance of IntegerType.
 
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
 
bool isVoidTy() const
Return true if this is 'void'.
 
value_op_iterator value_op_end()
 
void setOperand(unsigned i, Value *Val)
 
Value * getOperand(unsigned i) const
 
value_op_iterator value_op_begin()
 
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
 
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
 
void insert(VPRecipeBase *Recipe, iterator InsertPt)
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
 
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
 
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
 
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
 
const VPBlocksTy & getPredecessors() const
 
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
 
const VPBlocksTy & getSuccessors() const
 
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
 
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
 
VPlan-based builder utility analogous to IRBuilder.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
This class augments a recipe with a set of VPValues defined by the recipe.
 
void dump() const
Dump the VPDef to stderr (for debugging).
 
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
 
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
 
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
 
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
 
unsigned getVPDefID() const
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
VPValue * getStepValue() const
 
VPValue * getStartValue() const
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
 
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
 
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
 
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
 
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
 
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
 
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
Class to record and manage LLVM IR flags.
 
bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
 
CmpInst::Predicate CmpPredicate
 
void printFlags(raw_ostream &O) const
 
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
 
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
 
CmpInst::Predicate getPredicate() const
 
bool hasNoSignedWrap() const
 
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
 
GEPNoWrapFlags getGEPNoWrapFlags() const
 
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
 
DisjointFlagsTy DisjointFlags
 
bool hasNoUnsignedWrap() const
 
NonNegFlagsTy NonNegFlags
 
void applyFlags(Instruction &I) const
Apply the IR flags to I.
 
Instruction & getInstruction() const
 
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
 
void extractLastLaneOfFirstOperand(VPBuilder &Builder)
Update the recipes first operand to the last lane of the operand using Builder.
 
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
void execute(VPTransformState &State) override
Generate the instruction.
 
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
 
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
 
bool doesGeneratePerAllLanes() const
Returns true if this VPInstruction generates scalar values for all lanes.
 
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
 
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
 
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
 
@ ExtractPenultimateElement
 
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
 
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
 
@ FirstOrderRecurrenceSplice
 
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
 
@ BuildVector
Creates a fixed-width vector containing all operands.
 
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
 
@ VScale
Returns the value for vscale.
 
@ CanonicalIVIncrementForPart
 
@ CalculateTripCountMinusVF
 
bool opcodeMayReadOrWriteFromMemory() const
Returns true if the underlying opcode may read from or write to memory.
 
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
 
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
 
unsigned getOpcode() const
 
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
 
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
 
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
 
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
 
void execute(VPTransformState &State) override
Generate the instruction.
 
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
 
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this recipe.
 
Instruction * getInsertPos() const
 
const InterleaveGroup< Instruction > * getInterleaveGroup() const
 
VPValue * getMask() const
Return the mask used by this recipe.
 
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
 
VPValue * getAddr() const
Return the address accessed by this recipe.
 
VPValue * getEVL() const
The VPValue of the explicit vector length.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
 
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
 
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
 
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
 
static VPLane getLastLaneForVF(const ElementCount &VF)
 
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
 
static VPLane getFirstLane()
 
void execute(VPTransformState &State) override
Generate the reduction in the loop.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPartialReductionRecipe.
 
unsigned getOpcode() const
Get the binary op's opcode.
 
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
 
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
 
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
 
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
 
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
 
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
 
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
 
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
 
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
 
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
 
VPRegionBlock * getRegion()
 
bool isPhi() const
Returns true for PHI-like recipes.
 
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
 
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
 
VPBasicBlock * getParent()
 
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
 
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
 
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
 
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
 
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
 
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
 
bool isScalarCast() const
Return true if the recipe is a scalar cast.
 
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
 
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
 
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
 
void execute(VPTransformState &State) override
Generate the reduction in the loop.
 
VPValue * getEVL() const
The VPValue of the explicit vector length.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
void execute(VPTransformState &State) override
Generate the phi/select nodes.
 
bool isConditional() const
Return true if the in-loop reduction is conditional.
 
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
 
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
VPValue * getCondOp() const
The VPValue of the condition for the block.
 
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
 
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
 
void execute(VPTransformState &State) override
Generate the reduction in the loop.
 
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
 
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
 
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
void execute(VPTransformState &State) override
Generate replicas of the desired Ingredient.
 
bool isSingleScalar() const
 
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
 
unsigned getOpcode() const
 
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
VPValue * getStepValue() const
 
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
 
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
 
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
 
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
 
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
 
This class can be used to assign names to VPValues.
 
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
 
Helper to access the operand that contains the unroll part for this recipe after unrolling.
 
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
 
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
 
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
 
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
 
void setOperand(unsigned I, VPValue *New)
 
unsigned getNumOperands() const
 
operand_iterator op_begin()
 
VPValue * getOperand(unsigned N) const
 
virtual bool onlyFirstLaneUsed(const VPValue *Op) const
Returns true if the VPUser only uses the first lane of operand Op.
 
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
 
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
 
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
 
friend class VPExpressionRecipe
 
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
 
bool hasMoreThanOneUniqueUser() const
Returns true if the value has more than one unique user.
 
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
 
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
 
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
 
void replaceAllUsesWith(VPValue *New)
 
user_iterator user_begin()
 
unsigned getNumUsers() const
 
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
 
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
Type * getSourceElementType() const
 
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
Function * getCalledScalarFunction() const
 
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
 
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
 
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
Type * getResultType() const
Returns the result type of the cast.
 
void execute(VPTransformState &State) override
Produce widened copies of the cast.
 
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
void execute(VPTransformState &State) override
Generate the gep nodes.
 
Type * getSourceElementType() const
 
VPValue * getStepValue()
Returns the step value of the induction.
 
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
 
Type * getScalarType() const
Returns the scalar type of the induction.
 
bool isCanonical() const
Returns true if the induction is canonical, i.e.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
 
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
 
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
Type * getResultType() const
Return the scalar return type of the intrinsic.
 
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
 
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
 
bool IsMasked
Whether the memory access is masked.
 
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
 
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
 
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
 
bool Consecutive
Whether the accessed addresses are consecutive.
 
VPValue * getMask() const
Return the mask used by this recipe.
 
Align Alignment
Alignment information for this memory access.
 
VPValue * getAddr() const
Return the address accessed by this recipe.
 
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
void execute(VPTransformState &State) override
Generate the phi/select nodes.
 
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
 
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
 
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
 
LLVM Value Representation.
 
Type * getType() const
All values are typed, get the type of this value.
 
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
 
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
 
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
 
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
 
Base class of all SIMD vector types.
 
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
 
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
 
Type * getElementType() const
 
constexpr ScalarTy getFixedValue() const
 
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
 
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
 
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
 
const ParentTy * getParent() const
 
self_iterator getIterator()
 
typename base_list_type::iterator iterator
 
iterator erase(iterator where)
 
pointer remove(iterator &IT)
 
This class implements an extremely fast bulk output stream that can only output to a stream.
 
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
 
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
 
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
 
@ C
The default llvm calling convention, compatible with C.
 
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
 
LLVM_ABI Intrinsic::ID getDeinterleaveIntrinsicID(unsigned Factor)
Returns the corresponding llvm.vector.deinterleaveN intrinsic for factor N.
 
LLVM_ABI StringRef getBaseName(ID id)
Return the LLVM name for an intrinsic, without encoded types for overloading, such as "llvm....
 
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
 
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
 
bool match(Val *V, const Pattern &P)
 
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
 
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
 
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
 
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
 
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
 
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
 
GEPLikeRecipe_match< Op0_t, Op1_t > m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
 
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
 
NodeAddr< DefNode * > Def
 
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
 
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
 
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
 
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
 
const SCEV * getSCEVExprForVPValue(const VPValue *V, ScalarEvolution &SE, const Loop *L=nullptr)
Return the SCEV expression for V.
 
This is an optimization pass for GlobalISel generic memory operations.
 
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
 
LLVM_ABI Value * createSimpleReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a reduction of the given vector.
 
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
 
FunctionAddr VTableAddr Value
 
LLVM_ABI Value * createFindLastIVReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind, Value *Start, Value *Sentinel)
Create a reduction of the given vector Src for a reduction of the kind RecurKind::FindLastIV.
 
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
 
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
 
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
 
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
 
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
 
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
 
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
 
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
 
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
 
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
 
auto cast_or_null(const Y &Val)
 
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
 
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
 
bool isa_and_nonnull(const Y &Val)
 
LLVM_ABI Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
 
auto dyn_cast_or_null(const Y &Val)
 
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
 
LLVM_ABI Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
 
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
 
auto reverse(ContainerTy &&C)
 
LLVM_ABI llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
 
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
 
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
 
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
 
Type * toVectorizedTy(Type *Ty, ElementCount EC)
A helper for converting to vectorized types.
 
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
 
cl::opt< unsigned > ForceTargetInstructionCost
 
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
 
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
 
bool canVectorizeTy(Type *Ty)
Returns true if Ty is a valid vector element type, void, or an unpacked literal struct where all elem...
 
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
 
RecurKind
These are the kinds of recurrences that we support.
 
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
 
@ Mul
Product of integers.
 
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
 
@ SMax
Signed integer max implemented in terms of select(cmp()).
 
@ SMin
Signed integer min implemented in terms of select(cmp()).
 
@ Sub
Subtraction of integers.
 
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
 
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
 
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
 
DWARFExpression::Operation Op
 
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
 
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
 
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
 
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
 
LLVM_ABI Value * createOrderedReduction(IRBuilderBase &B, RecurKind RdxKind, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence kind RdxKind.
 
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
 
unsigned getPredBlockCostDivisor(TargetTransformInfo::TargetCostKind CostKind)
A helper function that returns how much we should divide the cost of a predicated block by.
 
@ Increment
Incrementally increasing token ID.
 
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
 
LLVM_ABI Value * createAnyOfReduction(IRBuilderBase &B, Value *Src, Value *InitVal, PHINode *OrigPhi)
Create a reduction of the given vector Src for a reduction of kind RecurKind::AnyOf.
 
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
 
This struct is a compact representation of a valid (non-zero power of two) alignment.
 
Struct to hold various analysis needed for cost computations.
 
void execute(VPTransformState &State) override
Generate the phi nodes.
 
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use cast/dyn_cast/isa and exec...
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
void execute(VPTransformState &State) override
Generate the instruction.
 
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
 
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
 
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
 
void execute(VPTransformState &State) override
Generate the wide load or gather.
 
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
 
VPValue * getEVL() const
Return the EVL operand.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
void execute(VPTransformState &State) override
Generate a wide load or gather.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
bool isInvariantCond() const
 
VPValue * getCond() const
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenSelectRecipe.
 
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
 
VPValue * getStoredValue() const
Return the address accessed by this recipe.
 
void execute(VPTransformState &State) override
Generate the wide store or scatter.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
 
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
 
VPValue * getEVL() const
Return the EVL operand.
 
void execute(VPTransformState &State) override
Generate a wide store or scatter.
 
VPValue * getStoredValue() const
Return the value stored by this recipe.
 
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.