doxygen/RISCVTargetTransformInfo_8cpp_source.html

//===-- RISCVTargetTransformInfo.cpp - RISC-V specific TTI ----------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//


#include "RISCVTargetTransformInfo.h"

#include "MCTargetDesc/RISCVMatInt.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/Analysis/TargetTransformInfo.h"

#include "llvm/CodeGen/BasicTTIImpl.h"

#include "llvm/CodeGen/CostTable.h"

#include "llvm/CodeGen/TargetLowering.h"

#include "llvm/IR/Instructions.h"

#include <cmath>

#include <optional>

using namespace llvm;


#define DEBUG_TYPE "riscvtti"


static cl::opt<unsigned> RVVRegisterWidthLMUL(

    "riscv-v-register-bit-width-lmul",

    cl::desc(

        "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "

        "by autovectorized code. Fractional LMULs are not supported."),

    cl::init(2), cl::Hidden);


static cl::opt<unsigned> SLPMaxVF(

    "riscv-v-slp-max-vf",

    cl::desc(

        "Overrides result used for getMaximumVF query which is used "

        "exclusively by SLP vectorizer."),

    cl::Hidden);


InstructionCost

RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,

                                      TTI::TargetCostKind CostKind) {

  // Check if the type is valid for all CostKind

  if (!VT.isVector())

    return InstructionCost::getInvalid();

  size_t NumInstr = OpCodes.size();

  if (CostKind == TTI::TCK_CodeSize)

    return NumInstr;

  InstructionCost LMULCost = TLI->getLMULCost(VT);

  if ((CostKind != TTI::TCK_RecipThroughput) && (CostKind != TTI::TCK_Latency))

    return LMULCost * NumInstr;

  InstructionCost Cost = 0;

  for (auto Op : OpCodes) {

    switch (Op) {

    case RISCV::VRGATHER_VI:

      Cost += TLI->getVRGatherVICost(VT);

      break;

    case RISCV::VRGATHER_VV:

      Cost += TLI->getVRGatherVVCost(VT);

      break;

    case RISCV::VSLIDEUP_VI:

    case RISCV::VSLIDEDOWN_VI:

      Cost += TLI->getVSlideVICost(VT);

      break;

    case RISCV::VSLIDEUP_VX:

    case RISCV::VSLIDEDOWN_VX:

      Cost += TLI->getVSlideVXCost(VT);

      break;

    case RISCV::VREDMAX_VS:

    case RISCV::VREDMIN_VS:

    case RISCV::VREDMAXU_VS:

    case RISCV::VREDMINU_VS:

    case RISCV::VREDSUM_VS:

    case RISCV::VREDAND_VS:

    case RISCV::VREDOR_VS:

    case RISCV::VREDXOR_VS:

    case RISCV::VFREDMAX_VS:

    case RISCV::VFREDMIN_VS:

    case RISCV::VFREDUSUM_VS: {

      unsigned VL = VT.getVectorMinNumElements();

      if (!VT.isFixedLengthVector())

        VL *= *getVScaleForTuning();

      Cost += Log2_32_Ceil(VL);

      break;

    }

    case RISCV::VFREDOSUM_VS: {

      unsigned VL = VT.getVectorMinNumElements();

      if (!VT.isFixedLengthVector())

        VL *= *getVScaleForTuning();

      Cost += VL;

      break;

    }

    case RISCV::VMV_X_S:

    case RISCV::VMV_S_X:

    case RISCV::VFMV_F_S:

    case RISCV::VFMV_S_F:

    case RISCV::VMOR_MM:

    case RISCV::VMXOR_MM:

    case RISCV::VMAND_MM:

    case RISCV::VMANDN_MM:

    case RISCV::VMNAND_MM:

    case RISCV::VCPOP_M:

      Cost += 1;

      break;

    default:

      Cost += LMULCost;

    }

  }

  return Cost;

}


InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,

                                            TTI::TargetCostKind CostKind) {

  assert(Ty->isIntegerTy() &&

         "getIntImmCost can only estimate cost of materialising integers");


  // We have a Zero register, so 0 is always free.

  if (Imm == 0)

    return TTI::TCC_Free;


  // Otherwise, we check how many instructions it will take to materialise.

  const DataLayout &DL = getDataLayout();

  return RISCVMatInt::getIntMatCost(Imm, DL.getTypeSizeInBits(Ty), *getST());

}


// Look for patterns of shift followed by AND that can be turned into a pair of

// shifts. We won't need to materialize an immediate for the AND so these can

// be considered free.

static bool canUseShiftPair(Instruction *Inst, const APInt &Imm) {

  uint64_t Mask = Imm.getZExtValue();

  auto *BO = dyn_cast<BinaryOperator>(Inst->getOperand(0));

  if (!BO || !BO->hasOneUse())

    return false;


  if (BO->getOpcode() != Instruction::Shl)

    return false;


  if (!isa<ConstantInt>(BO->getOperand(1)))

    return false;


  unsigned ShAmt = cast<ConstantInt>(BO->getOperand(1))->getZExtValue();

  // (and (shl x, c2), c1) will be matched to (srli (slli x, c2+c3), c3) if c1

  // is a mask shifted by c2 bits with c3 leading zeros.

  if (isShiftedMask_64(Mask)) {

    unsigned Trailing = llvm::countr_zero(Mask);

    if (ShAmt == Trailing)

      return true;

  }


  return false;

}


InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,

                                                const APInt &Imm, Type *Ty,

                                                TTI::TargetCostKind CostKind,

                                                Instruction *Inst) {

  assert(Ty->isIntegerTy() &&

         "getIntImmCost can only estimate cost of materialising integers");


  // We have a Zero register, so 0 is always free.

  if (Imm == 0)

    return TTI::TCC_Free;


  // Some instructions in RISC-V can take a 12-bit immediate. Some of these are

  // commutative, in others the immediate comes from a specific argument index.

  bool Takes12BitImm = false;

  unsigned ImmArgIdx = ~0U;


  switch (Opcode) {

  case Instruction::GetElementPtr:

    // Never hoist any arguments to a GetElementPtr. CodeGenPrepare will

    // split up large offsets in GEP into better parts than ConstantHoisting

    // can.

    return TTI::TCC_Free;

  case Instruction::Store:

    // If the address is a constant, use the materialization cost.

    if (Idx == 1)

      return getIntImmCost(Imm, Ty, CostKind);

    return TTI::TCC_Free;

  case Instruction::Load:

    // If the address is a constant, use the materialization cost.

    return getIntImmCost(Imm, Ty, CostKind);

  case Instruction::And:

    // zext.h

    if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())

      return TTI::TCC_Free;

    // zext.w

    if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZba())

      return TTI::TCC_Free;

    // bclri

    if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())

      return TTI::TCC_Free;

    if (Inst && Idx == 1 && Imm.getBitWidth() <= ST->getXLen() &&

        canUseShiftPair(Inst, Imm))

      return TTI::TCC_Free;

    Takes12BitImm = true;

    break;

  case Instruction::Add:

    Takes12BitImm = true;

    break;

  case Instruction::Or:

  case Instruction::Xor:

    // bseti/binvi

    if (ST->hasStdExtZbs() && Imm.isPowerOf2())

      return TTI::TCC_Free;

    Takes12BitImm = true;

    break;

  case Instruction::Mul:

    // Power of 2 is a shift. Negated power of 2 is a shift and a negate.

    if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())

      return TTI::TCC_Free;

    // One more or less than a power of 2 can use SLLI+ADD/SUB.

    if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())

      return TTI::TCC_Free;

    // FIXME: There is no MULI instruction.

    Takes12BitImm = true;

    break;

  case Instruction::Sub:

  case Instruction::Shl:

  case Instruction::LShr:

  case Instruction::AShr:

    Takes12BitImm = true;

    ImmArgIdx = 1;

    break;

  default:

    break;

  }


  if (Takes12BitImm) {

    // Check immediate is the correct argument...

    if (Instruction::isCommutative(Opcode) || Idx == ImmArgIdx) {

      // ... and fits into the 12-bit immediate.

      if (Imm.getSignificantBits() <= 64 &&

          getTLI()->isLegalAddImmediate(Imm.getSExtValue())) {

        return TTI::TCC_Free;

      }

    }


    // Otherwise, use the full materialisation cost.

    return getIntImmCost(Imm, Ty, CostKind);

  }


  // By default, prevent hoisting.

  return TTI::TCC_Free;

}


InstructionCost

RISCVTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,

                                  const APInt &Imm, Type *Ty,

                                  TTI::TargetCostKind CostKind) {

  // Prevent hoisting in unknown cases.

  return TTI::TCC_Free;

}


bool RISCVTTIImpl::hasActiveVectorLength(unsigned, Type *DataTy, Align) const {

  return ST->hasVInstructions();

}


TargetTransformInfo::PopcntSupportKind

RISCVTTIImpl::getPopcntSupport(unsigned TyWidth) {

  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");

  return ST->hasStdExtZbb() || ST->hasVendorXCVbitmanip()

             ? TTI::PSK_FastHardware

             : TTI::PSK_Software;

}


bool RISCVTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {

  // Currently, the ExpandReductions pass can't expand scalable-vector

  // reductions, but we still request expansion as RVV doesn't support certain

  // reductions and the SelectionDAG can't legalize them either.

  switch (II->getIntrinsicID()) {

  default:

    return false;

  // These reductions have no equivalent in RVV

  case Intrinsic::vector_reduce_mul:

  case Intrinsic::vector_reduce_fmul:

    return true;

  }

}


std::optional<unsigned> RISCVTTIImpl::getMaxVScale() const {

  if (ST->hasVInstructions())

    return ST->getRealMaxVLen() / RISCV::RVVBitsPerBlock;

  return BaseT::getMaxVScale();

}


std::optional<unsigned> RISCVTTIImpl::getVScaleForTuning() const {

  if (ST->hasVInstructions())

    if (unsigned MinVLen = ST->getRealMinVLen();

        MinVLen >= RISCV::RVVBitsPerBlock)

      return MinVLen / RISCV::RVVBitsPerBlock;

  return BaseT::getVScaleForTuning();

}


TypeSize

RISCVTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {

  unsigned LMUL =

      llvm::bit_floor(std::clamp<unsigned>(RVVRegisterWidthLMUL, 1, 8));

  switch (K) {

  case TargetTransformInfo::RGK_Scalar:

    return TypeSize::getFixed(ST->getXLen());

  case TargetTransformInfo::RGK_FixedWidthVector:

    return TypeSize::getFixed(

        ST->useRVVForFixedLengthVectors() ? LMUL * ST->getRealMinVLen() : 0);

  case TargetTransformInfo::RGK_ScalableVector:

    return TypeSize::getScalable(

        (ST->hasVInstructions() &&

         ST->getRealMinVLen() >= RISCV::RVVBitsPerBlock)

            ? LMUL * RISCV::RVVBitsPerBlock

            : 0);

  }


  llvm_unreachable("Unsupported register kind");

}


InstructionCost

RISCVTTIImpl::getConstantPoolLoadCost(Type *Ty,  TTI::TargetCostKind CostKind) {

  // Add a cost of address generation + the cost of the load. The address

  // is expected to be a PC relative offset to a constant pool entry

  // using auipc/addi.

  return 2 + getMemoryOpCost(Instruction::Load, Ty, DL.getABITypeAlign(Ty),

                             /*AddressSpace=*/0, CostKind);

}


static VectorType *getVRGatherIndexType(MVT DataVT, const RISCVSubtarget &ST,

                                        LLVMContext &C) {

  assert((DataVT.getScalarSizeInBits() != 8 ||

          DataVT.getVectorNumElements() <= 256) && "unhandled case in lowering");

  MVT IndexVT = DataVT.changeTypeToInteger();

  if (IndexVT.getScalarType().bitsGT(ST.getXLenVT()))

    IndexVT = IndexVT.changeVectorElementType(MVT::i16);

  return cast<VectorType>(EVT(IndexVT).getTypeForEVT(C));

}


InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,

                                             VectorType *Tp, ArrayRef<int> Mask,

                                             TTI::TargetCostKind CostKind,

                                             int Index, VectorType *SubTp,

                                             ArrayRef<const Value *> Args,

                                             const Instruction *CxtI) {

  Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp);


  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);


  // First, handle cases where having a fixed length vector enables us to

  // give a more accurate cost than falling back to generic scalable codegen.

  // TODO: Each of these cases hints at a modeling gap around scalable vectors.

  if (isa<FixedVectorType>(Tp)) {

    switch (Kind) {

    default:

      break;

    case TTI::SK_PermuteSingleSrc: {

      if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) {

        MVT EltTp = LT.second.getVectorElementType();

        // If the size of the element is < ELEN then shuffles of interleaves and

        // deinterleaves of 2 vectors can be lowered into the following

        // sequences

        if (EltTp.getScalarSizeInBits() < ST->getELen()) {

          // Example sequence:

          //   vsetivli     zero, 4, e8, mf4, ta, ma (ignored)

          //   vwaddu.vv    v10, v8, v9

          //   li       a0, -1                   (ignored)

          //   vwmaccu.vx   v10, a0, v9

          if (ShuffleVectorInst::isInterleaveMask(Mask, 2, Mask.size()))

            return 2 * LT.first * TLI->getLMULCost(LT.second);


          if (Mask[0] == 0 || Mask[0] == 1) {

            auto DeinterleaveMask = createStrideMask(Mask[0], 2, Mask.size());

            // Example sequence:

            //   vnsrl.wi   v10, v8, 0

            if (equal(DeinterleaveMask, Mask))

              return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,

                                                        LT.second, CostKind);

          }

        }

      }

      // vrgather + cost of generating the mask constant.

      // We model this for an unknown mask with a single vrgather.

      if (LT.second.isFixedLengthVector() && LT.first == 1 &&

          (LT.second.getScalarSizeInBits() != 8 ||

           LT.second.getVectorNumElements() <= 256)) {

        VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext());

        InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);

        return IndexCost +

               getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second, CostKind);

      }

      [[fallthrough]];

    }

    case TTI::SK_Transpose:

    case TTI::SK_PermuteTwoSrc: {

      // 2 x (vrgather + cost of generating the mask constant) + cost of mask

      // register for the second vrgather. We model this for an unknown

      // (shuffle) mask.

      if (LT.second.isFixedLengthVector() && LT.first == 1 &&

          (LT.second.getScalarSizeInBits() != 8 ||

           LT.second.getVectorNumElements() <= 256)) {

        auto &C = Tp->getContext();

        auto EC = Tp->getElementCount();

        VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, C);

        VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC);

        InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);

        InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind);

        return 2 * IndexCost +

               getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},

                                       LT.second, CostKind) +

               MaskCost;

      }

      [[fallthrough]];

    }

    case TTI::SK_Select: {

      // We are going to permute multiple sources and the result will be in

      // multiple destinations. Providing an accurate cost only for splits where

      // the element type remains the same.

      if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&

          LT.second.isFixedLengthVector() &&

          LT.second.getVectorElementType().getSizeInBits() ==

              Tp->getElementType()->getPrimitiveSizeInBits() &&

          LT.second.getVectorNumElements() <

              cast<FixedVectorType>(Tp)->getNumElements() &&

          divideCeil(Mask.size(),

                     cast<FixedVectorType>(Tp)->getNumElements()) ==

              static_cast<unsigned>(*LT.first.getValue())) {

        unsigned NumRegs = *LT.first.getValue();

        unsigned VF = cast<FixedVectorType>(Tp)->getNumElements();

        unsigned SubVF = PowerOf2Ceil(VF / NumRegs);

        auto *SubVecTy = FixedVectorType::get(Tp->getElementType(), SubVF);


        InstructionCost Cost = 0;

        for (unsigned I = 0; I < NumRegs; ++I) {

          bool IsSingleVector = true;

          SmallVector<int> SubMask(SubVF, PoisonMaskElem);

          transform(Mask.slice(I * SubVF,

                               I == NumRegs - 1 ? Mask.size() % SubVF : SubVF),

                    SubMask.begin(), [&](int I) {

                      bool SingleSubVector = I / VF == 0;

                      IsSingleVector &= SingleSubVector;

                      return (SingleSubVector ? 0 : 1) * SubVF + I % VF;

                    });

          Cost += getShuffleCost(IsSingleVector ? TTI::SK_PermuteSingleSrc

                                                : TTI::SK_PermuteTwoSrc,

                                 SubVecTy, SubMask, CostKind, 0, nullptr);

          return Cost;

        }

      }

      break;

    }

    }

  };


  // Handle scalable vectors (and fixed vectors legalized to scalable vectors).

  switch (Kind) {

  default:

    // Fallthrough to generic handling.

    // TODO: Most of these cases will return getInvalid in generic code, and

    // must be implemented here.

    break;

  case TTI::SK_ExtractSubvector:

    // Extract at zero is always a subregister extract

    if (Index == 0)

      return TTI::TCC_Free;


    // If we're extracting a subvector of at most m1 size at a sub-register

    // boundary - which unfortunately we need exact vlen to identify - this is

    // a subregister extract at worst and thus won't require a vslidedown.

    // TODO: Extend for aligned m2, m4 subvector extracts

    // TODO: Extend for misalgined (but contained) extracts

    // TODO: Extend for scalable subvector types

    if (std::pair<InstructionCost, MVT> SubLT = getTypeLegalizationCost(SubTp);

        SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) {

      const unsigned MinVLen = ST->getRealMinVLen();

      const unsigned MaxVLen = ST->getRealMaxVLen();

      if (MinVLen == MaxVLen &&

          SubLT.second.getScalarSizeInBits() * Index % MinVLen == 0 &&

          SubLT.second.getSizeInBits() <= MinVLen)

        return TTI::TCC_Free;

    }


    // Example sequence:

    // vsetivli     zero, 4, e8, mf2, tu, ma (ignored)

    // vslidedown.vi  v8, v9, 2

    return LT.first *

           getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second, CostKind);

  case TTI::SK_InsertSubvector:

    // Example sequence:

    // vsetivli     zero, 4, e8, mf2, tu, ma (ignored)

    // vslideup.vi  v8, v9, 2

    return LT.first *

           getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second, CostKind);

  case TTI::SK_Select: {

    // Example sequence:

    // li           a0, 90

    // vsetivli     zero, 8, e8, mf2, ta, ma (ignored)

    // vmv.s.x      v0, a0

    // vmerge.vvm   v8, v9, v8, v0

    // We use 2 for the cost of the mask materialization as this is the true

    // cost for small masks and most shuffles are small.  At worst, this cost

    // should be a very small constant for the constant pool load.  As such,

    // we may bias towards large selects slightly more than truely warranted.

    return LT.first *

           (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},

                                        LT.second, CostKind));

  }

  case TTI::SK_Broadcast: {

    bool HasScalar = (Args.size() > 0) && (Operator::getOpcode(Args[0]) ==

                                           Instruction::InsertElement);

    if (LT.second.getScalarSizeInBits() == 1) {

      if (HasScalar) {

        // Example sequence:

        //   andi a0, a0, 1

        //   vsetivli zero, 2, e8, mf8, ta, ma (ignored)

        //   vmv.v.x v8, a0

        //   vmsne.vi v0, v8, 0

        return LT.first *

               (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},

                                            LT.second, CostKind));

      }

      // Example sequence:

      //   vsetivli  zero, 2, e8, mf8, ta, mu (ignored)

      //   vmv.v.i v8, 0

      //   vmerge.vim      v8, v8, 1, v0

      //   vmv.x.s a0, v8

      //   andi    a0, a0, 1

      //   vmv.v.x v8, a0

      //   vmsne.vi  v0, v8, 0


      return LT.first *

             (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,

                                           RISCV::VMV_X_S, RISCV::VMV_V_X,

                                           RISCV::VMSNE_VI},

                                          LT.second, CostKind));

    }


    if (HasScalar) {

      // Example sequence:

      //   vmv.v.x v8, a0

      return LT.first *

             getRISCVInstructionCost(RISCV::VMV_V_X, LT.second, CostKind);

    }


    // Example sequence:

    //   vrgather.vi     v9, v8, 0

    return LT.first *

           getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second, CostKind);

  }

  case TTI::SK_Splice: {

    // vslidedown+vslideup.

    // TODO: Multiplying by LT.first implies this legalizes into multiple copies

    // of similar code, but I think we expand through memory.

    unsigned Opcodes[2] = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};

    if (Index >= 0 && Index < 32)

      Opcodes[0] = RISCV::VSLIDEDOWN_VI;

    else if (Index < 0 && Index > -32)

      Opcodes[1] = RISCV::VSLIDEUP_VI;

    return LT.first * getRISCVInstructionCost(Opcodes, LT.second, CostKind);

  }

  case TTI::SK_Reverse: {

    // TODO: Cases to improve here:

    // * Illegal vector types

    // * i64 on RV32

    // * i1 vector

    // At low LMUL, most of the cost is producing the vrgather index register.

    // At high LMUL, the cost of the vrgather itself will dominate.

    // Example sequence:

    //   csrr a0, vlenb

    //   srli a0, a0, 3

    //   addi a0, a0, -1

    //   vsetvli a1, zero, e8, mf8, ta, mu (ignored)

    //   vid.v v9

    //   vrsub.vx v10, v9, a0

    //   vrgather.vv v9, v8, v10

    InstructionCost LenCost = 3;

    if (LT.second.isFixedLengthVector())

      // vrsub.vi has a 5 bit immediate field, otherwise an li suffices

      LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;

    unsigned Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV};

    if (LT.second.isFixedLengthVector() &&

        isInt<5>(LT.second.getVectorNumElements() - 1))

      Opcodes[1] = RISCV::VRSUB_VI;

    InstructionCost GatherCost =

        getRISCVInstructionCost(Opcodes, LT.second, CostKind);

    // Mask operation additionally required extend and truncate

    InstructionCost ExtendCost = Tp->getElementType()->isIntegerTy(1) ? 3 : 0;

    return LT.first * (LenCost + GatherCost + ExtendCost);

  }

  }

  return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp);

}


InstructionCost

RISCVTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,

                                    unsigned AddressSpace,

                                    TTI::TargetCostKind CostKind) {

  if (!isLegalMaskedLoadStore(Src, Alignment) ||

      CostKind != TTI::TCK_RecipThroughput)

    return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,

                                        CostKind);


  return getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);

}


InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost(

    unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,

    Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,

    bool UseMaskForCond, bool UseMaskForGaps) {

  if (isa<ScalableVectorType>(VecTy))

    return InstructionCost::getInvalid();

  auto *FVTy = cast<FixedVectorType>(VecTy);

  InstructionCost MemCost =

      getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, CostKind);

  unsigned VF = FVTy->getNumElements() / Factor;


  // The interleaved memory access pass will lower interleaved memory ops (i.e

  // a load and store followed by a specific shuffle) to vlseg/vsseg

  // intrinsics. In those cases then we can treat it as if it's just one (legal)

  // memory op

  if (!UseMaskForCond && !UseMaskForGaps &&

      Factor <= TLI->getMaxSupportedInterleaveFactor()) {

    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(FVTy);

    // Need to make sure type has't been scalarized

    if (LT.second.isFixedLengthVector()) {

      auto *LegalFVTy = FixedVectorType::get(FVTy->getElementType(),

                                             LT.second.getVectorNumElements());

      // FIXME: We use the memory op cost of the *legalized* type here, becuase

      // it's getMemoryOpCost returns a really expensive cost for types like

      // <6 x i8>, which show up when doing interleaves of Factor=3 etc.

      // Should the memory op cost of these be cheaper?

      if (TLI->isLegalInterleavedAccessType(LegalFVTy, Factor, Alignment,

                                            AddressSpace, DL)) {

        InstructionCost LegalMemCost = getMemoryOpCost(

            Opcode, LegalFVTy, Alignment, AddressSpace, CostKind);

        return LT.first + LegalMemCost;

      }

    }

  }


  // An interleaved load will look like this for Factor=3:

  // %wide.vec = load <12 x i32>, ptr %3, align 4

  // %strided.vec = shufflevector %wide.vec, poison, <4 x i32> <stride mask>

  // %strided.vec1 = shufflevector %wide.vec, poison, <4 x i32> <stride mask>

  // %strided.vec2 = shufflevector %wide.vec, poison, <4 x i32> <stride mask>

  if (Opcode == Instruction::Load) {

    InstructionCost Cost = MemCost;

    for (unsigned Index : Indices) {

      FixedVectorType *SubVecTy =

          FixedVectorType::get(FVTy->getElementType(), VF * Factor);

      auto Mask = createStrideMask(Index, Factor, VF);

      InstructionCost ShuffleCost =

          getShuffleCost(TTI::ShuffleKind::SK_PermuteSingleSrc, SubVecTy, Mask,

                         CostKind, 0, nullptr, {});

      Cost += ShuffleCost;

    }

    return Cost;

  }


  // TODO: Model for NF > 2

  // We'll need to enhance getShuffleCost to model shuffles that are just

  // inserts and extracts into subvectors, since they won't have the full cost

  // of a vrgather.

  // An interleaved store for 3 vectors of 4 lanes will look like

  // %11 = shufflevector <4 x i32> %4, <4 x i32> %6, <8 x i32> <0...7>

  // %12 = shufflevector <4 x i32> %9, <4 x i32> poison, <8 x i32> <0...3>

  // %13 = shufflevector <8 x i32> %11, <8 x i32> %12, <12 x i32> <0...11>

  // %interleaved.vec = shufflevector %13, poison, <12 x i32> <interleave mask>

  // store <12 x i32> %interleaved.vec, ptr %10, align 4

  if (Factor != 2)

    return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,

                                             Alignment, AddressSpace, CostKind,

                                             UseMaskForCond, UseMaskForGaps);


  assert(Opcode == Instruction::Store && "Opcode must be a store");

  // For an interleaving store of 2 vectors, we perform one large interleaving

  // shuffle that goes into the wide store

  auto Mask = createInterleaveMask(VF, Factor);

  InstructionCost ShuffleCost =

      getShuffleCost(TTI::ShuffleKind::SK_PermuteSingleSrc, FVTy, Mask,

                     CostKind, 0, nullptr, {});

  return MemCost + ShuffleCost;

}


InstructionCost RISCVTTIImpl::getGatherScatterOpCost(

    unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,

    Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {

  if (CostKind != TTI::TCK_RecipThroughput)

    return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,

                                         Alignment, CostKind, I);


  if ((Opcode == Instruction::Load &&

       !isLegalMaskedGather(DataTy, Align(Alignment))) ||

      (Opcode == Instruction::Store &&

       !isLegalMaskedScatter(DataTy, Align(Alignment))))

    return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,

                                         Alignment, CostKind, I);


  // Cost is proportional to the number of memory operations implied.  For

  // scalable vectors, we use an estimate on that number since we don't

  // know exactly what VL will be.

  auto &VTy = *cast<VectorType>(DataTy);

  InstructionCost MemOpCost =

      getMemoryOpCost(Opcode, VTy.getElementType(), Alignment, 0, CostKind,

                      {TTI::OK_AnyValue, TTI::OP_None}, I);

  unsigned NumLoads = getEstimatedVLFor(&VTy);

  return NumLoads * MemOpCost;

}


InstructionCost RISCVTTIImpl::getStridedMemoryOpCost(

    unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,

    Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {

  if (((Opcode == Instruction::Load || Opcode == Instruction::Store) &&

       !isLegalStridedLoadStore(DataTy, Alignment)) ||

      (Opcode != Instruction::Load && Opcode != Instruction::Store))

    return BaseT::getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,

                                         Alignment, CostKind, I);


  if (CostKind == TTI::TCK_CodeSize)

    return TTI::TCC_Basic;


  // Cost is proportional to the number of memory operations implied.  For

  // scalable vectors, we use an estimate on that number since we don't

  // know exactly what VL will be.

  auto &VTy = *cast<VectorType>(DataTy);

  InstructionCost MemOpCost =

      getMemoryOpCost(Opcode, VTy.getElementType(), Alignment, 0, CostKind,

                      {TTI::OK_AnyValue, TTI::OP_None}, I);

  unsigned NumLoads = getEstimatedVLFor(&VTy);

  return NumLoads * MemOpCost;

}


// Currently, these represent both throughput and codesize costs

// for the respective intrinsics.  The costs in this table are simply

// instruction counts with the following adjustments made:

// * One vsetvli is considered free.

static const CostTblEntry VectorIntrinsicCostTable[]{

    {Intrinsic::floor, MVT::f32, 9},

    {Intrinsic::floor, MVT::f64, 9},

    {Intrinsic::ceil, MVT::f32, 9},

    {Intrinsic::ceil, MVT::f64, 9},

    {Intrinsic::trunc, MVT::f32, 7},

    {Intrinsic::trunc, MVT::f64, 7},

    {Intrinsic::round, MVT::f32, 9},

    {Intrinsic::round, MVT::f64, 9},

    {Intrinsic::roundeven, MVT::f32, 9},

    {Intrinsic::roundeven, MVT::f64, 9},

    {Intrinsic::rint, MVT::f32, 7},

    {Intrinsic::rint, MVT::f64, 7},

    {Intrinsic::lrint, MVT::i32, 1},

    {Intrinsic::lrint, MVT::i64, 1},

    {Intrinsic::llrint, MVT::i64, 1},

    {Intrinsic::nearbyint, MVT::f32, 9},

    {Intrinsic::nearbyint, MVT::f64, 9},

    {Intrinsic::bswap, MVT::i16, 3},

    {Intrinsic::bswap, MVT::i32, 12},

    {Intrinsic::bswap, MVT::i64, 31},

    {Intrinsic::vp_bswap, MVT::i16, 3},

    {Intrinsic::vp_bswap, MVT::i32, 12},

    {Intrinsic::vp_bswap, MVT::i64, 31},

    {Intrinsic::vp_fshl, MVT::i8, 7},

    {Intrinsic::vp_fshl, MVT::i16, 7},

    {Intrinsic::vp_fshl, MVT::i32, 7},

    {Intrinsic::vp_fshl, MVT::i64, 7},

    {Intrinsic::vp_fshr, MVT::i8, 7},

    {Intrinsic::vp_fshr, MVT::i16, 7},

    {Intrinsic::vp_fshr, MVT::i32, 7},

    {Intrinsic::vp_fshr, MVT::i64, 7},

    {Intrinsic::bitreverse, MVT::i8, 17},

    {Intrinsic::bitreverse, MVT::i16, 24},

    {Intrinsic::bitreverse, MVT::i32, 33},

    {Intrinsic::bitreverse, MVT::i64, 52},

    {Intrinsic::vp_bitreverse, MVT::i8, 17},

    {Intrinsic::vp_bitreverse, MVT::i16, 24},

    {Intrinsic::vp_bitreverse, MVT::i32, 33},

    {Intrinsic::vp_bitreverse, MVT::i64, 52},

    {Intrinsic::ctpop, MVT::i8, 12},

    {Intrinsic::ctpop, MVT::i16, 19},

    {Intrinsic::ctpop, MVT::i32, 20},

    {Intrinsic::ctpop, MVT::i64, 21},

    {Intrinsic::vp_ctpop, MVT::i8, 12},

    {Intrinsic::vp_ctpop, MVT::i16, 19},

    {Intrinsic::vp_ctpop, MVT::i32, 20},

    {Intrinsic::vp_ctpop, MVT::i64, 21},

    {Intrinsic::vp_ctlz, MVT::i8, 19},

    {Intrinsic::vp_ctlz, MVT::i16, 28},

    {Intrinsic::vp_ctlz, MVT::i32, 31},

    {Intrinsic::vp_ctlz, MVT::i64, 35},

    {Intrinsic::vp_cttz, MVT::i8, 16},

    {Intrinsic::vp_cttz, MVT::i16, 23},

    {Intrinsic::vp_cttz, MVT::i32, 24},

    {Intrinsic::vp_cttz, MVT::i64, 25},

};


static unsigned getISDForVPIntrinsicID(Intrinsic::ID ID) {

  switch (ID) {

#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD)                                    \

  case Intrinsic::VPID:                                                        \

    return ISD::VPSD;

#include "llvm/IR/VPIntrinsics.def"

#undef HELPER_MAP_VPID_TO_VPSD

  }

  return ISD::DELETED_NODE;

}


InstructionCost

RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,

                                    TTI::TargetCostKind CostKind) {

  auto *RetTy = ICA.getReturnType();

  switch (ICA.getID()) {

  case Intrinsic::ceil:

  case Intrinsic::floor:

  case Intrinsic::trunc:

  case Intrinsic::rint:

  case Intrinsic::lrint:

  case Intrinsic::llrint:

  case Intrinsic::round:

  case Intrinsic::roundeven: {

    // These all use the same code.

    auto LT = getTypeLegalizationCost(RetTy);

    if (!LT.second.isVector() && TLI->isOperationCustom(ISD::FCEIL, LT.second))

      return LT.first * 8;

    break;

  }

  case Intrinsic::umin:

  case Intrinsic::umax:

  case Intrinsic::smin:

  case Intrinsic::smax: {

    auto LT = getTypeLegalizationCost(RetTy);

    if (LT.second.isScalarInteger() && ST->hasStdExtZbb())

      return LT.first;


    if (ST->hasVInstructions() && LT.second.isVector()) {

      unsigned Op;

      switch (ICA.getID()) {

      case Intrinsic::umin:

        Op = RISCV::VMINU_VV;

        break;

      case Intrinsic::umax:

        Op = RISCV::VMAXU_VV;

        break;

      case Intrinsic::smin:

        Op = RISCV::VMIN_VV;

        break;

      case Intrinsic::smax:

        Op = RISCV::VMAX_VV;

        break;

      }

      return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind);

    }

    break;

  }

  case Intrinsic::sadd_sat:

  case Intrinsic::ssub_sat:

  case Intrinsic::uadd_sat:

  case Intrinsic::usub_sat:

  case Intrinsic::fabs:

  case Intrinsic::sqrt: {

    auto LT = getTypeLegalizationCost(RetTy);

    if (ST->hasVInstructions() && LT.second.isVector())

      return LT.first;

    break;

  }

  case Intrinsic::ctpop: {

    auto LT = getTypeLegalizationCost(RetTy);

    if (ST->hasVInstructions() && ST->hasStdExtZvbb() && LT.second.isVector())

      return LT.first;

    break;

  }

  case Intrinsic::abs: {

    auto LT = getTypeLegalizationCost(RetTy);

    if (ST->hasVInstructions() && LT.second.isVector()) {

      // vrsub.vi v10, v8, 0

      // vmax.vv v8, v8, v10

      return LT.first * 2;

    }

    break;

  }

  case Intrinsic::get_active_lane_mask: {

    if (ST->hasVInstructions()) {

      Type *ExpRetTy = VectorType::get(

          ICA.getArgTypes()[0], cast<VectorType>(RetTy)->getElementCount());

      auto LT = getTypeLegalizationCost(ExpRetTy);


      // vid.v   v8  // considered hoisted

      // vsaddu.vx   v8, v8, a0

      // vmsltu.vx   v0, v8, a1

      return LT.first *

             getRISCVInstructionCost({RISCV::VSADDU_VX, RISCV::VMSLTU_VX},

                                     LT.second, CostKind);

    }

    break;

  }

  // TODO: add more intrinsic

  case Intrinsic::experimental_stepvector: {

    auto LT = getTypeLegalizationCost(RetTy);

    // Legalisation of illegal types involves an `index' instruction plus

    // (LT.first - 1) vector adds.

    if (ST->hasVInstructions())

      return getRISCVInstructionCost(RISCV::VID_V, LT.second, CostKind) +

             (LT.first - 1) *

                 getRISCVInstructionCost(RISCV::VADD_VX, LT.second, CostKind);

    return 1 + (LT.first - 1);

  }

  case Intrinsic::vp_rint: {

    // RISC-V target uses at least 5 instructions to lower rounding intrinsics.

    unsigned Cost = 5;

    auto LT = getTypeLegalizationCost(RetTy);

    if (TLI->isOperationCustom(ISD::VP_FRINT, LT.second))

      return Cost * LT.first;

    break;

  }

  case Intrinsic::vp_nearbyint: {

    // More one read and one write for fflags than vp_rint.

    unsigned Cost = 7;

    auto LT = getTypeLegalizationCost(RetTy);

    if (TLI->isOperationCustom(ISD::VP_FRINT, LT.second))

      return Cost * LT.first;

    break;

  }

  case Intrinsic::vp_ceil:

  case Intrinsic::vp_floor:

  case Intrinsic::vp_round:

  case Intrinsic::vp_roundeven:

  case Intrinsic::vp_roundtozero: {

    // Rounding with static rounding mode needs two more instructions to

    // swap/write FRM than vp_rint.

    unsigned Cost = 7;

    auto LT = getTypeLegalizationCost(RetTy);

    unsigned VPISD = getISDForVPIntrinsicID(ICA.getID());

    if (TLI->isOperationCustom(VPISD, LT.second))

      return Cost * LT.first;

    break;

  }

  }


  if (ST->hasVInstructions() && RetTy->isVectorTy()) {

    if (auto LT = getTypeLegalizationCost(RetTy);

        LT.second.isVector()) {

      MVT EltTy = LT.second.getVectorElementType();

      if (const auto *Entry = CostTableLookup(VectorIntrinsicCostTable,

                                              ICA.getID(), EltTy))

        return LT.first * Entry->Cost;

    }

  }


  return BaseT::getIntrinsicInstrCost(ICA, CostKind);

}


InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,

                                               Type *Src,

                                               TTI::CastContextHint CCH,

                                               TTI::TargetCostKind CostKind,

                                               const Instruction *I) {

  bool IsVectorType = isa<VectorType>(Dst) && isa<VectorType>(Src);

  if (!IsVectorType)

    return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);


  bool IsTypeLegal = isTypeLegal(Src) && isTypeLegal(Dst) &&

                     (Src->getScalarSizeInBits() <= ST->getELen()) &&

                     (Dst->getScalarSizeInBits() <= ST->getELen());


  // FIXME: Need to compute legalizing cost for illegal types.

  if (!IsTypeLegal)

    return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);


  std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(Src);

  std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(Dst);


  int ISD = TLI->InstructionOpcodeToISD(Opcode);

  assert(ISD && "Invalid opcode");


  int PowDiff = (int)Log2_32(Dst->getScalarSizeInBits()) -

                (int)Log2_32(Src->getScalarSizeInBits());

  switch (ISD) {

  case ISD::SIGN_EXTEND:

  case ISD::ZERO_EXTEND: {

    const unsigned SrcEltSize = Src->getScalarSizeInBits();

    if (SrcEltSize == 1) {

      // We do not use vsext/vzext to extend from mask vector.

      // Instead we use the following instructions to extend from mask vector:

      // vmv.v.i v8, 0

      // vmerge.vim v8, v8, -1, v0

      return getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM},

                                     DstLT.second, CostKind);

    }

    if ((PowDiff < 1) || (PowDiff > 3))

      return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);

    unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};

    unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};

    unsigned Op =

        (ISD == ISD::SIGN_EXTEND) ? SExtOp[PowDiff - 1] : ZExtOp[PowDiff - 1];

    return getRISCVInstructionCost(Op, DstLT.second, CostKind);

  }

  case ISD::TRUNCATE:

    if (Dst->getScalarSizeInBits() == 1) {

      // We do not use several vncvt to truncate to mask vector. So we could

      // not use PowDiff to calculate it.

      // Instead we use the following instructions to truncate to mask vector:

      // vand.vi v8, v8, 1

      // vmsne.vi v0, v8, 0

      return getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},

                                     SrcLT.second, CostKind);

    }

    [[fallthrough]];

  case ISD::FP_EXTEND:

  case ISD::FP_ROUND: {

    // Counts of narrow/widen instructions.

    unsigned SrcEltSize = Src->getScalarSizeInBits();

    unsigned DstEltSize = Dst->getScalarSizeInBits();


    unsigned Op = (ISD == ISD::TRUNCATE)    ? RISCV::VNSRL_WI

                  : (ISD == ISD::FP_EXTEND) ? RISCV::VFWCVT_F_F_V

                                            : RISCV::VFNCVT_F_F_W;

    InstructionCost Cost = 0;

    for (; SrcEltSize != DstEltSize;) {

      MVT ElementMVT = (ISD == ISD::TRUNCATE)

                           ? MVT::getIntegerVT(DstEltSize)

                           : MVT::getFloatingPointVT(DstEltSize);

      MVT DstMVT = DstLT.second.changeVectorElementType(ElementMVT);

      DstEltSize =

          (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1;

      Cost += getRISCVInstructionCost(Op, DstMVT, CostKind);

    }

    return Cost;

  }

  case ISD::FP_TO_SINT:

  case ISD::FP_TO_UINT:

  case ISD::SINT_TO_FP:

  case ISD::UINT_TO_FP:

    if (Src->getScalarSizeInBits() == 1 || Dst->getScalarSizeInBits() == 1) {

      // The cost of convert from or to mask vector is different from other

      // cases. We could not use PowDiff to calculate it.

      // For mask vector to fp, we should use the following instructions:

      // vmv.v.i v8, 0

      // vmerge.vim v8, v8, -1, v0

      // vfcvt.f.x.v v8, v8


      // And for fp vector to mask, we use:

      // vfncvt.rtz.x.f.w v9, v8

      // vand.vi v8, v9, 1

      // vmsne.vi v0, v8, 0

      return 3;

    }

    if (std::abs(PowDiff) <= 1)

      return 1;

    // Backend could lower (v[sz]ext i8 to double) to vfcvt(v[sz]ext.f8 i8),

    // so it only need two conversion.

    if (Src->isIntOrIntVectorTy())

      return 2;

    // Counts of narrow/widen instructions.

    return std::abs(PowDiff);

  }

  return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);

}


unsigned RISCVTTIImpl::getEstimatedVLFor(VectorType *Ty) {

  if (isa<ScalableVectorType>(Ty)) {

    const unsigned EltSize = DL.getTypeSizeInBits(Ty->getElementType());

    const unsigned MinSize = DL.getTypeSizeInBits(Ty).getKnownMinValue();

    const unsigned VectorBits = *getVScaleForTuning() * RISCV::RVVBitsPerBlock;

    return RISCVTargetLowering::computeVLMAX(VectorBits, EltSize, MinSize);

  }

  return cast<FixedVectorType>(Ty)->getNumElements();

}


InstructionCost

RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,

                                     FastMathFlags FMF,

                                     TTI::TargetCostKind CostKind) {

  if (isa<FixedVectorType>(Ty) && !ST->useRVVForFixedLengthVectors())

    return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind);


  // Skip if scalar size of Ty is bigger than ELEN.

  if (Ty->getScalarSizeInBits() > ST->getELen())

    return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind);


  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);

  if (Ty->getElementType()->isIntegerTy(1)) {

    // SelectionDAGBuilder does following transforms:

    //   vector_reduce_{smin,umax}(<n x i1>) --> vector_reduce_or(<n x i1>)

    //   vector_reduce_{smax,umin}(<n x i1>) --> vector_reduce_and(<n x i1>)

    if (IID == Intrinsic::umax || IID == Intrinsic::smin)

      return getArithmeticReductionCost(Instruction::Or, Ty, FMF, CostKind);

    else

      return getArithmeticReductionCost(Instruction::And, Ty, FMF, CostKind);

  }


  if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {

    SmallVector<unsigned, 3> Opcodes;

    InstructionCost ExtraCost = 0;

    switch (IID) {

    case Intrinsic::maximum:

      if (FMF.noNaNs()) {

        Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};

      } else {

        Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,

                   RISCV::VFMV_F_S};

        // Cost of Canonical Nan + branch

        // lui a0, 523264

        // fmv.w.x fa0, a0

        Type *DstTy = Ty->getScalarType();

        const unsigned EltTyBits = DstTy->getScalarSizeInBits();

        Type *SrcTy = IntegerType::getIntNTy(DstTy->getContext(), EltTyBits);

        ExtraCost = 1 +

                    getCastInstrCost(Instruction::UIToFP, DstTy, SrcTy,

                                     TTI::CastContextHint::None, CostKind) +

                    getCFInstrCost(Instruction::Br, CostKind);

      }

      break;


    case Intrinsic::minimum:

      if (FMF.noNaNs()) {

        Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};

      } else {

        Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,

                   RISCV::VFMV_F_S};

        // Cost of Canonical Nan + branch

        // lui a0, 523264

        // fmv.w.x fa0, a0

        Type *DstTy = Ty->getScalarType();

        const unsigned EltTyBits = DL.getTypeSizeInBits(DstTy);

        Type *SrcTy = IntegerType::getIntNTy(DstTy->getContext(), EltTyBits);

        ExtraCost = 1 +

                    getCastInstrCost(Instruction::UIToFP, DstTy, SrcTy,

                                     TTI::CastContextHint::None, CostKind) +

                    getCFInstrCost(Instruction::Br, CostKind);

      }

      break;

    }

    return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);

  }


  // IR Reduction is composed by two vmv and one rvv reduction instruction.

  unsigned SplitOp;

  SmallVector<unsigned, 3> Opcodes;

  switch (IID) {

  default:

    llvm_unreachable("Unsupported intrinsic");

  case Intrinsic::smax:

    SplitOp = RISCV::VMAX_VV;

    Opcodes = {RISCV::VMV_S_X, RISCV::VREDMAX_VS, RISCV::VMV_X_S};

    break;

  case Intrinsic::smin:

    SplitOp = RISCV::VMIN_VV;

    Opcodes = {RISCV::VMV_S_X, RISCV::VREDMIN_VS, RISCV::VMV_X_S};

    break;

  case Intrinsic::umax:

    SplitOp = RISCV::VMAXU_VV;

    Opcodes = {RISCV::VMV_S_X, RISCV::VREDMAXU_VS, RISCV::VMV_X_S};

    break;

  case Intrinsic::umin:

    SplitOp = RISCV::VMINU_VV;

    Opcodes = {RISCV::VMV_S_X, RISCV::VREDMINU_VS, RISCV::VMV_X_S};

    break;

  case Intrinsic::maxnum:

    SplitOp = RISCV::VFMAX_VV;

    Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};

    break;

  case Intrinsic::minnum:

    SplitOp = RISCV::VFMIN_VV;

    Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};

    break;

  }

  // Add a cost for data larger than LMUL8

  InstructionCost SplitCost =

      (LT.first > 1) ? (LT.first - 1) *

                           getRISCVInstructionCost(SplitOp, LT.second, CostKind)

                     : 0;

  return SplitCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);

}


InstructionCost

RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,

                                         std::optional<FastMathFlags> FMF,

                                         TTI::TargetCostKind CostKind) {

  if (isa<FixedVectorType>(Ty) && !ST->useRVVForFixedLengthVectors())

    return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);


  // Skip if scalar size of Ty is bigger than ELEN.

  if (Ty->getScalarSizeInBits() > ST->getELen())

    return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);


  int ISD = TLI->InstructionOpcodeToISD(Opcode);

  assert(ISD && "Invalid opcode");


  if (ISD != ISD::ADD && ISD != ISD::OR && ISD != ISD::XOR && ISD != ISD::AND &&

      ISD != ISD::FADD)

    return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);


  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);

  SmallVector<unsigned, 3> Opcodes;

  Type *ElementTy = Ty->getElementType();

  if (ElementTy->isIntegerTy(1)) {

    if (ISD == ISD::AND) {

      // Example sequences:

      //   vsetvli a0, zero, e8, mf8, ta, ma

      //   vmnot.m v8, v0

      //   vcpop.m a0, v8

      //   seqz a0, a0

      Opcodes = {RISCV::VMNAND_MM, RISCV::VCPOP_M};

      return (LT.first - 1) +

             getRISCVInstructionCost(Opcodes, LT.second, CostKind) +

             getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,

                                CmpInst::ICMP_EQ, CostKind);

    } else {

      // Example sequences:

      //   vsetvli a0, zero, e8, mf8, ta, ma

      //   vcpop.m a0, v0

      //   snez a0, a0

      Opcodes = {RISCV::VCPOP_M};

      return (LT.first - 1) +

             getRISCVInstructionCost(Opcodes, LT.second, CostKind) +

             getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,

                                CmpInst::ICMP_NE, CostKind);

    }

  }


  // IR Reduction is composed by two vmv and one rvv reduction instruction.

  if (TTI::requiresOrderedReduction(FMF)) {

    Opcodes.push_back(RISCV::VFMV_S_F);

    for (unsigned i = 0; i < LT.first.getValue(); i++)

      Opcodes.push_back(RISCV::VFREDOSUM_VS);

    Opcodes.push_back(RISCV::VFMV_F_S);

    return getRISCVInstructionCost(Opcodes, LT.second, CostKind);

  }

  unsigned SplitOp;

  switch (ISD) {

  case ISD::ADD:

    SplitOp = RISCV::VADD_VV;

    Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};

    break;

  case ISD::OR:

    SplitOp = RISCV::VOR_VV;

    Opcodes = {RISCV::VMV_S_X, RISCV::VREDOR_VS, RISCV::VMV_X_S};

    break;

  case ISD::XOR:

    SplitOp = RISCV::VXOR_VV;

    Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};

    break;

  case ISD::AND:

    SplitOp = RISCV::VAND_VV;

    Opcodes = {RISCV::VMV_S_X, RISCV::VREDAND_VS, RISCV::VMV_X_S};

    break;

  case ISD::FADD:

    SplitOp = RISCV::VFADD_VV;

    Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};

    break;

  }

  // Add a cost for data larger than LMUL8

  InstructionCost SplitCost =

      (LT.first > 1) ? (LT.first - 1) *

                           getRISCVInstructionCost(SplitOp, LT.second, CostKind)

                     : 0;

  return SplitCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);

}


InstructionCost RISCVTTIImpl::getExtendedReductionCost(

    unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy,

    FastMathFlags FMF, TTI::TargetCostKind CostKind) {

  if (isa<FixedVectorType>(ValTy) && !ST->useRVVForFixedLengthVectors())

    return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,

                                           FMF, CostKind);


  // Skip if scalar size of ResTy is bigger than ELEN.

  if (ResTy->getScalarSizeInBits() > ST->getELen())

    return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,

                                           FMF, CostKind);


  if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)

    return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,

                                           FMF, CostKind);


  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);


  if (ResTy->getScalarSizeInBits() != 2 * LT.second.getScalarSizeInBits())

    return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,

                                           FMF, CostKind);


  return (LT.first - 1) +

         getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);

}


InstructionCost RISCVTTIImpl::getStoreImmCost(Type *Ty,

                                              TTI::OperandValueInfo OpInfo,

                                              TTI::TargetCostKind CostKind) {

  assert(OpInfo.isConstant() && "non constant operand?");

  if (!isa<VectorType>(Ty))

    // FIXME: We need to account for immediate materialization here, but doing

    // a decent job requires more knowledge about the immediate than we

    // currently have here.

    return 0;


  if (OpInfo.isUniform())

    // vmv.x.i, vmv.v.x, or vfmv.v.f

    // We ignore the cost of the scalar constant materialization to be consistent

    // with how we treat scalar constants themselves just above.

    return 1;


  return getConstantPoolLoadCost(Ty, CostKind);

}


InstructionCost RISCVTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,

                                              MaybeAlign Alignment,

                                              unsigned AddressSpace,

                                              TTI::TargetCostKind CostKind,

                                              TTI::OperandValueInfo OpInfo,

                                              const Instruction *I) {

  EVT VT = TLI->getValueType(DL, Src, true);

  // Type legalization can't handle structs

  if (VT == MVT::Other)

    return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,

                                  CostKind, OpInfo, I);


  InstructionCost Cost = 0;

  if (Opcode == Instruction::Store && OpInfo.isConstant())

    Cost += getStoreImmCost(Src, OpInfo, CostKind);

  InstructionCost BaseCost =

    BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,

                           CostKind, OpInfo, I);

  // Assume memory ops cost scale with the number of vector registers

  // possible accessed by the instruction.  Note that BasicTTI already

  // handles the LT.first term for us.

  if (std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);

      LT.second.isVector() && CostKind != TTI::TCK_CodeSize)

    BaseCost *= TLI->getLMULCost(LT.second);

  return Cost + BaseCost;


}


InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,

                                                 Type *CondTy,

                                                 CmpInst::Predicate VecPred,

                                                 TTI::TargetCostKind CostKind,

                                                 const Instruction *I) {

  if (CostKind != TTI::TCK_RecipThroughput)

    return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,

                                     I);


  if (isa<FixedVectorType>(ValTy) && !ST->useRVVForFixedLengthVectors())

    return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,

                                     I);


  // Skip if scalar size of ValTy is bigger than ELEN.

  if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() > ST->getELen())

    return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,

                                     I);


  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);

  if (Opcode == Instruction::Select && ValTy->isVectorTy()) {

    if (CondTy->isVectorTy()) {

      if (ValTy->getScalarSizeInBits() == 1) {

        // vmandn.mm v8, v8, v9

        // vmand.mm v9, v0, v9

        // vmor.mm v0, v9, v8

        return LT.first *

               getRISCVInstructionCost(

                   {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},

                   LT.second, CostKind);

      }

      // vselect and max/min are supported natively.

      return LT.first *

             getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second, CostKind);

    }


    if (ValTy->getScalarSizeInBits() == 1) {

      //  vmv.v.x v9, a0

      //  vmsne.vi v9, v9, 0

      //  vmandn.mm v8, v8, v9

      //  vmand.mm v9, v0, v9

      //  vmor.mm v0, v9, v8

      MVT InterimVT = LT.second.changeVectorElementType(MVT::i8);

      return LT.first *

                 getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},

                                         InterimVT, CostKind) +

             LT.first * getRISCVInstructionCost(

                            {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},

                            LT.second, CostKind);

    }


    // vmv.v.x v10, a0

    // vmsne.vi v0, v10, 0

    // vmerge.vvm v8, v9, v8, v0

    return LT.first * getRISCVInstructionCost(

                          {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM},

                          LT.second, CostKind);

  }


  if ((Opcode == Instruction::ICmp) && ValTy->isVectorTy() &&

      CmpInst::isIntPredicate(VecPred)) {

    // Use VMSLT_VV to represent VMSEQ, VMSNE, VMSLTU, VMSLEU, VMSLT, VMSLE

    // provided they incur the same cost across all implementations

    return LT.first *

           getRISCVInstructionCost(RISCV::VMSLT_VV, LT.second, CostKind);

  }


  if ((Opcode == Instruction::FCmp) && ValTy->isVectorTy() &&

      CmpInst::isFPPredicate(VecPred)) {


    // Use VMXOR_MM and VMXNOR_MM to generate all true/false mask

    if ((VecPred == CmpInst::FCMP_FALSE) || (VecPred == CmpInst::FCMP_TRUE))

      return getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second, CostKind);


    // If we do not support the input floating point vector type, use the base

    // one which will calculate as:

    // ScalarizeCost + Num * Cost for fixed vector,

    // InvalidCost for scalable vector.

    if ((ValTy->getScalarSizeInBits() == 16 && !ST->hasVInstructionsF16()) ||

        (ValTy->getScalarSizeInBits() == 32 && !ST->hasVInstructionsF32()) ||

        (ValTy->getScalarSizeInBits() == 64 && !ST->hasVInstructionsF64()))

      return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,

                                       I);


    // Assuming vector fp compare and mask instructions are all the same cost

    // until a need arises to differentiate them.

    switch (VecPred) {

    case CmpInst::FCMP_ONE: // vmflt.vv + vmflt.vv + vmor.mm

    case CmpInst::FCMP_ORD: // vmfeq.vv + vmfeq.vv + vmand.mm

    case CmpInst::FCMP_UNO: // vmfne.vv + vmfne.vv + vmor.mm

    case CmpInst::FCMP_UEQ: // vmflt.vv + vmflt.vv + vmnor.mm

      return LT.first * getRISCVInstructionCost(

                            {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM},

                            LT.second, CostKind);


    case CmpInst::FCMP_UGT: // vmfle.vv + vmnot.m

    case CmpInst::FCMP_UGE: // vmflt.vv + vmnot.m

    case CmpInst::FCMP_ULT: // vmfle.vv + vmnot.m

    case CmpInst::FCMP_ULE: // vmflt.vv + vmnot.m

      return LT.first *

             getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM},

                                     LT.second, CostKind);


    case CmpInst::FCMP_OEQ: // vmfeq.vv

    case CmpInst::FCMP_OGT: // vmflt.vv

    case CmpInst::FCMP_OGE: // vmfle.vv

    case CmpInst::FCMP_OLT: // vmflt.vv

    case CmpInst::FCMP_OLE: // vmfle.vv

    case CmpInst::FCMP_UNE: // vmfne.vv

      return LT.first *

             getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second, CostKind);

    default:

      break;

    }

  }


  // TODO: Add cost for scalar type.


  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);

}


InstructionCost RISCVTTIImpl::getCFInstrCost(unsigned Opcode,

                                             TTI::TargetCostKind CostKind,

                                             const Instruction *I) {

  if (CostKind != TTI::TCK_RecipThroughput)

    return Opcode == Instruction::PHI ? 0 : 1;

  // Branches are assumed to be predicted.

  return 0;

}


InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,

                                                 TTI::TargetCostKind CostKind,

                                                 unsigned Index, Value *Op0,

                                                 Value *Op1) {

  assert(Val->isVectorTy() && "This must be a vector type");


  if (Opcode != Instruction::ExtractElement &&

      Opcode != Instruction::InsertElement)

    return BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);


  // Legalize the type.

  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Val);


  // This type is legalized to a scalar type.

  if (!LT.second.isVector()) {

    auto *FixedVecTy = cast<FixedVectorType>(Val);

    // If Index is a known constant, cost is zero.

    if (Index != -1U)

      return 0;

    // Extract/InsertElement with non-constant index is very costly when

    // scalarized; estimate cost of loads/stores sequence via the stack:

    // ExtractElement cost: store vector to stack, load scalar;

    // InsertElement cost: store vector to stack, store scalar, load vector.

    Type *ElemTy = FixedVecTy->getElementType();

    auto NumElems = FixedVecTy->getNumElements();

    auto Align = DL.getPrefTypeAlign(ElemTy);

    InstructionCost LoadCost =

        getMemoryOpCost(Instruction::Load, ElemTy, Align, 0, CostKind);

    InstructionCost StoreCost =

        getMemoryOpCost(Instruction::Store, ElemTy, Align, 0, CostKind);

    return Opcode == Instruction::ExtractElement

               ? StoreCost * NumElems + LoadCost

               : (StoreCost + LoadCost) * NumElems + StoreCost;

  }


  // For unsupported scalable vector.

  if (LT.second.isScalableVector() && !LT.first.isValid())

    return LT.first;


  if (!isTypeLegal(Val))

    return BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);


  // Mask vector extract/insert is expanded via e8.

  if (Val->getScalarSizeInBits() == 1) {

    VectorType *WideTy =

      VectorType::get(IntegerType::get(Val->getContext(), 8),

                      cast<VectorType>(Val)->getElementCount());

    if (Opcode == Instruction::ExtractElement) {

      InstructionCost ExtendCost

        = getCastInstrCost(Instruction::ZExt, WideTy, Val,

                           TTI::CastContextHint::None, CostKind);

      InstructionCost ExtractCost

        = getVectorInstrCost(Opcode, WideTy, CostKind, Index, nullptr, nullptr);

      return ExtendCost + ExtractCost;

    }

    InstructionCost ExtendCost

      = getCastInstrCost(Instruction::ZExt, WideTy, Val,

                         TTI::CastContextHint::None, CostKind);

    InstructionCost InsertCost

      = getVectorInstrCost(Opcode, WideTy, CostKind, Index, nullptr, nullptr);

    InstructionCost TruncCost

      = getCastInstrCost(Instruction::Trunc, Val, WideTy,

                         TTI::CastContextHint::None, CostKind);

    return ExtendCost + InsertCost + TruncCost;

  }


  // In RVV, we could use vslidedown + vmv.x.s to extract element from vector

  // and vslideup + vmv.s.x to insert element to vector.

  unsigned BaseCost = 1;

  // When insertelement we should add the index with 1 as the input of vslideup.

  unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;


  if (Index != -1U) {

    // The type may be split. For fixed-width vectors we can normalize the

    // index to the new type.

    if (LT.second.isFixedLengthVector()) {

      unsigned Width = LT.second.getVectorNumElements();

      Index = Index % Width;

    }


    // We could extract/insert the first element without vslidedown/vslideup.

    if (Index == 0)

      SlideCost = 0;

    else if (Opcode == Instruction::InsertElement)

      SlideCost = 1; // With a constant index, we do not need to use addi.

  }


  // Extract i64 in the target that has XLEN=32 need more instruction.

  if (Val->getScalarType()->isIntegerTy() &&

      ST->getXLen() < Val->getScalarSizeInBits()) {

    // For extractelement, we need the following instructions:

    // vsetivli zero, 1, e64, m1, ta, mu (not count)

    // vslidedown.vx v8, v8, a0

    // vmv.x.s a0, v8

    // li a1, 32

    // vsrl.vx v8, v8, a1

    // vmv.x.s a1, v8


    // For insertelement, we need the following instructions:

    // vsetivli zero, 2, e32, m4, ta, mu (not count)

    // vmv.v.i v12, 0

    // vslide1up.vx v16, v12, a1

    // vslide1up.vx v12, v16, a0

    // addi a0, a2, 1

    // vsetvli zero, a0, e64, m4, tu, mu (not count)

    // vslideup.vx v8, v12, a2


    // TODO: should we count these special vsetvlis?

    BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;

  }

  return BaseCost + SlideCost;

}


InstructionCost RISCVTTIImpl::getArithmeticInstrCost(

    unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,

    TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,

    ArrayRef<const Value *> Args, const Instruction *CxtI) {


  // TODO: Handle more cost kinds.

  if (CostKind != TTI::TCK_RecipThroughput)

    return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,

                                         Args, CxtI);


  if (isa<FixedVectorType>(Ty) && !ST->useRVVForFixedLengthVectors())

    return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,

                                         Args, CxtI);


  // Skip if scalar size of Ty is bigger than ELEN.

  if (isa<VectorType>(Ty) && Ty->getScalarSizeInBits() > ST->getELen())

    return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,

                                         Args, CxtI);


  // Legalize the type.

  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);


  // TODO: Handle scalar type.

  if (!LT.second.isVector())

    return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,

                                         Args, CxtI);


  auto getConstantMatCost =

    [&](unsigned Operand, TTI::OperandValueInfo OpInfo) -> InstructionCost {

    if (OpInfo.isUniform() && TLI->canSplatOperand(Opcode, Operand))

      // Two sub-cases:

      // * Has a 5 bit immediate operand which can be splatted.

      // * Has a larger immediate which must be materialized in scalar register

      // We return 0 for both as we currently ignore the cost of materializing

      // scalar constants in GPRs.

      return 0;


    return getConstantPoolLoadCost(Ty, CostKind);

  };


  // Add the cost of materializing any constant vectors required.

  InstructionCost ConstantMatCost = 0;

  if (Op1Info.isConstant())

    ConstantMatCost += getConstantMatCost(0, Op1Info);

  if (Op2Info.isConstant())

    ConstantMatCost += getConstantMatCost(1, Op2Info);


  switch (TLI->InstructionOpcodeToISD(Opcode)) {

  case ISD::ADD:

  case ISD::SUB:

  case ISD::AND:

  case ISD::OR:

  case ISD::XOR:

  case ISD::SHL:

  case ISD::SRL:

  case ISD::SRA:

  case ISD::MUL:

  case ISD::MULHS:

  case ISD::MULHU:

  case ISD::FADD:

  case ISD::FSUB:

  case ISD::FMUL:

  case ISD::FNEG: {

    return ConstantMatCost + TLI->getLMULCost(LT.second) * LT.first * 1;

  }

  default:

    return ConstantMatCost +

           BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,

                                         Args, CxtI);

  }

}


// TODO: Deduplicate from TargetTransformInfoImplCRTPBase.

InstructionCost RISCVTTIImpl::getPointersChainCost(

    ArrayRef<const Value *> Ptrs, const Value *Base,

    const TTI::PointersChainInfo &Info, Type *AccessTy,

    TTI::TargetCostKind CostKind) {

  InstructionCost Cost = TTI::TCC_Free;

  // In the basic model we take into account GEP instructions only

  // (although here can come alloca instruction, a value, constants and/or

  // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a

  // pointer). Typically, if Base is a not a GEP-instruction and all the

  // pointers are relative to the same base address, all the rest are

  // either GEP instructions, PHIs, bitcasts or constants. When we have same

  // base, we just calculate cost of each non-Base GEP as an ADD operation if

  // any their index is a non-const.

  // If no known dependecies between the pointers cost is calculated as a sum

  // of costs of GEP instructions.

  for (auto [I, V] : enumerate(Ptrs)) {

    const auto *GEP = dyn_cast<GetElementPtrInst>(V);

    if (!GEP)

      continue;

    if (Info.isSameBase() && V != Base) {

      if (GEP->hasAllConstantIndices())

        continue;

      // If the chain is unit-stride and BaseReg + stride*i is a legal

      // addressing mode, then presume the base GEP is sitting around in a

      // register somewhere and check if we can fold the offset relative to

      // it.

      unsigned Stride = DL.getTypeStoreSize(AccessTy);

      if (Info.isUnitStride() &&

          isLegalAddressingMode(AccessTy,

                                /* BaseGV */ nullptr,

                                /* BaseOffset */ Stride * I,

                                /* HasBaseReg */ true,

                                /* Scale */ 0,

                                GEP->getType()->getPointerAddressSpace()))

        continue;

      Cost += getArithmeticInstrCost(Instruction::Add, GEP->getType(), CostKind,

                                     {TTI::OK_AnyValue, TTI::OP_None},

                                     {TTI::OK_AnyValue, TTI::OP_None},

                                     std::nullopt);

    } else {

      SmallVector<const Value *> Indices(GEP->indices());

      Cost += getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(),

                         Indices, AccessTy, CostKind);

    }

  }

  return Cost;

}


void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,

                                           TTI::UnrollingPreferences &UP,

                                           OptimizationRemarkEmitter *ORE) {

  // TODO: More tuning on benchmarks and metrics with changes as needed

  //       would apply to all settings below to enable performance.


  if (ST->enableDefaultUnroll())

    return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP, ORE);


  // Enable Upper bound unrolling universally, not dependant upon the conditions

  // below.

  UP.UpperBound = true;


  // Disable loop unrolling for Oz and Os.

  UP.OptSizeThreshold = 0;

  UP.PartialOptSizeThreshold = 0;

  if (L->getHeader()->getParent()->hasOptSize())

    return;


  SmallVector<BasicBlock *, 4> ExitingBlocks;

  L->getExitingBlocks(ExitingBlocks);

  LLVM_DEBUG(dbgs() << "Loop has:\n"

                    << "Blocks: " << L->getNumBlocks() << "\n"

                    << "Exit blocks: " << ExitingBlocks.size() << "\n");


  // Only allow another exit other than the latch. This acts as an early exit

  // as it mirrors the profitability calculation of the runtime unroller.

  if (ExitingBlocks.size() > 2)

    return;


  // Limit the CFG of the loop body for targets with a branch predictor.

  // Allowing 4 blocks permits if-then-else diamonds in the body.

  if (L->getNumBlocks() > 4)

    return;


  // Don't unroll vectorized loops, including the remainder loop

  if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))

    return;


  // Scan the loop: don't unroll loops with calls as this could prevent

  // inlining.

  InstructionCost Cost = 0;

  for (auto *BB : L->getBlocks()) {

    for (auto &I : *BB) {

      // Initial setting - Don't unroll loops containing vectorized

      // instructions.

      if (I.getType()->isVectorTy())

        return;


      if (isa<CallInst>(I) || isa<InvokeInst>(I)) {

        if (const Function *F = cast<CallBase>(I).getCalledFunction()) {

          if (!isLoweredToCall(F))

            continue;

        }

        return;

      }


      SmallVector<const Value *> Operands(I.operand_values());

      Cost += getInstructionCost(&I, Operands,

                                 TargetTransformInfo::TCK_SizeAndLatency);

    }

  }


  LLVM_DEBUG(dbgs() << "Cost of loop: " << Cost << "\n");


  UP.Partial = true;

  UP.Runtime = true;

  UP.UnrollRemainder = true;

  UP.UnrollAndJam = true;

  UP.UnrollAndJamInnerLoopThreshold = 60;


  // Force unrolling small loops can be very useful because of the branch

  // taken cost of the backedge.

  if (Cost < 12)

    UP.Force = true;

}


void RISCVTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,

                                         TTI::PeelingPreferences &PP) {

  BaseT::getPeelingPreferences(L, SE, PP);

}


unsigned RISCVTTIImpl::getRegUsageForType(Type *Ty) {

  TypeSize Size = DL.getTypeSizeInBits(Ty);

  if (Ty->isVectorTy()) {

    if (Size.isScalable() && ST->hasVInstructions())

      return divideCeil(Size.getKnownMinValue(), RISCV::RVVBitsPerBlock);


    if (ST->useRVVForFixedLengthVectors())

      return divideCeil(Size, ST->getRealMinVLen());

  }


  return BaseT::getRegUsageForType(Ty);

}


unsigned RISCVTTIImpl::getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {

  if (SLPMaxVF.getNumOccurrences())

    return SLPMaxVF;


  // Return how many elements can fit in getRegisterBitwidth.  This is the

  // same routine as used in LoopVectorizer.  We should probably be

  // accounting for whether we actually have instructions with the right

  // lane type, but we don't have enough information to do that without

  // some additional plumbing which hasn't been justified yet.

  TypeSize RegWidth =

    getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector);

  // If no vector registers, or absurd element widths, disable

  // vectorization by returning 1.

  return std::max<unsigned>(1U, RegWidth.getFixedValue() / ElemWidth);

}


bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,

                                 const TargetTransformInfo::LSRCost &C2) {

  // RISC-V specific here are "instruction number 1st priority".

  return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost,

                  C1.NumIVMuls, C1.NumBaseAdds,

                  C1.ScaleCost, C1.ImmCost, C1.SetupCost) <

         std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost,

                  C2.NumIVMuls, C2.NumBaseAdds,

                  C2.ScaleCost, C2.ImmCost, C2.SetupCost);

}


bool RISCVTTIImpl::isLegalMaskedCompressStore(Type *DataTy, Align Alignment) {

  auto *VTy = dyn_cast<VectorType>(DataTy);

  if (!VTy || VTy->isScalableTy())

    return false;


  if (!isLegalMaskedLoadStore(DataTy, Alignment))

    return false;

  return true;

}


bool RISCVTTIImpl::areInlineCompatible(const Function *Caller,

                                       const Function *Callee) const {

  const TargetMachine &TM = getTLI()->getTargetMachine();


  const FeatureBitset &CallerBits =

      TM.getSubtargetImpl(*Caller)->getFeatureBits();

  const FeatureBitset &CalleeBits =

      TM.getSubtargetImpl(*Callee)->getFeatureBits();


  // Inline a callee if its target-features are a subset of the callers

  // target-features.

  return (CallerBits & CalleeBits) == CalleeBits;

}

BasicTTIImpl.h
This file provides a helper that implements much of the TTI interface in terms of the target-independ...

Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27

CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))

CostTable.h
Cost tables and simple lookup functions.

RetTy
return RetTy
Definition: DeadArgumentElimination.cpp:362

Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition: DeadArgumentElimination.cpp:354

LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101

Size
uint64_t Size
Definition: ELFObjHandler.cpp:81

GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171

Instructions.h

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74

getCalledFunction
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:160

if
if(VerifyEach)
Definition: PassBuilderBindings.cpp:71

TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47

RISCVMatInt.h

SLPMaxVF
static cl::opt< unsigned > SLPMaxVF("riscv-v-slp-max-vf", cl::desc("Overrides result used for getMaximumVF query which is used " "exclusively by SLP vectorizer."), cl::Hidden)

RVVRegisterWidthLMUL
static cl::opt< unsigned > RVVRegisterWidthLMUL("riscv-v-register-bit-width-lmul", cl::desc("The LMUL to use for getRegisterBitWidth queries. Affects LMUL used " "by autovectorized code. Fractional LMULs are not supported."), cl::init(2), cl::Hidden)

getVRGatherIndexType
static VectorType * getVRGatherIndexType(MVT DataVT, const RISCVSubtarget &ST, LLVMContext &C)
Definition: RISCVTargetTransformInfo.cpp:322

VectorIntrinsicCostTable
static const CostTblEntry VectorIntrinsicCostTable[]
Definition: RISCVTargetTransformInfo.cpp:729

canUseShiftPair
static bool canUseShiftPair(Instruction *Inst, const APInt &Imm)
Definition: RISCVTargetTransformInfo.cpp:126

getISDForVPIntrinsicID
static unsigned getISDForVPIntrinsicID(Intrinsic::ID ID)
Definition: RISCVTargetTransformInfo.cpp:787

RISCVTargetTransformInfo.h
This file defines a TargetTransformInfo::Concept conforming object specific to the RISC-V target mach...

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:76

TargetLowering.h
This file describes how to lower LLVM code to machine code.

TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.

VectorType
Definition: ItaniumDemangle.h:1149

llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:76

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41

llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165

llvm::BasicTTIImplBase< RISCVTTIImpl >::isTypeLegal
bool isTypeLegal(Type *Ty)
Definition: BasicTTIImpl.h:428

llvm::BasicTTIImplBase< RISCVTTIImpl >::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
Definition: BasicTTIImpl.h:1543

llvm::BasicTTIImplBase< RISCVTTIImpl >::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:1399

llvm::BasicTTIImplBase< RISCVTTIImpl >::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1221

llvm::BasicTTIImplBase::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: BasicTTIImpl.h:582

llvm::BasicTTIImplBase< RISCVTTIImpl >::getMaskedMemoryOpCost
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:1370

llvm::BasicTTIImplBase< RISCVTTIImpl >::getVScaleForTuning
std::optional< unsigned > getVScaleForTuning() const
Definition: BasicTTIImpl.h:756

llvm::BasicTTIImplBase< RISCVTTIImpl >::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
Definition: BasicTTIImpl.h:1273

llvm::BasicTTIImplBase< RISCVTTIImpl >::getStridedMemoryOpCost
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
Definition: BasicTTIImpl.h:1387

llvm::BasicTTIImplBase< RISCVTTIImpl >::getMaxVScale
std::optional< unsigned > getMaxVScale() const
Definition: BasicTTIImpl.h:755

llvm::BasicTTIImplBase< RISCVTTIImpl >::improveShuffleKindFromMask
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
Definition: BasicTTIImpl.h:969

llvm::BasicTTIImplBase< RISCVTTIImpl >::getExtendedReductionCost
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:2542

llvm::BasicTTIImplBase< RISCVTTIImpl >::getRegUsageForType
unsigned getRegUsageForType(Type *Ty)
Definition: BasicTTIImpl.h:433

llvm::BasicTTIImplBase< RISCVTTIImpl >::getMinMaxReductionCost
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
Definition: BasicTTIImpl.h:2493

llvm::BasicTTIImplBase< RISCVTTIImpl >::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1326

llvm::BasicTTIImplBase< RISCVTTIImpl >::getGatherScatterOpCost
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1378

llvm::BasicTTIImplBase< RISCVTTIImpl >::getShuffleCost
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:1017

llvm::BasicTTIImplBase< RISCVTTIImpl >::getGEPCost
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:438

llvm::BasicTTIImplBase< RISCVTTIImpl >::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: BasicTTIImpl.h:654

llvm::BasicTTIImplBase< RISCVTTIImpl >::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:891

llvm::BasicTTIImplBase< RISCVTTIImpl >::getArithmeticReductionCost
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:2482

llvm::BasicTTIImplBase< RISCVTTIImpl >::getTypeLegalizationCost
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
Definition: BasicTTIImpl.h:855

llvm::BasicTTIImplBase< RISCVTTIImpl >::isLegalAddImmediate
bool isLegalAddImmediate(int64_t imm)
Definition: BasicTTIImpl.h:327

llvm::BasicTTIImplBase< RISCVTTIImpl >::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1047

llvm::BasicTTIImplBase< RISCVTTIImpl >::isLegalAddressingMode
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
Definition: BasicTTIImpl.h:339

llvm::BasicTTIImplBase< RISCVTTIImpl >::DL
const DataLayout & DL
Definition: TargetTransformInfoImpl.h:38

llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993

llvm::CmpInst::FCMP_OEQ
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:996

llvm::CmpInst::FCMP_TRUE
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:1010

llvm::CmpInst::FCMP_OLT
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:999

llvm::CmpInst::FCMP_ULE
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:1008

llvm::CmpInst::FCMP_OGT
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:997

llvm::CmpInst::FCMP_OGE
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:998

llvm::CmpInst::FCMP_ULT
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:1007

llvm::CmpInst::FCMP_ONE
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:1001

llvm::CmpInst::FCMP_UEQ
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:1004

llvm::CmpInst::FCMP_UGT
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:1005

llvm::CmpInst::FCMP_OLE
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:1000

llvm::CmpInst::FCMP_ORD
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:1002

llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:1014

llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015

llvm::CmpInst::FCMP_UNE
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:1009

llvm::CmpInst::FCMP_UGE
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:1006

llvm::CmpInst::FCMP_FALSE
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:995

llvm::CmpInst::FCMP_UNO
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:1003

llvm::CmpInst::isFPPredicate
bool isFPPredicate() const
Definition: InstrTypes.h:1122

llvm::CmpInst::isIntPredicate
bool isIntPredicate() const
Definition: InstrTypes.h:1123

llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition: DWARFExpression.h:32

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110

llvm::DataLayout::getABITypeAlign
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:865

llvm::DataLayout::getTypeSizeInBits
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:672

llvm::DataLayout::getTypeStoreSize
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:472

llvm::DataLayout::getPrefTypeAlign
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874

llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20

llvm::FastMathFlags::noNaNs
bool noNaNs() const
Definition: FMF.h:66

llvm::FeatureBitset
Container class for subtarget features.
Definition: SubtargetFeature.h:41

llvm::FixedVectorType
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:539

llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692

llvm::Function
Definition: Function.h:62

llvm::InstructionCost
Definition: InstructionCost.h:29

llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition: InstructionCost.h:73

llvm::Instruction
Definition: Instruction.h:49

llvm::Instruction::isCommutative
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
Definition: Instruction.cpp:1213

llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278

llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:120

llvm::IntrinsicCostAttributes::getArgTypes
const SmallVectorImpl< Type * > & getArgTypes() const
Definition: TargetTransformInfo.h:157

llvm::IntrinsicCostAttributes::getReturnType
Type * getReturnType() const
Definition: TargetTransformInfo.h:153

llvm::IntrinsicCostAttributes::getID
Intrinsic::ID getID() const
Definition: TargetTransformInfo.h:151

llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47

llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67

llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44

llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:34

llvm::MVT::getFloatingPointVT
static MVT getFloatingPointVT(unsigned BitWidth)
Definition: MachineValueType.h:427

llvm::MVT::getVectorMinNumElements
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: MachineValueType.h:273

llvm::MVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: MachineValueType.h:342

llvm::MVT::changeVectorElementType
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: MachineValueType.h:203

llvm::MVT::getVectorNumElements
unsigned getVectorNumElements() const
Definition: MachineValueType.h:290

llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: MachineValueType.h:109

llvm::MVT::changeTypeToInteger
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: MachineValueType.h:213

llvm::MVT::bitsGT
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
Definition: MachineValueType.h:400

llvm::MVT::isFixedLengthVector
bool isFixedLengthVector() const
Definition: MachineValueType.h:131

llvm::MVT::getIntegerVT
static MVT getIntegerVT(unsigned BitWidth)
Definition: MachineValueType.h:437

llvm::MVT::getScalarType
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Definition: MachineValueType.h:255

llvm::Operator::getOpcode
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:41

llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33

llvm::RISCVSubtarget
Definition: RISCVSubtarget.h:59

llvm::RISCVSubtarget::hasVInstructionsF64
bool hasVInstructionsF64() const
Definition: RISCVSubtarget.h:221

llvm::RISCVSubtarget::getRealMinVLen
unsigned getRealMinVLen() const
Definition: RISCVSubtarget.h:187

llvm::RISCVSubtarget::useRVVForFixedLengthVectors
bool useRVVForFixedLengthVectors() const
Definition: RISCVSubtarget.cpp:182

llvm::RISCVSubtarget::getXLen
unsigned getXLen() const
Definition: RISCVSubtarget.h:171

llvm::RISCVSubtarget::hasVInstructionsF16
bool hasVInstructionsF16() const
Definition: RISCVSubtarget.h:218

llvm::RISCVSubtarget::hasVInstructions
bool hasVInstructions() const
Definition: RISCVSubtarget.h:215

llvm::RISCVSubtarget::getRealMaxVLen
unsigned getRealMaxVLen() const
Definition: RISCVSubtarget.h:191

llvm::RISCVSubtarget::hasVInstructionsF32
bool hasVInstructionsF32() const
Definition: RISCVSubtarget.h:220

llvm::RISCVSubtarget::getELen
unsigned getELen() const
Definition: RISCVSubtarget.h:183

llvm::RISCVTTIImpl::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
Definition: RISCVTargetTransformInfo.cpp:1453

llvm::RISCVTTIImpl::getGatherScatterOpCost
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
Definition: RISCVTargetTransformInfo.cpp:677

llvm::RISCVTTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: RISCVTargetTransformInfo.cpp:1767

llvm::RISCVTTIImpl::getShuffleCost
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
Definition: RISCVTargetTransformInfo.cpp:332

llvm::RISCVTTIImpl::isLSRCostLess
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
Definition: RISCVTargetTransformInfo.cpp:1801

llvm::RISCVTTIImpl::getArithmeticReductionCost
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
Definition: RISCVTargetTransformInfo.cpp:1166

llvm::RISCVTTIImpl::getIntImmCostIntrin
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
Definition: RISCVTargetTransformInfo.cpp:245

llvm::RISCVTTIImpl::getMaximumVF
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
Definition: RISCVTargetTransformInfo.cpp:1785

llvm::RISCVTTIImpl::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: RISCVTargetTransformInfo.cpp:942

llvm::RISCVTTIImpl::getMinMaxReductionCost
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Definition: RISCVTargetTransformInfo.cpp:1060

llvm::RISCVTTIImpl::isLegalMaskedLoadStore
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment)
Definition: RISCVTargetTransformInfo.h:220

llvm::RISCVTTIImpl::getIntImmCostInst
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
Definition: RISCVTargetTransformInfo.cpp:150

llvm::RISCVTTIImpl::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: RISCVTargetTransformInfo.cpp:1822

llvm::RISCVTTIImpl::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Definition: RISCVTargetTransformInfo.cpp:799

llvm::RISCVTTIImpl::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
Definition: RISCVTargetTransformInfo.cpp:1567

llvm::RISCVTTIImpl::getStridedMemoryOpCost
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
Definition: RISCVTargetTransformInfo.cpp:702

llvm::RISCVTTIImpl::getVScaleForTuning
std::optional< unsigned > getVScaleForTuning() const
Definition: RISCVTargetTransformInfo.cpp:284

llvm::RISCVTTIImpl::getExtendedReductionCost
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Definition: RISCVTargetTransformInfo.cpp:1250

llvm::RISCVTTIImpl::getMaskedMemoryOpCost
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
Definition: RISCVTargetTransformInfo.cpp:587

llvm::RISCVTTIImpl::getMaxVScale
std::optional< unsigned > getMaxVScale() const
Definition: RISCVTargetTransformInfo.cpp:278

llvm::RISCVTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: RISCVTargetTransformInfo.cpp:1689

llvm::RISCVTTIImpl::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: RISCVTargetTransformInfo.cpp:1324

llvm::RISCVTTIImpl::getPointersChainCost
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
Definition: RISCVTargetTransformInfo.cpp:1641

llvm::RISCVTTIImpl::getPopcntSupport
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
Definition: RISCVTargetTransformInfo.cpp:257

llvm::RISCVTTIImpl::shouldExpandReduction
bool shouldExpandReduction(const IntrinsicInst *II) const
Definition: RISCVTargetTransformInfo.cpp:264

llvm::RISCVTTIImpl::getStoreImmCost
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind)
Return the cost of materializing an immediate for a value operand of a store instruction.
Definition: RISCVTargetTransformInfo.cpp:1276

llvm::RISCVTTIImpl::isLegalMaskedCompressStore
bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment)
Definition: RISCVTargetTransformInfo.cpp:1812

llvm::RISCVTTIImpl::isLegalStridedLoadStore
bool isLegalStridedLoadStore(Type *DataType, Align Alignment)
Definition: RISCVTargetTransformInfo.h:279

llvm::RISCVTTIImpl::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: RISCVTargetTransformInfo.cpp:1444

llvm::RISCVTTIImpl::getRegUsageForType
unsigned getRegUsageForType(Type *Ty)
Definition: RISCVTargetTransformInfo.cpp:1772

llvm::RISCVTTIImpl::isLegalMaskedGather
bool isLegalMaskedGather(Type *DataType, Align Alignment)
Definition: RISCVTargetTransformInfo.h:262

llvm::RISCVTTIImpl::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
Definition: RISCVTargetTransformInfo.cpp:1296

llvm::RISCVTTIImpl::isLegalMaskedScatter
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
Definition: RISCVTargetTransformInfo.h:265

llvm::RISCVTTIImpl::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
Definition: RISCVTargetTransformInfo.cpp:293

llvm::RISCVTTIImpl::getIntImmCost
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
Definition: RISCVTargetTransformInfo.cpp:109

llvm::RISCVTTIImpl::hasActiveVectorLength
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
Definition: RISCVTargetTransformInfo.cpp:252

llvm::RISCVTTIImpl::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: RISCVTargetTransformInfo.cpp:598

llvm::RISCVTargetLowering::getVRGatherVVCost
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
Definition: RISCVISelLowering.cpp:2785

llvm::RISCVTargetLowering::canSplatOperand
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
Definition: RISCVISelLowering.cpp:1993

llvm::RISCVTargetLowering::getVRGatherVICost
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
Definition: RISCVISelLowering.cpp:2792

llvm::RISCVTargetLowering::computeVLMAX
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
Definition: RISCVISelLowering.h:784

llvm::RISCVTargetLowering::getLMULCost
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
Definition: RISCVISelLowering.cpp:2759

llvm::RISCVTargetLowering::getVSlideVICost
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
Definition: RISCVISelLowering.cpp:2808

llvm::RISCVTargetLowering::getVSlideVXCost
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
Definition: RISCVISelLowering.cpp:2800

llvm::RISCVTargetLowering::isLegalInterleavedAccessType
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
Definition: RISCVISelLowering.cpp:20896

llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:452

llvm::ShuffleVectorInst::isInterleaveMask
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
Definition: Instructions.cpp:2900

llvm::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:91

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:426

llvm::SmallVectorTemplateCommon::begin
iterator begin()
Definition: SmallVector.h:280

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209

llvm::TargetLoweringBase::InstructionOpcodeToISD
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
Definition: TargetLoweringBase.cpp:1890

llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition: TargetLowering.h:1654

llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition: TargetLowering.h:360

llvm::TargetLoweringBase::isOperationCustom
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
Definition: TargetLowering.h:1359

llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76

llvm::TargetTransformInfoImplBase::getDataLayout
const DataLayout & getDataLayout() const
Definition: TargetTransformInfoImpl.h:47

llvm::TargetTransformInfoImplBase::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Definition: TargetTransformInfoImpl.h:141

llvm::TargetTransformInfoImplCRTPBase::getInstructionCost
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Definition: TargetTransformInfoImpl.h:1158

llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:258

llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition: TargetTransformInfo.h:259

llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:261

llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:262

llvm::TargetTransformInfo::TCK_Latency
@ TCK_Latency
The latency of instruction.
Definition: TargetTransformInfo.h:260

llvm::TargetTransformInfo::requiresOrderedReduction
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
Definition: TargetTransformInfo.h:1465

llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:1125

llvm::TargetTransformInfo::RGK_FixedWidthVector
@ RGK_FixedWidthVector
Definition: TargetTransformInfo.h:1125

llvm::TargetTransformInfo::RGK_ScalableVector
@ RGK_ScalableVector
Definition: TargetTransformInfo.h:1125

llvm::TargetTransformInfo::RGK_Scalar
@ RGK_Scalar
Definition: TargetTransformInfo.h:1125

llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition: TargetTransformInfo.h:695

llvm::TargetTransformInfo::PSK_Software
@ PSK_Software
Definition: TargetTransformInfo.h:695

llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition: TargetTransformInfo.h:695

llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:284

llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:285

llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:1047

llvm::TargetTransformInfo::SK_InsertSubvector
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
Definition: TargetTransformInfo.h:1054

llvm::TargetTransformInfo::SK_Select
@ SK_Select
Selects elements from the corresponding lane of either source operand.
Definition: TargetTransformInfo.h:1050

llvm::TargetTransformInfo::SK_PermuteSingleSrc
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
Definition: TargetTransformInfo.h:1058

llvm::TargetTransformInfo::SK_Transpose
@ SK_Transpose
Transpose two vectors.
Definition: TargetTransformInfo.h:1053

llvm::TargetTransformInfo::SK_Splice
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
Definition: TargetTransformInfo.h:1060

llvm::TargetTransformInfo::SK_Broadcast
@ SK_Broadcast
Broadcast element 0 to all other elements.
Definition: TargetTransformInfo.h:1048

llvm::TargetTransformInfo::SK_PermuteTwoSrc
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
Definition: TargetTransformInfo.h:1056

llvm::TargetTransformInfo::SK_Reverse
@ SK_Reverse
Reverse the order of the vector.
Definition: TargetTransformInfo.h:1049

llvm::TargetTransformInfo::SK_ExtractSubvector
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
Definition: TargetTransformInfo.h:1055

llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition: TargetTransformInfo.h:1320

llvm::TargetTransformInfo::CastContextHint::None
@ None
The cast is not used with a load/store of any kind.

llvm::TypeSize
Definition: TypeSize.h:319

llvm::TypeSize::getFixed
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330

llvm::TypeSize::getScalable
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:333

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265

llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)

llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)

llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129

llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228

llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.

llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348

llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403

llvm::VectorType::getElementCount
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:641

llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676

llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:436

llvm::cl::opt
Definition: CommandLine.h:1430

llvm::details::FixedOrScalableQuantity::getFixedValue
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187

llvm::details::FixedOrScalableQuantity::getKnownMinValue
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168

uint64_t

unsigned

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::ISD::DELETED_NODE
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44

llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:791

llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239

llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:391

llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:790

llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390

llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:707

llvm::ISD::SRA
@ SRA
Definition: ISDOpcodes.h:706

llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774

llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:930

llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition: ISDOpcodes.h:837

llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:681

llvm::ISD::MULHU
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:651

llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705

llvm::ISD::XOR
@ XOR
Definition: ISDOpcodes.h:682

llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780

llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:392

llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240

llvm::ISD::MULHS
@ MULHS
Definition: ISDOpcodes.h:652

llvm::ISD::FP_EXTEND
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:888

llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:836

llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680

llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:952

llvm::ISD::MUL
@ MUL
Definition: ISDOpcodes.h:241

llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:869

llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786

llvm::RISCVMatInt::getIntMatCost
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost)
Definition: RISCVMatInt.cpp:510

llvm::RISCV::RVVBitsPerBlock
static constexpr unsigned RVVBitsPerBlock
Definition: RISCVTargetParser.h:28

llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:138

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450

llvm::dwarf::Index
Index
Definition: Dwarf.h:558

llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::Log2_32_Ceil
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:337

llvm::CostTableLookup
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
Definition: CostTable.h:35

llvm::getBooleanLoopAttribute
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
Definition: LoopInfo.cpp:1085

llvm::divideCeil
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:428

llvm::enumerate
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406

llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21

llvm::PowerOf2Ceil
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:372

llvm::countr_zero
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215

llvm::isShiftedMask_64
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:269

llvm::transform
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928

llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324

llvm::createStrideMask
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
Definition: VectorUtils.cpp:877

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163

llvm::PoisonMaskElem
constexpr int PoisonMaskElem
Definition: Instructions.h:2159

llvm::createInterleaveMask
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
Definition: VectorUtils.cpp:866

llvm::Op
DWARFExpression::Operation Op
Definition: DWARFExpression.cpp:22

llvm::Cost
InstructionCost Cost
Definition: FunctionSpecialization.h:95

llvm::equal
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition: STLExtras.h:2025

llvm::bit_floor
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39

llvm::CostTblEntryT
Cost Table Entry.
Definition: CostTable.h:25

llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34

llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117

llvm::TargetTransformInfo::LSRCost
Definition: TargetTransformInfo.h:510

llvm::TargetTransformInfo::LSRCost::NumIVMuls
unsigned NumIVMuls
Definition: TargetTransformInfo.h:516

llvm::TargetTransformInfo::LSRCost::ScaleCost
unsigned ScaleCost
Definition: TargetTransformInfo.h:520

llvm::TargetTransformInfo::LSRCost::Insns
unsigned Insns
TODO: Some of these could be merged.
Definition: TargetTransformInfo.h:513

llvm::TargetTransformInfo::LSRCost::ImmCost
unsigned ImmCost
Definition: TargetTransformInfo.h:518

llvm::TargetTransformInfo::LSRCost::AddRecCost
unsigned AddRecCost
Definition: TargetTransformInfo.h:515

llvm::TargetTransformInfo::LSRCost::NumRegs
unsigned NumRegs
Definition: TargetTransformInfo.h:514

llvm::TargetTransformInfo::LSRCost::NumBaseAdds
unsigned NumBaseAdds
Definition: TargetTransformInfo.h:517

llvm::TargetTransformInfo::LSRCost::SetupCost
unsigned SetupCost
Definition: TargetTransformInfo.h:519

llvm::TargetTransformInfo::OperandValueInfo
Definition: TargetTransformInfo.h:1084

llvm::TargetTransformInfo::OperandValueInfo::isConstant
bool isConstant() const
Definition: TargetTransformInfo.h:1088

llvm::TargetTransformInfo::OperandValueInfo::isUniform
bool isUniform() const
Definition: TargetTransformInfo.h:1091

llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:639

llvm::TargetTransformInfo::PointersChainInfo
Describe known properties for a set of pointers.
Definition: TargetTransformInfo.h:306

llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:524

llvm::TargetTransformInfo::UnrollingPreferences::UpperBound
bool UpperBound
Allow using trip count upper bound to unroll loops.
Definition: TargetTransformInfo.h:595

llvm::TargetTransformInfo::UnrollingPreferences::Force
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
Definition: TargetTransformInfo.h:593

llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:553

llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJamInnerLoopThreshold
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
Definition: TargetTransformInfo.h:604

llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJam
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
Definition: TargetTransformInfo.h:599

llvm::TargetTransformInfo::UnrollingPreferences::UnrollRemainder
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
Definition: TargetTransformInfo.h:597

llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:585

llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:581

llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:546

llvm::cl::desc
Definition: CommandLine.h:416