26#include "llvm/IR/IntrinsicsARM.h" 
   45#define DEBUG_TYPE "armtti" 
   49  cl::desc(
"Enable the generation of masked loads and stores"));
 
   53  cl::desc(
"Disable the generation of low-overhead loops"));
 
   57                  cl::desc(
"Enable the generation of WLS loops"));
 
   61    cl::desc(
"Enable the widening of global strings to alignment boundaries"));
 
   79  unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign
 
   81                           : IntrAlign->getLimitedValue();
 
   86  return Builder.CreateAlignedLoad(
II.getType(), 
II.getArgOperand(0),
 
 
   94      TM.getSubtargetImpl(*Caller)->getFeatureBits();
 
   96      TM.getSubtargetImpl(*Callee)->getFeatureBits();
 
   99  bool MatchExact = (CallerBits & ~InlineFeaturesAllowed) ==
 
  100                    (CalleeBits & ~InlineFeaturesAllowed);
 
  103  bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) ==
 
  104                     (CalleeBits & InlineFeaturesAllowed);
 
  105  return MatchExact && MatchSubset;
 
 
  111  if (ST->hasMVEIntegerOps())
 
  114  if (L->getHeader()->getParent()->hasOptSize())
 
  117  if (ST->isMClass() && ST->isThumb2() &&
 
  118      L->getNumBlocks() == 1)
 
 
  124std::optional<Instruction *>
 
  131  case Intrinsic::arm_neon_vld1: {
 
  141  case Intrinsic::arm_neon_vld2:
 
  142  case Intrinsic::arm_neon_vld3:
 
  143  case Intrinsic::arm_neon_vld4:
 
  144  case Intrinsic::arm_neon_vld2lane:
 
  145  case Intrinsic::arm_neon_vld3lane:
 
  146  case Intrinsic::arm_neon_vld4lane:
 
  147  case Intrinsic::arm_neon_vst1:
 
  148  case Intrinsic::arm_neon_vst2:
 
  149  case Intrinsic::arm_neon_vst3:
 
  150  case Intrinsic::arm_neon_vst4:
 
  151  case Intrinsic::arm_neon_vst2lane:
 
  152  case Intrinsic::arm_neon_vst3lane:
 
  153  case Intrinsic::arm_neon_vst4lane: {
 
  157    unsigned AlignArg = 
II.arg_size() - 1;
 
  158    Value *AlignArgOp = 
II.getArgOperand(AlignArg);
 
  169  case Intrinsic::arm_neon_vld1x2:
 
  170  case Intrinsic::arm_neon_vld1x3:
 
  171  case Intrinsic::arm_neon_vld1x4:
 
  172  case Intrinsic::arm_neon_vst1x2:
 
  173  case Intrinsic::arm_neon_vst1x3:
 
  174  case Intrinsic::arm_neon_vst1x4: {
 
  178    Align OldAlign = 
II.getParamAlign(0).valueOrOne();
 
  179    if (NewAlign > OldAlign)
 
  185  case Intrinsic::arm_mve_pred_i2v: {
 
  186    Value *Arg = 
II.getArgOperand(0);
 
  199        if (CI->getValue().trunc(16).isAllOnes()) {
 
  214  case Intrinsic::arm_mve_pred_v2i: {
 
  215    Value *Arg = 
II.getArgOperand(0);
 
  222    if (
II.getMetadata(LLVMContext::MD_range))
 
  227    if (
auto CurrentRange = 
II.getRange()) {
 
  229      if (
Range == CurrentRange)
 
  234    II.addRetAttr(Attribute::NoUndef);
 
  237  case Intrinsic::arm_mve_vadc:
 
  238  case Intrinsic::arm_mve_vadc_predicated: {
 
  240        (
II.getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;
 
  241    assert(
II.getArgOperand(CarryOp)->getType()->getScalarSizeInBits() == 32 &&
 
  242           "Bad type for intrinsic!");
 
  251  case Intrinsic::arm_mve_vmldava: {
 
  253    if (
I->hasOneUse()) {
 
  258        Value *OpX = 
I->getOperand(4);
 
  259        Value *OpY = 
I->getOperand(5);
 
  265                                       {
I->getOperand(0), 
I->getOperand(1),
 
  266                                        I->getOperand(2), OpZ, OpX, OpY});
 
 
  282        SimplifyAndSetOp)
 const {
 
  287  auto SimplifyNarrowInstrTopBottom =[&](
unsigned TopOpc) {
 
  296    SimplifyAndSetOp(&
II, 0, OrigDemandedElts & DemandedElts, UndefElts);
 
  303  switch (
II.getIntrinsicID()) {
 
  306  case Intrinsic::arm_mve_vcvt_narrow:
 
  307    SimplifyNarrowInstrTopBottom(2);
 
  309  case Intrinsic::arm_mve_vqmovn:
 
  310    SimplifyNarrowInstrTopBottom(4);
 
  312  case Intrinsic::arm_mve_vshrn:
 
  313    SimplifyNarrowInstrTopBottom(7);
 
 
  322  assert(Ty->isIntegerTy());
 
  324 unsigned Bits = Ty->getPrimitiveSizeInBits();
 
  325 if (Bits == 0 || Imm.getActiveBits() >= 64)
 
  328  int64_t SImmVal = Imm.getSExtValue();
 
  329  uint64_t ZImmVal = Imm.getZExtValue();
 
  330  if (!ST->isThumb()) {
 
  331    if ((SImmVal >= 0 && SImmVal < 65536) ||
 
  335    return ST->hasV6T2Ops() ? 2 : 3;
 
  337  if (ST->isThumb2()) {
 
  338    if ((SImmVal >= 0 && SImmVal < 65536) ||
 
  342    return ST->hasV6T2Ops() ? 2 : 3;
 
  345  if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
 
 
  358  if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
 
 
  374      C->getValue() == Imm && Imm.isNegative() && Imm.isNegatedPowerOf2()) {
 
  376    auto isSSatMin = [&](
Value *MinInst) {
 
  378        Value *MinLHS, *MinRHS;
 
 
  402  if (Imm.getBitWidth() != 64 ||
 
 
  421  if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
 
  422       Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
 
  428  if (Opcode == Instruction::GetElementPtr && Idx != 0)
 
  431  if (Opcode == Instruction::And) {
 
  433    if (Imm == 255 || Imm == 65535)
 
  440  if (Opcode == Instruction::Add)
 
  445  if (Opcode == Instruction::ICmp && Imm.isNegative() &&
 
  446      Ty->getIntegerBitWidth() == 32) {
 
  447    int64_t NegImm = -Imm.getSExtValue();
 
  448    if (ST->isThumb2() && NegImm < 1<<12)
 
  451    if (ST->isThumb() && NegImm < 1<<8)
 
  457  if (Opcode == Instruction::Xor && Imm.isAllOnes())
 
  462  if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->isThumb2()) &&
 
  463      Ty->getIntegerBitWidth() <= 32) {
 
  474  if (Inst && Opcode == Instruction::ICmp && Idx == 1 && Imm.isAllOnes()) {
 
 
  488      (ST->hasNEON() || ST->hasMVEIntegerOps())) {
 
 
  503  int ISD = TLI->InstructionOpcodeToISD(Opcode);
 
  509      return Cost == 0 ? 0 : 1;
 
  512  auto IsLegalFPType = [
this](
EVT VT) {
 
  514    return (EltVT == MVT::f32 && ST->hasVFP2Base()) ||
 
  515            (EltVT == MVT::f64 && ST->hasFP64()) ||
 
  516            (EltVT == MVT::f16 && ST->hasFullFP16());
 
  519  EVT SrcTy = TLI->getValueType(
DL, Src);
 
  520  EVT DstTy = TLI->getValueType(
DL, Dst);
 
  522  if (!SrcTy.isSimple() || !DstTy.
isSimple())
 
  529  if ((ST->hasMVEIntegerOps() &&
 
  530       (Opcode == Instruction::Trunc || Opcode == Instruction::ZExt ||
 
  531        Opcode == Instruction::SExt)) ||
 
  532      (ST->hasMVEFloatOps() &&
 
  533       (Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) &&
 
  534       IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)))
 
  537             ST->getMVEVectorCostFactor(
CostKind);
 
  557            LoadConversionTbl, 
ISD, DstTy.
getSimpleVT(), SrcTy.getSimpleVT()))
 
  558      return AdjustCost(Entry->Cost);
 
  577    if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
 
  578      if (
const auto *Entry =
 
  581        return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
 
  586        {ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, 1},
 
  587        {ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 3},
 
  589    if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
 
  590      if (
const auto *Entry =
 
  593        return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
 
  606    if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
 
  607      if (
const auto *Entry =
 
  610        return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
 
  617    if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
 
  618      if (
const auto *Entry =
 
  621        return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
 
  627      I && 
I->hasOneUse() && ST->hasNEON() && SrcTy.isVector()) {
 
  630      { 
ISD::ADD, MVT::v4i32, MVT::v4i16, 0 },
 
  631      { 
ISD::ADD, MVT::v8i16, MVT::v8i8,  0 },
 
  633      { 
ISD::SUB, MVT::v4i32, MVT::v4i16, 0 },
 
  634      { 
ISD::SUB, MVT::v8i16, MVT::v8i8,  0 },
 
  636      { 
ISD::MUL, MVT::v4i32, MVT::v4i16, 0 },
 
  637      { 
ISD::MUL, MVT::v8i16, MVT::v8i8,  0 },
 
  639      { 
ISD::SHL, MVT::v4i32, MVT::v4i16, 0 },
 
  640      { 
ISD::SHL, MVT::v8i16, MVT::v8i8,  0 },
 
  644    int UserISD = TLI->InstructionOpcodeToISD(
User->getOpcode());
 
  647                                             SrcTy.getSimpleVT())) {
 
  648      return AdjustCost(Entry->Cost);
 
  653  if (Src->isVectorTy() && ST->hasNEON() &&
 
  656       (
ISD == ISD::FP_EXTEND && SrcTy.getScalarType() == MVT::f32 &&
 
  661        {ISD::FP_EXTEND, MVT::v2f32, 2},
 
  662        {ISD::FP_EXTEND, MVT::v4f32, 4}};
 
  666      return AdjustCost(LT.first * Entry->Cost);
 
  755  if (SrcTy.isVector() && ST->hasNEON()) {
 
  758                                                   SrcTy.getSimpleVT()))
 
  759      return AdjustCost(Entry->Cost);
 
  785  if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
 
  788                                                   SrcTy.getSimpleVT()))
 
  789      return AdjustCost(Entry->Cost);
 
  816  if (SrcTy.isInteger() && ST->hasNEON()) {
 
  819                                                   SrcTy.getSimpleVT()))
 
  820      return AdjustCost(Entry->Cost);
 
  841  if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
 
  844                                                   SrcTy.getSimpleVT()))
 
  845      return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
 
  855    if (SrcTy.isFixedLengthVector())
 
  856      Lanes = SrcTy.getVectorNumElements();
 
  858    if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))
 
  861      return Lanes * CallCost;
 
  865      SrcTy.isFixedLengthVector()) {
 
  868    if ((SrcTy.getScalarType() == MVT::i8 ||
 
  869         SrcTy.getScalarType() == MVT::i16 ||
 
  870         SrcTy.getScalarType() == MVT::i32) &&
 
  871        SrcTy.getSizeInBits() > 128 &&
 
  873      return SrcTy.getVectorNumElements() * 2;
 
  888  if (SrcTy.isInteger()) {
 
  891                                                   SrcTy.getSimpleVT()))
 
  892      return AdjustCost(Entry->Cost);
 
  895  int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
 
  896                     ? ST->getMVEVectorCostFactor(
CostKind)
 
 
  904                                               unsigned Index, 
const Value *Op0,
 
  905                                               const Value *Op1)
 const {
 
  908  if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
 
  909      ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32)
 
  912  if (ST->hasNEON() && (Opcode == Instruction::InsertElement ||
 
  913                        Opcode == Instruction::ExtractElement)) {
 
  921    if (ValTy->isVectorTy() &&
 
  922        ValTy->getScalarSizeInBits() <= 32)
 
  923      return std::max<InstructionCost>(
 
  928  if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement ||
 
  929                                 Opcode == Instruction::ExtractElement)) {
 
  933    std::pair<InstructionCost, MVT> LT =
 
  935    return LT.first * (ValTy->getScalarType()->isIntegerTy() ? 4 : 1);
 
 
  945  int ISD = TLI->InstructionOpcodeToISD(Opcode);
 
  949      ST->isThumb() && !ValTy->isVectorTy()) {
 
  951    if (TLI->getValueType(
DL, ValTy, 
true) == MVT::Other)
 
  965    if (ValTy->isIntegerTy(1))
 
  975  if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && Sel &&
 
  978  if (Sel && ValTy->isVectorTy() &&
 
  979      (ValTy->isIntOrIntVectorTy() || ValTy->isFPOrFPVectorTy())) {
 
  980    const Value *LHS, *RHS;
 
  985      IID = Intrinsic::abs;
 
  988      IID = Intrinsic::smin;
 
  991      IID = Intrinsic::smax;
 
  994      IID = Intrinsic::umin;
 
  997      IID = Intrinsic::umax;
 
 1000      IID = Intrinsic::minnum;
 
 1003      IID = Intrinsic::maxnum;
 
 1018  if (ST->hasNEON() && ValTy->isVectorTy() && 
ISD == 
ISD::SELECT && CondTy) {
 
 1021      { 
ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
 
 1026    EVT SelCondTy = TLI->getValueType(
DL, CondTy);
 
 1027    EVT SelValTy = TLI->getValueType(
DL, ValTy);
 
 1039  if (ST->hasMVEIntegerOps() && ValTy->isVectorTy() &&
 
 1040      (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
 
 1048    if (Opcode == Instruction::FCmp && !ST->hasMVEFloatOps()) {
 
 1062    int BaseCost = ST->getMVEVectorCostFactor(
CostKind);
 
 1068    if (LT.second.isVector() && LT.second.getVectorNumElements() > 2) {
 
 1070        return LT.first * BaseCost +
 
 1080  if (ST->hasMVEIntegerOps() && ValTy->isVectorTy())
 
 1081    BaseCost = ST->getMVEVectorCostFactor(
CostKind);
 
 
 1095  unsigned NumVectorInstToHideOverhead = 10;
 
 1096  int MaxMergeDistance = 64;
 
 1098  if (ST->hasNEON()) {
 
 1101      return NumVectorInstToHideOverhead;
 
 
 1114    switch (
II->getIntrinsicID()) {
 
 1115    case Intrinsic::arm_mve_vctp8:
 
 1116    case Intrinsic::arm_mve_vctp16:
 
 1117    case Intrinsic::arm_mve_vctp32:
 
 1118    case Intrinsic::arm_mve_vctp64:
 
 
 1134    if (VecTy->getNumElements() == 2)
 
 1139    if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy())
 
 1144  return (EltWidth == 32 && Alignment >= 4) ||
 
 1145         (EltWidth == 16 && Alignment >= 2) || (EltWidth == 8);
 
 
 1152  unsigned EltWidth = Ty->getScalarSizeInBits();
 
 1153  return ((EltWidth == 32 && Alignment >= 4) ||
 
 1154          (EltWidth == 16 && Alignment >= 2) || EltWidth == 8);
 
 
 1162  unsigned DstAddrSpace = ~0u;
 
 1163  unsigned SrcAddrSpace = ~0u;
 
 1164  const Function *
F = 
I->getParent()->getParent();
 
 1172    const unsigned Size = 
C->getValue().getZExtValue();
 
 1173    const Align DstAlign = MC->getDestAlign().valueOrOne();
 
 1174    const Align SrcAlign = MC->getSourceAlign().valueOrOne();
 
 1178    DstAddrSpace = MC->getDestAddressSpace();
 
 1179    SrcAddrSpace = MC->getSourceAddressSpace();
 
 1187    const unsigned Size = 
C->getValue().getZExtValue();
 
 1188    const Align DstAlign = MS->getDestAlign().valueOrOne();
 
 1192    DstAddrSpace = MS->getDestAddressSpace();
 
 1197  unsigned Limit, Factor = 2;
 
 1198  switch(
I->getIntrinsicID()) {
 
 1199    case Intrinsic::memcpy:
 
 1200      Limit = TLI->getMaxStoresPerMemcpy(
F->hasMinSize());
 
 1202    case Intrinsic::memmove:
 
 1203      Limit = TLI->getMaxStoresPerMemmove(
F->hasMinSize());
 
 1205    case Intrinsic::memset:
 
 1206      Limit = TLI->getMaxStoresPerMemset(
F->hasMinSize());
 
 1216  std::vector<EVT> MemOps;
 
 1218  if (getTLI()->findOptimalMemOpLowering(
C, MemOps, Limit, MOp, DstAddrSpace,
 
 1219                                         SrcAddrSpace, 
F->getAttributes()))
 
 1220    return MemOps.size() * Factor;
 
 
 1245         "Expected the Mask to match the return size if given");
 
 1247         "Expected the same scalar types");
 
 1252  if (IsExtractSubvector)
 
 1254  if (ST->hasNEON()) {
 
 1271      if (
const auto *Entry =
 
 1273        return LT.first * Entry->Cost;
 
 1292      if (
const auto *Entry =
 
 1294        return LT.first * Entry->Cost;
 
 1318        return LT.first * Entry->Cost;
 
 1321  if (ST->hasMVEIntegerOps()) {
 
 1334        return LT.first * Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
 
 1337    if (!Mask.empty()) {
 
 1344          (LT.second.getScalarSizeInBits() == 8 ||
 
 1345           LT.second.getScalarSizeInBits() == 16 ||
 
 1346           LT.second.getScalarSizeInBits() == 32) &&
 
 1347          LT.second.getSizeInBits() == 128 &&
 
 1348          ((TLI->getMaxSupportedInterleaveFactor() >= 2 &&
 
 1350           (TLI->getMaxSupportedInterleaveFactor() == 4 &&
 
 1352        return ST->getMVEVectorCostFactor(
CostKind) *
 
 1353               std::max<InstructionCost>(1, LT.first / 4);
 
 1360          (LT.second.getScalarSizeInBits() == 8 ||
 
 1361           LT.second.getScalarSizeInBits() == 16 ||
 
 1362           LT.second.getScalarSizeInBits() == 32) &&
 
 1363          LT.second.getSizeInBits() == 128 &&
 
 1364          ((TLI->getMaxSupportedInterleaveFactor() >= 2 &&
 
 1366                Mask, 2, SrcTy->getElementCount().getKnownMinValue() * 2)) ||
 
 1367           (TLI->getMaxSupportedInterleaveFactor() == 4 &&
 
 1369                Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2))))
 
 1370        return ST->getMVEVectorCostFactor(
CostKind) * LT.first;
 
 1372      if (LT.second.isVector() &&
 
 1373          Mask.size() <= LT.second.getVectorNumElements() &&
 
 1376        return ST->getMVEVectorCostFactor(
CostKind) * LT.first;
 
 1381  if (IsExtractSubvector)
 
 1383  int BaseCost = ST->hasMVEIntegerOps() && SrcTy->isVectorTy()
 
 1384                     ? ST->getMVEVectorCostFactor(
CostKind)
 
 
 1394  int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
 
 1399    switch (ISDOpcode) {
 
 1412  if (ST->hasNEON()) {
 
 1413    const unsigned FunctionCallDivCost = 20;
 
 1414    const unsigned ReciprocalDivCost = 10;
 
 1420      { 
ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
 
 1421      { 
ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
 
 1422      { 
ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
 
 1423      { 
ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
 
 1424      { 
ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
 
 1425      { 
ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
 
 1426      { 
ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
 
 1427      { 
ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
 
 1428      { 
ISD::SDIV, MVT::v4i16,     ReciprocalDivCost},
 
 1429      { 
ISD::UDIV, MVT::v4i16,     ReciprocalDivCost},
 
 1430      { 
ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
 
 1431      { 
ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
 
 1432      { 
ISD::SDIV, MVT::v8i8,      ReciprocalDivCost},
 
 1433      { 
ISD::UDIV, MVT::v8i8,      ReciprocalDivCost},
 
 1434      { 
ISD::SREM, MVT::v8i8,  8 * FunctionCallDivCost},
 
 1435      { 
ISD::UREM, MVT::v8i8,  8 * FunctionCallDivCost},
 
 1437      { 
ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
 
 1438      { 
ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
 
 1439      { 
ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
 
 1440      { 
ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
 
 1441      { 
ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
 
 1442      { 
ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
 
 1443      { 
ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
 
 1444      { 
ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
 
 1445      { 
ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
 
 1446      { 
ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
 
 1447      { 
ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
 
 1448      { 
ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
 
 1449      { 
ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
 
 1450      { 
ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
 
 1451      { 
ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
 
 1452      { 
ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
 
 1456    if (
const auto *Entry = 
CostTableLookup(CostTbl, ISDOpcode, LT.second))
 
 1457      return LT.first * Entry->Cost;
 
 1460        Opcode, Ty, 
CostKind, Op1Info, Op2Info);
 
 1477  auto LooksLikeAFreeShift = [&]() {
 
 1478    if (ST->isThumb1Only() || Ty->isVectorTy())
 
 1488    case Instruction::Add:
 
 1489    case Instruction::Sub:
 
 1490    case Instruction::And:
 
 1491    case Instruction::Xor:
 
 1492    case Instruction::Or:
 
 1493    case Instruction::ICmp:
 
 1499  if (LooksLikeAFreeShift())
 
 1509  auto MulInDSPMLALPattern = [&](
const Instruction *
I, 
unsigned Opcode,
 
 1517    if (Opcode != Instruction::Mul)
 
 1520    if (Ty->isVectorTy())
 
 1523    auto ValueOpcodesEqual = [](
const Value *LHS, 
const Value *RHS) -> 
bool {
 
 1527    auto IsExtInst = [](
const Value *V) -> 
bool {
 
 1530    auto IsExtensionFromHalf = [](
const Value *V) -> 
bool {
 
 1538    Value *Op0 = BinOp->getOperand(0);
 
 1539    Value *Op1 = BinOp->getOperand(1);
 
 1540    if (IsExtInst(Op0) && IsExtInst(Op1) && ValueOpcodesEqual(Op0, Op1)) {
 
 1542      if (!
I->getType()->isIntegerTy(32) || !IsExtensionFromHalf(Op0) ||
 
 1543          !IsExtensionFromHalf(Op1))
 
 1547      for (
auto *U : 
I->users())
 
 1556  if (MulInDSPMLALPattern(CxtI, Opcode, Ty))
 
 1562  if (ST->hasMVEIntegerOps() && Ty->isVectorTy())
 
 1563    BaseCost = ST->getMVEVectorCostFactor(
CostKind);
 
 1569  if (TLI->isOperationLegalOrCustomOrPromote(ISDOpcode, LT.second))
 
 1570    return LT.first * BaseCost;
 
 1574    unsigned Num = VTy->getNumElements();
 
 
 1598  if (TLI->getValueType(
DL, Src, 
true) == MVT::Other)
 
 1602  if (ST->hasNEON() && Src->isVectorTy() && Alignment != 
Align(16) &&
 
 1607    return LT.first * 4;
 
 1613      ((Opcode == Instruction::Load && 
I->hasOneUse() &&
 
 1618        Opcode == Instruction::Load
 
 1619            ? (*
I->user_begin())->getType()
 
 1623      return ST->getMVEVectorCostFactor(
CostKind);
 
 1626  int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
 
 1627                     ? ST->getMVEVectorCostFactor(
CostKind)
 
 
 1637  if (ST->hasMVEIntegerOps()) {
 
 1638    if (Opcode == Instruction::Load &&
 
 1640      return ST->getMVEVectorCostFactor(
CostKind);
 
 1641    if (Opcode == Instruction::Store &&
 
 1643      return ST->getMVEVectorCostFactor(
CostKind);
 
 
 1656    bool UseMaskForCond, 
bool UseMaskForGaps)
 const {
 
 1657  assert(Factor >= 2 && 
"Invalid interleave factor");
 
 1661  bool EltIs64Bits = 
DL.getTypeSizeInBits(VecTy->
getScalarType()) == 64;
 
 1663  if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
 
 1664      !UseMaskForCond && !UseMaskForGaps) {
 
 1673        ST->hasMVEIntegerOps() ? ST->getMVEVectorCostFactor(
CostKind) : 1;
 
 1674    if (NumElts % Factor == 0 &&
 
 1675        TLI->isLegalInterleavedAccessType(Factor, SubVecTy, Alignment, 
DL))
 
 1676      return Factor * BaseCost * TLI->getNumInterleavedAccesses(SubVecTy, 
DL);
 
 1683    if (ST->hasMVEIntegerOps() && Factor == 2 && NumElts / Factor > 2 &&
 
 1685        DL.getTypeSizeInBits(SubVecTy).getFixedValue() <= 64)
 
 1686      return 2 * BaseCost;
 
 1691                                           UseMaskForCond, UseMaskForGaps);
 
 
 1695    unsigned Opcode, 
Type *DataTy, 
const Value *
Ptr, 
bool VariableMask,
 
 1707  unsigned NumElems = VTy->getNumElements();
 
 1708  unsigned EltSize = VTy->getScalarSizeInBits();
 
 1717      NumElems * LT.first * ST->getMVEVectorCostFactor(
CostKind);
 
 1723      NumElems * LT.first + (VariableMask ? NumElems * 5 : 0) +
 
 1729  if (EltSize < 8 || Alignment < EltSize / 8)
 
 1732  unsigned ExtSize = EltSize;
 
 1738    if ((
I->getOpcode() == Instruction::Load ||
 
 1741      const User *Us = *
I->users().begin();
 
 1746        if (((
TypeSize == 32 && (EltSize == 8 || EltSize == 16)) ||
 
 1747             (
TypeSize == 16 && EltSize == 8)) &&
 
 1755    if ((
I->getOpcode() == Instruction::Store ||
 
 1759      unsigned TypeSize = 
T->getOperand(0)->getType()->getScalarSizeInBits();
 
 1760      if (((EltSize == 16 && 
TypeSize == 32) ||
 
 1767  if (ExtSize * NumElems != 128 || NumElems < 4)
 
 1776  if (ExtSize != 8 && ExtSize != 16)
 
 1780    Ptr = BC->getOperand(0);
 
 1782    if (
GEP->getNumOperands() != 2)
 
 1784    unsigned Scale = 
DL.getTypeAllocSize(
GEP->getResultElementType());
 
 1786    if (Scale != 1 && Scale * 8 != ExtSize)
 
 1790      if (ZExt->getOperand(0)->getType()->getScalarSizeInBits() <= ExtSize)
 
 
 1800                                       std::optional<FastMathFlags> FMF,
 
 1803  EVT ValVT = TLI->getValueType(
DL, ValTy);
 
 1804  int ISD = TLI->InstructionOpcodeToISD(Opcode);
 
 1811      ((EltSize == 32 && ST->hasVFP2Base()) ||
 
 1812       (EltSize == 64 && ST->hasFP64()) ||
 
 1813       (EltSize == 16 && ST->hasFullFP16()))) {
 
 1815    unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);
 
 1818           NumElts * EltSize > VecLimit) {
 
 1829      VecCost += ST->getMVEVectorCostFactor(
CostKind) * 2;
 
 1832      ExtractCost = NumElts / 2;
 
 1834    return VecCost + ExtractCost +
 
 1840      (EltSize == 64 || EltSize == 32 || EltSize == 16 || EltSize == 8)) {
 
 1843        ST->hasMVEIntegerOps() ? 128 : (ST->hasNEON() ? 64 : -1);
 
 1845    while (
isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {
 
 1853        NumElts * EltSize == 64) {
 
 1855      VecCost += ST->getMVEVectorCostFactor(
CostKind) +
 
 1862    return VecCost + ExtractCost +
 
 1864                               Opcode, ValTy->getElementType(), 
CostKind);
 
 1879    return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind) * LT.first;
 
 
 1885    unsigned Opcode, 
bool IsUnsigned, 
Type *ResTy, 
VectorType *ValTy,
 
 1887  EVT ValVT = TLI->getValueType(
DL, ValTy);
 
 1888  EVT ResVT = TLI->getValueType(
DL, ResTy);
 
 1890  int ISD = TLI->InstructionOpcodeToISD(Opcode);
 
 1905          ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||
 
 1906           (LT.second == MVT::v8i16 && RevVTSize <= 32) ||
 
 1907           (LT.second == MVT::v4i32 && RevVTSize <= 64)))
 
 1908        return ST->getMVEVectorCostFactor(
CostKind) * LT.first;
 
 
 1922  if (RedOpcode != Instruction::Add)
 
 1924  EVT ValVT = TLI->getValueType(
DL, ValTy);
 
 1925  EVT ResVT = TLI->getValueType(
DL, ResTy);
 
 1938        ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||
 
 1939         (LT.second == MVT::v8i16 && RevVTSize <= 64) ||
 
 1940         (LT.second == MVT::v4i32 && RevVTSize <= 64)))
 
 1941      return ST->getMVEVectorCostFactor(
CostKind) * LT.first;
 
 
 1952  EVT ValVT = TLI->getValueType(
DL, Ty);
 
 1957  if ((IID == Intrinsic::minnum || IID == Intrinsic::maxnum) &&
 
 1963    unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);
 
 1965    while (
isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {
 
 1977      VecCost += ST->getMVEVectorCostFactor(
CostKind) * 2;
 
 1983                                {Ty->getElementType(), Ty->getElementType()},
 
 1985    return VecCost + ExtractCost +
 
 1989  if (IID == Intrinsic::smin || IID == Intrinsic::smax ||
 
 1990      IID == Intrinsic::umin || IID == Intrinsic::umax) {
 
 2002      return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind) * LT.first;
 
 
 2013  case Intrinsic::get_active_lane_mask:
 
 2021    if (ST->hasMVEIntegerOps())
 
 2024  case Intrinsic::sadd_sat:
 
 2025  case Intrinsic::ssub_sat:
 
 2026  case Intrinsic::uadd_sat:
 
 2027  case Intrinsic::usub_sat: {
 
 2028    bool IsAdd = (
Opc == Intrinsic::sadd_sat || 
Opc == Intrinsic::ssub_sat);
 
 2029    bool IsSigned = (
Opc == Intrinsic::sadd_sat || 
Opc == Intrinsic::ssub_sat);
 
 2033      if (IsSigned && ST->hasDSP() && ITy->getBitWidth() == 32)
 
 2035      if (ST->hasDSP() && (ITy->getBitWidth() == 8 || ITy->getBitWidth() == 16))
 
 2049    if (!ST->hasMVEIntegerOps())
 
 2053    if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
 
 2054        LT.second == MVT::v16i8) {
 
 2060      return LT.first * ST->getMVEVectorCostFactor(
CostKind) * Instrs;
 
 2064  case Intrinsic::abs:
 
 2065  case Intrinsic::smin:
 
 2066  case Intrinsic::smax:
 
 2067  case Intrinsic::umin:
 
 2068  case Intrinsic::umax: {
 
 2069    if (!ST->hasMVEIntegerOps())
 
 2074    if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
 
 2075        LT.second == MVT::v16i8)
 
 2076      return LT.first * ST->getMVEVectorCostFactor(
CostKind);
 
 2079  case Intrinsic::minnum:
 
 2080  case Intrinsic::maxnum: {
 
 2081    if (!ST->hasMVEFloatOps())
 
 2085    if (LT.second == MVT::v4f32 || LT.second == MVT::v8f16)
 
 2086      return LT.first * ST->getMVEVectorCostFactor(
CostKind);
 
 2089  case Intrinsic::fptosi_sat:
 
 2090  case Intrinsic::fptoui_sat: {
 
 2093    bool IsSigned = 
Opc == Intrinsic::fptosi_sat;
 
 2097    if ((ST->hasVFP2Base() && LT.second == MVT::f32 && MTy == MVT::i32) ||
 
 2098        (ST->hasFP64() && LT.second == MVT::f64 && MTy == MVT::i32) ||
 
 2099        (ST->hasFullFP16() && LT.second == MVT::f16 && MTy == MVT::i32))
 
 2103    if (ST->hasMVEFloatOps() &&
 
 2104        (LT.second == MVT::v4f32 || LT.second == MVT::v8f16) &&
 
 2106      return LT.first * ST->getMVEVectorCostFactor(
CostKind);
 
 2109    if (((ST->hasVFP2Base() && LT.second == MVT::f32) ||
 
 2110         (ST->hasFP64() && LT.second == MVT::f64) ||
 
 2111         (ST->hasFullFP16() && LT.second == MVT::f16) ||
 
 2112         (ST->hasMVEFloatOps() &&
 
 2113          (LT.second == MVT::v4f32 || LT.second == MVT::v8f16))) &&
 
 2116                                      LT.second.getScalarSizeInBits());
 
 2118          LT.second.isVector() ? ST->getMVEVectorCostFactor(
CostKind) : 1;
 
 2121                                     LegalTy, {LegalTy, LegalTy});
 
 2125                                     LegalTy, {LegalTy, LegalTy});
 
 2127      return LT.first * 
Cost;
 
 
 2155  if (!
F->isIntrinsic())
 
 2159  if (
F->getName().starts_with(
"llvm.arm"))
 
 2162  switch (
F->getIntrinsicID()) {
 
 2164  case Intrinsic::powi:
 
 2165  case Intrinsic::sin:
 
 2166  case Intrinsic::cos:
 
 2167  case Intrinsic::sincos:
 
 2168  case Intrinsic::pow:
 
 2169  case Intrinsic::log:
 
 2170  case Intrinsic::log10:
 
 2171  case Intrinsic::log2:
 
 2172  case Intrinsic::exp:
 
 2173  case Intrinsic::exp2:
 
 2175  case Intrinsic::sqrt:
 
 2176  case Intrinsic::fabs:
 
 2177  case Intrinsic::copysign:
 
 2178  case Intrinsic::floor:
 
 2179  case Intrinsic::ceil:
 
 2180  case Intrinsic::trunc:
 
 2181  case Intrinsic::rint:
 
 2182  case Intrinsic::nearbyint:
 
 2183  case Intrinsic::round:
 
 2184  case Intrinsic::canonicalize:
 
 2185  case Intrinsic::lround:
 
 2186  case Intrinsic::llround:
 
 2187  case Intrinsic::lrint:
 
 2188  case Intrinsic::llrint:
 
 2189    if (
F->getReturnType()->isDoubleTy() && !ST->hasFP64())
 
 2191    if (
F->getReturnType()->isHalfTy() && !ST->hasFullFP16())
 
 2196    return !ST->hasFPARMv8Base() && !ST->hasVFP2Base();
 
 2197  case Intrinsic::masked_store:
 
 2198  case Intrinsic::masked_load:
 
 2199  case Intrinsic::masked_gather:
 
 2200  case Intrinsic::masked_scatter:
 
 2201    return !ST->hasMVEIntegerOps();
 
 2202  case Intrinsic::sadd_with_overflow:
 
 2203  case Intrinsic::uadd_with_overflow:
 
 2204  case Intrinsic::ssub_with_overflow:
 
 2205  case Intrinsic::usub_with_overflow:
 
 2206  case Intrinsic::sadd_sat:
 
 2207  case Intrinsic::uadd_sat:
 
 2208  case Intrinsic::ssub_sat:
 
 2209  case Intrinsic::usub_sat:
 
 
 2217  unsigned ISD = TLI->InstructionOpcodeToISD(
I.getOpcode());
 
 2218  EVT VT = TLI->getValueType(
DL, 
I.getType(), 
true);
 
 2226      switch(
II->getIntrinsicID()) {
 
 2227        case Intrinsic::memcpy:
 
 2228        case Intrinsic::memset:
 
 2229        case Intrinsic::memmove:
 
 2241  switch (
I.getOpcode()) {
 
 2244  case Instruction::FPToSI:
 
 2245  case Instruction::FPToUI:
 
 2246  case Instruction::SIToFP:
 
 2247  case Instruction::UIToFP:
 
 2248  case Instruction::FPTrunc:
 
 2249  case Instruction::FPExt:
 
 2250    return !ST->hasFPARMv8Base();
 
 2278  if (TLI->useSoftFloat()) {
 
 2279    switch (
I.getOpcode()) {
 
 2282    case Instruction::Alloca:
 
 2283    case Instruction::Load:
 
 2284    case Instruction::Store:
 
 2285    case Instruction::Select:
 
 2286    case Instruction::PHI:
 
 2293  if (
I.getType()->isDoubleTy() && !ST->hasFP64())
 
 2297  if (
I.getType()->isHalfTy() && !ST->hasFullFP16())
 
 
 2325  const SCEV *TripCountSCEV =
 
 2331    LLVM_DEBUG(
dbgs() << 
"ARMHWLoops: Trip count does not fit into 32bits\n");
 
 2340      switch (
Call->getIntrinsicID()) {
 
 2343      case Intrinsic::start_loop_iterations:
 
 2344      case Intrinsic::test_start_loop_iterations:
 
 2345      case Intrinsic::loop_decrement:
 
 2346      case Intrinsic::loop_decrement_reg:
 
 2356  bool IsTailPredLoop = 
false;
 
 2357  auto ScanLoop = [&](
Loop *L) {
 
 2358    for (
auto *BB : L->getBlocks()) {
 
 2359      for (
auto &
I : *BB) {
 
 2367              II->getIntrinsicID() == Intrinsic::get_active_lane_mask ||
 
 2368              II->getIntrinsicID() == Intrinsic::arm_mve_vctp8 ||
 
 2369              II->getIntrinsicID() == Intrinsic::arm_mve_vctp16 ||
 
 2370              II->getIntrinsicID() == Intrinsic::arm_mve_vctp32 ||
 
 2371              II->getIntrinsicID() == Intrinsic::arm_mve_vctp64;
 
 2378  for (
auto *Inner : *L)
 
 2379    if (!ScanLoop(Inner))
 
 
 2411    if ((
II->getIntrinsicID() == Intrinsic::smin ||
 
 2412         II->getIntrinsicID() == Intrinsic::smax ||
 
 2413         II->getIntrinsicID() == Intrinsic::umin ||
 
 2414         II->getIntrinsicID() == Intrinsic::umax) &&
 
 
 2452  LLVM_DEBUG(
dbgs() << 
"Tail-predication: checking allowed instructions\n");
 
 2463  bool ReductionsDisabled =
 
 2467  for (
auto *
I : LiveOuts) {
 
 2468    if (!
I->getType()->isIntegerTy() && !
I->getType()->isFloatTy() &&
 
 2469        !
I->getType()->isHalfTy()) {
 
 2470      LLVM_DEBUG(
dbgs() << 
"Don't tail-predicate loop with non-integer/float " 
 2471                           "live-out value\n");
 
 2474    if (ReductionsDisabled) {
 
 2485    for (
Instruction &
I : BB->instructionsWithoutDebug()) {
 
 2494      if (
T->getScalarSizeInBits() > 32) {
 
 2502        if (NextStride == 1) {
 
 2507        } 
else if (NextStride == -1 ||
 
 2511                     << 
"Consecutive strides of 2 found, vld2/vstr2 can't " 
 2512                        "be tail-predicated\n.");
 
 2522            const SCEV *Step = AR->getStepRecurrence(*PSE.
getSE());
 
 2528                             "tail-predicate\n.");
 
 2534  LLVM_DEBUG(
dbgs() << 
"tail-predication: all instructions allowed!\n");
 
 
 2547  if (!ST->hasMVEIntegerOps())
 
 2554  if (L->getNumBlocks() > 1) {
 
 2555    LLVM_DEBUG(
dbgs() << 
"preferPredicateOverEpilogue: not a single block " 
 2560  assert(L->isInnermost() && 
"preferPredicateOverEpilogue: inner-loop expected");
 
 2565    LLVM_DEBUG(
dbgs() << 
"preferPredicateOverEpilogue: hardware-loop is not " 
 2576    LLVM_DEBUG(
dbgs() << 
"preferPredicateOverEpilogue: hardware-loop is not " 
 2583    LLVM_DEBUG(
dbgs() << 
"preferPredicateOverEpilogue: hardware-loop is not " 
 
 2610        return isa<IntrinsicInst>(I) &&
 
 2611               cast<IntrinsicInst>(I).getIntrinsicID() ==
 
 2612                   Intrinsic::get_active_lane_mask;
 
 2616  if (!ST->isMClass())
 
 2622  if (L->getHeader()->getParent()->hasOptSize())
 
 2626  L->getExitingBlocks(ExitingBlocks);
 
 2628                    << 
"Blocks: " << L->getNumBlocks() << 
"\n" 
 2629                    << 
"Exit blocks: " << ExitingBlocks.
size() << 
"\n");
 
 2633  if (ExitingBlocks.
size() > 2)
 
 2638  if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
 
 2648  for (
auto *BB : L->getBlocks()) {
 
 2649    for (
auto &
I : *BB) {
 
 2652      if (
I.getType()->isVectorTy())
 
 2676  if (ST->isThumb1Only()) {
 
 2677    unsigned ExitingValues = 0;
 
 2679    L->getExitBlocks(ExitBlocks);
 
 2680    for (
auto *Exit : ExitBlocks) {
 
 2683      unsigned LiveOuts = 
count_if(Exit->phis(), [](
auto &PH) {
 
 2684        return PH.getNumOperands() != 1 ||
 
 2685               !isa<GetElementPtrInst>(PH.getOperand(0));
 
 2687      ExitingValues = ExitingValues < LiveOuts ? LiveOuts : ExitingValues;
 
 2708      auto *Outer = L->getOutermostLoop();
 
 2709      if ((L != Outer && Outer != L->getParentLoop()) ||
 
 
 2738  if (!ST->hasMVEIntegerOps())
 
 2741  unsigned ScalarBits = Ty->getScalarSizeInBits();
 
 2744    return ScalarBits <= 64;
 
 
 2751  if (!ST->hasMVEIntegerOps())
 
 
 2758                                                 bool HasBaseReg, int64_t Scale,
 
 2759                                                 unsigned AddrSpace)
 const {
 
 2768      return AM.
Scale < 0 ? 1 : 0; 
 
 
 2779    return ST->isThumb2() || ST->hasV8MBaselineOps();
 
 2783    return ST->hasARMOps();
 
 
 2793    return Ext->getType()->getScalarSizeInBits() ==
 
 2794           2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
 
 
 2813  if (!
I->getType()->isVectorTy())
 
 2816  if (ST->hasNEON()) {
 
 2817    switch (
I->getOpcode()) {
 
 2818    case Instruction::Sub:
 
 2819    case Instruction::Add: {
 
 2822      Ops.push_back(&
I->getOperandUse(0));
 
 2823      Ops.push_back(&
I->getOperandUse(1));
 
 2831  if (!ST->hasMVEIntegerOps())
 
 2835    if (!
I->hasOneUse())
 
 2838    return Sub->getOpcode() == Instruction::FSub && 
Sub->getOperand(1) == 
I;
 
 2848    switch (
I->getOpcode()) {
 
 2849    case Instruction::Add:
 
 2850    case Instruction::Mul:
 
 2851    case Instruction::FAdd:
 
 2852    case Instruction::ICmp:
 
 2853    case Instruction::FCmp:
 
 2855    case Instruction::FMul:
 
 2856      return !IsFMSMul(
I);
 
 2857    case Instruction::Sub:
 
 2858    case Instruction::FSub:
 
 2859    case Instruction::Shl:
 
 2860    case Instruction::LShr:
 
 2861    case Instruction::AShr:
 
 2862      return Operand == 1;
 
 2863    case Instruction::Call:
 
 2865        switch (
II->getIntrinsicID()) {
 
 2866        case Intrinsic::fma:
 
 2868        case Intrinsic::sadd_sat:
 
 2869        case Intrinsic::uadd_sat:
 
 2870        case Intrinsic::arm_mve_add_predicated:
 
 2871        case Intrinsic::arm_mve_mul_predicated:
 
 2872        case Intrinsic::arm_mve_qadd_predicated:
 
 2873        case Intrinsic::arm_mve_vhadd:
 
 2874        case Intrinsic::arm_mve_hadd_predicated:
 
 2875        case Intrinsic::arm_mve_vqdmull:
 
 2876        case Intrinsic::arm_mve_vqdmull_predicated:
 
 2877        case Intrinsic::arm_mve_vqdmulh:
 
 2878        case Intrinsic::arm_mve_qdmulh_predicated:
 
 2879        case Intrinsic::arm_mve_vqrdmulh:
 
 2880        case Intrinsic::arm_mve_qrdmulh_predicated:
 
 2881        case Intrinsic::arm_mve_fma_predicated:
 
 2883        case Intrinsic::ssub_sat:
 
 2884        case Intrinsic::usub_sat:
 
 2885        case Intrinsic::arm_mve_sub_predicated:
 
 2886        case Intrinsic::arm_mve_qsub_predicated:
 
 2887        case Intrinsic::arm_mve_hsub_predicated:
 
 2888        case Intrinsic::arm_mve_vhsub:
 
 2889          return Operand == 1;
 
 2907    if (Shuffle->
getOpcode() == Instruction::BitCast)
 
 2914    if (!IsSinker(
I, 
OpIdx.index()))
 
 2919    for (
Use &U : 
Op->uses()) {
 
 2921      if (!IsSinker(Insn, U.getOperandNo()))
 
 2927      Ops.push_back(&
Op->getOperandUse(0));
 
 
 2949  unsigned NumBytesToPad = 4 - (
Size % 4);
 
 2950  unsigned NewSize = 
Size + NumBytesToPad;
 
 2956  if (NewSize > MaxMemIntrinsicSize)
 
 2959  return NumBytesToPad;
 
 
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
 
This file implements a class to represent arbitrary precision integral constant values and operations...
 
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
 
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
 
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
 
Cost tables and simple lookup functions.
 
This file provides the interface for the instcombine pass implementation.
 
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
 
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
 
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
 
This file defines the LoopVectorizationLegality class.
 
static const Function * getCalledFunction(const Value *V)
 
MachineInstr unsigned OpIdx
 
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
 
uint64_t IntrinsicInst * II
 
This file defines the SmallVector class.
 
Class for arbitrary precision integers.
 
unsigned getBitWidth() const
Return the number of bits in the APInt.
 
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
 
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
 
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
 
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
 
InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
 
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const override
 
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
 
InstructionCost getMemcpyCost(const Instruction *I) const override
 
bool maybeLoweredToCall(Instruction &I) const
 
bool preferInLoopReduction(RecurKind Kind, Type *Ty) const override
 
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
 
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *ValTy, TTI::TargetCostKind CostKind) const override
 
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
 
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
 
bool hasArmWideBranch(bool Thumb) const override
 
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
 
int getNumMemOps(const IntrinsicInst *I) const
Given a memcpy/memset/memmove instruction, return the number of memory operations performed,...
 
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
 
InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const override
 
bool isLoweredToCall(const Function *F) const override
 
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
 
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
 
bool isLegalMaskedStore(Type *DataTy, Align Alignment, unsigned AddressSpace) const override
 
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override
 
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
 
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
 
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
 
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
 
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
 
bool isLegalMaskedLoad(Type *DataTy, Align Alignment, unsigned AddressSpace) const override
 
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
 
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
 
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
 
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
 
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
 
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
 
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override
 
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override
 
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
 
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
 
bool preferPredicatedReductionSelect() const override
 
bool isLegalMaskedGather(Type *Ty, Align Alignment) const override
 
unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const override
 
bool isProfitableLSRChainElement(Instruction *I) const override
 
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const override
 
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
 
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
getScalingFactorCost - Return the cost of the scaling used in addressing mode represented by AM.
 
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
 
Class to represent array types.
 
A cache of @llvm.assume calls within a function.
 
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
 
LLVM Basic Block Representation.
 
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
 
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
 
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
 
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
 
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
 
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
 
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
 
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
 
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
 
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
 
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
 
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
 
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
 
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
 
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
 
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
 
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
 
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
 
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
 
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const override
 
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
 
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
 
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
 
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
 
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
 
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
 
@ ICMP_SLE
signed less or equal
 
@ ICMP_SGT
signed greater than
 
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
 
This is the shared class of boolean and integer constants.
 
const APInt & getValue() const
Return the constant as an APInt value reference.
 
This class represents a range of values.
 
This is an important base class in LLVM.
 
A parsed version of the target data layout string in and methods for querying it.
 
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
 
Convenience struct for specifying and reasoning about fast-math flags.
 
Container class for subtarget features.
 
Class to represent fixed width SIMD vectors.
 
unsigned getNumElements() const
 
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
 
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
 
ConstantInt * getTrue()
Get the constant value for i1 true.
 
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
 
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
 
The core instruction combiner logic.
 
const DataLayout & getDataLayout() const
 
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
 
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
 
DominatorTree & getDominatorTree() const
 
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
 
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)=0
 
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
 
AssumptionCache & getAssumptionCache() const
 
static InstructionCost getInvalid(CostType Val=0)
 
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
 
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
 
const SmallVectorImpl< Type * > & getArgTypes() const
 
Type * getReturnType() const
 
Intrinsic::ID getID() const
 
A wrapper class for inspecting calls to intrinsic functions.
 
This is an important class for using LLVM in a threaded context.
 
Drive the analysis of memory accesses in the loop.
 
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
 
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
 
LoopInfo * getLoopInfo() const
 
DominatorTree * getDominatorTree() const
 
AssumptionCache * getAssumptionCache() const
 
const LoopAccessInfo * getLAI() const
 
ScalarEvolution * getScalarEvolution() const
 
Represents a single loop in the control flow graph.
 
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
 
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
 
This class represents an analyzed expression in the program.
 
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
 
The main scalar evolution driver.
 
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
 
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
 
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
 
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
 
LLVM_ABI bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
 
APInt getUnsignedRangeMax(const SCEV *S)
Determine the max of the unsigned range for a particular SCEV.
 
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
 
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
 
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
 
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
 
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
 
StackOffset holds a fixed and a scalable offset in bytes.
 
static StackOffset getScalable(int64_t Scalable)
 
static StackOffset getFixed(int64_t Fixed)
 
Provides information about what library functions are available for the current target.
 
Primary interface to the complete machine description for the target machine.
 
This class represents a truncation of integer types.
 
The instances of the Type class are immutable: once they are created, they are never changed.
 
bool isVectorTy() const
True if this is an instance of VectorType.
 
bool isArrayTy() const
True if this is an instance of ArrayType.
 
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
 
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
 
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
 
Type * getArrayElementType() const
 
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
 
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
 
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
 
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
 
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
 
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
 
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
 
bool isIntegerTy() const
True if this is an instance of IntegerType.
 
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
 
A Use represents the edge between a Value definition and its users.
 
const Use & getOperandUse(unsigned i) const
 
Value * getOperand(unsigned i) const
 
LLVM Value Representation.
 
Type * getType() const
All values are typed, get the type of this value.
 
user_iterator user_begin()
 
bool hasOneUse() const
Return true if there is exactly one use of this value.
 
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
 
Base class of all SIMD vector types.
 
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
 
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
 
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
 
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
 
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
 
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
 
@ C
The default llvm calling convention, compatible with C.
 
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
 
@ ADD
Simple integer binary arithmetic operators.
 
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
 
@ FADD
Simple binary floating point operators.
 
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
 
@ SIGN_EXTEND
Conversion operators.
 
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
 
@ SHL
Shift and rotation operations.
 
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
 
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
 
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
 
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
 
@ AND
Bitwise operators - logical and, logical or, logical xor.
 
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
 
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
 
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
 
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
 
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
 
bool match(Val *V, const Pattern &P)
 
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
 
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
 
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
 
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
 
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
 
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
 
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
 
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
 
auto m_Undef()
Match an arbitrary undef constant.
 
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
 
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
 
@ ForceEnabledNoReductions
 
initializer< Ty > init(const Ty &Val)
 
This is an optimization pass for GlobalISel generic memory operations.
 
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
 
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
 
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
 
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
 
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
 
@ Runtime
Detect stack use after return if not disabled runtime with (ASAN_OPTIONS=detect_stack_use_after_retur...
 
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
 
auto dyn_cast_or_null(const Y &Val)
 
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
 
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
 
LLVM_ABI SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
 
SelectPatternFlavor
Specific patterns of select instructions we can match.
 
@ SPF_ABS
Floating point maxnum.
 
@ SPF_FMAXNUM
Floating point minnum.
 
@ SPF_UMIN
Signed minimum.
 
@ SPF_UMAX
Signed maximum.
 
@ SPF_SMAX
Unsigned minimum.
 
@ SPF_FMINNUM
Unsigned maximum.
 
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
 
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
 
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
 
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
 
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
 
RecurKind
These are the kinds of recurrences that we support.
 
@ Sub
Subtraction of integers.
 
DWARFExpression::Operation Op
 
CostTblEntryT< unsigned > CostTblEntry
 
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
 
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
 
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
 
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
 
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
 
@ Data
Use predicate only to mask operations on data in the loop.
 
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
 
This struct is a compact representation of a valid (non-zero power of two) alignment.
 
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
 
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
 
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
 
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
 
uint64_t getScalarSizeInBits() const
 
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
 
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
 
EVT getVectorElementType() const
Given a vector type, return the type of each element.
 
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
 
bool isInteger() const
Return true if this is an integer or a vector integer type.
 
Attributes of a target dependent hardware loop.
 
LLVM_ABI bool canAnalyze(LoopInfo &LI)
 
LLVM_ABI bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
 
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
 
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
 
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
 
SelectPatternFlavor Flavor
 
LoopVectorizationLegality * LVL
 
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...