26#include "llvm/IR/IntrinsicsARM.h"
45#define DEBUG_TYPE "armtti"
49 cl::desc(
"Enable the generation of masked loads and stores"));
53 cl::desc(
"Disable the generation of low-overhead loops"));
57 cl::desc(
"Enable the generation of WLS loops"));
61 cl::desc(
"Enable the widening of global strings to alignment boundaries"));
72 "Threshold for forced unrolling of small loops in Arm architecture"));
84 unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign
86 : IntrAlign->getLimitedValue();
91 return Builder.CreateAlignedLoad(
II.getType(),
II.getArgOperand(0),
104 bool MatchExact = (CallerBits & ~InlineFeaturesAllowed) ==
105 (CalleeBits & ~InlineFeaturesAllowed);
108 bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) ==
109 (CalleeBits & InlineFeaturesAllowed);
110 return MatchExact && MatchSubset;
116 if (ST->hasMVEIntegerOps())
119 if (L->getHeader()->getParent()->hasOptSize())
122 if (ST->isMClass() && ST->isThumb2() &&
123 L->getNumBlocks() == 1)
129std::optional<Instruction *>
136 case Intrinsic::arm_neon_vld1: {
146 case Intrinsic::arm_neon_vld2:
147 case Intrinsic::arm_neon_vld3:
148 case Intrinsic::arm_neon_vld4:
149 case Intrinsic::arm_neon_vld2lane:
150 case Intrinsic::arm_neon_vld3lane:
151 case Intrinsic::arm_neon_vld4lane:
152 case Intrinsic::arm_neon_vst1:
153 case Intrinsic::arm_neon_vst2:
154 case Intrinsic::arm_neon_vst3:
155 case Intrinsic::arm_neon_vst4:
156 case Intrinsic::arm_neon_vst2lane:
157 case Intrinsic::arm_neon_vst3lane:
158 case Intrinsic::arm_neon_vst4lane: {
162 unsigned AlignArg =
II.arg_size() - 1;
163 Value *AlignArgOp =
II.getArgOperand(AlignArg);
174 case Intrinsic::arm_neon_vld1x2:
175 case Intrinsic::arm_neon_vld1x3:
176 case Intrinsic::arm_neon_vld1x4:
177 case Intrinsic::arm_neon_vst1x2:
178 case Intrinsic::arm_neon_vst1x3:
179 case Intrinsic::arm_neon_vst1x4: {
183 Align OldAlign =
II.getParamAlign(0).valueOrOne();
184 if (NewAlign > OldAlign)
190 case Intrinsic::arm_mve_pred_i2v: {
191 Value *Arg =
II.getArgOperand(0);
204 if (CI->getValue().trunc(16).isAllOnes()) {
219 case Intrinsic::arm_mve_pred_v2i: {
220 Value *Arg =
II.getArgOperand(0);
227 if (
II.getMetadata(LLVMContext::MD_range))
232 if (
auto CurrentRange =
II.getRange()) {
234 if (
Range == CurrentRange)
239 II.addRetAttr(Attribute::NoUndef);
242 case Intrinsic::arm_mve_vadc:
243 case Intrinsic::arm_mve_vadc_predicated: {
245 (
II.getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;
246 assert(
II.getArgOperand(CarryOp)->getType()->getScalarSizeInBits() == 32 &&
247 "Bad type for intrinsic!");
256 case Intrinsic::arm_mve_vmldava: {
258 if (
I->hasOneUse()) {
263 Value *OpX =
I->getOperand(4);
264 Value *OpY =
I->getOperand(5);
270 {
I->getOperand(0),
I->getOperand(1),
271 I->getOperand(2), OpZ, OpX, OpY});
287 SimplifyAndSetOp)
const {
292 auto SimplifyNarrowInstrTopBottom =[&](
unsigned TopOpc) {
301 SimplifyAndSetOp(&
II, 0, OrigDemandedElts & DemandedElts, UndefElts);
308 switch (
II.getIntrinsicID()) {
311 case Intrinsic::arm_mve_vcvt_narrow:
312 SimplifyNarrowInstrTopBottom(2);
314 case Intrinsic::arm_mve_vqmovn:
315 SimplifyNarrowInstrTopBottom(4);
317 case Intrinsic::arm_mve_vshrn:
318 SimplifyNarrowInstrTopBottom(7);
327 assert(Ty->isIntegerTy());
329 unsigned Bits = Ty->getPrimitiveSizeInBits();
330 if (Bits == 0 || Imm.getActiveBits() >= 64)
333 int64_t SImmVal = Imm.getSExtValue();
334 uint64_t ZImmVal = Imm.getZExtValue();
335 if (!ST->isThumb()) {
336 if ((SImmVal >= 0 && SImmVal < 65536) ||
340 return ST->hasV6T2Ops() ? 2 : 3;
342 if (ST->isThumb2()) {
343 if ((SImmVal >= 0 && SImmVal < 65536) ||
347 return ST->hasV6T2Ops() ? 2 : 3;
350 if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
363 if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
379 C->getValue() == Imm && Imm.isNegative() && Imm.isNegatedPowerOf2()) {
381 auto isSSatMin = [&](
Value *MinInst) {
383 Value *MinLHS, *MinRHS;
407 if (Imm.getBitWidth() != 64 ||
426 if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
427 Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
433 if (Opcode == Instruction::GetElementPtr && Idx != 0)
436 if (Opcode == Instruction::And) {
438 if (Imm == 255 || Imm == 65535)
445 if (Opcode == Instruction::Add)
450 if (Opcode == Instruction::ICmp && Imm.isNegative() &&
451 Ty->getIntegerBitWidth() == 32) {
452 int64_t NegImm = -Imm.getSExtValue();
453 if (ST->isThumb2() && NegImm < 1<<12)
456 if (ST->isThumb() && NegImm < 1<<8)
462 if (Opcode == Instruction::Xor && Imm.isAllOnes())
467 if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->isThumb2()) &&
468 Ty->getIntegerBitWidth() <= 32) {
479 if (Inst && Opcode == Instruction::ICmp && Idx == 1 && Imm.isAllOnes()) {
493 (ST->hasNEON() || ST->hasMVEIntegerOps())) {
508 int ISD = TLI->InstructionOpcodeToISD(Opcode);
514 return Cost == 0 ? 0 : 1;
517 auto IsLegalFPType = [
this](
EVT VT) {
519 return (EltVT == MVT::f32 && ST->hasVFP2Base()) ||
520 (EltVT == MVT::f64 && ST->hasFP64()) ||
521 (EltVT == MVT::f16 && ST->hasFullFP16());
524 EVT SrcTy = TLI->getValueType(
DL, Src);
525 EVT DstTy = TLI->getValueType(
DL, Dst);
527 if (!SrcTy.isSimple() || !DstTy.
isSimple())
534 if ((ST->hasMVEIntegerOps() &&
535 (Opcode == Instruction::Trunc || Opcode == Instruction::ZExt ||
536 Opcode == Instruction::SExt)) ||
537 (ST->hasMVEFloatOps() &&
538 (Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) &&
539 IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)))
542 ST->getMVEVectorCostFactor(
CostKind);
562 LoadConversionTbl,
ISD, DstTy.
getSimpleVT(), SrcTy.getSimpleVT()))
563 return AdjustCost(Entry->Cost);
582 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
583 if (
const auto *Entry =
586 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
591 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, 1},
592 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 3},
594 if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
595 if (
const auto *Entry =
598 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
611 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
612 if (
const auto *Entry =
615 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
622 if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
623 if (
const auto *Entry =
626 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
632 I &&
I->hasOneUse() && ST->hasNEON() && SrcTy.isVector()) {
635 {
ISD::ADD, MVT::v4i32, MVT::v4i16, 0 },
636 {
ISD::ADD, MVT::v8i16, MVT::v8i8, 0 },
638 {
ISD::SUB, MVT::v4i32, MVT::v4i16, 0 },
639 {
ISD::SUB, MVT::v8i16, MVT::v8i8, 0 },
641 {
ISD::MUL, MVT::v4i32, MVT::v4i16, 0 },
642 {
ISD::MUL, MVT::v8i16, MVT::v8i8, 0 },
644 {
ISD::SHL, MVT::v4i32, MVT::v4i16, 0 },
645 {
ISD::SHL, MVT::v8i16, MVT::v8i8, 0 },
649 int UserISD = TLI->InstructionOpcodeToISD(
User->getOpcode());
652 SrcTy.getSimpleVT())) {
653 return AdjustCost(Entry->Cost);
658 if (Src->isVectorTy() && ST->hasNEON() &&
661 (
ISD == ISD::FP_EXTEND && SrcTy.getScalarType() == MVT::f32 &&
666 {ISD::FP_EXTEND, MVT::v2f32, 2},
667 {ISD::FP_EXTEND, MVT::v4f32, 4}};
671 return AdjustCost(LT.first * Entry->Cost);
760 if (SrcTy.isVector() && ST->hasNEON()) {
763 SrcTy.getSimpleVT()))
764 return AdjustCost(Entry->Cost);
790 if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
793 SrcTy.getSimpleVT()))
794 return AdjustCost(Entry->Cost);
821 if (SrcTy.isInteger() && ST->hasNEON()) {
824 SrcTy.getSimpleVT()))
825 return AdjustCost(Entry->Cost);
846 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
849 SrcTy.getSimpleVT()))
850 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
860 if (SrcTy.isFixedLengthVector())
861 Lanes = SrcTy.getVectorNumElements();
863 if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))
866 return Lanes * CallCost;
870 SrcTy.isFixedLengthVector()) {
873 if ((SrcTy.getScalarType() == MVT::i8 ||
874 SrcTy.getScalarType() == MVT::i16 ||
875 SrcTy.getScalarType() == MVT::i32) &&
876 SrcTy.getSizeInBits() > 128 &&
878 return SrcTy.getVectorNumElements() * 2;
893 if (SrcTy.isInteger()) {
896 SrcTy.getSimpleVT()))
897 return AdjustCost(Entry->Cost);
900 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
901 ? ST->getMVEVectorCostFactor(
CostKind)
909 unsigned Index,
const Value *Op0,
910 const Value *Op1)
const {
913 if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
914 ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32)
917 if (ST->hasNEON() && (Opcode == Instruction::InsertElement ||
918 Opcode == Instruction::ExtractElement)) {
926 if (ValTy->isVectorTy() &&
927 ValTy->getScalarSizeInBits() <= 32)
928 return std::max<InstructionCost>(
933 if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement ||
934 Opcode == Instruction::ExtractElement)) {
938 std::pair<InstructionCost, MVT> LT =
940 return LT.first * (ValTy->getScalarType()->isIntegerTy() ? 4 : 1);
950 int ISD = TLI->InstructionOpcodeToISD(Opcode);
954 ST->isThumb() && !ValTy->isVectorTy()) {
956 if (TLI->getValueType(
DL, ValTy,
true) == MVT::Other)
970 if (ValTy->isIntegerTy(1))
980 if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && Sel &&
983 if (Sel && ValTy->isVectorTy() &&
984 (ValTy->isIntOrIntVectorTy() || ValTy->isFPOrFPVectorTy())) {
985 const Value *LHS, *RHS;
990 IID = Intrinsic::abs;
993 IID = Intrinsic::smin;
996 IID = Intrinsic::smax;
999 IID = Intrinsic::umin;
1002 IID = Intrinsic::umax;
1005 IID = Intrinsic::minnum;
1008 IID = Intrinsic::maxnum;
1023 if (ST->hasNEON() && ValTy->isVectorTy() &&
ISD ==
ISD::SELECT && CondTy) {
1026 {
ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
1031 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
1032 EVT SelValTy = TLI->getValueType(
DL, ValTy);
1044 if (ST->hasMVEIntegerOps() && ValTy->isVectorTy() &&
1045 (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
1053 if (Opcode == Instruction::FCmp && !ST->hasMVEFloatOps()) {
1067 int BaseCost = ST->getMVEVectorCostFactor(
CostKind);
1073 if (LT.second.isVector() && LT.second.getVectorNumElements() > 2) {
1075 return LT.first * BaseCost +
1085 if (ST->hasMVEIntegerOps() && ValTy->isVectorTy())
1086 BaseCost = ST->getMVEVectorCostFactor(
CostKind);
1100 unsigned NumVectorInstToHideOverhead = 10;
1101 int MaxMergeDistance = 64;
1103 if (ST->hasNEON()) {
1106 return NumVectorInstToHideOverhead;
1119 switch (
II->getIntrinsicID()) {
1120 case Intrinsic::arm_mve_vctp8:
1121 case Intrinsic::arm_mve_vctp16:
1122 case Intrinsic::arm_mve_vctp32:
1123 case Intrinsic::arm_mve_vctp64:
1140 if (VecTy->getNumElements() == 2)
1145 if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy())
1150 return (EltWidth == 32 && Alignment >= 4) ||
1151 (EltWidth == 16 && Alignment >= 2) || (EltWidth == 8);
1158 unsigned EltWidth = Ty->getScalarSizeInBits();
1159 return ((EltWidth == 32 && Alignment >= 4) ||
1160 (EltWidth == 16 && Alignment >= 2) || EltWidth == 8);
1168 unsigned DstAddrSpace = ~0u;
1169 unsigned SrcAddrSpace = ~0u;
1170 const Function *
F =
I->getParent()->getParent();
1178 const unsigned Size =
C->getValue().getZExtValue();
1179 const Align DstAlign = MC->getDestAlign().valueOrOne();
1180 const Align SrcAlign = MC->getSourceAlign().valueOrOne();
1184 DstAddrSpace = MC->getDestAddressSpace();
1185 SrcAddrSpace = MC->getSourceAddressSpace();
1193 const unsigned Size =
C->getValue().getZExtValue();
1194 const Align DstAlign = MS->getDestAlign().valueOrOne();
1198 DstAddrSpace = MS->getDestAddressSpace();
1203 unsigned Limit, Factor = 2;
1204 switch(
I->getIntrinsicID()) {
1205 case Intrinsic::memcpy:
1206 Limit = TLI->getMaxStoresPerMemcpy(
F->hasMinSize());
1208 case Intrinsic::memmove:
1209 Limit = TLI->getMaxStoresPerMemmove(
F->hasMinSize());
1211 case Intrinsic::memset:
1212 Limit = TLI->getMaxStoresPerMemset(
F->hasMinSize());
1222 std::vector<EVT> MemOps;
1224 if (getTLI()->findOptimalMemOpLowering(
C, MemOps, Limit, MOp, DstAddrSpace,
1225 SrcAddrSpace,
F->getAttributes()))
1226 return MemOps.size() * Factor;
1251 "Expected the Mask to match the return size if given");
1253 "Expected the same scalar types");
1258 if (IsExtractSubvector)
1260 if (ST->hasNEON()) {
1277 if (
const auto *Entry =
1279 return LT.first * Entry->Cost;
1298 if (
const auto *Entry =
1300 return LT.first * Entry->Cost;
1324 return LT.first * Entry->Cost;
1327 if (ST->hasMVEIntegerOps()) {
1340 return LT.first * Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
1343 if (!Mask.empty()) {
1350 (LT.second.getScalarSizeInBits() == 8 ||
1351 LT.second.getScalarSizeInBits() == 16 ||
1352 LT.second.getScalarSizeInBits() == 32) &&
1353 LT.second.getSizeInBits() == 128 &&
1354 ((TLI->getMaxSupportedInterleaveFactor() >= 2 &&
1356 (TLI->getMaxSupportedInterleaveFactor() == 4 &&
1358 return ST->getMVEVectorCostFactor(
CostKind) *
1359 std::max<InstructionCost>(1, LT.first / 4);
1366 (LT.second.getScalarSizeInBits() == 8 ||
1367 LT.second.getScalarSizeInBits() == 16 ||
1368 LT.second.getScalarSizeInBits() == 32) &&
1369 LT.second.getSizeInBits() == 128 &&
1370 ((TLI->getMaxSupportedInterleaveFactor() >= 2 &&
1372 Mask, 2, SrcTy->getElementCount().getKnownMinValue() * 2)) ||
1373 (TLI->getMaxSupportedInterleaveFactor() == 4 &&
1375 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2))))
1376 return ST->getMVEVectorCostFactor(
CostKind) * LT.first;
1378 if (LT.second.isVector() &&
1379 Mask.size() <= LT.second.getVectorNumElements() &&
1382 return ST->getMVEVectorCostFactor(
CostKind) * LT.first;
1387 if (IsExtractSubvector)
1389 int BaseCost = ST->hasMVEIntegerOps() && SrcTy->isVectorTy()
1390 ? ST->getMVEVectorCostFactor(
CostKind)
1400 int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
1405 switch (ISDOpcode) {
1418 if (ST->hasNEON()) {
1419 const unsigned FunctionCallDivCost = 20;
1420 const unsigned ReciprocalDivCost = 10;
1426 {
ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
1427 {
ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
1428 {
ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
1429 {
ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
1430 {
ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
1431 {
ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
1432 {
ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
1433 {
ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
1434 {
ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
1435 {
ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
1436 {
ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
1437 {
ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
1438 {
ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
1439 {
ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
1440 {
ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
1441 {
ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
1443 {
ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
1444 {
ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
1445 {
ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
1446 {
ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
1447 {
ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
1448 {
ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
1449 {
ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
1450 {
ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
1451 {
ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
1452 {
ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
1453 {
ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
1454 {
ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
1455 {
ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
1456 {
ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
1457 {
ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
1458 {
ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
1462 if (
const auto *Entry =
CostTableLookup(CostTbl, ISDOpcode, LT.second))
1463 return LT.first * Entry->Cost;
1466 Opcode, Ty,
CostKind, Op1Info, Op2Info);
1483 auto LooksLikeAFreeShift = [&]() {
1484 if (ST->isThumb1Only() || Ty->isVectorTy())
1494 case Instruction::Add:
1495 case Instruction::Sub:
1496 case Instruction::And:
1497 case Instruction::Xor:
1498 case Instruction::Or:
1499 case Instruction::ICmp:
1505 if (LooksLikeAFreeShift())
1515 auto MulInDSPMLALPattern = [&](
const Instruction *
I,
unsigned Opcode,
1523 if (Opcode != Instruction::Mul)
1526 if (Ty->isVectorTy())
1529 auto ValueOpcodesEqual = [](
const Value *LHS,
const Value *RHS) ->
bool {
1533 auto IsExtInst = [](
const Value *V) ->
bool {
1536 auto IsExtensionFromHalf = [](
const Value *V) ->
bool {
1544 Value *Op0 = BinOp->getOperand(0);
1545 Value *Op1 = BinOp->getOperand(1);
1546 if (IsExtInst(Op0) && IsExtInst(Op1) && ValueOpcodesEqual(Op0, Op1)) {
1548 if (!
I->getType()->isIntegerTy(32) || !IsExtensionFromHalf(Op0) ||
1549 !IsExtensionFromHalf(Op1))
1553 for (
auto *U :
I->users())
1562 if (MulInDSPMLALPattern(CxtI, Opcode, Ty))
1568 if (ST->hasMVEIntegerOps() && Ty->isVectorTy())
1569 BaseCost = ST->getMVEVectorCostFactor(
CostKind);
1575 if (TLI->isOperationLegalOrCustomOrPromote(ISDOpcode, LT.second))
1576 return LT.first * BaseCost;
1580 unsigned Num = VTy->getNumElements();
1604 if (TLI->getValueType(
DL, Src,
true) == MVT::Other)
1608 if (ST->hasNEON() && Src->isVectorTy() && Alignment !=
Align(16) &&
1613 return LT.first * 4;
1619 ((Opcode == Instruction::Load &&
I->hasOneUse() &&
1624 Opcode == Instruction::Load
1625 ? (*
I->user_begin())->getType()
1629 return ST->getMVEVectorCostFactor(
CostKind);
1632 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
1633 ? ST->getMVEVectorCostFactor(
CostKind)
1642 switch (MICA.
getID()) {
1643 case Intrinsic::masked_scatter:
1644 case Intrinsic::masked_gather:
1646 case Intrinsic::masked_load:
1647 case Intrinsic::masked_store:
1656 unsigned IID = MICA.
getID();
1660 if (ST->hasMVEIntegerOps()) {
1661 if (IID == Intrinsic::masked_load &&
1663 return ST->getMVEVectorCostFactor(
CostKind);
1664 if (IID == Intrinsic::masked_store &&
1666 return ST->getMVEVectorCostFactor(
CostKind);
1678 bool UseMaskForCond,
bool UseMaskForGaps)
const {
1679 assert(Factor >= 2 &&
"Invalid interleave factor");
1683 bool EltIs64Bits =
DL.getTypeSizeInBits(VecTy->
getScalarType()) == 64;
1685 if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
1686 !UseMaskForCond && !UseMaskForGaps) {
1695 ST->hasMVEIntegerOps() ? ST->getMVEVectorCostFactor(
CostKind) : 1;
1696 if (NumElts % Factor == 0 &&
1697 TLI->isLegalInterleavedAccessType(Factor, SubVecTy, Alignment,
DL))
1698 return Factor * BaseCost * TLI->getNumInterleavedAccesses(SubVecTy,
DL);
1705 if (ST->hasMVEIntegerOps() && Factor == 2 && NumElts / Factor > 2 &&
1707 DL.getTypeSizeInBits(SubVecTy).getFixedValue() <= 64)
1708 return 2 * BaseCost;
1713 UseMaskForCond, UseMaskForGaps);
1735 unsigned NumElems = VTy->getNumElements();
1736 unsigned EltSize = VTy->getScalarSizeInBits();
1745 NumElems * LT.first * ST->getMVEVectorCostFactor(
CostKind);
1751 NumElems * LT.first + (VariableMask ? NumElems * 5 : 0) +
1757 if (EltSize < 8 || Alignment < EltSize / 8)
1760 unsigned ExtSize = EltSize;
1766 if ((
I->getOpcode() == Instruction::Load ||
1769 const User *Us = *
I->users().begin();
1774 if (((
TypeSize == 32 && (EltSize == 8 || EltSize == 16)) ||
1775 (
TypeSize == 16 && EltSize == 8)) &&
1783 if ((
I->getOpcode() == Instruction::Store ||
1787 unsigned TypeSize =
T->getOperand(0)->getType()->getScalarSizeInBits();
1788 if (((EltSize == 16 &&
TypeSize == 32) ||
1795 if (ExtSize * NumElems != 128 || NumElems < 4)
1804 if (ExtSize != 8 && ExtSize != 16)
1808 Ptr = BC->getOperand(0);
1810 if (
GEP->getNumOperands() != 2)
1812 unsigned Scale =
DL.getTypeAllocSize(
GEP->getResultElementType());
1814 if (Scale != 1 && Scale * 8 != ExtSize)
1818 if (ZExt->getOperand(0)->getType()->getScalarSizeInBits() <= ExtSize)
1828 std::optional<FastMathFlags> FMF,
1831 EVT ValVT = TLI->getValueType(
DL, ValTy);
1832 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1839 ((EltSize == 32 && ST->hasVFP2Base()) ||
1840 (EltSize == 64 && ST->hasFP64()) ||
1841 (EltSize == 16 && ST->hasFullFP16()))) {
1843 unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1846 NumElts * EltSize > VecLimit) {
1857 VecCost += ST->getMVEVectorCostFactor(
CostKind) * 2;
1860 ExtractCost = NumElts / 2;
1862 return VecCost + ExtractCost +
1868 (EltSize == 64 || EltSize == 32 || EltSize == 16 || EltSize == 8)) {
1871 ST->hasMVEIntegerOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1873 while (
isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {
1881 NumElts * EltSize == 64) {
1883 VecCost += ST->getMVEVectorCostFactor(
CostKind) +
1890 return VecCost + ExtractCost +
1892 Opcode, ValTy->getElementType(),
CostKind);
1907 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind) * LT.first;
1913 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
1915 EVT ValVT = TLI->getValueType(
DL, ValTy);
1916 EVT ResVT = TLI->getValueType(
DL, ResTy);
1918 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1933 ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||
1934 (LT.second == MVT::v8i16 && RevVTSize <= 32) ||
1935 (LT.second == MVT::v4i32 && RevVTSize <= 64)))
1936 return ST->getMVEVectorCostFactor(
CostKind) * LT.first;
1950 if (RedOpcode != Instruction::Add)
1952 EVT ValVT = TLI->getValueType(
DL, ValTy);
1953 EVT ResVT = TLI->getValueType(
DL, ResTy);
1966 ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||
1967 (LT.second == MVT::v8i16 && RevVTSize <= 64) ||
1968 (LT.second == MVT::v4i32 && RevVTSize <= 64)))
1969 return ST->getMVEVectorCostFactor(
CostKind) * LT.first;
1980 EVT ValVT = TLI->getValueType(
DL, Ty);
1985 if ((IID == Intrinsic::minnum || IID == Intrinsic::maxnum) &&
1991 unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1993 while (
isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {
2005 VecCost += ST->getMVEVectorCostFactor(
CostKind) * 2;
2011 {Ty->getElementType(), Ty->getElementType()},
2013 return VecCost + ExtractCost +
2017 if (IID == Intrinsic::smin || IID == Intrinsic::smax ||
2018 IID == Intrinsic::umin || IID == Intrinsic::umax) {
2030 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind) * LT.first;
2041 case Intrinsic::get_active_lane_mask:
2049 if (ST->hasMVEIntegerOps())
2052 case Intrinsic::sadd_sat:
2053 case Intrinsic::ssub_sat:
2054 case Intrinsic::uadd_sat:
2055 case Intrinsic::usub_sat: {
2056 bool IsAdd = (
Opc == Intrinsic::sadd_sat ||
Opc == Intrinsic::ssub_sat);
2057 bool IsSigned = (
Opc == Intrinsic::sadd_sat ||
Opc == Intrinsic::ssub_sat);
2061 if (IsSigned && ST->hasDSP() && ITy->getBitWidth() == 32)
2063 if (ST->hasDSP() && (ITy->getBitWidth() == 8 || ITy->getBitWidth() == 16))
2077 if (!ST->hasMVEIntegerOps())
2081 if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
2082 LT.second == MVT::v16i8) {
2088 return LT.first * ST->getMVEVectorCostFactor(
CostKind) * Instrs;
2092 case Intrinsic::abs:
2093 case Intrinsic::smin:
2094 case Intrinsic::smax:
2095 case Intrinsic::umin:
2096 case Intrinsic::umax: {
2097 if (!ST->hasMVEIntegerOps())
2102 if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
2103 LT.second == MVT::v16i8)
2104 return LT.first * ST->getMVEVectorCostFactor(
CostKind);
2107 case Intrinsic::minnum:
2108 case Intrinsic::maxnum: {
2109 if (!ST->hasMVEFloatOps())
2113 if (LT.second == MVT::v4f32 || LT.second == MVT::v8f16)
2114 return LT.first * ST->getMVEVectorCostFactor(
CostKind);
2117 case Intrinsic::fptosi_sat:
2118 case Intrinsic::fptoui_sat: {
2121 bool IsSigned =
Opc == Intrinsic::fptosi_sat;
2125 if ((ST->hasVFP2Base() && LT.second == MVT::f32 && MTy == MVT::i32) ||
2126 (ST->hasFP64() && LT.second == MVT::f64 && MTy == MVT::i32) ||
2127 (ST->hasFullFP16() && LT.second == MVT::f16 && MTy == MVT::i32))
2131 if (ST->hasMVEFloatOps() &&
2132 (LT.second == MVT::v4f32 || LT.second == MVT::v8f16) &&
2134 return LT.first * ST->getMVEVectorCostFactor(
CostKind);
2137 if (((ST->hasVFP2Base() && LT.second == MVT::f32) ||
2138 (ST->hasFP64() && LT.second == MVT::f64) ||
2139 (ST->hasFullFP16() && LT.second == MVT::f16) ||
2140 (ST->hasMVEFloatOps() &&
2141 (LT.second == MVT::v4f32 || LT.second == MVT::v8f16))) &&
2144 LT.second.getScalarSizeInBits());
2146 LT.second.isVector() ? ST->getMVEVectorCostFactor(
CostKind) : 1;
2149 LegalTy, {LegalTy, LegalTy});
2153 LegalTy, {LegalTy, LegalTy});
2155 return LT.first *
Cost;
2183 if (!
F->isIntrinsic())
2187 if (
F->getName().starts_with(
"llvm.arm"))
2190 switch (
F->getIntrinsicID()) {
2192 case Intrinsic::powi:
2193 case Intrinsic::sin:
2194 case Intrinsic::cos:
2195 case Intrinsic::sincos:
2196 case Intrinsic::pow:
2197 case Intrinsic::log:
2198 case Intrinsic::log10:
2199 case Intrinsic::log2:
2200 case Intrinsic::exp:
2201 case Intrinsic::exp2:
2203 case Intrinsic::sqrt:
2204 case Intrinsic::fabs:
2205 case Intrinsic::copysign:
2206 case Intrinsic::floor:
2207 case Intrinsic::ceil:
2208 case Intrinsic::trunc:
2209 case Intrinsic::rint:
2210 case Intrinsic::nearbyint:
2211 case Intrinsic::round:
2212 case Intrinsic::canonicalize:
2213 case Intrinsic::lround:
2214 case Intrinsic::llround:
2215 case Intrinsic::lrint:
2216 case Intrinsic::llrint:
2217 if (
F->getReturnType()->isDoubleTy() && !ST->hasFP64())
2219 if (
F->getReturnType()->isHalfTy() && !ST->hasFullFP16())
2224 return !ST->hasFPARMv8Base() && !ST->hasVFP2Base();
2225 case Intrinsic::masked_store:
2226 case Intrinsic::masked_load:
2227 case Intrinsic::masked_gather:
2228 case Intrinsic::masked_scatter:
2229 return !ST->hasMVEIntegerOps();
2230 case Intrinsic::sadd_with_overflow:
2231 case Intrinsic::uadd_with_overflow:
2232 case Intrinsic::ssub_with_overflow:
2233 case Intrinsic::usub_with_overflow:
2234 case Intrinsic::sadd_sat:
2235 case Intrinsic::uadd_sat:
2236 case Intrinsic::ssub_sat:
2237 case Intrinsic::usub_sat:
2245 unsigned ISD = TLI->InstructionOpcodeToISD(
I.getOpcode());
2246 EVT VT = TLI->getValueType(
DL,
I.getType(),
true);
2254 switch(
II->getIntrinsicID()) {
2255 case Intrinsic::memcpy:
2256 case Intrinsic::memset:
2257 case Intrinsic::memmove:
2269 switch (
I.getOpcode()) {
2272 case Instruction::FPToSI:
2273 case Instruction::FPToUI:
2274 case Instruction::SIToFP:
2275 case Instruction::UIToFP:
2276 case Instruction::FPTrunc:
2277 case Instruction::FPExt:
2278 return !ST->hasFPARMv8Base();
2306 if (TLI->useSoftFloat()) {
2307 switch (
I.getOpcode()) {
2310 case Instruction::Alloca:
2311 case Instruction::Load:
2312 case Instruction::Store:
2313 case Instruction::Select:
2314 case Instruction::PHI:
2321 if (
I.getType()->isDoubleTy() && !ST->hasFP64())
2325 if (
I.getType()->isHalfTy() && !ST->hasFullFP16())
2353 const SCEV *TripCountSCEV =
2359 LLVM_DEBUG(
dbgs() <<
"ARMHWLoops: Trip count does not fit into 32bits\n");
2368 switch (
Call->getIntrinsicID()) {
2371 case Intrinsic::start_loop_iterations:
2372 case Intrinsic::test_start_loop_iterations:
2373 case Intrinsic::loop_decrement:
2374 case Intrinsic::loop_decrement_reg:
2384 bool IsTailPredLoop =
false;
2385 auto ScanLoop = [&](
Loop *L) {
2386 for (
auto *BB : L->getBlocks()) {
2387 for (
auto &
I : *BB) {
2395 II->getIntrinsicID() == Intrinsic::get_active_lane_mask ||
2396 II->getIntrinsicID() == Intrinsic::arm_mve_vctp8 ||
2397 II->getIntrinsicID() == Intrinsic::arm_mve_vctp16 ||
2398 II->getIntrinsicID() == Intrinsic::arm_mve_vctp32 ||
2399 II->getIntrinsicID() == Intrinsic::arm_mve_vctp64;
2406 for (
auto *Inner : *L)
2407 if (!ScanLoop(Inner))
2439 if ((
II->getIntrinsicID() == Intrinsic::smin ||
2440 II->getIntrinsicID() == Intrinsic::smax ||
2441 II->getIntrinsicID() == Intrinsic::umin ||
2442 II->getIntrinsicID() == Intrinsic::umax) &&
2481 LLVM_DEBUG(
dbgs() <<
"Tail-predication: checking allowed instructions\n");
2492 bool ReductionsDisabled =
2496 for (
auto *
I : LiveOuts) {
2497 if (!
I->getType()->isIntegerTy() && !
I->getType()->isFloatTy() &&
2498 !
I->getType()->isHalfTy()) {
2499 LLVM_DEBUG(
dbgs() <<
"Don't tail-predicate loop with non-integer/float "
2500 "live-out value\n");
2503 if (ReductionsDisabled) {
2514 for (
Instruction &
I : BB->instructionsWithoutDebug()) {
2523 if (
T->getScalarSizeInBits() > 32) {
2530 int64_t NextStride =
2532 if (NextStride == 1) {
2537 }
else if (NextStride == -1 ||
2541 <<
"Consecutive strides of 2 found, vld2/vstr2 can't "
2542 "be tail-predicated\n.");
2552 const SCEV *Step = AR->getStepRecurrence(*PSE.
getSE());
2558 "tail-predicate\n.");
2564 LLVM_DEBUG(
dbgs() <<
"tail-predication: all instructions allowed!\n");
2577 if (!ST->hasMVEIntegerOps())
2584 if (L->getNumBlocks() > 1) {
2585 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: not a single block "
2590 assert(L->isInnermost() &&
"preferPredicateOverEpilogue: inner-loop expected");
2595 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2606 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2613 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2641 return isa<IntrinsicInst>(I) &&
2642 cast<IntrinsicInst>(I).getIntrinsicID() ==
2643 Intrinsic::get_active_lane_mask;
2647 if (!ST->isMClass())
2653 if (L->getHeader()->getParent()->hasOptSize())
2657 L->getExitingBlocks(ExitingBlocks);
2659 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
2660 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
2664 if (ExitingBlocks.
size() > 2)
2669 if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
2679 for (
auto *BB : L->getBlocks()) {
2680 for (
auto &
I : *BB) {
2683 if (
I.getType()->isVectorTy())
2707 if (ST->isThumb1Only()) {
2708 unsigned ExitingValues = 0;
2710 L->getExitBlocks(ExitBlocks);
2711 for (
auto *Exit : ExitBlocks) {
2714 unsigned LiveOuts =
count_if(Exit->phis(), [](
auto &PH) {
2715 return PH.getNumOperands() != 1 ||
2716 !isa<GetElementPtrInst>(PH.getOperand(0));
2718 ExitingValues = ExitingValues < LiveOuts ? LiveOuts : ExitingValues;
2739 auto *Outer = L->getOutermostLoop();
2740 if ((L != Outer && Outer != L->getParentLoop()) ||
2769 if (!ST->hasMVEIntegerOps())
2772 unsigned ScalarBits = Ty->getScalarSizeInBits();
2775 return ScalarBits <= 64;
2782 if (!ST->hasMVEIntegerOps())
2789 bool HasBaseReg, int64_t Scale,
2790 unsigned AddrSpace)
const {
2799 return AM.
Scale < 0 ? 1 : 0;
2810 return ST->isThumb2() || ST->hasV8MBaselineOps();
2814 return ST->hasARMOps();
2824 return Ext->getType()->getScalarSizeInBits() ==
2825 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
2844 if (!
I->getType()->isVectorTy())
2847 if (ST->hasNEON()) {
2848 switch (
I->getOpcode()) {
2849 case Instruction::Sub:
2850 case Instruction::Add: {
2853 Ops.push_back(&
I->getOperandUse(0));
2854 Ops.push_back(&
I->getOperandUse(1));
2862 if (!ST->hasMVEIntegerOps())
2866 if (!
I->hasOneUse())
2869 return Sub->getOpcode() == Instruction::FSub &&
Sub->getOperand(1) ==
I;
2879 switch (
I->getOpcode()) {
2880 case Instruction::Add:
2881 case Instruction::Mul:
2882 case Instruction::FAdd:
2883 case Instruction::ICmp:
2884 case Instruction::FCmp:
2886 case Instruction::FMul:
2887 return !IsFMSMul(
I);
2888 case Instruction::Sub:
2889 case Instruction::FSub:
2890 case Instruction::Shl:
2891 case Instruction::LShr:
2892 case Instruction::AShr:
2893 return Operand == 1;
2894 case Instruction::Call:
2896 switch (
II->getIntrinsicID()) {
2897 case Intrinsic::fma:
2899 case Intrinsic::sadd_sat:
2900 case Intrinsic::uadd_sat:
2901 case Intrinsic::arm_mve_add_predicated:
2902 case Intrinsic::arm_mve_mul_predicated:
2903 case Intrinsic::arm_mve_qadd_predicated:
2904 case Intrinsic::arm_mve_vhadd:
2905 case Intrinsic::arm_mve_hadd_predicated:
2906 case Intrinsic::arm_mve_vqdmull:
2907 case Intrinsic::arm_mve_vqdmull_predicated:
2908 case Intrinsic::arm_mve_vqdmulh:
2909 case Intrinsic::arm_mve_qdmulh_predicated:
2910 case Intrinsic::arm_mve_vqrdmulh:
2911 case Intrinsic::arm_mve_qrdmulh_predicated:
2912 case Intrinsic::arm_mve_fma_predicated:
2914 case Intrinsic::ssub_sat:
2915 case Intrinsic::usub_sat:
2916 case Intrinsic::arm_mve_sub_predicated:
2917 case Intrinsic::arm_mve_qsub_predicated:
2918 case Intrinsic::arm_mve_hsub_predicated:
2919 case Intrinsic::arm_mve_vhsub:
2920 return Operand == 1;
2938 if (Shuffle->
getOpcode() == Instruction::BitCast)
2945 if (!IsSinker(
I,
OpIdx.index()))
2950 for (
Use &U :
Op->uses()) {
2952 if (!IsSinker(Insn, U.getOperandNo()))
2958 Ops.push_back(&
Op->getOperandUse(0));
2980 unsigned NumBytesToPad = 4 - (
Size % 4);
2981 unsigned NewSize =
Size + NumBytesToPad;
2987 if (NewSize > MaxMemIntrinsicSize)
2990 return NumBytesToPad;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file provides the interface for the instcombine pass implementation.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
This file defines the SmallVector class.
Class for arbitrary precision integers.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getMemcpyCost(const Instruction *I) const override
bool maybeLoweredToCall(Instruction &I) const
bool preferInLoopReduction(RecurKind Kind, Type *Ty) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *ValTy, TTI::TargetCostKind CostKind) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool hasArmWideBranch(bool Thumb) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
int getNumMemOps(const IntrinsicInst *I) const
Given a memcpy/memset/memmove instruction, return the number of memory operations performed,...
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const override
bool isLoweredToCall(const Function *F) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override
bool isLegalMaskedStore(Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::MaskKind MaskKind=TTI::MaskKind::VariableOrConstantMask) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool isLegalMaskedLoad(Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::MaskKind MaskKind=TTI::MaskKind::VariableOrConstantMask) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
bool preferPredicatedReductionSelect() const override
bool isLegalMaskedGather(Type *Ty, Align Alignment) const override
unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const override
bool isProfitableLSRChainElement(Instruction *I) const override
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
getScalingFactorCost - Return the cost of the scaling used in addressing mode represented by AM.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
A cache of @llvm.assume calls within a function.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
LLVM Basic Block Representation.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLE
signed less or equal
@ ICMP_SGT
signed greater than
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
ConstantInt * getTrue()
Get the constant value for i1 true.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
DominatorTree & getDominatorTree() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)=0
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
AssumptionCache & getAssumptionCache() const
static InstructionCost getInvalid(CostType Val=0)
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Drive the analysis of memory accesses in the loop.
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
LoopInfo * getLoopInfo() const
DominatorTree * getDominatorTree() const
AssumptionCache * getAssumptionCache() const
const LoopAccessInfo * getLAI() const
ScalarEvolution * getScalarEvolution() const
Represents a single loop in the control flow graph.
const FeatureBitset & getFeatureBits() const
Information for memory intrinsic cost model.
Align getAlignment() const
unsigned getAddressSpace() const
Type * getDataType() const
const Value * getPointer() const
bool getVariableMask() const
Intrinsic::ID getID() const
const Instruction * getInst() const
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
APInt getUnsignedRangeMax(const SCEV *S)
Determine the max of the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
Provides information about what library functions are available for the current target.
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isArrayTy() const
True if this is an instance of ArrayType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Type * getArrayElementType() const
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ SIGN_EXTEND
Conversion operators.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ ForceEnabledNoReductions
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
@ Runtime
Detect stack use after return if not disabled runtime with (ASAN_OPTIONS=detect_stack_use_after_retur...
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
auto dyn_cast_or_null(const Y &Val)
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_ABS
Floating point maxnum.
@ SPF_FMAXNUM
Floating point minnum.
@ SPF_UMIN
Signed minimum.
@ SPF_UMAX
Signed maximum.
@ SPF_SMAX
Unsigned minimum.
@ SPF_FMINNUM
Unsigned maximum.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
RecurKind
These are the kinds of recurrences that we support.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
@ Data
Use predicate only to mask operations on data in the loop.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
bool isInteger() const
Return true if this is an integer or a vector integer type.
Attributes of a target dependent hardware loop.
LLVM_ABI bool canAnalyze(LoopInfo &LI)
LLVM_ABI bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
SelectPatternFlavor Flavor
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...