26#include "llvm/IR/IntrinsicsARM.h"
45#define DEBUG_TYPE "armtti"
49 cl::desc(
"Enable the generation of masked loads and stores"));
53 cl::desc(
"Disable the generation of low-overhead loops"));
57 cl::desc(
"Enable the generation of WLS loops"));
61 cl::desc(
"Enable the widening of global strings to alignment boundaries"));
72 "Threshold for forced unrolling of small loops in Arm architecture"));
84 unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign
86 : IntrAlign->getLimitedValue();
91 return Builder.CreateAlignedLoad(
II.getType(),
II.getArgOperand(0),
104 bool MatchExact = (CallerBits & ~InlineFeaturesAllowed) ==
105 (CalleeBits & ~InlineFeaturesAllowed);
108 bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) ==
109 (CalleeBits & InlineFeaturesAllowed);
112 if (!MatchExact || !MatchSubset) {
113 dbgs() <<
"=== Inline compatibility debug ===\n";
114 dbgs() <<
"Caller: " << Caller->getName() <<
"\n";
115 dbgs() <<
"Callee: " << Callee->getName() <<
"\n";
122 dbgs() <<
"Only-in-caller bit count: " << ExtraInCaller.
count() <<
"\n";
123 dbgs() <<
"Only-in-callee bit count: " << MissingInCaller.
count() <<
"\n";
125 dbgs() <<
"Only-in-caller feature indices [";
128 for (
size_t I = 0, E = ExtraInCaller.
size();
I < E; ++
I) {
129 if (ExtraInCaller.
test(
I)) {
139 dbgs() <<
"Only-in-callee feature indices [";
142 for (
size_t I = 0, E = MissingInCaller.
size();
I < E; ++
I) {
143 if (MissingInCaller.
test(
I)) {
155 dbgs() <<
"MatchExact=" << (MatchExact ?
"true" :
"false")
156 <<
" MatchSubset=" << (MatchSubset ?
"true" :
"false") <<
"\n";
159 return MatchExact && MatchSubset;
165 if (ST->hasMVEIntegerOps())
168 if (L->getHeader()->getParent()->hasOptSize())
171 if (ST->isMClass() && ST->isThumb2() &&
172 L->getNumBlocks() == 1)
178std::optional<Instruction *>
185 case Intrinsic::arm_neon_vld1: {
195 case Intrinsic::arm_neon_vld2:
196 case Intrinsic::arm_neon_vld3:
197 case Intrinsic::arm_neon_vld4:
198 case Intrinsic::arm_neon_vld2lane:
199 case Intrinsic::arm_neon_vld3lane:
200 case Intrinsic::arm_neon_vld4lane:
201 case Intrinsic::arm_neon_vst1:
202 case Intrinsic::arm_neon_vst2:
203 case Intrinsic::arm_neon_vst3:
204 case Intrinsic::arm_neon_vst4:
205 case Intrinsic::arm_neon_vst2lane:
206 case Intrinsic::arm_neon_vst3lane:
207 case Intrinsic::arm_neon_vst4lane: {
211 unsigned AlignArg =
II.arg_size() - 1;
212 Value *AlignArgOp =
II.getArgOperand(AlignArg);
223 case Intrinsic::arm_neon_vld1x2:
224 case Intrinsic::arm_neon_vld1x3:
225 case Intrinsic::arm_neon_vld1x4:
226 case Intrinsic::arm_neon_vst1x2:
227 case Intrinsic::arm_neon_vst1x3:
228 case Intrinsic::arm_neon_vst1x4: {
232 Align OldAlign =
II.getParamAlign(0).valueOrOne();
233 if (NewAlign > OldAlign)
239 case Intrinsic::arm_mve_pred_i2v: {
240 Value *Arg =
II.getArgOperand(0);
253 if (CI->getValue().trunc(16).isAllOnes()) {
268 case Intrinsic::arm_mve_pred_v2i: {
269 Value *Arg =
II.getArgOperand(0);
276 if (
II.getMetadata(LLVMContext::MD_range))
281 if (
auto CurrentRange =
II.getRange()) {
283 if (
Range == CurrentRange)
288 II.addRetAttr(Attribute::NoUndef);
291 case Intrinsic::arm_mve_vadc:
292 case Intrinsic::arm_mve_vadc_predicated: {
294 (
II.getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;
295 assert(
II.getArgOperand(CarryOp)->getType()->getScalarSizeInBits() == 32 &&
296 "Bad type for intrinsic!");
305 case Intrinsic::arm_mve_vmldava: {
307 if (
I->hasOneUse()) {
312 Value *OpX =
I->getOperand(4);
313 Value *OpY =
I->getOperand(5);
319 {
I->getOperand(0),
I->getOperand(1),
320 I->getOperand(2), OpZ, OpX, OpY});
336 SimplifyAndSetOp)
const {
341 auto SimplifyNarrowInstrTopBottom =[&](
unsigned TopOpc) {
350 SimplifyAndSetOp(&
II, 0, OrigDemandedElts & DemandedElts, UndefElts);
357 switch (
II.getIntrinsicID()) {
360 case Intrinsic::arm_mve_vcvt_narrow:
361 SimplifyNarrowInstrTopBottom(2);
363 case Intrinsic::arm_mve_vqmovn:
364 SimplifyNarrowInstrTopBottom(4);
366 case Intrinsic::arm_mve_vshrn:
367 SimplifyNarrowInstrTopBottom(7);
376 assert(Ty->isIntegerTy());
378 unsigned Bits = Ty->getPrimitiveSizeInBits();
379 if (Bits == 0 || Imm.getActiveBits() >= 64)
382 int64_t SImmVal = Imm.getSExtValue();
383 uint64_t ZImmVal = Imm.getZExtValue();
384 if (!ST->isThumb()) {
385 if ((SImmVal >= 0 && SImmVal < 65536) ||
389 return ST->hasV6T2Ops() ? 2 : 3;
391 if (ST->isThumb2()) {
392 if ((SImmVal >= 0 && SImmVal < 65536) ||
396 return ST->hasV6T2Ops() ? 2 : 3;
399 if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
412 if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
428 C->getValue() == Imm && Imm.isNegative() && Imm.isNegatedPowerOf2()) {
430 auto isSSatMin = [&](
Value *MinInst) {
432 Value *MinLHS, *MinRHS;
456 if (Imm.getBitWidth() != 64 ||
475 if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
476 Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
482 if (Opcode == Instruction::GetElementPtr && Idx != 0)
485 if (Opcode == Instruction::And) {
487 if (Imm == 255 || Imm == 65535)
494 if (Opcode == Instruction::Add)
499 if (Opcode == Instruction::ICmp && Imm.isNegative() &&
500 Ty->getIntegerBitWidth() == 32) {
501 int64_t NegImm = -Imm.getSExtValue();
502 if (ST->isThumb2() && NegImm < 1<<12)
505 if (ST->isThumb() && NegImm < 1<<8)
511 if (Opcode == Instruction::Xor && Imm.isAllOnes())
516 if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->isThumb2()) &&
517 Ty->getIntegerBitWidth() <= 32) {
528 if (Inst && Opcode == Instruction::ICmp && Idx == 1 && Imm.isAllOnes()) {
542 (ST->hasNEON() || ST->hasMVEIntegerOps())) {
557 int ISD = TLI->InstructionOpcodeToISD(Opcode);
563 return Cost == 0 ? 0 : 1;
566 auto IsLegalFPType = [
this](
EVT VT) {
568 return (EltVT == MVT::f32 && ST->hasVFP2Base()) ||
569 (EltVT == MVT::f64 && ST->hasFP64()) ||
570 (EltVT == MVT::f16 && ST->hasFullFP16());
573 EVT SrcTy = TLI->getValueType(
DL, Src);
574 EVT DstTy = TLI->getValueType(
DL, Dst);
576 if (!SrcTy.isSimple() || !DstTy.
isSimple())
583 if ((ST->hasMVEIntegerOps() &&
584 (Opcode == Instruction::Trunc || Opcode == Instruction::ZExt ||
585 Opcode == Instruction::SExt)) ||
586 (ST->hasMVEFloatOps() &&
587 (Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) &&
588 IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)))
591 ST->getMVEVectorCostFactor(
CostKind);
611 LoadConversionTbl,
ISD, DstTy.
getSimpleVT(), SrcTy.getSimpleVT()))
612 return AdjustCost(Entry->Cost);
631 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
632 if (
const auto *Entry =
635 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
643 if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
644 if (
const auto *Entry =
647 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
660 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
661 if (
const auto *Entry =
664 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
671 if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
672 if (
const auto *Entry =
675 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
681 I &&
I->hasOneUse() && ST->hasNEON() && SrcTy.isVector()) {
684 {
ISD::ADD, MVT::v4i32, MVT::v4i16, 0 },
685 {
ISD::ADD, MVT::v8i16, MVT::v8i8, 0 },
687 {
ISD::SUB, MVT::v4i32, MVT::v4i16, 0 },
688 {
ISD::SUB, MVT::v8i16, MVT::v8i8, 0 },
690 {
ISD::MUL, MVT::v4i32, MVT::v4i16, 0 },
691 {
ISD::MUL, MVT::v8i16, MVT::v8i8, 0 },
693 {
ISD::SHL, MVT::v4i32, MVT::v4i16, 0 },
694 {
ISD::SHL, MVT::v8i16, MVT::v8i8, 0 },
698 int UserISD = TLI->InstructionOpcodeToISD(
User->getOpcode());
701 SrcTy.getSimpleVT())) {
702 return AdjustCost(Entry->Cost);
707 if (Src->isVectorTy() && ST->hasNEON() &&
720 return AdjustCost(LT.first * Entry->Cost);
809 if (SrcTy.isVector() && ST->hasNEON()) {
812 SrcTy.getSimpleVT()))
813 return AdjustCost(Entry->Cost);
839 if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
842 SrcTy.getSimpleVT()))
843 return AdjustCost(Entry->Cost);
870 if (SrcTy.isInteger() && ST->hasNEON()) {
873 SrcTy.getSimpleVT()))
874 return AdjustCost(Entry->Cost);
895 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
898 SrcTy.getSimpleVT()))
899 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
909 if (SrcTy.isFixedLengthVector())
910 Lanes = SrcTy.getVectorNumElements();
912 if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))
915 return Lanes * CallCost;
919 SrcTy.isFixedLengthVector()) {
922 if ((SrcTy.getScalarType() == MVT::i8 ||
923 SrcTy.getScalarType() == MVT::i16 ||
924 SrcTy.getScalarType() == MVT::i32) &&
925 SrcTy.getSizeInBits() > 128 &&
927 return SrcTy.getVectorNumElements() * 2;
942 if (SrcTy.isInteger()) {
945 SrcTy.getSimpleVT()))
946 return AdjustCost(Entry->Cost);
949 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
950 ? ST->getMVEVectorCostFactor(
CostKind)
961 if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
962 ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32)
965 if (ST->hasNEON() && (Opcode == Instruction::InsertElement ||
966 Opcode == Instruction::ExtractElement)) {
974 if (ValTy->isVectorTy() &&
975 ValTy->getScalarSizeInBits() <= 32)
976 return std::max<InstructionCost>(
982 if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement ||
983 Opcode == Instruction::ExtractElement)) {
987 std::pair<InstructionCost, MVT> LT =
989 return LT.first * (ValTy->getScalarType()->isIntegerTy() ? 4 : 1);
1000 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1004 ST->isThumb() && !ValTy->isVectorTy()) {
1006 if (TLI->getValueType(
DL, ValTy,
true) == MVT::Other)
1020 if (ValTy->isIntegerTy(1))
1030 if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && Sel &&
1033 if (Sel && ValTy->isVectorTy() &&
1034 (ValTy->isIntOrIntVectorTy() || ValTy->isFPOrFPVectorTy())) {
1035 const Value *LHS, *RHS;
1040 IID = Intrinsic::abs;
1043 IID = Intrinsic::smin;
1046 IID = Intrinsic::smax;
1049 IID = Intrinsic::umin;
1052 IID = Intrinsic::umax;
1055 IID = Intrinsic::minnum;
1058 IID = Intrinsic::maxnum;
1073 if (ST->hasNEON() && ValTy->isVectorTy() &&
ISD ==
ISD::SELECT && CondTy) {
1076 {
ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
1081 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
1082 EVT SelValTy = TLI->getValueType(
DL, ValTy);
1094 if (ST->hasMVEIntegerOps() && ValTy->isVectorTy() &&
1095 (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
1103 if (Opcode == Instruction::FCmp && !ST->hasMVEFloatOps()) {
1117 int BaseCost = ST->getMVEVectorCostFactor(
CostKind);
1123 if (LT.second.isVector() && LT.second.getVectorNumElements() > 2) {
1125 return LT.first * BaseCost +
1135 if (ST->hasMVEIntegerOps() && ValTy->isVectorTy())
1136 BaseCost = ST->getMVEVectorCostFactor(
CostKind);
1150 unsigned NumVectorInstToHideOverhead = 10;
1151 int MaxMergeDistance = 64;
1153 if (ST->hasNEON()) {
1156 return NumVectorInstToHideOverhead;
1169 switch (
II->getIntrinsicID()) {
1170 case Intrinsic::arm_mve_vctp8:
1171 case Intrinsic::arm_mve_vctp16:
1172 case Intrinsic::arm_mve_vctp32:
1173 case Intrinsic::arm_mve_vctp64:
1190 if (VecTy->getNumElements() == 2)
1195 if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy())
1200 return (EltWidth == 32 && Alignment >= 4) ||
1201 (EltWidth == 16 && Alignment >= 2) || (EltWidth == 8);
1208 unsigned EltWidth = Ty->getScalarSizeInBits();
1209 return ((EltWidth == 32 && Alignment >= 4) ||
1210 (EltWidth == 16 && Alignment >= 2) || EltWidth == 8);
1218 unsigned DstAddrSpace = ~0u;
1219 unsigned SrcAddrSpace = ~0u;
1220 const Function *
F =
I->getParent()->getParent();
1228 const unsigned Size =
C->getValue().getZExtValue();
1229 const Align DstAlign = MC->getDestAlign().valueOrOne();
1230 const Align SrcAlign = MC->getSourceAlign().valueOrOne();
1234 DstAddrSpace = MC->getDestAddressSpace();
1235 SrcAddrSpace = MC->getSourceAddressSpace();
1243 const unsigned Size =
C->getValue().getZExtValue();
1244 const Align DstAlign = MS->getDestAlign().valueOrOne();
1248 DstAddrSpace = MS->getDestAddressSpace();
1253 unsigned Limit, Factor = 2;
1254 switch(
I->getIntrinsicID()) {
1255 case Intrinsic::memcpy:
1256 Limit = TLI->getMaxStoresPerMemcpy(
F->hasMinSize());
1258 case Intrinsic::memmove:
1259 Limit = TLI->getMaxStoresPerMemmove(
F->hasMinSize());
1261 case Intrinsic::memset:
1262 Limit = TLI->getMaxStoresPerMemset(
F->hasMinSize());
1272 std::vector<EVT> MemOps;
1274 if (getTLI()->findOptimalMemOpLowering(
C, MemOps, Limit, MOp, DstAddrSpace,
1275 SrcAddrSpace,
F->getAttributes(),
1277 return MemOps.size() * Factor;
1302 "Expected the Mask to match the return size if given");
1304 "Expected the same scalar types");
1309 if (IsExtractSubvector)
1311 if (ST->hasNEON()) {
1328 if (
const auto *Entry =
1330 return LT.first * Entry->Cost;
1349 if (
const auto *Entry =
1351 return LT.first * Entry->Cost;
1375 return LT.first * Entry->Cost;
1378 if (ST->hasMVEIntegerOps()) {
1391 return LT.first * Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
1394 if (!Mask.empty()) {
1401 (LT.second.getScalarSizeInBits() == 8 ||
1402 LT.second.getScalarSizeInBits() == 16 ||
1403 LT.second.getScalarSizeInBits() == 32) &&
1404 LT.second.getSizeInBits() == 128 &&
1405 ((TLI->getMaxSupportedInterleaveFactor() >= 2 &&
1407 (TLI->getMaxSupportedInterleaveFactor() == 4 &&
1409 return ST->getMVEVectorCostFactor(
CostKind) *
1410 std::max<InstructionCost>(1, LT.first / 4);
1417 (LT.second.getScalarSizeInBits() == 8 ||
1418 LT.second.getScalarSizeInBits() == 16 ||
1419 LT.second.getScalarSizeInBits() == 32) &&
1420 LT.second.getSizeInBits() == 128 &&
1421 ((TLI->getMaxSupportedInterleaveFactor() >= 2 &&
1423 Mask, 2, SrcTy->getElementCount().getKnownMinValue() * 2)) ||
1424 (TLI->getMaxSupportedInterleaveFactor() == 4 &&
1426 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2))))
1427 return ST->getMVEVectorCostFactor(
CostKind) * LT.first;
1429 if (LT.second.isVector() &&
1430 Mask.size() <= LT.second.getVectorNumElements() &&
1433 return ST->getMVEVectorCostFactor(
CostKind) * LT.first;
1438 if (IsExtractSubvector)
1440 int BaseCost = ST->hasMVEIntegerOps() && SrcTy->isVectorTy()
1441 ? ST->getMVEVectorCostFactor(
CostKind)
1451 int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
1456 switch (ISDOpcode) {
1469 if (ST->hasNEON()) {
1470 const unsigned FunctionCallDivCost = 20;
1471 const unsigned ReciprocalDivCost = 10;
1477 {
ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
1478 {
ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
1479 {
ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
1480 {
ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
1481 {
ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
1482 {
ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
1483 {
ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
1484 {
ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
1485 {
ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
1486 {
ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
1487 {
ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
1488 {
ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
1489 {
ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
1490 {
ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
1491 {
ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
1492 {
ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
1494 {
ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
1495 {
ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
1496 {
ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
1497 {
ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
1498 {
ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
1499 {
ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
1500 {
ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
1501 {
ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
1502 {
ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
1503 {
ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
1504 {
ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
1505 {
ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
1506 {
ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
1507 {
ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
1508 {
ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
1509 {
ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
1513 if (
const auto *Entry =
CostTableLookup(CostTbl, ISDOpcode, LT.second))
1514 return LT.first * Entry->Cost;
1517 Opcode, Ty,
CostKind, Op1Info, Op2Info);
1534 auto LooksLikeAFreeShift = [&]() {
1535 if (ST->isThumb1Only() || Ty->isVectorTy())
1545 case Instruction::Add:
1546 case Instruction::Sub:
1547 case Instruction::And:
1548 case Instruction::Xor:
1549 case Instruction::Or:
1550 case Instruction::ICmp:
1556 if (LooksLikeAFreeShift())
1566 auto MulInDSPMLALPattern = [&](
const Instruction *
I,
unsigned Opcode,
1574 if (Opcode != Instruction::Mul)
1577 if (Ty->isVectorTy())
1580 auto ValueOpcodesEqual = [](
const Value *LHS,
const Value *RHS) ->
bool {
1584 auto IsExtInst = [](
const Value *V) ->
bool {
1587 auto IsExtensionFromHalf = [](
const Value *V) ->
bool {
1595 Value *Op0 = BinOp->getOperand(0);
1596 Value *Op1 = BinOp->getOperand(1);
1597 if (IsExtInst(Op0) && IsExtInst(Op1) && ValueOpcodesEqual(Op0, Op1)) {
1599 if (!
I->getType()->isIntegerTy(32) || !IsExtensionFromHalf(Op0) ||
1600 !IsExtensionFromHalf(Op1))
1604 for (
auto *U :
I->users())
1613 if (MulInDSPMLALPattern(CxtI, Opcode, Ty))
1619 if (ST->hasMVEIntegerOps() && Ty->isVectorTy())
1620 BaseCost = ST->getMVEVectorCostFactor(
CostKind);
1626 if (TLI->isOperationLegalOrCustomOrPromote(ISDOpcode, LT.second))
1627 return LT.first * BaseCost;
1631 unsigned Num = VTy->getNumElements();
1655 if (TLI->getValueType(
DL, Src,
true) == MVT::Other)
1659 if (ST->hasNEON() && Src->isVectorTy() && Alignment !=
Align(16) &&
1664 return LT.first * 4;
1670 ((Opcode == Instruction::Load &&
I->hasOneUse() &&
1675 Opcode == Instruction::Load
1676 ? (*
I->user_begin())->getType()
1680 return ST->getMVEVectorCostFactor(
CostKind);
1683 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
1684 ? ST->getMVEVectorCostFactor(
CostKind)
1693 switch (MICA.
getID()) {
1694 case Intrinsic::masked_scatter:
1695 case Intrinsic::masked_gather:
1697 case Intrinsic::masked_load:
1698 case Intrinsic::masked_store:
1707 unsigned IID = MICA.
getID();
1711 if (ST->hasMVEIntegerOps()) {
1712 if (IID == Intrinsic::masked_load &&
1714 return ST->getMVEVectorCostFactor(
CostKind);
1715 if (IID == Intrinsic::masked_store &&
1717 return ST->getMVEVectorCostFactor(
CostKind);
1729 bool UseMaskForCond,
bool UseMaskForGaps)
const {
1730 assert(Factor >= 2 &&
"Invalid interleave factor");
1734 bool EltIs64Bits =
DL.getTypeSizeInBits(VecTy->
getScalarType()) == 64;
1736 if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
1737 !UseMaskForCond && !UseMaskForGaps) {
1746 ST->hasMVEIntegerOps() ? ST->getMVEVectorCostFactor(
CostKind) : 1;
1747 if (NumElts % Factor == 0 &&
1748 TLI->isLegalInterleavedAccessType(Factor, SubVecTy, Alignment,
DL))
1749 return Factor * BaseCost * TLI->getNumInterleavedAccesses(SubVecTy,
DL);
1756 if (ST->hasMVEIntegerOps() && Factor == 2 && NumElts / Factor > 2 &&
1758 DL.getTypeSizeInBits(SubVecTy).getFixedValue() <= 64)
1759 return 2 * BaseCost;
1764 UseMaskForCond, UseMaskForGaps);
1786 unsigned NumElems = VTy->getNumElements();
1787 unsigned EltSize = VTy->getScalarSizeInBits();
1796 NumElems * LT.first * ST->getMVEVectorCostFactor(
CostKind);
1802 NumElems * LT.first + (VariableMask ? NumElems * 5 : 0) +
1808 if (EltSize < 8 || Alignment < EltSize / 8)
1811 unsigned ExtSize = EltSize;
1817 if ((
I->getOpcode() == Instruction::Load ||
1820 const User *Us = *
I->users().begin();
1825 if (((
TypeSize == 32 && (EltSize == 8 || EltSize == 16)) ||
1826 (
TypeSize == 16 && EltSize == 8)) &&
1834 if ((
I->getOpcode() == Instruction::Store ||
1838 unsigned TypeSize =
T->getOperand(0)->getType()->getScalarSizeInBits();
1839 if (((EltSize == 16 &&
TypeSize == 32) ||
1846 if (ExtSize * NumElems != 128 || NumElems < 4)
1855 if (ExtSize != 8 && ExtSize != 16)
1859 Ptr = BC->getOperand(0);
1861 if (
GEP->getNumOperands() != 2)
1863 unsigned Scale =
DL.getTypeAllocSize(
GEP->getResultElementType());
1865 if (Scale != 1 && Scale * 8 != ExtSize)
1869 if (ZExt->getOperand(0)->getType()->getScalarSizeInBits() <= ExtSize)
1879 std::optional<FastMathFlags> FMF,
1882 EVT ValVT = TLI->getValueType(
DL, ValTy);
1883 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1890 ((EltSize == 32 && ST->hasVFP2Base()) ||
1891 (EltSize == 64 && ST->hasFP64()) ||
1892 (EltSize == 16 && ST->hasFullFP16()))) {
1894 unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1897 NumElts * EltSize > VecLimit) {
1908 VecCost += ST->getMVEVectorCostFactor(
CostKind) * 2;
1911 ExtractCost = NumElts / 2;
1913 return VecCost + ExtractCost +
1919 (EltSize == 64 || EltSize == 32 || EltSize == 16 || EltSize == 8)) {
1922 ST->hasMVEIntegerOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1924 while (
isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {
1932 NumElts * EltSize == 64) {
1934 VecCost += ST->getMVEVectorCostFactor(
CostKind) +
1941 return VecCost + ExtractCost +
1943 Opcode, ValTy->getElementType(),
CostKind);
1958 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind) * LT.first;
1964 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
1966 EVT ValVT = TLI->getValueType(
DL, ValTy);
1967 EVT ResVT = TLI->getValueType(
DL, ResTy);
1969 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1984 ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||
1985 (LT.second == MVT::v8i16 && RevVTSize <= 32) ||
1986 (LT.second == MVT::v4i32 && RevVTSize <= 64)))
1987 return ST->getMVEVectorCostFactor(
CostKind) * LT.first;
2001 if (RedOpcode != Instruction::Add)
2003 EVT ValVT = TLI->getValueType(
DL, ValTy);
2004 EVT ResVT = TLI->getValueType(
DL, ResTy);
2017 ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||
2018 (LT.second == MVT::v8i16 && RevVTSize <= 64) ||
2019 (LT.second == MVT::v4i32 && RevVTSize <= 64)))
2020 return ST->getMVEVectorCostFactor(
CostKind) * LT.first;
2031 EVT ValVT = TLI->getValueType(
DL, Ty);
2036 if ((IID == Intrinsic::minnum || IID == Intrinsic::maxnum) &&
2042 unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);
2044 while (
isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {
2056 VecCost += ST->getMVEVectorCostFactor(
CostKind) * 2;
2062 {Ty->getElementType(), Ty->getElementType()},
2064 return VecCost + ExtractCost +
2068 if (IID == Intrinsic::smin || IID == Intrinsic::smax ||
2069 IID == Intrinsic::umin || IID == Intrinsic::umax) {
2081 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind) * LT.first;
2092 case Intrinsic::get_active_lane_mask:
2100 if (ST->hasMVEIntegerOps())
2103 case Intrinsic::sadd_sat:
2104 case Intrinsic::ssub_sat:
2105 case Intrinsic::uadd_sat:
2106 case Intrinsic::usub_sat: {
2107 bool IsAdd = (
Opc == Intrinsic::sadd_sat ||
Opc == Intrinsic::ssub_sat);
2108 bool IsSigned = (
Opc == Intrinsic::sadd_sat ||
Opc == Intrinsic::ssub_sat);
2112 if (IsSigned && ST->hasDSP() && ITy->getBitWidth() == 32)
2114 if (ST->hasDSP() && (ITy->getBitWidth() == 8 || ITy->getBitWidth() == 16))
2128 if (!ST->hasMVEIntegerOps())
2132 if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
2133 LT.second == MVT::v16i8) {
2139 return LT.first * ST->getMVEVectorCostFactor(
CostKind) * Instrs;
2143 case Intrinsic::abs:
2144 case Intrinsic::smin:
2145 case Intrinsic::smax:
2146 case Intrinsic::umin:
2147 case Intrinsic::umax: {
2148 if (!ST->hasMVEIntegerOps())
2153 if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
2154 LT.second == MVT::v16i8)
2155 return LT.first * ST->getMVEVectorCostFactor(
CostKind);
2158 case Intrinsic::minnum:
2159 case Intrinsic::maxnum: {
2160 if (!ST->hasMVEFloatOps())
2164 if (LT.second == MVT::v4f32 || LT.second == MVT::v8f16)
2165 return LT.first * ST->getMVEVectorCostFactor(
CostKind);
2168 case Intrinsic::fptosi_sat:
2169 case Intrinsic::fptoui_sat: {
2172 bool IsSigned =
Opc == Intrinsic::fptosi_sat;
2176 if ((ST->hasVFP2Base() && LT.second == MVT::f32 && MTy == MVT::i32) ||
2177 (ST->hasFP64() && LT.second == MVT::f64 && MTy == MVT::i32) ||
2178 (ST->hasFullFP16() && LT.second == MVT::f16 && MTy == MVT::i32))
2182 if (ST->hasMVEFloatOps() &&
2183 (LT.second == MVT::v4f32 || LT.second == MVT::v8f16) &&
2185 return LT.first * ST->getMVEVectorCostFactor(
CostKind);
2188 if (((ST->hasVFP2Base() && LT.second == MVT::f32) ||
2189 (ST->hasFP64() && LT.second == MVT::f64) ||
2190 (ST->hasFullFP16() && LT.second == MVT::f16) ||
2191 (ST->hasMVEFloatOps() &&
2192 (LT.second == MVT::v4f32 || LT.second == MVT::v8f16))) &&
2195 LT.second.getScalarSizeInBits());
2197 LT.second.isVector() ? ST->getMVEVectorCostFactor(
CostKind) : 1;
2200 LegalTy, {LegalTy, LegalTy});
2204 LegalTy, {LegalTy, LegalTy});
2206 return LT.first *
Cost;
2234 if (!
F->isIntrinsic())
2238 if (
F->getName().starts_with(
"llvm.arm"))
2241 switch (
F->getIntrinsicID()) {
2243 case Intrinsic::powi:
2244 case Intrinsic::sin:
2245 case Intrinsic::cos:
2246 case Intrinsic::sincos:
2247 case Intrinsic::pow:
2248 case Intrinsic::log:
2249 case Intrinsic::log10:
2250 case Intrinsic::log2:
2251 case Intrinsic::exp:
2252 case Intrinsic::exp2:
2254 case Intrinsic::sqrt:
2255 case Intrinsic::fabs:
2256 case Intrinsic::copysign:
2257 case Intrinsic::floor:
2258 case Intrinsic::ceil:
2259 case Intrinsic::trunc:
2260 case Intrinsic::rint:
2261 case Intrinsic::nearbyint:
2262 case Intrinsic::round:
2263 case Intrinsic::canonicalize:
2264 case Intrinsic::lround:
2265 case Intrinsic::llround:
2266 case Intrinsic::lrint:
2267 case Intrinsic::llrint:
2268 if (
F->getReturnType()->isDoubleTy() && !ST->hasFP64())
2270 if (
F->getReturnType()->isHalfTy() && !ST->hasFullFP16())
2275 return !ST->hasFPARMv8Base() && !ST->hasVFP2Base();
2276 case Intrinsic::masked_store:
2277 case Intrinsic::masked_load:
2278 case Intrinsic::masked_gather:
2279 case Intrinsic::masked_scatter:
2280 return !ST->hasMVEIntegerOps();
2281 case Intrinsic::sadd_with_overflow:
2282 case Intrinsic::uadd_with_overflow:
2283 case Intrinsic::ssub_with_overflow:
2284 case Intrinsic::usub_with_overflow:
2285 case Intrinsic::sadd_sat:
2286 case Intrinsic::uadd_sat:
2287 case Intrinsic::ssub_sat:
2288 case Intrinsic::usub_sat:
2296 unsigned ISD = TLI->InstructionOpcodeToISD(
I.getOpcode());
2297 EVT VT = TLI->getValueType(
DL,
I.getType(),
true);
2305 switch(
II->getIntrinsicID()) {
2306 case Intrinsic::memcpy:
2307 case Intrinsic::memset:
2308 case Intrinsic::memmove:
2320 switch (
I.getOpcode()) {
2323 case Instruction::FPToSI:
2324 case Instruction::FPToUI:
2325 case Instruction::SIToFP:
2326 case Instruction::UIToFP:
2327 case Instruction::FPTrunc:
2328 case Instruction::FPExt:
2329 return !ST->hasFPARMv8Base();
2357 if (TLI->useSoftFloat()) {
2358 switch (
I.getOpcode()) {
2361 case Instruction::Alloca:
2362 case Instruction::Load:
2363 case Instruction::Store:
2364 case Instruction::Select:
2365 case Instruction::PHI:
2372 if (
I.getType()->isDoubleTy() && !ST->hasFP64())
2376 if (
I.getType()->isHalfTy() && !ST->hasFullFP16())
2404 const SCEV *TripCountSCEV =
2410 LLVM_DEBUG(
dbgs() <<
"ARMHWLoops: Trip count does not fit into 32bits\n");
2419 switch (
Call->getIntrinsicID()) {
2422 case Intrinsic::start_loop_iterations:
2423 case Intrinsic::test_start_loop_iterations:
2424 case Intrinsic::loop_decrement:
2425 case Intrinsic::loop_decrement_reg:
2435 bool IsTailPredLoop =
false;
2436 auto ScanLoop = [&](
Loop *L) {
2437 for (
auto *BB : L->getBlocks()) {
2438 for (
auto &
I : *BB) {
2446 II->getIntrinsicID() == Intrinsic::get_active_lane_mask ||
2447 II->getIntrinsicID() == Intrinsic::arm_mve_vctp8 ||
2448 II->getIntrinsicID() == Intrinsic::arm_mve_vctp16 ||
2449 II->getIntrinsicID() == Intrinsic::arm_mve_vctp32 ||
2450 II->getIntrinsicID() == Intrinsic::arm_mve_vctp64;
2457 for (
auto *Inner : *L)
2458 if (!ScanLoop(Inner))
2490 if ((
II->getIntrinsicID() == Intrinsic::smin ||
2491 II->getIntrinsicID() == Intrinsic::smax ||
2492 II->getIntrinsicID() == Intrinsic::umin ||
2493 II->getIntrinsicID() == Intrinsic::umax) &&
2532 LLVM_DEBUG(
dbgs() <<
"Tail-predication: checking allowed instructions\n");
2543 bool ReductionsDisabled =
2547 for (
auto *
I : LiveOuts) {
2548 if (!
I->getType()->isIntegerTy() && !
I->getType()->isFloatTy() &&
2549 !
I->getType()->isHalfTy()) {
2550 LLVM_DEBUG(
dbgs() <<
"Don't tail-predicate loop with non-integer/float "
2551 "live-out value\n");
2554 if (ReductionsDisabled) {
2565 for (
Instruction &
I : BB->instructionsWithoutDebug()) {
2574 if (
T->getScalarSizeInBits() > 32) {
2581 int64_t NextStride =
2583 if (NextStride == 1) {
2588 }
else if (NextStride == -1 ||
2592 <<
"Consecutive strides of 2 found, vld2/vstr2 can't "
2593 "be tail-predicated\n.");
2603 const SCEV *Step = AR->getStepRecurrence(*PSE.
getSE());
2609 "tail-predicate\n.");
2615 LLVM_DEBUG(
dbgs() <<
"tail-predication: all instructions allowed!\n");
2628 if (!ST->hasMVEIntegerOps())
2635 if (L->getNumBlocks() > 1) {
2636 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: not a single block "
2641 assert(L->isInnermost() &&
"preferPredicateOverEpilogue: inner-loop expected");
2646 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2657 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2664 LLVM_DEBUG(
dbgs() <<
"preferPredicateOverEpilogue: hardware-loop is not "
2692 return isa<IntrinsicInst>(I) &&
2693 cast<IntrinsicInst>(I).getIntrinsicID() ==
2694 Intrinsic::get_active_lane_mask;
2698 if (!ST->isMClass())
2704 if (L->getHeader()->getParent()->hasOptSize())
2708 L->getExitingBlocks(ExitingBlocks);
2710 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
2711 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
2715 if (ExitingBlocks.
size() > 2)
2720 if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
2730 for (
auto *BB : L->getBlocks()) {
2731 for (
auto &
I : *BB) {
2734 if (
I.getType()->isVectorTy())
2758 if (ST->isThumb1Only()) {
2759 unsigned ExitingValues = 0;
2761 L->getExitBlocks(ExitBlocks);
2762 for (
auto *Exit : ExitBlocks) {
2765 unsigned LiveOuts =
count_if(Exit->phis(), [](
auto &PH) {
2766 return PH.getNumOperands() != 1 ||
2767 !isa<GetElementPtrInst>(PH.getOperand(0));
2769 ExitingValues = ExitingValues < LiveOuts ? LiveOuts : ExitingValues;
2790 auto *Outer = L->getOutermostLoop();
2791 if ((L != Outer && Outer != L->getParentLoop()) ||
2820 if (!ST->hasMVEIntegerOps())
2823 unsigned ScalarBits = Ty->getScalarSizeInBits();
2826 return ScalarBits <= 64;
2833 if (!ST->hasMVEIntegerOps())
2840 bool HasBaseReg, int64_t Scale,
2841 unsigned AddrSpace)
const {
2850 return AM.
Scale < 0 ? 1 : 0;
2861 return ST->isThumb2() || ST->hasV8MBaselineOps();
2865 return ST->hasARMOps();
2875 return Ext->getType()->getScalarSizeInBits() ==
2876 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
2895 if (!
I->getType()->isVectorTy())
2898 if (ST->hasNEON()) {
2899 switch (
I->getOpcode()) {
2900 case Instruction::Sub:
2901 case Instruction::Add: {
2904 Ops.push_back(&
I->getOperandUse(0));
2905 Ops.push_back(&
I->getOperandUse(1));
2913 if (!ST->hasMVEIntegerOps())
2917 if (!
I->hasOneUse())
2920 return Sub->getOpcode() == Instruction::FSub &&
Sub->getOperand(1) ==
I;
2930 switch (
I->getOpcode()) {
2931 case Instruction::Add:
2932 case Instruction::Mul:
2933 case Instruction::FAdd:
2934 case Instruction::ICmp:
2935 case Instruction::FCmp:
2937 case Instruction::FMul:
2938 return !IsFMSMul(
I);
2939 case Instruction::Sub:
2940 case Instruction::FSub:
2941 case Instruction::Shl:
2942 case Instruction::LShr:
2943 case Instruction::AShr:
2944 return Operand == 1;
2945 case Instruction::Call:
2947 switch (
II->getIntrinsicID()) {
2948 case Intrinsic::fma:
2950 case Intrinsic::sadd_sat:
2951 case Intrinsic::uadd_sat:
2952 case Intrinsic::arm_mve_add_predicated:
2953 case Intrinsic::arm_mve_mul_predicated:
2954 case Intrinsic::arm_mve_qadd_predicated:
2955 case Intrinsic::arm_mve_vhadd:
2956 case Intrinsic::arm_mve_hadd_predicated:
2957 case Intrinsic::arm_mve_vqdmull:
2958 case Intrinsic::arm_mve_vqdmull_predicated:
2959 case Intrinsic::arm_mve_vqdmulh:
2960 case Intrinsic::arm_mve_qdmulh_predicated:
2961 case Intrinsic::arm_mve_vqrdmulh:
2962 case Intrinsic::arm_mve_qrdmulh_predicated:
2963 case Intrinsic::arm_mve_fma_predicated:
2965 case Intrinsic::ssub_sat:
2966 case Intrinsic::usub_sat:
2967 case Intrinsic::arm_mve_sub_predicated:
2968 case Intrinsic::arm_mve_qsub_predicated:
2969 case Intrinsic::arm_mve_hsub_predicated:
2970 case Intrinsic::arm_mve_vhsub:
2971 return Operand == 1;
2989 if (Shuffle->
getOpcode() == Instruction::BitCast)
2996 if (!IsSinker(
I,
OpIdx.index()))
3001 for (
Use &U :
Op->uses()) {
3003 if (!IsSinker(Insn, U.getOperandNo()))
3009 Ops.push_back(&
Op->getOperandUse(0));
3031 unsigned NumBytesToPad = 4 - (
Size % 4);
3032 unsigned NewSize =
Size + NumBytesToPad;
3038 if (NewSize > MaxMemIntrinsicSize)
3041 return NumBytesToPad;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file provides the interface for the instcombine pass implementation.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
This file defines the SmallVector class.
Class for arbitrary precision integers.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getMemcpyCost(const Instruction *I) const override
bool maybeLoweredToCall(Instruction &I) const
bool preferInLoopReduction(RecurKind Kind, Type *Ty) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *ValTy, TTI::TargetCostKind CostKind) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool hasArmWideBranch(bool Thumb) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
int getNumMemOps(const IntrinsicInst *I) const
Given a memcpy/memset/memmove instruction, return the number of memory operations performed,...
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const override
bool isLoweredToCall(const Function *F) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override
bool isLegalMaskedStore(Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::MaskKind MaskKind=TTI::MaskKind::VariableOrConstantMask) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool isLegalMaskedLoad(Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::MaskKind MaskKind=TTI::MaskKind::VariableOrConstantMask) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
bool preferPredicatedReductionSelect() const override
bool isLegalMaskedGather(Type *Ty, Align Alignment) const override
unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const override
bool isProfitableLSRChainElement(Instruction *I) const override
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
getScalingFactorCost - Return the cost of the scaling used in addressing mode represented by AM.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
A cache of @llvm.assume calls within a function.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
LLVM Basic Block Representation.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLE
signed less or equal
@ ICMP_SGT
signed greater than
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr size_t size() const
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
ConstantInt * getTrue()
Get the constant value for i1 true.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
DominatorTree & getDominatorTree() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)=0
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
AssumptionCache & getAssumptionCache() const
static InstructionCost getInvalid(CostType Val=0)
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Drive the analysis of memory accesses in the loop.
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
LoopInfo * getLoopInfo() const
DominatorTree * getDominatorTree() const
AssumptionCache * getAssumptionCache() const
const LoopAccessInfo * getLAI() const
ScalarEvolution * getScalarEvolution() const
Represents a single loop in the control flow graph.
const FeatureBitset & getFeatureBits() const
Information for memory intrinsic cost model.
Align getAlignment() const
unsigned getAddressSpace() const
Type * getDataType() const
const Value * getPointer() const
bool getVariableMask() const
Intrinsic::ID getID() const
const Instruction * getInst() const
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
APInt getUnsignedRangeMax(const SCEV *S)
Determine the max of the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
Provides information about what library functions are available for the current target.
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isArrayTy() const
True if this is an instance of ArrayType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Type * getArrayElementType() const
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ SIGN_EXTEND
Conversion operators.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ ForceEnabledNoReductions
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
@ Runtime
Detect stack use after return if not disabled runtime with (ASAN_OPTIONS=detect_stack_use_after_retur...
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
auto dyn_cast_or_null(const Y &Val)
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_ABS
Floating point maxnum.
@ SPF_FMAXNUM
Floating point minnum.
@ SPF_UMIN
Signed minimum.
@ SPF_UMAX
Signed maximum.
@ SPF_SMAX
Unsigned minimum.
@ SPF_FMINNUM
Unsigned maximum.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
RecurKind
These are the kinds of recurrences that we support.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
@ Data
Use predicate only to mask operations on data in the loop.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
bool isInteger() const
Return true if this is an integer or a vector integer type.
Attributes of a target dependent hardware loop.
LLVM_ABI bool canAnalyze(LoopInfo &LI)
LLVM_ABI bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
SelectPatternFlavor Flavor
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...