23#include "llvm/IR/IntrinsicsAArch64.h"
35#define DEBUG_TYPE "aarch64tti"
41 "sve-prefer-fixed-over-scalable-if-equal",
cl::Hidden);
59 "Penalty of calling a function that requires a change to PSTATE.SM"));
63 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
74 cl::desc(
"The cost of a histcnt instruction"));
78 cl::desc(
"The number of instructions to search for a redundant dmb"));
82 cl::desc(
"Threshold for forced unrolling of small loops in AArch64"));
85class TailFoldingOption {
100 bool NeedsDefault =
true;
104 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
119 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
120 "Initial bits should only include one of "
121 "(disabled|all|simple|default)");
122 Bits = NeedsDefault ? DefaultBits : InitialBits;
124 Bits &= ~DisableBits;
130 errs() <<
"invalid argument '" << Opt
131 <<
"' to -sve-tail-folding=; the option should be of the form\n"
132 " (disabled|all|default|simple)[+(reductions|recurrences"
133 "|reverse|noreductions|norecurrences|noreverse)]\n";
139 void operator=(
const std::string &Val) {
148 setNeedsDefault(
false);
151 StringRef(Val).split(TailFoldTypes,
'+', -1,
false);
153 unsigned StartIdx = 1;
154 if (TailFoldTypes[0] ==
"disabled")
155 setInitialBits(TailFoldingOpts::Disabled);
156 else if (TailFoldTypes[0] ==
"all")
157 setInitialBits(TailFoldingOpts::All);
158 else if (TailFoldTypes[0] ==
"default")
159 setNeedsDefault(
true);
160 else if (TailFoldTypes[0] ==
"simple")
161 setInitialBits(TailFoldingOpts::Simple);
164 setInitialBits(TailFoldingOpts::Disabled);
167 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
168 if (TailFoldTypes[
I] ==
"reductions")
169 setEnableBit(TailFoldingOpts::Reductions);
170 else if (TailFoldTypes[
I] ==
"recurrences")
171 setEnableBit(TailFoldingOpts::Recurrences);
172 else if (TailFoldTypes[
I] ==
"reverse")
173 setEnableBit(TailFoldingOpts::Reverse);
174 else if (TailFoldTypes[
I] ==
"noreductions")
175 setDisableBit(TailFoldingOpts::Reductions);
176 else if (TailFoldTypes[
I] ==
"norecurrences")
177 setDisableBit(TailFoldingOpts::Recurrences);
178 else if (TailFoldTypes[
I] ==
"noreverse")
179 setDisableBit(TailFoldingOpts::Reverse);
196 "Control the use of vectorisation using tail-folding for SVE where the"
197 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
198 "\ndisabled (Initial) No loop types will vectorize using "
200 "\ndefault (Initial) Uses the default tail-folding settings for "
202 "\nall (Initial) All legal loop types will vectorize using "
204 "\nsimple (Initial) Use tail-folding for simple loops (not "
205 "reductions or recurrences)"
206 "\nreductions Use tail-folding for loops containing reductions"
207 "\nnoreductions Inverse of above"
208 "\nrecurrences Use tail-folding for loops containing fixed order "
210 "\nnorecurrences Inverse of above"
211 "\nreverse Use tail-folding for loops requiring reversed "
213 "\nnoreverse Inverse of above"),
258 TTI->isMultiversionedFunction(
F) ?
"fmv-features" :
"target-features";
259 StringRef FeatureStr =
F.getFnAttribute(AttributeStr).getValueAsString();
260 FeatureStr.
split(Features,
",");
276 return F.hasFnAttribute(
"fmv-features");
280 AArch64::FeatureExecuteOnly,
320 FeatureBitset EffectiveCallerBits = CallerBits ^ InlineInverseFeatures;
321 FeatureBitset EffectiveCalleeBits = CalleeBits ^ InlineInverseFeatures;
323 return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;
341 auto FVTy = dyn_cast<FixedVectorType>(Ty);
343 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
352 unsigned DefaultCallPenalty)
const {
377 if (
F ==
Call.getCaller())
383 return DefaultCallPenalty;
394 ST->isSVEorStreamingSVEAvailable() &&
395 !ST->disableMaximizeScalableBandwidth();
419 assert(Ty->isIntegerTy());
421 unsigned BitSize = Ty->getPrimitiveSizeInBits();
428 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
433 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
439 return std::max<InstructionCost>(1,
Cost);
446 assert(Ty->isIntegerTy());
448 unsigned BitSize = Ty->getPrimitiveSizeInBits();
454 unsigned ImmIdx = ~0U;
458 case Instruction::GetElementPtr:
463 case Instruction::Store:
466 case Instruction::Add:
467 case Instruction::Sub:
468 case Instruction::Mul:
469 case Instruction::UDiv:
470 case Instruction::SDiv:
471 case Instruction::URem:
472 case Instruction::SRem:
473 case Instruction::And:
474 case Instruction::Or:
475 case Instruction::Xor:
476 case Instruction::ICmp:
480 case Instruction::Shl:
481 case Instruction::LShr:
482 case Instruction::AShr:
486 case Instruction::Trunc:
487 case Instruction::ZExt:
488 case Instruction::SExt:
489 case Instruction::IntToPtr:
490 case Instruction::PtrToInt:
491 case Instruction::BitCast:
492 case Instruction::PHI:
493 case Instruction::Call:
494 case Instruction::Select:
495 case Instruction::Ret:
496 case Instruction::Load:
501 int NumConstants = (BitSize + 63) / 64;
514 assert(Ty->isIntegerTy());
516 unsigned BitSize = Ty->getPrimitiveSizeInBits();
525 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
531 case Intrinsic::sadd_with_overflow:
532 case Intrinsic::uadd_with_overflow:
533 case Intrinsic::ssub_with_overflow:
534 case Intrinsic::usub_with_overflow:
535 case Intrinsic::smul_with_overflow:
536 case Intrinsic::umul_with_overflow:
538 int NumConstants = (BitSize + 63) / 64;
545 case Intrinsic::experimental_stackmap:
546 if ((Idx < 2) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
549 case Intrinsic::experimental_patchpoint_void:
550 case Intrinsic::experimental_patchpoint:
551 if ((Idx < 4) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
554 case Intrinsic::experimental_gc_statepoint:
555 if ((Idx < 5) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
565 if (TyWidth == 32 || TyWidth == 64)
574 return ST->getSchedModel().MispredictPenalty;
595 unsigned TotalHistCnts = 1;
605 unsigned EC = VTy->getElementCount().getKnownMinValue();
610 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
612 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
616 TotalHistCnts = EC / NaturalVectorWidth;
636 switch (ICA.
getID()) {
637 case Intrinsic::experimental_vector_histogram_add: {
644 case Intrinsic::clmul: {
649 if (LT.second == MVT::v8i8 || LT.second == MVT::v16i8)
653 if (TLI->getValueType(
DL, RetTy,
true) == MVT::i8) {
658 -1,
nullptr,
nullptr) *
661 -1,
nullptr,
nullptr);
665 if (LT.second.SimpleTy == MVT::nxv2i64)
666 if (ST->hasSVEAES() && (ST->isSVEAvailable() || ST->hasSSVE_AES()))
669 if (ST->hasSVE2() || ST->hasSME()) {
670 switch (LT.second.SimpleTy) {
685 if (LT.second.SimpleTy == MVT::nxv2i64)
689 switch (LT.second.SimpleTy) {
699 -1,
nullptr,
nullptr) *
702 -1,
nullptr,
nullptr));
716 case Intrinsic::umin:
717 case Intrinsic::umax:
718 case Intrinsic::smin:
719 case Intrinsic::smax: {
720 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
721 MVT::v8i16, MVT::v2i32, MVT::v4i32,
722 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
726 if (LT.second == MVT::v2i64)
732 case Intrinsic::scmp:
733 case Intrinsic::ucmp: {
735 {Intrinsic::scmp, MVT::i32, 3},
736 {Intrinsic::scmp, MVT::i64, 3},
737 {Intrinsic::scmp, MVT::v8i8, 3},
738 {Intrinsic::scmp, MVT::v16i8, 3},
739 {Intrinsic::scmp, MVT::v4i16, 3},
740 {Intrinsic::scmp, MVT::v8i16, 3},
741 {Intrinsic::scmp, MVT::v2i32, 3},
742 {Intrinsic::scmp, MVT::v4i32, 3},
743 {Intrinsic::scmp, MVT::v1i64, 3},
744 {Intrinsic::scmp, MVT::v2i64, 3},
750 return Entry->Cost * LT.first;
753 case Intrinsic::sadd_sat:
754 case Intrinsic::ssub_sat:
755 case Intrinsic::uadd_sat:
756 case Intrinsic::usub_sat: {
757 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
758 MVT::v8i16, MVT::v2i32, MVT::v4i32,
764 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
766 return LT.first * Instrs;
771 if (ST->isSVEAvailable() && VectorSize >= 128 &&
isPowerOf2_64(VectorSize))
772 return LT.first * Instrs;
776 case Intrinsic::abs: {
777 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
778 MVT::v8i16, MVT::v2i32, MVT::v4i32,
779 MVT::v2i64, MVT::nxv16i8, MVT::nxv8i16,
780 MVT::nxv4i32, MVT::nxv2i64};
786 case Intrinsic::bswap: {
787 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
788 MVT::v4i32, MVT::v2i64};
791 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
796 case Intrinsic::fmuladd: {
801 (EltTy->
isHalfTy() && ST->hasFullFP16()))
805 case Intrinsic::stepvector: {
814 Cost += AddCost * (LT.first - 1);
818 case Intrinsic::vector_extract:
819 case Intrinsic::vector_insert: {
832 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
833 EVT SubVecVT = IsExtract ? getTLI()->getValueType(
DL, RetTy)
841 getTLI()->getTypeConversion(
C, SubVecVT);
843 getTLI()->getTypeConversion(
C, VecVT);
851 case Intrinsic::bitreverse: {
853 {Intrinsic::bitreverse, MVT::i32, 1},
854 {Intrinsic::bitreverse, MVT::i64, 1},
855 {Intrinsic::bitreverse, MVT::v8i8, 1},
856 {Intrinsic::bitreverse, MVT::v16i8, 1},
857 {Intrinsic::bitreverse, MVT::v4i16, 2},
858 {Intrinsic::bitreverse, MVT::v8i16, 2},
859 {Intrinsic::bitreverse, MVT::v2i32, 2},
860 {Intrinsic::bitreverse, MVT::v4i32, 2},
861 {Intrinsic::bitreverse, MVT::v1i64, 2},
862 {Intrinsic::bitreverse, MVT::v2i64, 2},
870 if (TLI->getValueType(
DL, RetTy,
true) == MVT::i8 ||
871 TLI->getValueType(
DL, RetTy,
true) == MVT::i16)
872 return LegalisationCost.first * Entry->Cost + 1;
874 return LegalisationCost.first * Entry->Cost;
878 case Intrinsic::ctpop: {
879 if (!ST->hasNEON()) {
911 RetTy->getScalarSizeInBits()
914 return LT.first * Entry->Cost + ExtraCost;
918 case Intrinsic::sadd_with_overflow:
919 case Intrinsic::uadd_with_overflow:
920 case Intrinsic::ssub_with_overflow:
921 case Intrinsic::usub_with_overflow:
922 case Intrinsic::smul_with_overflow:
923 case Intrinsic::umul_with_overflow: {
925 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
926 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
927 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
928 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
929 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
930 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
931 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
932 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
933 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
934 {Intrinsic::usub_with_overflow, MVT::i8, 3},
935 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
936 {Intrinsic::usub_with_overflow, MVT::i16, 3},
937 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
938 {Intrinsic::usub_with_overflow, MVT::i32, 1},
939 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
940 {Intrinsic::usub_with_overflow, MVT::i64, 1},
941 {Intrinsic::smul_with_overflow, MVT::i8, 5},
942 {Intrinsic::umul_with_overflow, MVT::i8, 4},
943 {Intrinsic::smul_with_overflow, MVT::i16, 5},
944 {Intrinsic::umul_with_overflow, MVT::i16, 4},
945 {Intrinsic::smul_with_overflow, MVT::i32, 2},
946 {Intrinsic::umul_with_overflow, MVT::i32, 2},
947 {Intrinsic::smul_with_overflow, MVT::i64, 3},
948 {Intrinsic::umul_with_overflow, MVT::i64, 3},
950 EVT MTy = TLI->getValueType(
DL, RetTy->getContainedType(0),
true);
957 case Intrinsic::fptosi_sat:
958 case Intrinsic::fptoui_sat: {
961 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
963 EVT MTy = TLI->getValueType(
DL, RetTy);
966 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
967 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
968 LT.second == MVT::v2f64)) {
970 (LT.second == MVT::f64 && MTy == MVT::i32) ||
971 (LT.second == MVT::f32 && MTy == MVT::i64)))
980 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
987 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
988 (LT.second == MVT::f16 && MTy == MVT::i64) ||
989 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
1003 if ((LT.second.getScalarType() == MVT::f32 ||
1004 LT.second.getScalarType() == MVT::f64 ||
1005 LT.second.getScalarType() == MVT::f16) &&
1008 Type::getIntNTy(RetTy->getContext(), LT.second.getScalarSizeInBits());
1009 if (LT.second.isVector())
1010 LegalTy =
VectorType::get(LegalTy, LT.second.getVectorElementCount());
1014 LegalTy, {LegalTy, LegalTy});
1018 LegalTy, {LegalTy, LegalTy});
1020 return LT.first *
Cost +
1021 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
1027 RetTy = RetTy->getScalarType();
1028 if (LT.second.isVector()) {
1046 return LT.first *
Cost;
1048 case Intrinsic::fshl:
1049 case Intrinsic::fshr: {
1058 if (RetTy->isIntegerTy() && ICA.
getArgs()[0] == ICA.
getArgs()[1] &&
1059 (RetTy->getPrimitiveSizeInBits() == 32 ||
1060 RetTy->getPrimitiveSizeInBits() == 64)) {
1073 {Intrinsic::fshl, MVT::v4i32, 2},
1074 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
1075 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
1076 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
1082 return LegalisationCost.first * Entry->Cost;
1086 if (!RetTy->isIntegerTy())
1091 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
1092 RetTy->getScalarSizeInBits() < 64) ||
1093 (RetTy->getScalarSizeInBits() % 64 != 0);
1094 unsigned ExtraCost = HigherCost ? 1 : 0;
1095 if (RetTy->getScalarSizeInBits() == 32 ||
1096 RetTy->getScalarSizeInBits() == 64)
1099 else if (HigherCost)
1103 return TyL.first + ExtraCost;
1105 case Intrinsic::get_active_lane_mask: {
1107 EVT RetVT = getTLI()->getValueType(
DL, RetTy);
1109 if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
1112 if (RetTy->isScalableTy()) {
1113 if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
1123 if (ST->hasSVE2p1() || ST->hasSME2()) {
1138 return Cost + (SplitCost * (
Cost - 1));
1153 case Intrinsic::experimental_vector_match: {
1156 unsigned SearchSize = NeedleTy->getNumElements();
1157 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
1170 case Intrinsic::cttz: {
1172 if (LT.second == MVT::v8i8 || LT.second == MVT::v16i8)
1173 return LT.first * 2;
1174 if (LT.second == MVT::v4i16 || LT.second == MVT::v8i16 ||
1175 LT.second == MVT::v2i32 || LT.second == MVT::v4i32)
1176 return LT.first * 3;
1179 case Intrinsic::experimental_cttz_elts: {
1181 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
1189 case Intrinsic::loop_dependence_raw_mask:
1190 case Intrinsic::loop_dependence_war_mask: {
1192 if (ST->hasSVE2() || ST->hasSME()) {
1193 EVT VecVT = getTLI()->getValueType(
DL, RetTy);
1194 unsigned EltSizeInBytes =
1204 case Intrinsic::experimental_vector_extract_last_active:
1205 if (ST->isSVEorStreamingSVEAvailable()) {
1211 case Intrinsic::pow: {
1214 EVT VT = getTLI()->getValueType(
DL, RetTy);
1216 bool HasLibcall = getTLI()->getLibcallImpl(LC) != RTLIB::Unsupported;
1231 bool Is025 = ExpF->getValueAPF().isExactlyValue(0.25);
1232 bool Is075 = ExpF->getValueAPF().isExactlyValue(0.75);
1242 return (Sqrt * 2) +
FMul;
1253 case Intrinsic::sqrt:
1254 case Intrinsic::fabs:
1255 case Intrinsic::ceil:
1256 case Intrinsic::floor:
1257 case Intrinsic::nearbyint:
1258 case Intrinsic::round:
1259 case Intrinsic::rint:
1260 case Intrinsic::roundeven:
1261 case Intrinsic::trunc:
1262 case Intrinsic::minnum:
1263 case Intrinsic::maxnum:
1264 case Intrinsic::minimum:
1265 case Intrinsic::maximum: {
1283 auto RequiredType =
II.getType();
1286 assert(PN &&
"Expected Phi Node!");
1289 if (!PN->hasOneUse())
1290 return std::nullopt;
1292 for (
Value *IncValPhi : PN->incoming_values()) {
1295 Reinterpret->getIntrinsicID() !=
1296 Intrinsic::aarch64_sve_convert_to_svbool ||
1297 RequiredType != Reinterpret->getArgOperand(0)->getType())
1298 return std::nullopt;
1306 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
1308 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
1381 return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
1386 return GoverningPredicateIdx;
1391 GoverningPredicateIdx = Index;
1409 return UndefIntrinsic;
1414 UndefIntrinsic = IID;
1436 return ResultLanes == InactiveLanesTakenFromOperand;
1441 return OperandIdxForInactiveLanes;
1445 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1446 ResultLanes = InactiveLanesTakenFromOperand;
1447 OperandIdxForInactiveLanes = Index;
1452 return ResultLanes == InactiveLanesAreNotDefined;
1456 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1457 ResultLanes = InactiveLanesAreNotDefined;
1462 return ResultLanes == InactiveLanesAreUnused;
1466 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1467 ResultLanes = InactiveLanesAreUnused;
1477 ResultIsZeroInitialized =
true;
1488 return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
1493 return OperandIdxWithNoActiveLanes;
1498 OperandIdxWithNoActiveLanes = Index;
1503 unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
1506 unsigned IROpcode = 0;
1508 enum PredicationStyle {
1510 InactiveLanesTakenFromOperand,
1511 InactiveLanesAreNotDefined,
1512 InactiveLanesAreUnused
1515 bool ResultIsZeroInitialized =
false;
1516 unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
1517 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
1525 return !isa<ScalableVectorType>(V->getType());
1533 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
1534 case Intrinsic::aarch64_sve_fcvt_f16f32:
1535 case Intrinsic::aarch64_sve_fcvt_f16f64:
1536 case Intrinsic::aarch64_sve_fcvt_f32f16:
1537 case Intrinsic::aarch64_sve_fcvt_f32f64:
1538 case Intrinsic::aarch64_sve_fcvt_f64f16:
1539 case Intrinsic::aarch64_sve_fcvt_f64f32:
1540 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
1541 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
1542 case Intrinsic::aarch64_sve_fcvtx_f32f64:
1543 case Intrinsic::aarch64_sve_fcvtzs:
1544 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
1545 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
1546 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
1547 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
1548 case Intrinsic::aarch64_sve_fcvtzu:
1549 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
1550 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
1551 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
1552 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
1553 case Intrinsic::aarch64_sve_revb:
1554 case Intrinsic::aarch64_sve_revh:
1555 case Intrinsic::aarch64_sve_revw:
1556 case Intrinsic::aarch64_sve_revd:
1557 case Intrinsic::aarch64_sve_scvtf:
1558 case Intrinsic::aarch64_sve_scvtf_f16i32:
1559 case Intrinsic::aarch64_sve_scvtf_f16i64:
1560 case Intrinsic::aarch64_sve_scvtf_f32i64:
1561 case Intrinsic::aarch64_sve_scvtf_f64i32:
1562 case Intrinsic::aarch64_sve_ucvtf:
1563 case Intrinsic::aarch64_sve_ucvtf_f16i32:
1564 case Intrinsic::aarch64_sve_ucvtf_f16i64:
1565 case Intrinsic::aarch64_sve_ucvtf_f32i64:
1566 case Intrinsic::aarch64_sve_ucvtf_f64i32:
1569 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
1570 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
1571 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
1572 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
1575 case Intrinsic::aarch64_sve_fabd:
1577 case Intrinsic::aarch64_sve_fadd:
1580 case Intrinsic::aarch64_sve_fdiv:
1583 case Intrinsic::aarch64_sve_fmax:
1585 case Intrinsic::aarch64_sve_fmaxnm:
1587 case Intrinsic::aarch64_sve_fmin:
1589 case Intrinsic::aarch64_sve_fminnm:
1591 case Intrinsic::aarch64_sve_fmla:
1593 case Intrinsic::aarch64_sve_fmls:
1595 case Intrinsic::aarch64_sve_fmul:
1598 case Intrinsic::aarch64_sve_fmulx:
1600 case Intrinsic::aarch64_sve_fnmla:
1602 case Intrinsic::aarch64_sve_fnmls:
1604 case Intrinsic::aarch64_sve_fsub:
1607 case Intrinsic::aarch64_sve_add:
1610 case Intrinsic::aarch64_sve_mla:
1612 case Intrinsic::aarch64_sve_mls:
1614 case Intrinsic::aarch64_sve_mul:
1617 case Intrinsic::aarch64_sve_sabd:
1619 case Intrinsic::aarch64_sve_sdiv:
1622 case Intrinsic::aarch64_sve_smax:
1624 case Intrinsic::aarch64_sve_smin:
1626 case Intrinsic::aarch64_sve_smulh:
1628 case Intrinsic::aarch64_sve_sub:
1631 case Intrinsic::aarch64_sve_uabd:
1633 case Intrinsic::aarch64_sve_udiv:
1636 case Intrinsic::aarch64_sve_umax:
1638 case Intrinsic::aarch64_sve_umin:
1640 case Intrinsic::aarch64_sve_umulh:
1642 case Intrinsic::aarch64_sve_asr:
1645 case Intrinsic::aarch64_sve_lsl:
1648 case Intrinsic::aarch64_sve_lsr:
1651 case Intrinsic::aarch64_sve_and:
1654 case Intrinsic::aarch64_sve_bic:
1656 case Intrinsic::aarch64_sve_eor:
1659 case Intrinsic::aarch64_sve_orr:
1662 case Intrinsic::aarch64_sve_shsub:
1664 case Intrinsic::aarch64_sve_shsubr:
1666 case Intrinsic::aarch64_sve_sqrshl:
1668 case Intrinsic::aarch64_sve_sqshl:
1670 case Intrinsic::aarch64_sve_sqsub:
1672 case Intrinsic::aarch64_sve_srshl:
1674 case Intrinsic::aarch64_sve_uhsub:
1676 case Intrinsic::aarch64_sve_uhsubr:
1678 case Intrinsic::aarch64_sve_uqrshl:
1680 case Intrinsic::aarch64_sve_uqshl:
1682 case Intrinsic::aarch64_sve_uqsub:
1684 case Intrinsic::aarch64_sve_urshl:
1687 case Intrinsic::aarch64_sve_add_u:
1690 case Intrinsic::aarch64_sve_and_u:
1693 case Intrinsic::aarch64_sve_asr_u:
1696 case Intrinsic::aarch64_sve_eor_u:
1699 case Intrinsic::aarch64_sve_fadd_u:
1702 case Intrinsic::aarch64_sve_fdiv_u:
1705 case Intrinsic::aarch64_sve_fmul_u:
1708 case Intrinsic::aarch64_sve_fsub_u:
1711 case Intrinsic::aarch64_sve_lsl_u:
1714 case Intrinsic::aarch64_sve_lsr_u:
1717 case Intrinsic::aarch64_sve_mul_u:
1720 case Intrinsic::aarch64_sve_orr_u:
1723 case Intrinsic::aarch64_sve_sdiv_u:
1726 case Intrinsic::aarch64_sve_sub_u:
1729 case Intrinsic::aarch64_sve_udiv_u:
1733 case Intrinsic::aarch64_sve_addqv:
1734 case Intrinsic::aarch64_sve_and_z:
1735 case Intrinsic::aarch64_sve_bic_z:
1736 case Intrinsic::aarch64_sve_brka_z:
1737 case Intrinsic::aarch64_sve_brkb_z:
1738 case Intrinsic::aarch64_sve_brkn_z:
1739 case Intrinsic::aarch64_sve_brkpa_z:
1740 case Intrinsic::aarch64_sve_brkpb_z:
1741 case Intrinsic::aarch64_sve_cntp:
1742 case Intrinsic::aarch64_sve_compact:
1743 case Intrinsic::aarch64_sve_eor_z:
1744 case Intrinsic::aarch64_sve_eorv:
1745 case Intrinsic::aarch64_sve_eorqv:
1746 case Intrinsic::aarch64_sve_nand_z:
1747 case Intrinsic::aarch64_sve_nor_z:
1748 case Intrinsic::aarch64_sve_orn_z:
1749 case Intrinsic::aarch64_sve_orr_z:
1750 case Intrinsic::aarch64_sve_orv:
1751 case Intrinsic::aarch64_sve_orqv:
1752 case Intrinsic::aarch64_sve_pnext:
1753 case Intrinsic::aarch64_sve_rdffr_z:
1754 case Intrinsic::aarch64_sve_saddv:
1755 case Intrinsic::aarch64_sve_uaddv:
1756 case Intrinsic::aarch64_sve_umaxv:
1757 case Intrinsic::aarch64_sve_umaxqv:
1758 case Intrinsic::aarch64_sve_cmpeq:
1759 case Intrinsic::aarch64_sve_cmpeq_wide:
1760 case Intrinsic::aarch64_sve_cmpge:
1761 case Intrinsic::aarch64_sve_cmpge_wide:
1762 case Intrinsic::aarch64_sve_cmpgt:
1763 case Intrinsic::aarch64_sve_cmpgt_wide:
1764 case Intrinsic::aarch64_sve_cmphi:
1765 case Intrinsic::aarch64_sve_cmphi_wide:
1766 case Intrinsic::aarch64_sve_cmphs:
1767 case Intrinsic::aarch64_sve_cmphs_wide:
1768 case Intrinsic::aarch64_sve_cmple_wide:
1769 case Intrinsic::aarch64_sve_cmplo_wide:
1770 case Intrinsic::aarch64_sve_cmpls_wide:
1771 case Intrinsic::aarch64_sve_cmplt_wide:
1772 case Intrinsic::aarch64_sve_cmpne:
1773 case Intrinsic::aarch64_sve_cmpne_wide:
1774 case Intrinsic::aarch64_sve_facge:
1775 case Intrinsic::aarch64_sve_facgt:
1776 case Intrinsic::aarch64_sve_fcmpeq:
1777 case Intrinsic::aarch64_sve_fcmpge:
1778 case Intrinsic::aarch64_sve_fcmpgt:
1779 case Intrinsic::aarch64_sve_fcmpne:
1780 case Intrinsic::aarch64_sve_fcmpuo:
1781 case Intrinsic::aarch64_sve_ld1:
1782 case Intrinsic::aarch64_sve_ld1_gather:
1783 case Intrinsic::aarch64_sve_ld1_gather_index:
1784 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
1785 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
1786 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
1787 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
1788 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
1789 case Intrinsic::aarch64_sve_ld1q_gather_index:
1790 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
1791 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
1792 case Intrinsic::aarch64_sve_ld1ro:
1793 case Intrinsic::aarch64_sve_ld1rq:
1794 case Intrinsic::aarch64_sve_ld1udq:
1795 case Intrinsic::aarch64_sve_ld1uwq:
1796 case Intrinsic::aarch64_sve_ld2_sret:
1797 case Intrinsic::aarch64_sve_ld2q_sret:
1798 case Intrinsic::aarch64_sve_ld3_sret:
1799 case Intrinsic::aarch64_sve_ld3q_sret:
1800 case Intrinsic::aarch64_sve_ld4_sret:
1801 case Intrinsic::aarch64_sve_ld4q_sret:
1802 case Intrinsic::aarch64_sve_ldff1:
1803 case Intrinsic::aarch64_sve_ldff1_gather:
1804 case Intrinsic::aarch64_sve_ldff1_gather_index:
1805 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
1806 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
1807 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
1808 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
1809 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
1810 case Intrinsic::aarch64_sve_ldnf1:
1811 case Intrinsic::aarch64_sve_ldnt1:
1812 case Intrinsic::aarch64_sve_ldnt1_gather:
1813 case Intrinsic::aarch64_sve_ldnt1_gather_index:
1814 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
1815 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
1818 case Intrinsic::aarch64_sve_prf:
1819 case Intrinsic::aarch64_sve_prfb_gather_index:
1820 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
1821 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
1822 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
1823 case Intrinsic::aarch64_sve_prfd_gather_index:
1824 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
1825 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
1826 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
1827 case Intrinsic::aarch64_sve_prfh_gather_index:
1828 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
1829 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
1830 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
1831 case Intrinsic::aarch64_sve_prfw_gather_index:
1832 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
1833 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
1834 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
1837 case Intrinsic::aarch64_sve_st1_scatter:
1838 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
1839 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
1840 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
1841 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
1842 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
1843 case Intrinsic::aarch64_sve_st1dq:
1844 case Intrinsic::aarch64_sve_st1q_scatter_index:
1845 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
1846 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
1847 case Intrinsic::aarch64_sve_st1wq:
1848 case Intrinsic::aarch64_sve_stnt1:
1849 case Intrinsic::aarch64_sve_stnt1_scatter:
1850 case Intrinsic::aarch64_sve_stnt1_scatter_index:
1851 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
1852 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
1854 case Intrinsic::aarch64_sve_st2:
1855 case Intrinsic::aarch64_sve_st2q:
1857 case Intrinsic::aarch64_sve_st3:
1858 case Intrinsic::aarch64_sve_st3q:
1860 case Intrinsic::aarch64_sve_st4:
1861 case Intrinsic::aarch64_sve_st4q:
1869 Value *UncastedPred;
1875 Pred = UncastedPred;
1881 if (OrigPredTy->getMinNumElements() <=
1883 ->getMinNumElements())
1884 Pred = UncastedPred;
1888 return C &&
C->isAllOnesValue();
1895 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
1896 Dup->getOperand(1) == Pg &&
isa<Constant>(Dup->getOperand(2)))
1904static std::optional<Instruction *>
1911 Value *Op1 =
II.getOperand(1);
1912 Value *Op2 =
II.getOperand(2);
1938 return std::nullopt;
1946 if (SimpleII == Inactive)
1956static std::optional<Instruction *>
1960 return std::nullopt;
1989 II.setCalledFunction(NewDecl);
1999 return std::nullopt;
2011static std::optional<Instruction *>
2015 return std::nullopt;
2017 auto IntrinsicID = BinOp->getIntrinsicID();
2018 switch (IntrinsicID) {
2019 case Intrinsic::aarch64_sve_and_z:
2020 case Intrinsic::aarch64_sve_bic_z:
2021 case Intrinsic::aarch64_sve_eor_z:
2022 case Intrinsic::aarch64_sve_nand_z:
2023 case Intrinsic::aarch64_sve_nor_z:
2024 case Intrinsic::aarch64_sve_orn_z:
2025 case Intrinsic::aarch64_sve_orr_z:
2028 return std::nullopt;
2031 auto BinOpPred = BinOp->getOperand(0);
2032 auto BinOpOp1 = BinOp->getOperand(1);
2033 auto BinOpOp2 = BinOp->getOperand(2);
2037 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
2038 return std::nullopt;
2040 auto PredOp = PredIntr->getOperand(0);
2042 if (PredOpTy !=
II.getType())
2043 return std::nullopt;
2047 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
2048 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
2049 if (BinOpOp1 == BinOpOp2)
2050 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
2053 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
2055 auto NarrowedBinOp =
2060static std::optional<Instruction *>
2067 return BinOpCombine;
2072 return std::nullopt;
2075 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
2084 if (CursorVTy->getElementCount().getKnownMinValue() <
2085 IVTy->getElementCount().getKnownMinValue())
2089 if (Cursor->getType() == IVTy)
2090 EarliestReplacement = Cursor;
2095 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
2096 Intrinsic::aarch64_sve_convert_to_svbool ||
2097 IntrinsicCursor->getIntrinsicID() ==
2098 Intrinsic::aarch64_sve_convert_from_svbool))
2101 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
2102 Cursor = IntrinsicCursor->getOperand(0);
2107 if (!EarliestReplacement)
2108 return std::nullopt;
2116 auto *OpPredicate =
II.getOperand(0);
2133 II.getArgOperand(2));
2139 return std::nullopt;
2143 II.getArgOperand(0),
II.getArgOperand(2),
uint64_t(0));
2152 II.getArgOperand(0));
2162 return std::nullopt;
2167 if (!SplatValue || !SplatValue->isZero())
2168 return std::nullopt;
2173 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
2174 return std::nullopt;
2178 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
2179 return std::nullopt;
2182 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
2183 return std::nullopt;
2188 return std::nullopt;
2191 return std::nullopt;
2195 return std::nullopt;
2199 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
2200 return std::nullopt;
2202 unsigned NumElts = VecTy->getNumElements();
2203 unsigned PredicateBits = 0;
2206 for (
unsigned I = 0;
I < NumElts; ++
I) {
2209 return std::nullopt;
2211 PredicateBits |= 1 << (
I * (16 / NumElts));
2215 if (PredicateBits == 0) {
2217 PFalse->takeName(&
II);
2223 for (
unsigned I = 0;
I < 16; ++
I)
2224 if ((PredicateBits & (1 <<
I)) != 0)
2227 unsigned PredSize = Mask & -Mask;
2232 for (
unsigned I = 0;
I < 16;
I += PredSize)
2233 if ((PredicateBits & (1 <<
I)) == 0)
2234 return std::nullopt;
2236 auto *ConvertToSVBool =
2239 auto *ConvertFromSVBool =
2241 II.getType(), ConvertToSVBool);
2249 Value *Pg =
II.getArgOperand(0);
2250 Value *Vec =
II.getArgOperand(1);
2251 auto IntrinsicID =
II.getIntrinsicID();
2252 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
2264 auto OpC = OldBinOp->getOpcode();
2270 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
2276 if (IsAfter &&
C &&
C->isNullValue()) {
2280 Extract->insertBefore(
II.getIterator());
2281 Extract->takeName(&
II);
2287 return std::nullopt;
2289 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
2290 return std::nullopt;
2292 const auto PTruePattern =
2298 return std::nullopt;
2300 unsigned Idx = MinNumElts - 1;
2310 if (Idx >= PgVTy->getMinNumElements())
2311 return std::nullopt;
2316 Extract->insertBefore(
II.getIterator());
2317 Extract->takeName(&
II);
2330 Value *Pg =
II.getArgOperand(0);
2332 Value *Vec =
II.getArgOperand(2);
2335 if (!Ty->isIntegerTy())
2336 return std::nullopt;
2341 return std::nullopt;
2358 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
2373static std::optional<Instruction *>
2377 if (
Pattern == AArch64SVEPredPattern::all) {
2386 return MinNumElts && NumElts >= MinNumElts
2388 II, ConstantInt::get(
II.getType(), MinNumElts)))
2392static std::optional<Instruction *>
2395 if (!ST->isStreaming())
2396 return std::nullopt;
2408 Value *PgVal =
II.getArgOperand(0);
2409 Value *OpVal =
II.getArgOperand(1);
2413 if (PgVal == OpVal &&
2414 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
2415 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
2430 return std::nullopt;
2434 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
2435 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
2449 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
2450 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
2451 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
2452 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
2453 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
2454 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
2455 (OpIID == Intrinsic::aarch64_sve_and_z) ||
2456 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
2457 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
2458 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
2459 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
2460 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
2461 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
2471 return std::nullopt;
2474template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>
2475static std::optional<Instruction *>
2477 bool MergeIntoAddendOp) {
2479 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
2480 if (MergeIntoAddendOp) {
2481 AddendOp =
II.getOperand(1);
2482 Mul =
II.getOperand(2);
2484 AddendOp =
II.getOperand(2);
2485 Mul =
II.getOperand(1);
2490 return std::nullopt;
2492 if (!
Mul->hasOneUse())
2493 return std::nullopt;
2496 if (
II.getType()->isFPOrFPVectorTy()) {
2501 return std::nullopt;
2503 return std::nullopt;
2508 if (MergeIntoAddendOp)
2518static std::optional<Instruction *>
2520 Value *Pred =
II.getOperand(0);
2521 Value *PtrOp =
II.getOperand(1);
2522 Type *VecTy =
II.getType();
2526 Load->copyMetadata(
II);
2537static std::optional<Instruction *>
2539 Value *VecOp =
II.getOperand(0);
2540 Value *Pred =
II.getOperand(1);
2541 Value *PtrOp =
II.getOperand(2);
2545 Store->copyMetadata(
II);
2557 case Intrinsic::aarch64_sve_fmul_u:
2558 return Instruction::BinaryOps::FMul;
2559 case Intrinsic::aarch64_sve_fadd_u:
2560 return Instruction::BinaryOps::FAdd;
2561 case Intrinsic::aarch64_sve_fsub_u:
2562 return Instruction::BinaryOps::FSub;
2564 return Instruction::BinaryOpsEnd;
2568static std::optional<Instruction *>
2571 if (
II.isStrictFP())
2572 return std::nullopt;
2574 auto *OpPredicate =
II.getOperand(0);
2576 if (BinOpCode == Instruction::BinaryOpsEnd ||
2578 return std::nullopt;
2580 BinOpCode,
II.getOperand(1),
II.getOperand(2),
II.getFastMathFlags());
2584static std::optional<Instruction *>
2586 assert(
II.getIntrinsicID() == Intrinsic::aarch64_sve_mla_u &&
2587 "Expected MLA_U intrinsic");
2588 Value *Acc =
II.getArgOperand(1);
2589 Value *MulOp0 =
II.getArgOperand(2);
2590 Value *MulOp1 =
II.getArgOperand(3);
2604 return std::nullopt;
2610 Intrinsic::aarch64_sve_mla>(
2614 Intrinsic::aarch64_sve_mad>(
2617 return std::nullopt;
2620static std::optional<Instruction *>
2624 Intrinsic::aarch64_sve_fmla>(IC,
II,
2629 Intrinsic::aarch64_sve_fmad>(IC,
II,
2634 Intrinsic::aarch64_sve_fmla>(IC,
II,
2637 return std::nullopt;
2640static std::optional<Instruction *>
2644 Intrinsic::aarch64_sve_fmla>(IC,
II,
2649 Intrinsic::aarch64_sve_fmad>(IC,
II,
2654 Intrinsic::aarch64_sve_fmla_u>(
2660static std::optional<Instruction *>
2664 Intrinsic::aarch64_sve_fmls>(IC,
II,
2669 Intrinsic::aarch64_sve_fnmsb>(
2674 Intrinsic::aarch64_sve_fmls>(IC,
II,
2677 return std::nullopt;
2680static std::optional<Instruction *>
2684 Intrinsic::aarch64_sve_fmls>(IC,
II,
2689 Intrinsic::aarch64_sve_fnmsb>(
2694 Intrinsic::aarch64_sve_fmls_u>(
2703 Intrinsic::aarch64_sve_mls>(
2706 return std::nullopt;
2711 Value *UnpackArg =
II.getArgOperand(0);
2713 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
2714 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
2727 return std::nullopt;
2731 auto *OpVal =
II.getOperand(0);
2732 auto *OpIndices =
II.getOperand(1);
2739 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
2740 return std::nullopt;
2755 Type *RetTy =
II.getType();
2756 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
2757 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
2761 if ((
match(
II.getArgOperand(0),
2768 if (TyA ==
B->getType() &&
2773 TyA->getMinNumElements());
2779 return std::nullopt;
2787 if (
match(
II.getArgOperand(0),
2792 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
2794 return std::nullopt;
2797static std::optional<Instruction *>
2799 Value *Mask =
II.getOperand(0);
2800 Value *BasePtr =
II.getOperand(1);
2801 Value *Index =
II.getOperand(2);
2812 BasePtr->getPointerAlignment(
II.getDataLayout());
2815 BasePtr, IndexBase);
2822 return std::nullopt;
2825static std::optional<Instruction *>
2827 Value *Val =
II.getOperand(0);
2828 Value *Mask =
II.getOperand(1);
2829 Value *BasePtr =
II.getOperand(2);
2830 Value *Index =
II.getOperand(3);
2840 BasePtr->getPointerAlignment(
II.getDataLayout());
2843 BasePtr, IndexBase);
2849 return std::nullopt;
2855 Value *Pred =
II.getOperand(0);
2856 Value *Vec =
II.getOperand(1);
2857 Value *DivVec =
II.getOperand(2);
2861 if (!SplatConstantInt)
2862 return std::nullopt;
2866 if (DivisorValue == -1)
2867 return std::nullopt;
2868 if (DivisorValue == 1)
2874 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2881 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2883 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2887 return std::nullopt;
2891 size_t VecSize = Vec.
size();
2896 size_t HalfVecSize = VecSize / 2;
2900 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2908 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2926 return std::nullopt;
2933 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2934 CurrentInsertElt = InsertElt->getOperand(0);
2940 return std::nullopt;
2944 for (
size_t I = 0;
I < Elts.
size();
I++) {
2945 if (Elts[
I] ==
nullptr)
2950 if (InsertEltChain ==
nullptr)
2951 return std::nullopt;
2957 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2958 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2959 IIScalableTy->getMinNumElements() /
2964 auto *WideShuffleMaskTy =
2975 auto NarrowBitcast =
2988 return std::nullopt;
2993 Value *Pred =
II.getOperand(0);
2994 Value *Vec =
II.getOperand(1);
2995 Value *Shift =
II.getOperand(2);
2998 Value *AbsPred, *MergedValue;
3004 return std::nullopt;
3012 return std::nullopt;
3017 return std::nullopt;
3020 {
II.getType()}, {Pred, Vec, Shift});
3027 Value *Vec =
II.getOperand(0);
3032 return std::nullopt;
3038 auto *NI =
II.getNextNode();
3041 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
3043 while (LookaheadThreshold-- && CanSkipOver(NI)) {
3044 auto *NIBB = NI->getParent();
3045 NI = NI->getNextNode();
3047 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
3048 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
3054 if (NextII &&
II.isIdenticalTo(NextII))
3057 return std::nullopt;
3065 {II.getType(), II.getOperand(0)->getType()},
3066 {II.getOperand(0), II.getOperand(1)}));
3073 if (PredPattern == AArch64SVEPredPattern::all ||
3074 PredPattern == AArch64SVEPredPattern::pow2)
3076 return std::nullopt;
3082 Value *Passthru =
II.getOperand(0);
3090 auto *Mask = ConstantInt::get(Ty, MaskValue);
3096 return std::nullopt;
3099static std::optional<Instruction *>
3106 return std::nullopt;
3109std::optional<Instruction *>
3120 case Intrinsic::aarch64_dmb:
3122 case Intrinsic::aarch64_neon_fmaxnm:
3123 case Intrinsic::aarch64_neon_fminnm:
3125 case Intrinsic::aarch64_sve_convert_from_svbool:
3127 case Intrinsic::aarch64_sve_dup:
3129 case Intrinsic::aarch64_sve_dup_x:
3131 case Intrinsic::aarch64_sve_cmpne:
3132 case Intrinsic::aarch64_sve_cmpne_wide:
3134 case Intrinsic::aarch64_sve_rdffr:
3136 case Intrinsic::aarch64_sve_lasta:
3137 case Intrinsic::aarch64_sve_lastb:
3139 case Intrinsic::aarch64_sve_clasta_n:
3140 case Intrinsic::aarch64_sve_clastb_n:
3142 case Intrinsic::aarch64_sve_cntd:
3144 case Intrinsic::aarch64_sve_cntw:
3146 case Intrinsic::aarch64_sve_cnth:
3148 case Intrinsic::aarch64_sve_cntb:
3150 case Intrinsic::aarch64_sme_cntsd:
3152 case Intrinsic::aarch64_sve_ptest_any:
3153 case Intrinsic::aarch64_sve_ptest_first:
3154 case Intrinsic::aarch64_sve_ptest_last:
3156 case Intrinsic::aarch64_sve_fadd:
3158 case Intrinsic::aarch64_sve_fadd_u:
3160 case Intrinsic::aarch64_sve_fmul_u:
3162 case Intrinsic::aarch64_sve_fsub:
3164 case Intrinsic::aarch64_sve_fsub_u:
3166 case Intrinsic::aarch64_sve_add:
3168 case Intrinsic::aarch64_sve_add_u:
3170 Intrinsic::aarch64_sve_mla_u>(
3172 case Intrinsic::aarch64_sve_mla_u:
3174 case Intrinsic::aarch64_sve_sub:
3176 case Intrinsic::aarch64_sve_sub_u:
3178 Intrinsic::aarch64_sve_mls_u>(
3180 case Intrinsic::aarch64_sve_tbl:
3182 case Intrinsic::aarch64_sve_uunpkhi:
3183 case Intrinsic::aarch64_sve_uunpklo:
3184 case Intrinsic::aarch64_sve_sunpkhi:
3185 case Intrinsic::aarch64_sve_sunpklo:
3187 case Intrinsic::aarch64_sve_uzp1:
3189 case Intrinsic::aarch64_sve_zip1:
3190 case Intrinsic::aarch64_sve_zip2:
3192 case Intrinsic::aarch64_sve_ld1_gather_index:
3194 case Intrinsic::aarch64_sve_st1_scatter_index:
3196 case Intrinsic::aarch64_sve_ld1:
3198 case Intrinsic::aarch64_sve_st1:
3200 case Intrinsic::aarch64_sve_sdiv:
3202 case Intrinsic::aarch64_sve_sel:
3204 case Intrinsic::aarch64_sve_srshl:
3206 case Intrinsic::aarch64_sve_dupq_lane:
3208 case Intrinsic::aarch64_sve_insr:
3210 case Intrinsic::aarch64_sve_whilelo:
3212 case Intrinsic::aarch64_sve_ptrue:
3214 case Intrinsic::aarch64_sve_uxtb:
3216 case Intrinsic::aarch64_sve_uxth:
3218 case Intrinsic::aarch64_sve_uxtw:
3220 case Intrinsic::aarch64_sme_in_streaming_mode:
3224 return std::nullopt;
3231 SimplifyAndSetOp)
const {
3232 switch (
II.getIntrinsicID()) {
3235 case Intrinsic::aarch64_neon_fcvtxn:
3236 case Intrinsic::aarch64_neon_rshrn:
3237 case Intrinsic::aarch64_neon_sqrshrn:
3238 case Intrinsic::aarch64_neon_sqrshrun:
3239 case Intrinsic::aarch64_neon_sqshrn:
3240 case Intrinsic::aarch64_neon_sqshrun:
3241 case Intrinsic::aarch64_neon_sqxtn:
3242 case Intrinsic::aarch64_neon_sqxtun:
3243 case Intrinsic::aarch64_neon_uqrshrn:
3244 case Intrinsic::aarch64_neon_uqshrn:
3245 case Intrinsic::aarch64_neon_uqxtn:
3246 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
3250 return std::nullopt;
3254 return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3264 if (ST->useSVEForFixedLengthVectors() &&
3267 std::max(ST->getMinSVEVectorSizeInBits(), 128u));
3268 else if (ST->isNeonAvailable())
3273 if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3282bool AArch64TTIImpl::isSingleExtWideningInstruction(
3284 Type *SrcOverrideTy)
const {
3299 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3302 Type *SrcTy = SrcOverrideTy;
3304 case Instruction::Add:
3305 case Instruction::Sub: {
3314 if (Opcode == Instruction::Sub)
3338 assert(SrcTy &&
"Expected some SrcTy");
3340 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
3346 DstTyL.first * DstTyL.second.getVectorMinNumElements();
3348 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
3352 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
3355Type *AArch64TTIImpl::isBinExtWideningInstruction(
unsigned Opcode,
Type *DstTy,
3357 Type *SrcOverrideTy)
const {
3358 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
3359 Opcode != Instruction::Mul)
3369 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3372 auto getScalarSizeWithOverride = [&](
const Value *
V) {
3378 ->getScalarSizeInBits();
3381 unsigned MaxEltSize = 0;
3384 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3385 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3386 MaxEltSize = std::max(EltSize0, EltSize1);
3389 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3390 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3393 if (EltSize0 >= DstEltSize / 2 || EltSize1 >= DstEltSize / 2)
3395 MaxEltSize = DstEltSize / 2;
3396 }
else if (Opcode == Instruction::Mul &&
3409 getScalarSizeWithOverride(
isa<ZExtInst>(Args[0]) ? Args[0] : Args[1]);
3413 if (MaxEltSize * 2 > DstEltSize)
3431 if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(
DL, Src)) ||
3432 (Src->isScalableTy() && !ST->hasSVE2()))
3442 if (AddUser && AddUser->getOpcode() == Instruction::Add)
3446 if (!Shr || Shr->getOpcode() != Instruction::LShr)
3450 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
3451 Src->getScalarSizeInBits() !=
3475 int ISD = TLI->InstructionOpcodeToISD(Opcode);
3479 if (
I &&
I->hasOneUser()) {
3482 if (
Type *ExtTy = isBinExtWideningInstruction(
3483 SingleUser->getOpcode(), Dst, Operands,
3484 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3497 if (isSingleExtWideningInstruction(
3498 SingleUser->getOpcode(), Dst, Operands,
3499 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3503 if (SingleUser->getOpcode() == Instruction::Add) {
3504 if (
I == SingleUser->getOperand(1) ||
3506 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
3521 EVT SrcTy = TLI->getValueType(
DL, Src);
3522 EVT DstTy = TLI->getValueType(
DL, Dst);
3524 if (!SrcTy.isSimple() || !DstTy.
isSimple())
3529 if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
3558 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3561 ST->useSVEForFixedLengthVectors(WiderTy)) {
3562 std::pair<InstructionCost, MVT> LT =
3564 unsigned NumElements =
3580 const unsigned int SVE_EXT_COST = 1;
3581 const unsigned int SVE_FCVT_COST = 1;
3582 const unsigned int SVE_UNPACK_ONCE = 4;
3583 const unsigned int SVE_UNPACK_TWICE = 16;
3712 SVE_EXT_COST + SVE_FCVT_COST},
3717 SVE_EXT_COST + SVE_FCVT_COST},
3724 SVE_EXT_COST + SVE_FCVT_COST},
3728 SVE_EXT_COST + SVE_FCVT_COST},
3734 SVE_EXT_COST + SVE_FCVT_COST},
3737 SVE_EXT_COST + SVE_FCVT_COST},
3742 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3744 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3754 SVE_EXT_COST + SVE_FCVT_COST},
3759 SVE_EXT_COST + SVE_FCVT_COST},
3772 SVE_EXT_COST + SVE_FCVT_COST},
3776 SVE_EXT_COST + SVE_FCVT_COST},
3788 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3790 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3792 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3794 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3798 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3800 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3816 SVE_EXT_COST + SVE_FCVT_COST},
3821 SVE_EXT_COST + SVE_FCVT_COST},
3832 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3834 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3836 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3838 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3840 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3842 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3846 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3848 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3850 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3852 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
4077 if (ST->hasFullFP16())
4089 Src->getScalarType(), CCH,
CostKind) +
4097 ST->isSVEorStreamingSVEAvailable() &&
4098 TLI->getTypeAction(Src->getContext(), SrcTy) ==
4100 TLI->getTypeAction(Dst->getContext(), DstTy) ==
4109 Opcode, LegalTy, Src, CCH,
CostKind,
I);
4112 return Part1 + Part2;
4119 ST->isSVEorStreamingSVEAvailable() && TLI->isTypeLegal(DstTy))
4131 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
4144 CostKind, Index,
nullptr,
nullptr);
4148 auto DstVT = TLI->getValueType(
DL, Dst);
4149 auto SrcVT = TLI->getValueType(
DL, Src);
4154 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
4160 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
4170 case Instruction::SExt:
4175 case Instruction::ZExt:
4176 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
4189 return Opcode == Instruction::PHI ? 0 : 1;
4198 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
4207 if (!LT.second.isVector())
4212 if (LT.second.isFixedLengthVector()) {
4213 unsigned Width = LT.second.getVectorNumElements();
4214 Index = Index % Width;
4229 if (ST->hasFastLD1Single())
4241 : ST->getVectorInsertExtractBaseCost() + 1;
4265 auto ExtractCanFuseWithFmul = [&]() {
4272 auto IsAllowedScalarTy = [&](
const Type *
T) {
4273 return T->isFloatTy() ||
T->isDoubleTy() ||
4274 (
T->isHalfTy() && ST->hasFullFP16());
4278 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
4281 return BO && BO->getOpcode() == BinaryOperator::FMul &&
4282 !BO->getType()->isVectorTy();
4287 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
4291 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);
4300 DenseMap<User *, unsigned> UserToExtractIdx;
4301 for (
auto *U :
Scalar->users()) {
4302 if (!IsUserFMulScalarTy(U))
4306 UserToExtractIdx[
U];
4308 if (UserToExtractIdx.
empty())
4310 for (
auto &[S, U, L] : ScalarUserAndIdx) {
4311 for (
auto *U : S->users()) {
4312 if (UserToExtractIdx.
contains(U)) {
4314 auto *Op0 =
FMul->getOperand(0);
4315 auto *Op1 =
FMul->getOperand(1);
4316 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
4317 UserToExtractIdx[
U] =
L;
4323 for (
auto &[U, L] : UserToExtractIdx) {
4335 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
4336 if (!IsUserFMulScalarTy(U))
4341 const auto *BO = cast<BinaryOperator>(U);
4342 const auto *OtherEE = dyn_cast<ExtractElementInst>(
4343 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
4345 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
4348 return IsExtractLaneEquivalentToZero(
4349 cast<ConstantInt>(OtherEE->getIndexOperand())
4352 OtherEE->getType()->getScalarSizeInBits());
4360 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
4361 ExtractCanFuseWithFmul())
4366 :
ST->getVectorInsertExtractBaseCost();
4375 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
4378 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr,
4384 Value *Scalar,
ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
4386 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr, Scalar,
4387 ScalarUserAndIdx, VIC);
4394 return getVectorInstrCostHelper(
I.getOpcode(), Val,
CostKind, Index, &
I,
4401 unsigned Index)
const {
4413 : ST->getVectorInsertExtractBaseCost() + 1;
4422 if (Ty->getElementType()->isFloatingPointTy())
4425 unsigned VecInstCost =
4427 return DemandedElts.
popcount() * (Insert + Extract) * VecInstCost;
4434 if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
4435 return std::nullopt;
4436 if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
4437 return std::nullopt;
4439 if (CanUseSVE && ST->hasSVEB16B16() && ST->isNonStreamingSVEorSME2Available())
4440 return std::nullopt;
4447 Cost += InstCost(PromotedTy);
4470 Op2Info, Args, CxtI);
4474 int ISD = TLI->InstructionOpcodeToISD(Opcode);
4481 Ty,
CostKind, Op1Info, Op2Info,
true,
4484 [&](
Type *PromotedTy) {
4488 return *PromotedCost;
4491 if (Ty->getScalarType()->isFP128Ty())
4499 if (
Type *ExtTy = isBinExtWideningInstruction(Opcode, Ty, Args)) {
4515 if (LT.second == MVT::v2i64) {
4595 auto VT = TLI->getValueType(
DL, Ty);
4596 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
4600 : (3 * AsrCost + AddCost);
4602 return MulCost + AsrCost + 2 * AddCost;
4604 }
else if (VT.isVector()) {
4614 if (Ty->isScalableTy() && ST->hasSVE())
4615 Cost += 2 * AsrCost;
4620 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
4624 }
else if (LT.second == MVT::v2i64) {
4625 return VT.getVectorNumElements() *
4632 if (Ty->isScalableTy() && ST->hasSVE())
4633 return MulCost + 2 * AddCost + 2 * AsrCost;
4634 return 2 * MulCost + AddCost + AsrCost + UsraCost;
4639 LT.second.isFixedLengthVector()) {
4649 return ExtractCost + InsertCost +
4657 auto VT = TLI->getValueType(
DL, Ty);
4673 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
4674 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
4675 LT.second == MVT::nxv16i8;
4676 bool Is128bit = LT.second.is128BitVector();
4688 (HasMULH ? 0 : ShrCost) +
4689 AddCost * 2 + ShrCost;
4690 return DivCost + (
ISD ==
ISD::UREM ? MulCost + AddCost : 0);
4697 if (!VT.isVector() && VT.getSizeInBits() > 64)
4701 Opcode, Ty,
CostKind, Op1Info, Op2Info);
4703 if (TLI->isOperationLegalOrCustom(
ISD, LT.second) && ST->hasSVE()) {
4707 Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {
4717 if (
nullptr != Entry)
4722 if (LT.second.getScalarType() == MVT::i8)
4724 else if (LT.second.getScalarType() == MVT::i16)
4736 Opcode, Ty->getScalarType(),
CostKind, Op1Info, Op2Info);
4737 return (4 + DivCost) * VTy->getNumElements();
4743 -1,
nullptr,
nullptr);
4766 if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
4767 (Ty->isHalfTy() && ST->hasFullFP16())) &&
4776 if (!Ty->getScalarType()->isFP128Ty())
4783 if (!Ty->getScalarType()->isFP128Ty())
4784 return 2 * LT.first;
4791 if (!Ty->isVectorTy())
4807 int MaxMergeDistance = 64;
4811 return NumVectorInstToHideOverhead;
4821 unsigned Opcode1,
unsigned Opcode2)
const {
4824 if (!
Sched.hasInstrSchedModel())
4828 Sched.getSchedClassDesc(
TII->get(Opcode1).getSchedClass());
4830 Sched.getSchedClassDesc(
TII->get(Opcode2).getSchedClass());
4836 "Cannot handle variant scheduling classes without an MI");
4852 const int AmortizationCost = 20;
4860 VecPred = CurrentPred;
4868 static const auto ValidMinMaxTys = {
4869 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
4870 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
4871 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
4875 (ST->hasFullFP16() &&
4881 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
4882 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
4883 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
4884 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
4885 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
4886 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
4887 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
4888 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
4889 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
4890 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
4891 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
4893 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
4894 EVT SelValTy = TLI->getValueType(
DL, ValTy);
4903 if (Opcode == Instruction::FCmp) {
4905 ValTy,
CostKind, Op1Info, Op2Info,
false,
4907 false, [&](
Type *PromotedTy) {
4919 return *PromotedCost;
4923 if (LT.second.getScalarType() != MVT::f64 &&
4924 LT.second.getScalarType() != MVT::f32 &&
4925 LT.second.getScalarType() != MVT::f16)
4930 unsigned Factor = 1;
4931 if (!CondTy->isVectorTy() &&
4945 AArch64::FCMEQv4f32))
4957 TLI->isTypeLegal(TLI->getValueType(
DL, ValTy)) &&
4976 Op1Info, Op2Info,
I);
4982 if (ST->requiresStrictAlign()) {
4987 Options.AllowOverlappingLoads =
true;
4988 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
4993 Options.LoadSizes = {8, 4, 2, 1};
4994 Options.AllowedTailExpansions = {3, 5, 6};
4999 return ST->hasSVE();
5005 switch (MICA.
getID()) {
5006 case Intrinsic::masked_scatter:
5007 case Intrinsic::masked_gather:
5009 case Intrinsic::masked_load:
5010 case Intrinsic::masked_expandload:
5011 case Intrinsic::masked_store:
5025 if (!LT.first.isValid())
5030 if (VT->getElementType()->isIntegerTy(1))
5041 if (MICA.
getID() == Intrinsic::masked_expandload) {
5057 if (LT.first > 1 && LT.second.getScalarSizeInBits() > 8)
5058 return MemOpCost * 2;
5067 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
5068 "Should be called on only load or stores.");
5070 case Instruction::Load:
5073 return ST->getGatherOverhead();
5075 case Instruction::Store:
5078 return ST->getScatterOverhead();
5089 unsigned Opcode = (MICA.
getID() == Intrinsic::masked_gather ||
5090 MICA.
getID() == Intrinsic::vp_gather)
5092 : Instruction::Store;
5102 if (!LT.first.isValid())
5106 if (!LT.second.isVector() ||
5108 VT->getElementType()->isIntegerTy(1))
5118 ElementCount LegalVF = LT.second.getVectorElementCount();
5121 {TTI::OK_AnyValue, TTI::OP_None},
I);
5137 EVT VT = TLI->getValueType(
DL, Ty,
true);
5139 if (VT == MVT::Other ||
5145 if (!LT.first.isValid())
5155 (VTy->getElementType()->isIntegerTy(1) &&
5156 !VTy->getElementCount().isKnownMultipleOf(
5167 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
5168 LT.second.is128BitVector() && Alignment <
Align(16)) {
5174 const int AmortizationCost = 6;
5176 return LT.first * 2 * AmortizationCost;
5180 if (Ty->isPtrOrPtrVectorTy())
5185 if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
5187 if (VT == MVT::v4i8)
5194 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
5209 while (!TypeWorklist.
empty()) {
5231 bool UseMaskForCond,
bool UseMaskForGaps)
const {
5232 assert(Factor >= 2 &&
"Invalid interleave factor");
5247 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
5250 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
5251 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
5254 VecVTy->getElementCount().divideCoefficientBy(Factor));
5260 if (MinElts % Factor == 0 &&
5261 TLI->isLegalInterleavedAccessType(SubVecTy,
DL, UseScalable))
5262 return Factor * TLI->getNumInterleavedAccesses(SubVecTy,
DL, UseScalable);
5267 UseMaskForCond, UseMaskForGaps);
5274 for (
auto *
I : Tys) {
5275 if (!
I->isVectorTy())
5286 Align Alignment)
const {
5293 return (ST->isSVEAvailable() && ST->hasSVE2p2()) ||
5294 (ST->isSVEorStreamingSVEAvailable() && ST->hasSME2p2());
5300 return ST->getMaxInterleaveFactor();
5310 enum { MaxStridedLoads = 7 };
5312 int StridedLoads = 0;
5315 for (
const auto BB : L->blocks()) {
5316 for (
auto &
I : *BB) {
5322 if (L->isLoopInvariant(PtrValue))
5327 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
5336 if (StridedLoads > MaxStridedLoads / 2)
5337 return StridedLoads;
5340 return StridedLoads;
5343 int StridedLoads = countStridedLoads(L, SE);
5345 <<
" strided loads\n");
5361 unsigned *FinalSize) {
5365 for (
auto *BB : L->getBlocks()) {
5366 for (
auto &
I : *BB) {
5372 if (!Cost.isValid())
5376 if (LoopCost > Budget)
5398 if (MaxTC > 0 && MaxTC <= 32)
5409 if (Blocks.
size() != 2)
5431 if (!L->isInnermost() || L->getNumBlocks() > 8)
5435 if (!L->getExitBlock())
5441 bool HasParellelizableReductions =
5442 L->getNumBlocks() == 1 &&
5443 any_of(L->getHeader()->phis(),
5445 return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
5448 if (HasParellelizableReductions &&
5470 if (HasParellelizableReductions) {
5481 if (Header == Latch) {
5484 unsigned Width = 10;
5490 unsigned MaxInstsPerLine = 16;
5492 unsigned BestUC = 1;
5493 unsigned SizeWithBestUC = BestUC *
Size;
5495 unsigned SizeWithUC = UC *
Size;
5496 if (SizeWithUC > 48)
5498 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
5499 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
5501 SizeWithBestUC = BestUC *
Size;
5511 for (
auto *BB : L->blocks()) {
5512 for (
auto &
I : *BB) {
5522 for (
auto *U :
I.users())
5524 LoadedValuesPlus.
insert(U);
5531 return LoadedValuesPlus.
contains(
SI->getOperand(0));
5557 auto *I = dyn_cast<Instruction>(V);
5558 return I && DependsOnLoopLoad(I, Depth + 1);
5565 DependsOnLoopLoad(
I, 0)) {
5581 if (L->getLoopDepth() > 1)
5592 for (
auto *BB : L->getBlocks()) {
5593 for (
auto &
I : *BB) {
5597 if (IsVectorized &&
I.getType()->isVectorTy())
5614 if (ST->isAppleMLike())
5616 else if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
5638 !ST->getSchedModel().isOutOfOrder()) {
5661 bool CanCreate)
const {
5665 case Intrinsic::aarch64_neon_st1x2:
5666 case Intrinsic::aarch64_neon_st1x3:
5667 case Intrinsic::aarch64_neon_st1x4:
5668 case Intrinsic::aarch64_neon_st2:
5669 case Intrinsic::aarch64_neon_st3:
5670 case Intrinsic::aarch64_neon_st4: {
5673 if (!CanCreate || !ST)
5675 unsigned NumElts = Inst->
arg_size() - 1;
5676 if (ST->getNumElements() != NumElts)
5678 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5684 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5686 Res = Builder.CreateInsertValue(Res, L, i);
5690 case Intrinsic::aarch64_neon_ld1x2:
5691 case Intrinsic::aarch64_neon_ld1x3:
5692 case Intrinsic::aarch64_neon_ld1x4:
5693 case Intrinsic::aarch64_neon_ld2:
5694 case Intrinsic::aarch64_neon_ld3:
5695 case Intrinsic::aarch64_neon_ld4:
5696 if (Inst->
getType() == ExpectedType)
5707 case Intrinsic::aarch64_neon_ld1x2:
5708 case Intrinsic::aarch64_neon_ld1x3:
5709 case Intrinsic::aarch64_neon_ld1x4:
5710 case Intrinsic::aarch64_neon_ld2:
5711 case Intrinsic::aarch64_neon_ld3:
5712 case Intrinsic::aarch64_neon_ld4:
5713 Info.ReadMem =
true;
5714 Info.WriteMem =
false;
5717 case Intrinsic::aarch64_neon_st1x2:
5718 case Intrinsic::aarch64_neon_st1x3:
5719 case Intrinsic::aarch64_neon_st1x4:
5720 case Intrinsic::aarch64_neon_st2:
5721 case Intrinsic::aarch64_neon_st3:
5722 case Intrinsic::aarch64_neon_st4:
5723 Info.ReadMem =
false;
5724 Info.WriteMem =
true;
5733 case Intrinsic::aarch64_neon_ld1x2:
5734 case Intrinsic::aarch64_neon_st1x2:
5735 Info.MatchingId = Intrinsic::aarch64_neon_ld1x2;
5737 case Intrinsic::aarch64_neon_ld1x3:
5738 case Intrinsic::aarch64_neon_st1x3:
5739 Info.MatchingId = Intrinsic::aarch64_neon_ld1x3;
5741 case Intrinsic::aarch64_neon_ld1x4:
5742 case Intrinsic::aarch64_neon_st1x4:
5743 Info.MatchingId = Intrinsic::aarch64_neon_ld1x4;
5745 case Intrinsic::aarch64_neon_ld2:
5746 case Intrinsic::aarch64_neon_st2:
5747 Info.MatchingId = Intrinsic::aarch64_neon_ld2;
5749 case Intrinsic::aarch64_neon_ld3:
5750 case Intrinsic::aarch64_neon_st3:
5751 Info.MatchingId = Intrinsic::aarch64_neon_ld3;
5753 case Intrinsic::aarch64_neon_ld4:
5754 case Intrinsic::aarch64_neon_st4:
5755 Info.MatchingId = Intrinsic::aarch64_neon_ld4;
5767 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
5768 bool Considerable =
false;
5769 AllowPromotionWithoutCommonHeader =
false;
5772 Type *ConsideredSExtType =
5774 if (
I.getType() != ConsideredSExtType)
5778 for (
const User *U :
I.users()) {
5780 Considerable =
true;
5784 if (GEPInst->getNumOperands() > 2) {
5785 AllowPromotionWithoutCommonHeader =
true;
5790 return Considerable;
5841 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
5851 return LegalizationCost + 2;
5861 LegalizationCost *= LT.first - 1;
5864 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5873 return LegalizationCost + 2;
5881 std::optional<FastMathFlags> FMF,
5897 return BaseCost + FixedVTy->getNumElements();
5914 MVT MTy = LT.second;
5915 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5963 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
5964 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
5976 return (LT.first - 1) +
Log2_32(NElts);
5981 return (LT.first - 1) + Entry->Cost;
5993 if (LT.first != 1) {
5999 ExtraCost *= LT.first - 1;
6002 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
6003 return Cost + ExtraCost;
6011 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *VecTy,
6013 EVT VecVT = TLI->getValueType(
DL, VecTy);
6014 EVT ResVT = TLI->getValueType(
DL, ResTy);
6024 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
6026 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&
6028 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&
6030 return (LT.first - 1) * 2 + 2;
6041 EVT VecVT = TLI->getValueType(
DL, VecTy);
6042 EVT ResVT = TLI->getValueType(
DL, ResTy);
6045 RedOpcode == Instruction::Add) {
6051 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
6053 return LT.first + 2;
6088 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
6089 ? TLI->getPromotedVTForPredicate(
EVT(LT.second))
6103 if (LT.second.getScalarType() == MVT::i1) {
6112 assert(Entry &&
"Illegal Type for Splice");
6113 LegalizationCost += Entry->Cost;
6114 return LegalizationCost * LT.first;
6118 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
6127 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub &&
6128 Opcode != Instruction::FAdd && Opcode != Instruction::FSub) ||
6135 assert(FMF &&
"Missing FastMathFlags for floating-point partial reduction");
6136 if (!FMF->allowReassoc() || !FMF->allowContract())
6140 "FastMathFlags only apply to floating-point partial reductions");
6144 (!BinOp || (OpBExtend !=
TTI::PR_None && InputTypeB)) &&
6145 "Unexpected values for OpBExtend or InputTypeB");
6149 if (BinOp && ((*BinOp != Instruction::Mul && *BinOp != Instruction::FMul) ||
6150 InputTypeA != InputTypeB))
6153 bool IsUSDot = OpBExtend !=
TTI::PR_None && OpAExtend != OpBExtend;
6156 if (IsUSDot && !ST->hasMatMulInt8() && !ST->hasDotProd())
6169 auto TC = TLI->getTypeConversion(AccumVectorType->
getContext(),
6178 if (TLI->getTypeAction(AccumVectorType->
getContext(), TC.second) !=
6184 std::pair<InstructionCost, MVT> AccumLT =
6186 std::pair<InstructionCost, MVT> InputLT =
6190 auto IsSupported = [&](
bool SVEPred,
bool NEONPred) ->
bool {
6191 return (ST->isSVEorStreamingSVEAvailable() && SVEPred) ||
6192 (AccumLT.second.isFixedLengthVector() &&
6193 AccumLT.second.getSizeInBits() <= 128 && ST->isNeonAvailable() &&
6197 bool IsSub = Opcode == Instruction::Sub || Opcode == Instruction::FSub;
6205 if (AccumLT.second.getScalarType() == MVT::i32 &&
6206 InputLT.second.getScalarType() == MVT::i8) {
6208 if (!IsUSDot && IsSupported(
true, ST->hasDotProd()))
6209 return Cost + INegCost;
6211 if (IsUSDot && IsSupported(ST->hasMatMulInt8(), ST->hasMatMulInt8()))
6212 return Cost + INegCost;
6217 if (IsUSDot && IsSupported(
false, ST->hasDotProd()))
6218 return Cost * 3 + INegCost;
6221 if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot) {
6223 if (AccumLT.second.getScalarType() == MVT::i64 &&
6224 InputLT.second.getScalarType() == MVT::i16)
6225 return Cost + INegCost;
6228 if (AccumLT.second.getScalarType() == MVT::i32 &&
6229 InputLT.second.getScalarType() == MVT::i16 &&
6230 (ST->hasSVE2p1() || ST->hasSME2()) && !IsSub)
6233 if (AccumLT.second.getScalarType() == MVT::i64 &&
6234 InputLT.second.getScalarType() == MVT::i8)
6240 return Cost + INegCost;
6243 if (AccumLT.second.getScalarType() == MVT::i16 &&
6244 InputLT.second.getScalarType() == MVT::i8 &&
6245 (ST->hasSVE2p3() || ST->hasSME2p3()) && !IsSub)
6251 if (Opcode == Instruction::FAdd && !IsSub &&
6252 IsSupported(ST->hasSME2() || ST->hasSVE2p1(), ST->hasF16F32DOT()) &&
6253 AccumLT.second.getScalarType() == MVT::f32 &&
6254 InputLT.second.getScalarType() == MVT::f16)
6258 if (Ratio == 2 && !IsUSDot) {
6259 MVT InVT = InputLT.second.getScalarType();
6262 if (IsSupported(ST->hasSVE2() || ST->hasSME(),
true) &&
6267 if (IsSupported(ST->hasSVE2(), ST->hasFP16FML()) && InVT == MVT::f16)
6271 if (IsSupported(ST->hasSVE2p1() || ST->hasSME2(),
false) &&
6272 InVT == MVT::bf16 && IsSub)
6282 if (IsSupported(ST->hasBF16(), ST->hasBF16()) && InVT == MVT::bf16)
6283 return Cost * 2 + FNegCost;
6287 AccumType, VF, OpAExtend, OpBExtend,
6299 "Expected the Mask to match the return size if given");
6301 "Expected the same scalar types");
6307 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
6308 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
6309 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
6317 return std::max<InstructionCost>(1, LT.first / 4);
6325 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2) ||
6327 Mask, 3, SrcTy->getElementCount().getKnownMinValue() * 2)))
6330 unsigned TpNumElts = Mask.size();
6331 unsigned LTNumElts = LT.second.getVectorNumElements();
6332 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
6334 LT.second.getVectorElementCount());
6336 std::map<std::tuple<unsigned, unsigned, SmallVector<int>>,
InstructionCost>
6338 for (
unsigned N = 0;
N < NumVecs;
N++) {
6342 unsigned Source1 = -1U, Source2 = -1U;
6343 unsigned NumSources = 0;
6344 for (
unsigned E = 0; E < LTNumElts; E++) {
6345 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
6354 unsigned Source = MaskElt / LTNumElts;
6355 if (NumSources == 0) {
6358 }
else if (NumSources == 1 && Source != Source1) {
6361 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
6367 if (Source == Source1)
6369 else if (Source == Source2)
6370 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
6379 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});
6390 NTp, NTp, NMask,
CostKind, 0,
nullptr, Args,
6393 Result.first->second = NCost;
6407 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
6408 if (LT.second.getFixedSizeInBits() >= 128 &&
6410 LT.second.getVectorNumElements() / 2) {
6413 if (Index == (
int)LT.second.getVectorNumElements() / 2)
6427 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
6430 return M.value() < 0 || M.value() == (int)M.index();
6436 !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
6437 SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
6446 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
6447 ST->isSVEorStreamingSVEAvailable() &&
6452 if (ST->isSVEorStreamingSVEAvailable() &&
6466 if (IsLoad && LT.second.isVector() &&
6468 LT.second.getVectorElementCount()))
6474 if (Mask.size() == 4 &&
6476 (SrcTy->getScalarSizeInBits() == 16 ||
6477 SrcTy->getScalarSizeInBits() == 32) &&
6478 all_of(Mask, [](
int E) {
return E < 8; }))
6484 if (LT.second.isFixedLengthVector() &&
6485 LT.second.getVectorNumElements() == Mask.size() &&
6491 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6492 isTRNMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6493 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6494 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6495 LT.second.getVectorNumElements(), 16) ||
6496 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6497 LT.second.getVectorNumElements(), 32) ||
6498 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6499 LT.second.getVectorNumElements(), 64) ||
6502 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
6631 return LT.first * Entry->Cost;
6640 LT.second.getSizeInBits() <= 128 && SubTp) {
6642 if (SubLT.second.isVector()) {
6643 int NumElts = LT.second.getVectorNumElements();
6644 int NumSubElts = SubLT.second.getVectorNumElements();
6645 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
6651 if (IsExtractSubvector)
6668 if (
getPtrStride(*PSE, AccessTy, Ptr, TheLoop, DT, Strides,
6687 return ST->useFixedOverScalableIfEqualCost();
6691 return ST->getEpilogueVectorizationMinVF();
6726 unsigned NumInsns = 0;
6728 NumInsns += BB->size();
6738 int64_t Scale,
unsigned AddrSpace)
const {
6766 if (
I->getOpcode() == Instruction::Or &&
6770 if (
I->getOpcode() == Instruction::Add ||
6771 I->getOpcode() == Instruction::Sub)
6796 return all_equal(Shuf->getShuffleMask());
6803 bool AllowSplat =
false) {
6808 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
6809 auto *FullTy = FullV->
getType();
6810 auto *HalfTy = HalfV->getType();
6812 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
6815 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
6818 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
6822 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
6836 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
6837 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
6851 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
6852 (M2Start != 0 && M2Start != (NumElements / 2)))
6854 if (S1Op1 && S2Op1 && M1Start != M2Start)
6864 return Ext->getType()->getScalarSizeInBits() ==
6865 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
6879 Value *VectorOperand =
nullptr;
6896 if (!
GEP ||
GEP->getNumOperands() != 2)
6900 Value *Offsets =
GEP->getOperand(1);
6903 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
6909 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
6910 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
6911 Ops.push_back(&
GEP->getOperandUse(1));
6947 switch (
II->getIntrinsicID()) {
6948 case Intrinsic::aarch64_neon_smull:
6949 case Intrinsic::aarch64_neon_umull:
6952 Ops.push_back(&
II->getOperandUse(0));
6953 Ops.push_back(&
II->getOperandUse(1));
6958 case Intrinsic::fma:
6959 case Intrinsic::fmuladd:
6966 Ops.push_back(&
II->getOperandUse(0));
6968 Ops.push_back(&
II->getOperandUse(1));
6971 case Intrinsic::aarch64_neon_sqdmull:
6972 case Intrinsic::aarch64_neon_sqdmulh:
6973 case Intrinsic::aarch64_neon_sqrdmulh:
6976 Ops.push_back(&
II->getOperandUse(0));
6978 Ops.push_back(&
II->getOperandUse(1));
6979 return !
Ops.empty();
6980 case Intrinsic::aarch64_neon_fmlal:
6981 case Intrinsic::aarch64_neon_fmlal2:
6982 case Intrinsic::aarch64_neon_fmlsl:
6983 case Intrinsic::aarch64_neon_fmlsl2:
6986 Ops.push_back(&
II->getOperandUse(1));
6988 Ops.push_back(&
II->getOperandUse(2));
6989 return !
Ops.empty();
6990 case Intrinsic::aarch64_sve_ptest_first:
6991 case Intrinsic::aarch64_sve_ptest_last:
6993 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
6994 Ops.push_back(&
II->getOperandUse(0));
6995 return !
Ops.empty();
6996 case Intrinsic::aarch64_sme_write_horiz:
6997 case Intrinsic::aarch64_sme_write_vert:
6998 case Intrinsic::aarch64_sme_writeq_horiz:
6999 case Intrinsic::aarch64_sme_writeq_vert: {
7001 if (!Idx || Idx->getOpcode() != Instruction::Add)
7003 Ops.push_back(&
II->getOperandUse(1));
7006 case Intrinsic::aarch64_sme_read_horiz:
7007 case Intrinsic::aarch64_sme_read_vert:
7008 case Intrinsic::aarch64_sme_readq_horiz:
7009 case Intrinsic::aarch64_sme_readq_vert:
7010 case Intrinsic::aarch64_sme_ld1b_vert:
7011 case Intrinsic::aarch64_sme_ld1h_vert:
7012 case Intrinsic::aarch64_sme_ld1w_vert:
7013 case Intrinsic::aarch64_sme_ld1d_vert:
7014 case Intrinsic::aarch64_sme_ld1q_vert:
7015 case Intrinsic::aarch64_sme_st1b_vert:
7016 case Intrinsic::aarch64_sme_st1h_vert:
7017 case Intrinsic::aarch64_sme_st1w_vert:
7018 case Intrinsic::aarch64_sme_st1d_vert:
7019 case Intrinsic::aarch64_sme_st1q_vert:
7020 case Intrinsic::aarch64_sme_ld1b_horiz:
7021 case Intrinsic::aarch64_sme_ld1h_horiz:
7022 case Intrinsic::aarch64_sme_ld1w_horiz:
7023 case Intrinsic::aarch64_sme_ld1d_horiz:
7024 case Intrinsic::aarch64_sme_ld1q_horiz:
7025 case Intrinsic::aarch64_sme_st1b_horiz:
7026 case Intrinsic::aarch64_sme_st1h_horiz:
7027 case Intrinsic::aarch64_sme_st1w_horiz:
7028 case Intrinsic::aarch64_sme_st1d_horiz:
7029 case Intrinsic::aarch64_sme_st1q_horiz: {
7031 if (!Idx || Idx->getOpcode() != Instruction::Add)
7033 Ops.push_back(&
II->getOperandUse(3));
7036 case Intrinsic::aarch64_neon_pmull:
7039 Ops.push_back(&
II->getOperandUse(0));
7040 Ops.push_back(&
II->getOperandUse(1));
7042 case Intrinsic::aarch64_neon_pmull64:
7044 II->getArgOperand(1)))
7046 Ops.push_back(&
II->getArgOperandUse(0));
7047 Ops.push_back(&
II->getArgOperandUse(1));
7049 case Intrinsic::masked_gather:
7052 Ops.push_back(&
II->getArgOperandUse(0));
7054 case Intrinsic::masked_scatter:
7057 Ops.push_back(&
II->getArgOperandUse(1));
7064 auto ShouldSinkCondition = [](
Value *
Cond,
7069 if (
II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
7073 Ops.push_back(&
II->getOperandUse(0));
7077 switch (
I->getOpcode()) {
7078 case Instruction::GetElementPtr:
7079 case Instruction::Add:
7080 case Instruction::Sub:
7082 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
7084 Ops.push_back(&
I->getOperandUse(
Op));
7089 case Instruction::Select: {
7090 if (!ShouldSinkCondition(
I->getOperand(0),
Ops))
7093 Ops.push_back(&
I->getOperandUse(0));
7096 case Instruction::UncondBr:
7098 case Instruction::CondBr: {
7102 Ops.push_back(&
I->getOperandUse(0));
7105 case Instruction::FMul:
7110 Ops.push_back(&
I->getOperandUse(0));
7112 Ops.push_back(&
I->getOperandUse(1));
7122 case Instruction::Xor:
7125 if (
I->getType()->isVectorTy() && ST->isNeonAvailable()) {
7127 ST->isSVEorStreamingSVEAvailable() && (ST->hasSVE2() || ST->hasSME());
7132 case Instruction::And:
7133 case Instruction::Or:
7136 if (
I->getOpcode() == Instruction::Or &&
7141 if (!(
I->getType()->isVectorTy() && ST->hasNEON()) &&
7144 for (
auto &
Op :
I->operands()) {
7156 Ops.push_back(&Not);
7157 Ops.push_back(&InsertElt);
7167 if (!
I->getType()->isVectorTy())
7168 return !
Ops.empty();
7170 switch (
I->getOpcode()) {
7171 case Instruction::Sub:
7172 case Instruction::Add: {
7181 Ops.push_back(&Ext1->getOperandUse(0));
7182 Ops.push_back(&Ext2->getOperandUse(0));
7185 Ops.push_back(&
I->getOperandUse(0));
7186 Ops.push_back(&
I->getOperandUse(1));
7190 case Instruction::Or: {
7193 if (ST->hasNEON()) {
7207 if (
I->getParent() != MainAnd->
getParent() ||
7212 if (
I->getParent() != IA->getParent() ||
7213 I->getParent() != IB->getParent())
7218 Ops.push_back(&
I->getOperandUse(0));
7219 Ops.push_back(&
I->getOperandUse(1));
7228 case Instruction::Mul: {
7229 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
7232 if (Ty->isScalableTy())
7236 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
7239 int NumZExts = 0, NumSExts = 0;
7240 for (
auto &
Op :
I->operands()) {
7247 auto *ExtOp = Ext->getOperand(0);
7248 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
7249 Ops.push_back(&Ext->getOperandUse(0));
7257 if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <
7258 I->getType()->getScalarSizeInBits())
7295 if (!ElementConstant || !ElementConstant->
isZero())
7298 unsigned Opcode = OperandInstr->
getOpcode();
7299 if (Opcode == Instruction::SExt)
7301 else if (Opcode == Instruction::ZExt)
7306 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
7316 Ops.push_back(&Insert->getOperandUse(1));
7322 if (!
Ops.empty() && (NumSExts == 2 || NumZExts == 2))
7326 if (!ShouldSinkSplatForIndexedVariant(
I))
7331 Ops.push_back(&
I->getOperandUse(0));
7333 Ops.push_back(&
I->getOperandUse(1));
7335 return !
Ops.empty();
7337 case Instruction::FMul: {
7339 if (
I->getType()->isScalableTy())
7340 return !
Ops.empty();
7344 return !
Ops.empty();
7348 Ops.push_back(&
I->getOperandUse(0));
7350 Ops.push_back(&
I->getOperandUse(1));
7351 return !
Ops.empty();
static bool isAllActivePredicate(const SelectionDAG &DAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file defines the DenseMap class.
static Value * getCondition(Instruction *I)
const HexagonInstrInfo * TII
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
unsigned getVectorInsertExtractBaseCost() const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
bool prefersVectorizedAddressing() const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
bool preferTailFoldingOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
APInt getPriorityMask(const Function &F) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool useNeonVector(const Type *Ty) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedExpandLoad(Type *DataTy, Align Alignment) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
InstructionCost getBranchMispredictPenalty() const override
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
bool enableScalableVectorization() const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isTypeLegal(Type *Ty) const override
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isIntPredicate(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool noSignedZeros() const
bool allowContract() const
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
LLVM_ABI Value * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={}, function_ref< void(CallInst *)> SetFn=[](CallInst *) {})
Variant to create a possibly constant-folded intrinsic.
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)
Create an expression which evaluates to the number of elements in EC at runtime.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
DominatorTree * getDominatorTree() const
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
const FeatureBitset & getFeatureBits() const
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
bool isFixedLengthVector() const
MVT getVectorElementType() const
Information for memory intrinsic cost model.
Align getAlignment() const
Type * getDataType() const
Intrinsic::ID getID() const
const Instruction * getInst() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool isSMEABIRoutine() const
bool hasStreamingBody() const
void set(unsigned M, bool Enable=true)
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
TargetInstrInfo - Interface to description of machine instruction set.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
LLVM_ABI APInt getCpuSupportsMask(ArrayRef< StringRef > Features)
static constexpr unsigned SVEBitsPerBlock
LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
brc_match< Cond_t, match_bind< BasicBlock >, match_bind< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
LLVM_ABI Libcall getPOW(EVT RetVT)
getPOW - Return the POW_* value for the given types, or UNKNOWN_LIBCALL if there is none.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ FSub
Subtraction of floats.
@ FAddChainWithSubs
A chain of fadds and fsubs.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FindLast
FindLast reduction with select(cmp(),x,y) where x and y.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0,...
unsigned getMatchingIROpode() const
bool inactiveLanesAreUnused() const
bool inactiveLanesAreNotDefined() const
bool hasMatchingUndefIntrinsic() const
static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()
static SVEIntrinsicInfo defaultZeroingOp()
bool hasGoverningPredicate() const
SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)
static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)
SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)
unsigned getOperandIdxWithNoActiveLanes() const
SVEIntrinsicInfo & setInactiveLanesAreUnused()
SVEIntrinsicInfo & setInactiveLanesAreNotDefined()
SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)
bool inactiveLanesTakenFromOperand() const
static SVEIntrinsicInfo defaultUndefOp()
bool hasOperandWithNoActiveLanes() const
Intrinsic::ID getMatchingUndefIntrinsic() const
SVEIntrinsicInfo & setResultIsZeroInitialized()
static SVEIntrinsicInfo defaultMergingUnaryOp()
SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)
unsigned getGoverningPredicateOperandIdx() const
bool hasMatchingIROpode() const
bool resultIsZeroInitialized() const
SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)
unsigned getOperandIdxInactiveLanesTakenFrom() const
static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...