23#include "llvm/IR/IntrinsicsAArch64.h"
35#define DEBUG_TYPE "aarch64tti"
41 "sve-prefer-fixed-over-scalable-if-equal",
cl::Hidden);
59 "Penalty of calling a function that requires a change to PSTATE.SM"));
63 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
74 cl::desc(
"The cost of a histcnt instruction"));
78 cl::desc(
"The number of instructions to search for a redundant dmb"));
82 cl::desc(
"Threshold for forced unrolling of small loops in AArch64"));
85class TailFoldingOption {
100 bool NeedsDefault =
true;
104 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
119 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
120 "Initial bits should only include one of "
121 "(disabled|all|simple|default)");
122 Bits = NeedsDefault ? DefaultBits : InitialBits;
124 Bits &= ~DisableBits;
130 errs() <<
"invalid argument '" << Opt
131 <<
"' to -sve-tail-folding=; the option should be of the form\n"
132 " (disabled|all|default|simple)[+(reductions|recurrences"
133 "|reverse|noreductions|norecurrences|noreverse)]\n";
139 void operator=(
const std::string &Val) {
148 setNeedsDefault(
false);
151 StringRef(Val).split(TailFoldTypes,
'+', -1,
false);
153 unsigned StartIdx = 1;
154 if (TailFoldTypes[0] ==
"disabled")
155 setInitialBits(TailFoldingOpts::Disabled);
156 else if (TailFoldTypes[0] ==
"all")
157 setInitialBits(TailFoldingOpts::All);
158 else if (TailFoldTypes[0] ==
"default")
159 setNeedsDefault(
true);
160 else if (TailFoldTypes[0] ==
"simple")
161 setInitialBits(TailFoldingOpts::Simple);
164 setInitialBits(TailFoldingOpts::Disabled);
167 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
168 if (TailFoldTypes[
I] ==
"reductions")
169 setEnableBit(TailFoldingOpts::Reductions);
170 else if (TailFoldTypes[
I] ==
"recurrences")
171 setEnableBit(TailFoldingOpts::Recurrences);
172 else if (TailFoldTypes[
I] ==
"reverse")
173 setEnableBit(TailFoldingOpts::Reverse);
174 else if (TailFoldTypes[
I] ==
"noreductions")
175 setDisableBit(TailFoldingOpts::Reductions);
176 else if (TailFoldTypes[
I] ==
"norecurrences")
177 setDisableBit(TailFoldingOpts::Recurrences);
178 else if (TailFoldTypes[
I] ==
"noreverse")
179 setDisableBit(TailFoldingOpts::Reverse);
196 "Control the use of vectorisation using tail-folding for SVE where the"
197 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
198 "\ndisabled (Initial) No loop types will vectorize using "
200 "\ndefault (Initial) Uses the default tail-folding settings for "
202 "\nall (Initial) All legal loop types will vectorize using "
204 "\nsimple (Initial) Use tail-folding for simple loops (not "
205 "reductions or recurrences)"
206 "\nreductions Use tail-folding for loops containing reductions"
207 "\nnoreductions Inverse of above"
208 "\nrecurrences Use tail-folding for loops containing fixed order "
210 "\nnorecurrences Inverse of above"
211 "\nreverse Use tail-folding for loops requiring reversed "
213 "\nnoreverse Inverse of above"),
258 TTI->isMultiversionedFunction(
F) ?
"fmv-features" :
"target-features";
259 StringRef FeatureStr =
F.getFnAttribute(AttributeStr).getValueAsString();
260 FeatureStr.
split(Features,
",");
276 return F.hasFnAttribute(
"fmv-features");
280 AArch64::FeatureExecuteOnly,
320 FeatureBitset EffectiveCallerBits = CallerBits ^ InlineInverseFeatures;
321 FeatureBitset EffectiveCalleeBits = CalleeBits ^ InlineInverseFeatures;
323 return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;
341 auto FVTy = dyn_cast<FixedVectorType>(Ty);
343 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
352 unsigned DefaultCallPenalty)
const {
377 if (
F ==
Call.getCaller())
383 return DefaultCallPenalty;
394 ST->isSVEorStreamingSVEAvailable() &&
395 !ST->disableMaximizeScalableBandwidth();
419 assert(Ty->isIntegerTy());
421 unsigned BitSize = Ty->getPrimitiveSizeInBits();
428 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
433 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
439 return std::max<InstructionCost>(1,
Cost);
446 assert(Ty->isIntegerTy());
448 unsigned BitSize = Ty->getPrimitiveSizeInBits();
454 unsigned ImmIdx = ~0U;
458 case Instruction::GetElementPtr:
463 case Instruction::Store:
466 case Instruction::Add:
467 case Instruction::Sub:
468 case Instruction::Mul:
469 case Instruction::UDiv:
470 case Instruction::SDiv:
471 case Instruction::URem:
472 case Instruction::SRem:
473 case Instruction::And:
474 case Instruction::Or:
475 case Instruction::Xor:
476 case Instruction::ICmp:
480 case Instruction::Shl:
481 case Instruction::LShr:
482 case Instruction::AShr:
486 case Instruction::Trunc:
487 case Instruction::ZExt:
488 case Instruction::SExt:
489 case Instruction::IntToPtr:
490 case Instruction::PtrToInt:
491 case Instruction::BitCast:
492 case Instruction::PHI:
493 case Instruction::Call:
494 case Instruction::Select:
495 case Instruction::Ret:
496 case Instruction::Load:
501 int NumConstants = (BitSize + 63) / 64;
514 assert(Ty->isIntegerTy());
516 unsigned BitSize = Ty->getPrimitiveSizeInBits();
525 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
531 case Intrinsic::sadd_with_overflow:
532 case Intrinsic::uadd_with_overflow:
533 case Intrinsic::ssub_with_overflow:
534 case Intrinsic::usub_with_overflow:
535 case Intrinsic::smul_with_overflow:
536 case Intrinsic::umul_with_overflow:
538 int NumConstants = (BitSize + 63) / 64;
545 case Intrinsic::experimental_stackmap:
546 if ((Idx < 2) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
549 case Intrinsic::experimental_patchpoint_void:
550 case Intrinsic::experimental_patchpoint:
551 if ((Idx < 4) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
554 case Intrinsic::experimental_gc_statepoint:
555 if ((Idx < 5) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
565 if (TyWidth == 32 || TyWidth == 64)
589 unsigned TotalHistCnts = 1;
599 unsigned EC = VTy->getElementCount().getKnownMinValue();
604 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
606 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
610 TotalHistCnts = EC / NaturalVectorWidth;
630 switch (ICA.
getID()) {
631 case Intrinsic::experimental_vector_histogram_add: {
638 case Intrinsic::umin:
639 case Intrinsic::umax:
640 case Intrinsic::smin:
641 case Intrinsic::smax: {
642 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
643 MVT::v8i16, MVT::v2i32, MVT::v4i32,
644 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
648 if (LT.second == MVT::v2i64)
654 case Intrinsic::scmp:
655 case Intrinsic::ucmp: {
657 {Intrinsic::scmp, MVT::i32, 3},
658 {Intrinsic::scmp, MVT::i64, 3},
659 {Intrinsic::scmp, MVT::v8i8, 3},
660 {Intrinsic::scmp, MVT::v16i8, 3},
661 {Intrinsic::scmp, MVT::v4i16, 3},
662 {Intrinsic::scmp, MVT::v8i16, 3},
663 {Intrinsic::scmp, MVT::v2i32, 3},
664 {Intrinsic::scmp, MVT::v4i32, 3},
665 {Intrinsic::scmp, MVT::v1i64, 3},
666 {Intrinsic::scmp, MVT::v2i64, 3},
672 return Entry->Cost * LT.first;
675 case Intrinsic::sadd_sat:
676 case Intrinsic::ssub_sat:
677 case Intrinsic::uadd_sat:
678 case Intrinsic::usub_sat: {
679 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
680 MVT::v8i16, MVT::v2i32, MVT::v4i32,
686 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
688 return LT.first * Instrs;
693 if (ST->isSVEAvailable() && VectorSize >= 128 &&
isPowerOf2_64(VectorSize))
694 return LT.first * Instrs;
698 case Intrinsic::abs: {
699 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
700 MVT::v8i16, MVT::v2i32, MVT::v4i32,
701 MVT::v2i64, MVT::nxv16i8, MVT::nxv8i16,
702 MVT::nxv4i32, MVT::nxv2i64};
708 case Intrinsic::bswap: {
709 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
710 MVT::v4i32, MVT::v2i64};
713 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
718 case Intrinsic::fmuladd: {
723 (EltTy->
isHalfTy() && ST->hasFullFP16()))
727 case Intrinsic::stepvector: {
736 Cost += AddCost * (LT.first - 1);
740 case Intrinsic::vector_extract:
741 case Intrinsic::vector_insert: {
754 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
755 EVT SubVecVT = IsExtract ? getTLI()->getValueType(
DL, RetTy)
763 getTLI()->getTypeConversion(
C, SubVecVT);
765 getTLI()->getTypeConversion(
C, VecVT);
773 case Intrinsic::bitreverse: {
775 {Intrinsic::bitreverse, MVT::i32, 1},
776 {Intrinsic::bitreverse, MVT::i64, 1},
777 {Intrinsic::bitreverse, MVT::v8i8, 1},
778 {Intrinsic::bitreverse, MVT::v16i8, 1},
779 {Intrinsic::bitreverse, MVT::v4i16, 2},
780 {Intrinsic::bitreverse, MVT::v8i16, 2},
781 {Intrinsic::bitreverse, MVT::v2i32, 2},
782 {Intrinsic::bitreverse, MVT::v4i32, 2},
783 {Intrinsic::bitreverse, MVT::v1i64, 2},
784 {Intrinsic::bitreverse, MVT::v2i64, 2},
792 if (TLI->getValueType(
DL, RetTy,
true) == MVT::i8 ||
793 TLI->getValueType(
DL, RetTy,
true) == MVT::i16)
794 return LegalisationCost.first * Entry->Cost + 1;
796 return LegalisationCost.first * Entry->Cost;
800 case Intrinsic::ctpop: {
801 if (!ST->hasNEON()) {
833 RetTy->getScalarSizeInBits()
836 return LT.first * Entry->Cost + ExtraCost;
840 case Intrinsic::sadd_with_overflow:
841 case Intrinsic::uadd_with_overflow:
842 case Intrinsic::ssub_with_overflow:
843 case Intrinsic::usub_with_overflow:
844 case Intrinsic::smul_with_overflow:
845 case Intrinsic::umul_with_overflow: {
847 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
848 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
849 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
850 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
851 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
852 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
853 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
854 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
855 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
856 {Intrinsic::usub_with_overflow, MVT::i8, 3},
857 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
858 {Intrinsic::usub_with_overflow, MVT::i16, 3},
859 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
860 {Intrinsic::usub_with_overflow, MVT::i32, 1},
861 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
862 {Intrinsic::usub_with_overflow, MVT::i64, 1},
863 {Intrinsic::smul_with_overflow, MVT::i8, 5},
864 {Intrinsic::umul_with_overflow, MVT::i8, 4},
865 {Intrinsic::smul_with_overflow, MVT::i16, 5},
866 {Intrinsic::umul_with_overflow, MVT::i16, 4},
867 {Intrinsic::smul_with_overflow, MVT::i32, 2},
868 {Intrinsic::umul_with_overflow, MVT::i32, 2},
869 {Intrinsic::smul_with_overflow, MVT::i64, 3},
870 {Intrinsic::umul_with_overflow, MVT::i64, 3},
872 EVT MTy = TLI->getValueType(
DL, RetTy->getContainedType(0),
true);
879 case Intrinsic::fptosi_sat:
880 case Intrinsic::fptoui_sat: {
883 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
885 EVT MTy = TLI->getValueType(
DL, RetTy);
888 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
889 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
890 LT.second == MVT::v2f64)) {
892 (LT.second == MVT::f64 && MTy == MVT::i32) ||
893 (LT.second == MVT::f32 && MTy == MVT::i64)))
902 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
909 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
910 (LT.second == MVT::f16 && MTy == MVT::i64) ||
911 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
925 if ((LT.second.getScalarType() == MVT::f32 ||
926 LT.second.getScalarType() == MVT::f64 ||
927 LT.second.getScalarType() == MVT::f16) &&
931 if (LT.second.isVector())
936 LegalTy, {LegalTy, LegalTy});
940 LegalTy, {LegalTy, LegalTy});
942 return LT.first *
Cost +
943 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
949 RetTy = RetTy->getScalarType();
950 if (LT.second.isVector()) {
968 return LT.first *
Cost;
970 case Intrinsic::fshl:
971 case Intrinsic::fshr: {
980 if (RetTy->isIntegerTy() && ICA.
getArgs()[0] == ICA.
getArgs()[1] &&
981 (RetTy->getPrimitiveSizeInBits() == 32 ||
982 RetTy->getPrimitiveSizeInBits() == 64)) {
995 {Intrinsic::fshl, MVT::v4i32, 2},
996 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
997 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
998 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
1004 return LegalisationCost.first * Entry->Cost;
1008 if (!RetTy->isIntegerTy())
1013 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
1014 RetTy->getScalarSizeInBits() < 64) ||
1015 (RetTy->getScalarSizeInBits() % 64 != 0);
1016 unsigned ExtraCost = HigherCost ? 1 : 0;
1017 if (RetTy->getScalarSizeInBits() == 32 ||
1018 RetTy->getScalarSizeInBits() == 64)
1021 else if (HigherCost)
1025 return TyL.first + ExtraCost;
1027 case Intrinsic::get_active_lane_mask: {
1029 EVT RetVT = getTLI()->getValueType(
DL, RetTy);
1031 if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
1034 if (RetTy->isScalableTy()) {
1035 if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
1045 if (ST->hasSVE2p1() || ST->hasSME2()) {
1060 return Cost + (SplitCost * (
Cost - 1));
1075 case Intrinsic::experimental_vector_match: {
1078 unsigned SearchSize = NeedleTy->getNumElements();
1079 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
1092 case Intrinsic::cttz: {
1094 if (LT.second == MVT::v8i8 || LT.second == MVT::v16i8)
1095 return LT.first * 2;
1096 if (LT.second == MVT::v4i16 || LT.second == MVT::v8i16 ||
1097 LT.second == MVT::v2i32 || LT.second == MVT::v4i32)
1098 return LT.first * 3;
1101 case Intrinsic::experimental_cttz_elts: {
1103 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
1111 case Intrinsic::loop_dependence_raw_mask:
1112 case Intrinsic::loop_dependence_war_mask: {
1114 if (ST->hasSVE2() || ST->hasSME()) {
1115 EVT VecVT = getTLI()->getValueType(
DL, RetTy);
1116 unsigned EltSizeInBytes =
1126 case Intrinsic::experimental_vector_extract_last_active:
1127 if (ST->isSVEorStreamingSVEAvailable()) {
1133 case Intrinsic::pow: {
1136 EVT VT = getTLI()->getValueType(
DL, RetTy);
1138 bool HasLibcall = getTLI()->getLibcallImpl(LC) != RTLIB::Unsupported;
1153 bool Is025 = ExpF->getValueAPF().isExactlyValue(0.25);
1154 bool Is075 = ExpF->getValueAPF().isExactlyValue(0.75);
1164 return (Sqrt * 2) +
FMul;
1175 case Intrinsic::sqrt:
1176 case Intrinsic::fabs:
1177 case Intrinsic::ceil:
1178 case Intrinsic::floor:
1179 case Intrinsic::nearbyint:
1180 case Intrinsic::round:
1181 case Intrinsic::rint:
1182 case Intrinsic::roundeven:
1183 case Intrinsic::trunc:
1184 case Intrinsic::minnum:
1185 case Intrinsic::maxnum:
1186 case Intrinsic::minimum:
1187 case Intrinsic::maximum: {
1205 auto RequiredType =
II.getType();
1208 assert(PN &&
"Expected Phi Node!");
1211 if (!PN->hasOneUse())
1212 return std::nullopt;
1214 for (
Value *IncValPhi : PN->incoming_values()) {
1217 Reinterpret->getIntrinsicID() !=
1218 Intrinsic::aarch64_sve_convert_to_svbool ||
1219 RequiredType != Reinterpret->getArgOperand(0)->getType())
1220 return std::nullopt;
1228 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
1230 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
1303 return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
1308 return GoverningPredicateIdx;
1313 GoverningPredicateIdx = Index;
1331 return UndefIntrinsic;
1336 UndefIntrinsic = IID;
1358 return ResultLanes == InactiveLanesTakenFromOperand;
1363 return OperandIdxForInactiveLanes;
1367 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1368 ResultLanes = InactiveLanesTakenFromOperand;
1369 OperandIdxForInactiveLanes = Index;
1374 return ResultLanes == InactiveLanesAreNotDefined;
1378 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1379 ResultLanes = InactiveLanesAreNotDefined;
1384 return ResultLanes == InactiveLanesAreUnused;
1388 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1389 ResultLanes = InactiveLanesAreUnused;
1399 ResultIsZeroInitialized =
true;
1410 return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
1415 return OperandIdxWithNoActiveLanes;
1420 OperandIdxWithNoActiveLanes = Index;
1425 unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
1428 unsigned IROpcode = 0;
1430 enum PredicationStyle {
1432 InactiveLanesTakenFromOperand,
1433 InactiveLanesAreNotDefined,
1434 InactiveLanesAreUnused
1437 bool ResultIsZeroInitialized =
false;
1438 unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
1439 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
1447 return !isa<ScalableVectorType>(V->getType());
1455 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
1456 case Intrinsic::aarch64_sve_fcvt_f16f32:
1457 case Intrinsic::aarch64_sve_fcvt_f16f64:
1458 case Intrinsic::aarch64_sve_fcvt_f32f16:
1459 case Intrinsic::aarch64_sve_fcvt_f32f64:
1460 case Intrinsic::aarch64_sve_fcvt_f64f16:
1461 case Intrinsic::aarch64_sve_fcvt_f64f32:
1462 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
1463 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
1464 case Intrinsic::aarch64_sve_fcvtx_f32f64:
1465 case Intrinsic::aarch64_sve_fcvtzs:
1466 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
1467 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
1468 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
1469 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
1470 case Intrinsic::aarch64_sve_fcvtzu:
1471 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
1472 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
1473 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
1474 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
1475 case Intrinsic::aarch64_sve_revb:
1476 case Intrinsic::aarch64_sve_revh:
1477 case Intrinsic::aarch64_sve_revw:
1478 case Intrinsic::aarch64_sve_revd:
1479 case Intrinsic::aarch64_sve_scvtf:
1480 case Intrinsic::aarch64_sve_scvtf_f16i32:
1481 case Intrinsic::aarch64_sve_scvtf_f16i64:
1482 case Intrinsic::aarch64_sve_scvtf_f32i64:
1483 case Intrinsic::aarch64_sve_scvtf_f64i32:
1484 case Intrinsic::aarch64_sve_ucvtf:
1485 case Intrinsic::aarch64_sve_ucvtf_f16i32:
1486 case Intrinsic::aarch64_sve_ucvtf_f16i64:
1487 case Intrinsic::aarch64_sve_ucvtf_f32i64:
1488 case Intrinsic::aarch64_sve_ucvtf_f64i32:
1491 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
1492 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
1493 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
1494 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
1497 case Intrinsic::aarch64_sve_fabd:
1499 case Intrinsic::aarch64_sve_fadd:
1502 case Intrinsic::aarch64_sve_fdiv:
1505 case Intrinsic::aarch64_sve_fmax:
1507 case Intrinsic::aarch64_sve_fmaxnm:
1509 case Intrinsic::aarch64_sve_fmin:
1511 case Intrinsic::aarch64_sve_fminnm:
1513 case Intrinsic::aarch64_sve_fmla:
1515 case Intrinsic::aarch64_sve_fmls:
1517 case Intrinsic::aarch64_sve_fmul:
1520 case Intrinsic::aarch64_sve_fmulx:
1522 case Intrinsic::aarch64_sve_fnmla:
1524 case Intrinsic::aarch64_sve_fnmls:
1526 case Intrinsic::aarch64_sve_fsub:
1529 case Intrinsic::aarch64_sve_add:
1532 case Intrinsic::aarch64_sve_mla:
1534 case Intrinsic::aarch64_sve_mls:
1536 case Intrinsic::aarch64_sve_mul:
1539 case Intrinsic::aarch64_sve_sabd:
1541 case Intrinsic::aarch64_sve_sdiv:
1544 case Intrinsic::aarch64_sve_smax:
1546 case Intrinsic::aarch64_sve_smin:
1548 case Intrinsic::aarch64_sve_smulh:
1550 case Intrinsic::aarch64_sve_sub:
1553 case Intrinsic::aarch64_sve_uabd:
1555 case Intrinsic::aarch64_sve_udiv:
1558 case Intrinsic::aarch64_sve_umax:
1560 case Intrinsic::aarch64_sve_umin:
1562 case Intrinsic::aarch64_sve_umulh:
1564 case Intrinsic::aarch64_sve_asr:
1567 case Intrinsic::aarch64_sve_lsl:
1570 case Intrinsic::aarch64_sve_lsr:
1573 case Intrinsic::aarch64_sve_and:
1576 case Intrinsic::aarch64_sve_bic:
1578 case Intrinsic::aarch64_sve_eor:
1581 case Intrinsic::aarch64_sve_orr:
1584 case Intrinsic::aarch64_sve_shsub:
1586 case Intrinsic::aarch64_sve_shsubr:
1588 case Intrinsic::aarch64_sve_sqrshl:
1590 case Intrinsic::aarch64_sve_sqshl:
1592 case Intrinsic::aarch64_sve_sqsub:
1594 case Intrinsic::aarch64_sve_srshl:
1596 case Intrinsic::aarch64_sve_uhsub:
1598 case Intrinsic::aarch64_sve_uhsubr:
1600 case Intrinsic::aarch64_sve_uqrshl:
1602 case Intrinsic::aarch64_sve_uqshl:
1604 case Intrinsic::aarch64_sve_uqsub:
1606 case Intrinsic::aarch64_sve_urshl:
1609 case Intrinsic::aarch64_sve_add_u:
1612 case Intrinsic::aarch64_sve_and_u:
1615 case Intrinsic::aarch64_sve_asr_u:
1618 case Intrinsic::aarch64_sve_eor_u:
1621 case Intrinsic::aarch64_sve_fadd_u:
1624 case Intrinsic::aarch64_sve_fdiv_u:
1627 case Intrinsic::aarch64_sve_fmul_u:
1630 case Intrinsic::aarch64_sve_fsub_u:
1633 case Intrinsic::aarch64_sve_lsl_u:
1636 case Intrinsic::aarch64_sve_lsr_u:
1639 case Intrinsic::aarch64_sve_mul_u:
1642 case Intrinsic::aarch64_sve_orr_u:
1645 case Intrinsic::aarch64_sve_sdiv_u:
1648 case Intrinsic::aarch64_sve_sub_u:
1651 case Intrinsic::aarch64_sve_udiv_u:
1655 case Intrinsic::aarch64_sve_addqv:
1656 case Intrinsic::aarch64_sve_and_z:
1657 case Intrinsic::aarch64_sve_bic_z:
1658 case Intrinsic::aarch64_sve_brka_z:
1659 case Intrinsic::aarch64_sve_brkb_z:
1660 case Intrinsic::aarch64_sve_brkn_z:
1661 case Intrinsic::aarch64_sve_brkpa_z:
1662 case Intrinsic::aarch64_sve_brkpb_z:
1663 case Intrinsic::aarch64_sve_cntp:
1664 case Intrinsic::aarch64_sve_compact:
1665 case Intrinsic::aarch64_sve_eor_z:
1666 case Intrinsic::aarch64_sve_eorv:
1667 case Intrinsic::aarch64_sve_eorqv:
1668 case Intrinsic::aarch64_sve_nand_z:
1669 case Intrinsic::aarch64_sve_nor_z:
1670 case Intrinsic::aarch64_sve_orn_z:
1671 case Intrinsic::aarch64_sve_orr_z:
1672 case Intrinsic::aarch64_sve_orv:
1673 case Intrinsic::aarch64_sve_orqv:
1674 case Intrinsic::aarch64_sve_pnext:
1675 case Intrinsic::aarch64_sve_rdffr_z:
1676 case Intrinsic::aarch64_sve_saddv:
1677 case Intrinsic::aarch64_sve_uaddv:
1678 case Intrinsic::aarch64_sve_umaxv:
1679 case Intrinsic::aarch64_sve_umaxqv:
1680 case Intrinsic::aarch64_sve_cmpeq:
1681 case Intrinsic::aarch64_sve_cmpeq_wide:
1682 case Intrinsic::aarch64_sve_cmpge:
1683 case Intrinsic::aarch64_sve_cmpge_wide:
1684 case Intrinsic::aarch64_sve_cmpgt:
1685 case Intrinsic::aarch64_sve_cmpgt_wide:
1686 case Intrinsic::aarch64_sve_cmphi:
1687 case Intrinsic::aarch64_sve_cmphi_wide:
1688 case Intrinsic::aarch64_sve_cmphs:
1689 case Intrinsic::aarch64_sve_cmphs_wide:
1690 case Intrinsic::aarch64_sve_cmple_wide:
1691 case Intrinsic::aarch64_sve_cmplo_wide:
1692 case Intrinsic::aarch64_sve_cmpls_wide:
1693 case Intrinsic::aarch64_sve_cmplt_wide:
1694 case Intrinsic::aarch64_sve_cmpne:
1695 case Intrinsic::aarch64_sve_cmpne_wide:
1696 case Intrinsic::aarch64_sve_facge:
1697 case Intrinsic::aarch64_sve_facgt:
1698 case Intrinsic::aarch64_sve_fcmpeq:
1699 case Intrinsic::aarch64_sve_fcmpge:
1700 case Intrinsic::aarch64_sve_fcmpgt:
1701 case Intrinsic::aarch64_sve_fcmpne:
1702 case Intrinsic::aarch64_sve_fcmpuo:
1703 case Intrinsic::aarch64_sve_ld1:
1704 case Intrinsic::aarch64_sve_ld1_gather:
1705 case Intrinsic::aarch64_sve_ld1_gather_index:
1706 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
1707 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
1708 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
1709 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
1710 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
1711 case Intrinsic::aarch64_sve_ld1q_gather_index:
1712 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
1713 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
1714 case Intrinsic::aarch64_sve_ld1ro:
1715 case Intrinsic::aarch64_sve_ld1rq:
1716 case Intrinsic::aarch64_sve_ld1udq:
1717 case Intrinsic::aarch64_sve_ld1uwq:
1718 case Intrinsic::aarch64_sve_ld2_sret:
1719 case Intrinsic::aarch64_sve_ld2q_sret:
1720 case Intrinsic::aarch64_sve_ld3_sret:
1721 case Intrinsic::aarch64_sve_ld3q_sret:
1722 case Intrinsic::aarch64_sve_ld4_sret:
1723 case Intrinsic::aarch64_sve_ld4q_sret:
1724 case Intrinsic::aarch64_sve_ldff1:
1725 case Intrinsic::aarch64_sve_ldff1_gather:
1726 case Intrinsic::aarch64_sve_ldff1_gather_index:
1727 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
1728 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
1729 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
1730 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
1731 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
1732 case Intrinsic::aarch64_sve_ldnf1:
1733 case Intrinsic::aarch64_sve_ldnt1:
1734 case Intrinsic::aarch64_sve_ldnt1_gather:
1735 case Intrinsic::aarch64_sve_ldnt1_gather_index:
1736 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
1737 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
1740 case Intrinsic::aarch64_sve_prf:
1741 case Intrinsic::aarch64_sve_prfb_gather_index:
1742 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
1743 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
1744 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
1745 case Intrinsic::aarch64_sve_prfd_gather_index:
1746 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
1747 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
1748 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
1749 case Intrinsic::aarch64_sve_prfh_gather_index:
1750 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
1751 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
1752 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
1753 case Intrinsic::aarch64_sve_prfw_gather_index:
1754 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
1755 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
1756 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
1759 case Intrinsic::aarch64_sve_st1_scatter:
1760 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
1761 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
1762 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
1763 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
1764 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
1765 case Intrinsic::aarch64_sve_st1dq:
1766 case Intrinsic::aarch64_sve_st1q_scatter_index:
1767 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
1768 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
1769 case Intrinsic::aarch64_sve_st1wq:
1770 case Intrinsic::aarch64_sve_stnt1:
1771 case Intrinsic::aarch64_sve_stnt1_scatter:
1772 case Intrinsic::aarch64_sve_stnt1_scatter_index:
1773 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
1774 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
1776 case Intrinsic::aarch64_sve_st2:
1777 case Intrinsic::aarch64_sve_st2q:
1779 case Intrinsic::aarch64_sve_st3:
1780 case Intrinsic::aarch64_sve_st3q:
1782 case Intrinsic::aarch64_sve_st4:
1783 case Intrinsic::aarch64_sve_st4q:
1791 Value *UncastedPred;
1797 Pred = UncastedPred;
1803 if (OrigPredTy->getMinNumElements() <=
1805 ->getMinNumElements())
1806 Pred = UncastedPred;
1810 return C &&
C->isAllOnesValue();
1817 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
1818 Dup->getOperand(1) == Pg &&
isa<Constant>(Dup->getOperand(2)))
1826static std::optional<Instruction *>
1833 Value *Op1 =
II.getOperand(1);
1834 Value *Op2 =
II.getOperand(2);
1860 return std::nullopt;
1868 if (SimpleII == Inactive)
1878static std::optional<Instruction *>
1882 return std::nullopt;
1911 II.setCalledFunction(NewDecl);
1921 return std::nullopt;
1933static std::optional<Instruction *>
1937 return std::nullopt;
1939 auto IntrinsicID = BinOp->getIntrinsicID();
1940 switch (IntrinsicID) {
1941 case Intrinsic::aarch64_sve_and_z:
1942 case Intrinsic::aarch64_sve_bic_z:
1943 case Intrinsic::aarch64_sve_eor_z:
1944 case Intrinsic::aarch64_sve_nand_z:
1945 case Intrinsic::aarch64_sve_nor_z:
1946 case Intrinsic::aarch64_sve_orn_z:
1947 case Intrinsic::aarch64_sve_orr_z:
1950 return std::nullopt;
1953 auto BinOpPred = BinOp->getOperand(0);
1954 auto BinOpOp1 = BinOp->getOperand(1);
1955 auto BinOpOp2 = BinOp->getOperand(2);
1959 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1960 return std::nullopt;
1962 auto PredOp = PredIntr->getOperand(0);
1964 if (PredOpTy !=
II.getType())
1965 return std::nullopt;
1969 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1970 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1971 if (BinOpOp1 == BinOpOp2)
1972 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1975 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1977 auto NarrowedBinOp =
1982static std::optional<Instruction *>
1989 return BinOpCombine;
1994 return std::nullopt;
1997 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
2006 if (CursorVTy->getElementCount().getKnownMinValue() <
2007 IVTy->getElementCount().getKnownMinValue())
2011 if (Cursor->getType() == IVTy)
2012 EarliestReplacement = Cursor;
2017 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
2018 Intrinsic::aarch64_sve_convert_to_svbool ||
2019 IntrinsicCursor->getIntrinsicID() ==
2020 Intrinsic::aarch64_sve_convert_from_svbool))
2023 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
2024 Cursor = IntrinsicCursor->getOperand(0);
2029 if (!EarliestReplacement)
2030 return std::nullopt;
2038 auto *OpPredicate =
II.getOperand(0);
2055 II.getArgOperand(2));
2061 return std::nullopt;
2065 II.getArgOperand(0),
II.getArgOperand(2),
uint64_t(0));
2074 II.getArgOperand(0));
2084 return std::nullopt;
2089 if (!SplatValue || !SplatValue->isZero())
2090 return std::nullopt;
2095 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
2096 return std::nullopt;
2100 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
2101 return std::nullopt;
2104 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
2105 return std::nullopt;
2110 return std::nullopt;
2113 return std::nullopt;
2117 return std::nullopt;
2121 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
2122 return std::nullopt;
2124 unsigned NumElts = VecTy->getNumElements();
2125 unsigned PredicateBits = 0;
2128 for (
unsigned I = 0;
I < NumElts; ++
I) {
2131 return std::nullopt;
2133 PredicateBits |= 1 << (
I * (16 / NumElts));
2137 if (PredicateBits == 0) {
2139 PFalse->takeName(&
II);
2145 for (
unsigned I = 0;
I < 16; ++
I)
2146 if ((PredicateBits & (1 <<
I)) != 0)
2149 unsigned PredSize = Mask & -Mask;
2154 for (
unsigned I = 0;
I < 16;
I += PredSize)
2155 if ((PredicateBits & (1 <<
I)) == 0)
2156 return std::nullopt;
2161 {PredType}, {PTruePat});
2163 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
2164 auto *ConvertFromSVBool =
2166 {
II.getType()}, {ConvertToSVBool});
2174 Value *Pg =
II.getArgOperand(0);
2175 Value *Vec =
II.getArgOperand(1);
2176 auto IntrinsicID =
II.getIntrinsicID();
2177 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
2189 auto OpC = OldBinOp->getOpcode();
2195 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
2201 if (IsAfter &&
C &&
C->isNullValue()) {
2205 Extract->insertBefore(
II.getIterator());
2206 Extract->takeName(&
II);
2212 return std::nullopt;
2214 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
2215 return std::nullopt;
2217 const auto PTruePattern =
2223 return std::nullopt;
2225 unsigned Idx = MinNumElts - 1;
2235 if (Idx >= PgVTy->getMinNumElements())
2236 return std::nullopt;
2241 Extract->insertBefore(
II.getIterator());
2242 Extract->takeName(&
II);
2255 Value *Pg =
II.getArgOperand(0);
2257 Value *Vec =
II.getArgOperand(2);
2260 if (!Ty->isIntegerTy())
2261 return std::nullopt;
2266 return std::nullopt;
2283 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
2296 {
II.getType()}, {AllPat});
2303static std::optional<Instruction *>
2307 if (
Pattern == AArch64SVEPredPattern::all) {
2316 return MinNumElts && NumElts >= MinNumElts
2318 II, ConstantInt::get(
II.getType(), MinNumElts)))
2322static std::optional<Instruction *>
2325 if (!ST->isStreaming())
2326 return std::nullopt;
2338 Value *PgVal =
II.getArgOperand(0);
2339 Value *OpVal =
II.getArgOperand(1);
2343 if (PgVal == OpVal &&
2344 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
2345 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
2360 return std::nullopt;
2364 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
2365 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
2379 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
2380 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
2381 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
2382 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
2383 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
2384 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
2385 (OpIID == Intrinsic::aarch64_sve_and_z) ||
2386 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
2387 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
2388 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
2389 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
2390 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
2391 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
2401 return std::nullopt;
2404template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>
2405static std::optional<Instruction *>
2407 bool MergeIntoAddendOp) {
2409 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
2410 if (MergeIntoAddendOp) {
2411 AddendOp =
II.getOperand(1);
2412 Mul =
II.getOperand(2);
2414 AddendOp =
II.getOperand(2);
2415 Mul =
II.getOperand(1);
2420 return std::nullopt;
2422 if (!
Mul->hasOneUse())
2423 return std::nullopt;
2426 if (
II.getType()->isFPOrFPVectorTy()) {
2431 return std::nullopt;
2433 return std::nullopt;
2438 if (MergeIntoAddendOp)
2448static std::optional<Instruction *>
2450 Value *Pred =
II.getOperand(0);
2451 Value *PtrOp =
II.getOperand(1);
2452 Type *VecTy =
II.getType();
2456 Load->copyMetadata(
II);
2467static std::optional<Instruction *>
2469 Value *VecOp =
II.getOperand(0);
2470 Value *Pred =
II.getOperand(1);
2471 Value *PtrOp =
II.getOperand(2);
2475 Store->copyMetadata(
II);
2487 case Intrinsic::aarch64_sve_fmul_u:
2488 return Instruction::BinaryOps::FMul;
2489 case Intrinsic::aarch64_sve_fadd_u:
2490 return Instruction::BinaryOps::FAdd;
2491 case Intrinsic::aarch64_sve_fsub_u:
2492 return Instruction::BinaryOps::FSub;
2494 return Instruction::BinaryOpsEnd;
2498static std::optional<Instruction *>
2501 if (
II.isStrictFP())
2502 return std::nullopt;
2504 auto *OpPredicate =
II.getOperand(0);
2506 if (BinOpCode == Instruction::BinaryOpsEnd ||
2508 return std::nullopt;
2510 BinOpCode,
II.getOperand(1),
II.getOperand(2),
II.getFastMathFlags());
2517 Intrinsic::aarch64_sve_mla>(
2521 Intrinsic::aarch64_sve_mad>(
2524 return std::nullopt;
2527static std::optional<Instruction *>
2531 Intrinsic::aarch64_sve_fmla>(IC,
II,
2536 Intrinsic::aarch64_sve_fmad>(IC,
II,
2541 Intrinsic::aarch64_sve_fmla>(IC,
II,
2544 return std::nullopt;
2547static std::optional<Instruction *>
2551 Intrinsic::aarch64_sve_fmla>(IC,
II,
2556 Intrinsic::aarch64_sve_fmad>(IC,
II,
2561 Intrinsic::aarch64_sve_fmla_u>(
2567static std::optional<Instruction *>
2571 Intrinsic::aarch64_sve_fmls>(IC,
II,
2576 Intrinsic::aarch64_sve_fnmsb>(
2581 Intrinsic::aarch64_sve_fmls>(IC,
II,
2584 return std::nullopt;
2587static std::optional<Instruction *>
2591 Intrinsic::aarch64_sve_fmls>(IC,
II,
2596 Intrinsic::aarch64_sve_fnmsb>(
2601 Intrinsic::aarch64_sve_fmls_u>(
2610 Intrinsic::aarch64_sve_mls>(
2613 return std::nullopt;
2618 Value *UnpackArg =
II.getArgOperand(0);
2620 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
2621 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
2634 return std::nullopt;
2638 auto *OpVal =
II.getOperand(0);
2639 auto *OpIndices =
II.getOperand(1);
2646 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
2647 return std::nullopt;
2662 Type *RetTy =
II.getType();
2663 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
2664 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
2668 if ((
match(
II.getArgOperand(0),
2675 if (TyA ==
B->getType() &&
2680 TyA->getMinNumElements());
2686 return std::nullopt;
2694 if (
match(
II.getArgOperand(0),
2699 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
2701 return std::nullopt;
2704static std::optional<Instruction *>
2706 Value *Mask =
II.getOperand(0);
2707 Value *BasePtr =
II.getOperand(1);
2708 Value *Index =
II.getOperand(2);
2719 BasePtr->getPointerAlignment(
II.getDataLayout());
2722 BasePtr, IndexBase);
2729 return std::nullopt;
2732static std::optional<Instruction *>
2734 Value *Val =
II.getOperand(0);
2735 Value *Mask =
II.getOperand(1);
2736 Value *BasePtr =
II.getOperand(2);
2737 Value *Index =
II.getOperand(3);
2747 BasePtr->getPointerAlignment(
II.getDataLayout());
2750 BasePtr, IndexBase);
2756 return std::nullopt;
2762 Value *Pred =
II.getOperand(0);
2763 Value *Vec =
II.getOperand(1);
2764 Value *DivVec =
II.getOperand(2);
2768 if (!SplatConstantInt)
2769 return std::nullopt;
2773 if (DivisorValue == -1)
2774 return std::nullopt;
2775 if (DivisorValue == 1)
2781 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2788 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2790 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2794 return std::nullopt;
2798 size_t VecSize = Vec.
size();
2803 size_t HalfVecSize = VecSize / 2;
2807 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2815 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2833 return std::nullopt;
2840 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2841 CurrentInsertElt = InsertElt->getOperand(0);
2847 return std::nullopt;
2851 for (
size_t I = 0;
I < Elts.
size();
I++) {
2852 if (Elts[
I] ==
nullptr)
2857 if (InsertEltChain ==
nullptr)
2858 return std::nullopt;
2864 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2865 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2866 IIScalableTy->getMinNumElements() /
2871 auto *WideShuffleMaskTy =
2882 auto NarrowBitcast =
2895 return std::nullopt;
2900 Value *Pred =
II.getOperand(0);
2901 Value *Vec =
II.getOperand(1);
2902 Value *Shift =
II.getOperand(2);
2905 Value *AbsPred, *MergedValue;
2911 return std::nullopt;
2919 return std::nullopt;
2924 return std::nullopt;
2927 {
II.getType()}, {Pred, Vec, Shift});
2934 Value *Vec =
II.getOperand(0);
2939 return std::nullopt;
2945 auto *NI =
II.getNextNode();
2948 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
2950 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2951 auto *NIBB = NI->getParent();
2952 NI = NI->getNextNode();
2954 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
2955 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
2961 if (NextII &&
II.isIdenticalTo(NextII))
2964 return std::nullopt;
2972 {II.getType(), II.getOperand(0)->getType()},
2973 {II.getOperand(0), II.getOperand(1)}));
2980 return std::nullopt;
2986 Value *Passthru =
II.getOperand(0);
2994 auto *Mask = ConstantInt::get(Ty, MaskValue);
3000 return std::nullopt;
3003static std::optional<Instruction *>
3010 return std::nullopt;
3013std::optional<Instruction *>
3024 case Intrinsic::aarch64_dmb:
3026 case Intrinsic::aarch64_neon_fmaxnm:
3027 case Intrinsic::aarch64_neon_fminnm:
3029 case Intrinsic::aarch64_sve_convert_from_svbool:
3031 case Intrinsic::aarch64_sve_dup:
3033 case Intrinsic::aarch64_sve_dup_x:
3035 case Intrinsic::aarch64_sve_cmpne:
3036 case Intrinsic::aarch64_sve_cmpne_wide:
3038 case Intrinsic::aarch64_sve_rdffr:
3040 case Intrinsic::aarch64_sve_lasta:
3041 case Intrinsic::aarch64_sve_lastb:
3043 case Intrinsic::aarch64_sve_clasta_n:
3044 case Intrinsic::aarch64_sve_clastb_n:
3046 case Intrinsic::aarch64_sve_cntd:
3048 case Intrinsic::aarch64_sve_cntw:
3050 case Intrinsic::aarch64_sve_cnth:
3052 case Intrinsic::aarch64_sve_cntb:
3054 case Intrinsic::aarch64_sme_cntsd:
3056 case Intrinsic::aarch64_sve_ptest_any:
3057 case Intrinsic::aarch64_sve_ptest_first:
3058 case Intrinsic::aarch64_sve_ptest_last:
3060 case Intrinsic::aarch64_sve_fadd:
3062 case Intrinsic::aarch64_sve_fadd_u:
3064 case Intrinsic::aarch64_sve_fmul_u:
3066 case Intrinsic::aarch64_sve_fsub:
3068 case Intrinsic::aarch64_sve_fsub_u:
3070 case Intrinsic::aarch64_sve_add:
3072 case Intrinsic::aarch64_sve_add_u:
3074 Intrinsic::aarch64_sve_mla_u>(
3076 case Intrinsic::aarch64_sve_sub:
3078 case Intrinsic::aarch64_sve_sub_u:
3080 Intrinsic::aarch64_sve_mls_u>(
3082 case Intrinsic::aarch64_sve_tbl:
3084 case Intrinsic::aarch64_sve_uunpkhi:
3085 case Intrinsic::aarch64_sve_uunpklo:
3086 case Intrinsic::aarch64_sve_sunpkhi:
3087 case Intrinsic::aarch64_sve_sunpklo:
3089 case Intrinsic::aarch64_sve_uzp1:
3091 case Intrinsic::aarch64_sve_zip1:
3092 case Intrinsic::aarch64_sve_zip2:
3094 case Intrinsic::aarch64_sve_ld1_gather_index:
3096 case Intrinsic::aarch64_sve_st1_scatter_index:
3098 case Intrinsic::aarch64_sve_ld1:
3100 case Intrinsic::aarch64_sve_st1:
3102 case Intrinsic::aarch64_sve_sdiv:
3104 case Intrinsic::aarch64_sve_sel:
3106 case Intrinsic::aarch64_sve_srshl:
3108 case Intrinsic::aarch64_sve_dupq_lane:
3110 case Intrinsic::aarch64_sve_insr:
3112 case Intrinsic::aarch64_sve_whilelo:
3114 case Intrinsic::aarch64_sve_ptrue:
3116 case Intrinsic::aarch64_sve_uxtb:
3118 case Intrinsic::aarch64_sve_uxth:
3120 case Intrinsic::aarch64_sve_uxtw:
3122 case Intrinsic::aarch64_sme_in_streaming_mode:
3126 return std::nullopt;
3133 SimplifyAndSetOp)
const {
3134 switch (
II.getIntrinsicID()) {
3137 case Intrinsic::aarch64_neon_fcvtxn:
3138 case Intrinsic::aarch64_neon_rshrn:
3139 case Intrinsic::aarch64_neon_sqrshrn:
3140 case Intrinsic::aarch64_neon_sqrshrun:
3141 case Intrinsic::aarch64_neon_sqshrn:
3142 case Intrinsic::aarch64_neon_sqshrun:
3143 case Intrinsic::aarch64_neon_sqxtn:
3144 case Intrinsic::aarch64_neon_sqxtun:
3145 case Intrinsic::aarch64_neon_uqrshrn:
3146 case Intrinsic::aarch64_neon_uqshrn:
3147 case Intrinsic::aarch64_neon_uqxtn:
3148 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
3152 return std::nullopt;
3156 return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3166 if (ST->useSVEForFixedLengthVectors() &&
3169 std::max(ST->getMinSVEVectorSizeInBits(), 128u));
3170 else if (ST->isNeonAvailable())
3175 if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3184bool AArch64TTIImpl::isSingleExtWideningInstruction(
3186 Type *SrcOverrideTy)
const {
3201 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3204 Type *SrcTy = SrcOverrideTy;
3206 case Instruction::Add:
3207 case Instruction::Sub: {
3216 if (Opcode == Instruction::Sub)
3240 assert(SrcTy &&
"Expected some SrcTy");
3242 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
3248 DstTyL.first * DstTyL.second.getVectorMinNumElements();
3250 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
3254 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
3257Type *AArch64TTIImpl::isBinExtWideningInstruction(
unsigned Opcode,
Type *DstTy,
3259 Type *SrcOverrideTy)
const {
3260 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
3261 Opcode != Instruction::Mul)
3271 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3274 auto getScalarSizeWithOverride = [&](
const Value *
V) {
3280 ->getScalarSizeInBits();
3283 unsigned MaxEltSize = 0;
3286 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3287 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3288 MaxEltSize = std::max(EltSize0, EltSize1);
3291 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3292 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3295 if (EltSize0 >= DstEltSize / 2 || EltSize1 >= DstEltSize / 2)
3297 MaxEltSize = DstEltSize / 2;
3298 }
else if (Opcode == Instruction::Mul &&
3311 getScalarSizeWithOverride(
isa<ZExtInst>(Args[0]) ? Args[0] : Args[1]);
3315 if (MaxEltSize * 2 > DstEltSize)
3333 if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(
DL, Src)) ||
3334 (Src->isScalableTy() && !ST->hasSVE2()))
3344 if (AddUser && AddUser->getOpcode() == Instruction::Add)
3348 if (!Shr || Shr->getOpcode() != Instruction::LShr)
3352 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
3353 Src->getScalarSizeInBits() !=
3377 int ISD = TLI->InstructionOpcodeToISD(Opcode);
3381 if (
I &&
I->hasOneUser()) {
3384 if (
Type *ExtTy = isBinExtWideningInstruction(
3385 SingleUser->getOpcode(), Dst, Operands,
3386 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3399 if (isSingleExtWideningInstruction(
3400 SingleUser->getOpcode(), Dst, Operands,
3401 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3405 if (SingleUser->getOpcode() == Instruction::Add) {
3406 if (
I == SingleUser->getOperand(1) ||
3408 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
3423 EVT SrcTy = TLI->getValueType(
DL, Src);
3424 EVT DstTy = TLI->getValueType(
DL, Dst);
3426 if (!SrcTy.isSimple() || !DstTy.
isSimple())
3431 if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
3460 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3463 ST->useSVEForFixedLengthVectors(WiderTy)) {
3464 std::pair<InstructionCost, MVT> LT =
3466 unsigned NumElements =
3482 const unsigned int SVE_EXT_COST = 1;
3483 const unsigned int SVE_FCVT_COST = 1;
3484 const unsigned int SVE_UNPACK_ONCE = 4;
3485 const unsigned int SVE_UNPACK_TWICE = 16;
3614 SVE_EXT_COST + SVE_FCVT_COST},
3619 SVE_EXT_COST + SVE_FCVT_COST},
3626 SVE_EXT_COST + SVE_FCVT_COST},
3630 SVE_EXT_COST + SVE_FCVT_COST},
3636 SVE_EXT_COST + SVE_FCVT_COST},
3639 SVE_EXT_COST + SVE_FCVT_COST},
3644 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3646 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3656 SVE_EXT_COST + SVE_FCVT_COST},
3661 SVE_EXT_COST + SVE_FCVT_COST},
3674 SVE_EXT_COST + SVE_FCVT_COST},
3678 SVE_EXT_COST + SVE_FCVT_COST},
3690 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3692 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3694 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3696 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3700 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3702 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3718 SVE_EXT_COST + SVE_FCVT_COST},
3723 SVE_EXT_COST + SVE_FCVT_COST},
3734 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3736 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3738 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3740 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3742 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3744 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3748 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3750 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3752 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3754 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3979 if (ST->hasFullFP16())
3991 Src->getScalarType(), CCH,
CostKind) +
3999 ST->isSVEorStreamingSVEAvailable() &&
4000 TLI->getTypeAction(Src->getContext(), SrcTy) ==
4002 TLI->getTypeAction(Dst->getContext(), DstTy) ==
4011 Opcode, LegalTy, Src, CCH,
CostKind,
I);
4014 return Part1 + Part2;
4021 ST->isSVEorStreamingSVEAvailable() && TLI->isTypeLegal(DstTy))
4033 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
4046 CostKind, Index,
nullptr,
nullptr);
4050 auto DstVT = TLI->getValueType(
DL, Dst);
4051 auto SrcVT = TLI->getValueType(
DL, Src);
4056 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
4062 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
4072 case Instruction::SExt:
4077 case Instruction::ZExt:
4078 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
4091 return Opcode == Instruction::PHI ? 0 : 1;
4100 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
4109 if (!LT.second.isVector())
4114 if (LT.second.isFixedLengthVector()) {
4115 unsigned Width = LT.second.getVectorNumElements();
4116 Index = Index % Width;
4131 if (ST->hasFastLD1Single())
4143 : ST->getVectorInsertExtractBaseCost() + 1;
4167 auto ExtractCanFuseWithFmul = [&]() {
4174 auto IsAllowedScalarTy = [&](
const Type *
T) {
4175 return T->isFloatTy() ||
T->isDoubleTy() ||
4176 (
T->isHalfTy() && ST->hasFullFP16());
4180 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
4183 return BO && BO->getOpcode() == BinaryOperator::FMul &&
4184 !BO->getType()->isVectorTy();
4189 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
4193 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);
4202 DenseMap<User *, unsigned> UserToExtractIdx;
4203 for (
auto *U :
Scalar->users()) {
4204 if (!IsUserFMulScalarTy(U))
4208 UserToExtractIdx[
U];
4210 if (UserToExtractIdx.
empty())
4212 for (
auto &[S, U, L] : ScalarUserAndIdx) {
4213 for (
auto *U : S->users()) {
4214 if (UserToExtractIdx.
contains(U)) {
4216 auto *Op0 =
FMul->getOperand(0);
4217 auto *Op1 =
FMul->getOperand(1);
4218 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
4219 UserToExtractIdx[
U] =
L;
4225 for (
auto &[U, L] : UserToExtractIdx) {
4237 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
4238 if (!IsUserFMulScalarTy(U))
4243 const auto *BO = cast<BinaryOperator>(U);
4244 const auto *OtherEE = dyn_cast<ExtractElementInst>(
4245 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
4247 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
4250 return IsExtractLaneEquivalentToZero(
4251 cast<ConstantInt>(OtherEE->getIndexOperand())
4254 OtherEE->getType()->getScalarSizeInBits());
4262 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
4263 ExtractCanFuseWithFmul())
4268 :
ST->getVectorInsertExtractBaseCost();
4277 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
4280 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr,
4286 Value *Scalar,
ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
4288 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr, Scalar,
4289 ScalarUserAndIdx, VIC);
4296 return getVectorInstrCostHelper(
I.getOpcode(), Val,
CostKind, Index, &
I,
4303 unsigned Index)
const {
4315 : ST->getVectorInsertExtractBaseCost() + 1;
4324 if (Ty->getElementType()->isFloatingPointTy())
4327 unsigned VecInstCost =
4329 return DemandedElts.
popcount() * (Insert + Extract) * VecInstCost;
4336 if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
4337 return std::nullopt;
4338 if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
4339 return std::nullopt;
4341 if (CanUseSVE && ST->hasSVEB16B16() && ST->isNonStreamingSVEorSME2Available())
4342 return std::nullopt;
4349 Cost += InstCost(PromotedTy);
4372 Op2Info, Args, CxtI);
4376 int ISD = TLI->InstructionOpcodeToISD(Opcode);
4383 Ty,
CostKind, Op1Info, Op2Info,
true,
4386 [&](
Type *PromotedTy) {
4390 return *PromotedCost;
4396 if (
Type *ExtTy = isBinExtWideningInstruction(Opcode, Ty, Args)) {
4463 auto VT = TLI->getValueType(
DL, Ty);
4464 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
4468 : (3 * AsrCost + AddCost);
4470 return MulCost + AsrCost + 2 * AddCost;
4472 }
else if (VT.isVector()) {
4482 if (Ty->isScalableTy() && ST->hasSVE())
4483 Cost += 2 * AsrCost;
4488 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
4492 }
else if (LT.second == MVT::v2i64) {
4493 return VT.getVectorNumElements() *
4500 if (Ty->isScalableTy() && ST->hasSVE())
4501 return MulCost + 2 * AddCost + 2 * AsrCost;
4502 return 2 * MulCost + AddCost + AsrCost + UsraCost;
4507 LT.second.isFixedLengthVector()) {
4517 return ExtractCost + InsertCost +
4525 auto VT = TLI->getValueType(
DL, Ty);
4541 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
4542 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
4543 LT.second == MVT::nxv16i8;
4544 bool Is128bit = LT.second.is128BitVector();
4556 (HasMULH ? 0 : ShrCost) +
4557 AddCost * 2 + ShrCost;
4558 return DivCost + (
ISD ==
ISD::UREM ? MulCost + AddCost : 0);
4565 if (!VT.isVector() && VT.getSizeInBits() > 64)
4569 Opcode, Ty,
CostKind, Op1Info, Op2Info);
4571 if (TLI->isOperationLegalOrCustom(
ISD, LT.second) && ST->hasSVE()) {
4575 Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {
4585 if (
nullptr != Entry)
4590 if (LT.second.getScalarType() == MVT::i8)
4592 else if (LT.second.getScalarType() == MVT::i16)
4604 Opcode, Ty->getScalarType(),
CostKind, Op1Info, Op2Info);
4605 return (4 + DivCost) * VTy->getNumElements();
4611 -1,
nullptr,
nullptr);
4625 if (LT.second == MVT::v2i64 && ST->hasSVE())
4638 if (LT.second != MVT::v2i64)
4660 if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
4661 (Ty->isHalfTy() && ST->hasFullFP16())) &&
4670 if (!Ty->getScalarType()->isFP128Ty())
4677 if (!Ty->getScalarType()->isFP128Ty())
4678 return 2 * LT.first;
4685 if (!Ty->isVectorTy())
4701 int MaxMergeDistance = 64;
4705 return NumVectorInstToHideOverhead;
4715 unsigned Opcode1,
unsigned Opcode2)
const {
4718 if (!
Sched.hasInstrSchedModel())
4722 Sched.getSchedClassDesc(
TII->get(Opcode1).getSchedClass());
4724 Sched.getSchedClassDesc(
TII->get(Opcode2).getSchedClass());
4730 "Cannot handle variant scheduling classes without an MI");
4746 const int AmortizationCost = 20;
4754 VecPred = CurrentPred;
4762 static const auto ValidMinMaxTys = {
4763 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
4764 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
4765 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
4769 (ST->hasFullFP16() &&
4775 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
4776 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
4777 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
4778 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
4779 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
4780 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
4781 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
4782 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
4783 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
4784 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
4785 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
4787 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
4788 EVT SelValTy = TLI->getValueType(
DL, ValTy);
4797 if (Opcode == Instruction::FCmp) {
4799 ValTy,
CostKind, Op1Info, Op2Info,
false,
4801 false, [&](
Type *PromotedTy) {
4813 return *PromotedCost;
4817 if (LT.second.getScalarType() != MVT::f64 &&
4818 LT.second.getScalarType() != MVT::f32 &&
4819 LT.second.getScalarType() != MVT::f16)
4824 unsigned Factor = 1;
4825 if (!CondTy->isVectorTy() &&
4839 AArch64::FCMEQv4f32))
4851 TLI->isTypeLegal(TLI->getValueType(
DL, ValTy)) &&
4870 Op1Info, Op2Info,
I);
4876 if (ST->requiresStrictAlign()) {
4881 Options.AllowOverlappingLoads =
true;
4882 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
4887 Options.LoadSizes = {8, 4, 2, 1};
4888 Options.AllowedTailExpansions = {3, 5, 6};
4893 return ST->hasSVE();
4899 switch (MICA.
getID()) {
4900 case Intrinsic::masked_scatter:
4901 case Intrinsic::masked_gather:
4903 case Intrinsic::masked_load:
4904 case Intrinsic::masked_expandload:
4905 case Intrinsic::masked_store:
4919 if (!LT.first.isValid())
4924 if (VT->getElementType()->isIntegerTy(1))
4935 if (MICA.
getID() == Intrinsic::masked_expandload) {
4951 if (LT.first > 1 && LT.second.getScalarSizeInBits() > 8)
4952 return MemOpCost * 2;
4961 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4962 "Should be called on only load or stores.");
4964 case Instruction::Load:
4967 return ST->getGatherOverhead();
4969 case Instruction::Store:
4972 return ST->getScatterOverhead();
4983 unsigned Opcode = (MICA.
getID() == Intrinsic::masked_gather ||
4984 MICA.
getID() == Intrinsic::vp_gather)
4986 : Instruction::Store;
4996 if (!LT.first.isValid())
5000 if (!LT.second.isVector() ||
5002 VT->getElementType()->isIntegerTy(1))
5012 ElementCount LegalVF = LT.second.getVectorElementCount();
5015 {TTI::OK_AnyValue, TTI::OP_None},
I);
5031 EVT VT = TLI->getValueType(
DL, Ty,
true);
5033 if (VT == MVT::Other)
5038 if (!LT.first.isValid())
5048 (VTy->getElementType()->isIntegerTy(1) &&
5049 !VTy->getElementCount().isKnownMultipleOf(
5060 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
5061 LT.second.is128BitVector() && Alignment <
Align(16)) {
5067 const int AmortizationCost = 6;
5069 return LT.first * 2 * AmortizationCost;
5073 if (Ty->isPtrOrPtrVectorTy())
5078 if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
5080 if (VT == MVT::v4i8)
5087 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
5102 while (!TypeWorklist.
empty()) {
5124 bool UseMaskForCond,
bool UseMaskForGaps)
const {
5125 assert(Factor >= 2 &&
"Invalid interleave factor");
5140 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
5143 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
5144 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
5147 VecVTy->getElementCount().divideCoefficientBy(Factor));
5153 if (MinElts % Factor == 0 &&
5154 TLI->isLegalInterleavedAccessType(SubVecTy,
DL, UseScalable))
5155 return Factor * TLI->getNumInterleavedAccesses(SubVecTy,
DL, UseScalable);
5160 UseMaskForCond, UseMaskForGaps);
5167 for (
auto *
I : Tys) {
5168 if (!
I->isVectorTy())
5179 Align Alignment)
const {
5186 return (ST->isSVEAvailable() && ST->hasSVE2p2()) ||
5187 (ST->isSVEorStreamingSVEAvailable() && ST->hasSME2p2());
5191 return ST->getMaxInterleaveFactor();
5201 enum { MaxStridedLoads = 7 };
5203 int StridedLoads = 0;
5206 for (
const auto BB : L->blocks()) {
5207 for (
auto &
I : *BB) {
5213 if (L->isLoopInvariant(PtrValue))
5218 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
5227 if (StridedLoads > MaxStridedLoads / 2)
5228 return StridedLoads;
5231 return StridedLoads;
5234 int StridedLoads = countStridedLoads(L, SE);
5236 <<
" strided loads\n");
5252 unsigned *FinalSize) {
5256 for (
auto *BB : L->getBlocks()) {
5257 for (
auto &
I : *BB) {
5263 if (!Cost.isValid())
5267 if (LoopCost > Budget)
5289 if (MaxTC > 0 && MaxTC <= 32)
5300 if (Blocks.
size() != 2)
5322 if (!L->isInnermost() || L->getNumBlocks() > 8)
5326 if (!L->getExitBlock())
5332 bool HasParellelizableReductions =
5333 L->getNumBlocks() == 1 &&
5334 any_of(L->getHeader()->phis(),
5336 return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
5339 if (HasParellelizableReductions &&
5361 if (HasParellelizableReductions) {
5372 if (Header == Latch) {
5375 unsigned Width = 10;
5381 unsigned MaxInstsPerLine = 16;
5383 unsigned BestUC = 1;
5384 unsigned SizeWithBestUC = BestUC *
Size;
5386 unsigned SizeWithUC = UC *
Size;
5387 if (SizeWithUC > 48)
5389 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
5390 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
5392 SizeWithBestUC = BestUC *
Size;
5402 for (
auto *BB : L->blocks()) {
5403 for (
auto &
I : *BB) {
5413 for (
auto *U :
I.users())
5415 LoadedValuesPlus.
insert(U);
5422 return LoadedValuesPlus.
contains(
SI->getOperand(0));
5448 auto *I = dyn_cast<Instruction>(V);
5449 return I && DependsOnLoopLoad(I, Depth + 1);
5456 DependsOnLoopLoad(
I, 0)) {
5472 if (L->getLoopDepth() > 1)
5483 for (
auto *BB : L->getBlocks()) {
5484 for (
auto &
I : *BB) {
5488 if (IsVectorized &&
I.getType()->isVectorTy())
5505 if (ST->isAppleMLike())
5507 else if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
5529 !ST->getSchedModel().isOutOfOrder()) {
5552 bool CanCreate)
const {
5556 case Intrinsic::aarch64_neon_st2:
5557 case Intrinsic::aarch64_neon_st3:
5558 case Intrinsic::aarch64_neon_st4: {
5561 if (!CanCreate || !ST)
5563 unsigned NumElts = Inst->
arg_size() - 1;
5564 if (ST->getNumElements() != NumElts)
5566 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5572 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5574 Res = Builder.CreateInsertValue(Res, L, i);
5578 case Intrinsic::aarch64_neon_ld2:
5579 case Intrinsic::aarch64_neon_ld3:
5580 case Intrinsic::aarch64_neon_ld4:
5581 if (Inst->
getType() == ExpectedType)
5592 case Intrinsic::aarch64_neon_ld2:
5593 case Intrinsic::aarch64_neon_ld3:
5594 case Intrinsic::aarch64_neon_ld4:
5595 Info.ReadMem =
true;
5596 Info.WriteMem =
false;
5599 case Intrinsic::aarch64_neon_st2:
5600 case Intrinsic::aarch64_neon_st3:
5601 case Intrinsic::aarch64_neon_st4:
5602 Info.ReadMem =
false;
5603 Info.WriteMem =
true;
5611 case Intrinsic::aarch64_neon_ld2:
5612 case Intrinsic::aarch64_neon_st2:
5613 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
5615 case Intrinsic::aarch64_neon_ld3:
5616 case Intrinsic::aarch64_neon_st3:
5617 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
5619 case Intrinsic::aarch64_neon_ld4:
5620 case Intrinsic::aarch64_neon_st4:
5621 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
5633 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
5634 bool Considerable =
false;
5635 AllowPromotionWithoutCommonHeader =
false;
5638 Type *ConsideredSExtType =
5640 if (
I.getType() != ConsideredSExtType)
5644 for (
const User *U :
I.users()) {
5646 Considerable =
true;
5650 if (GEPInst->getNumOperands() > 2) {
5651 AllowPromotionWithoutCommonHeader =
true;
5656 return Considerable;
5705 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
5715 return LegalizationCost + 2;
5725 LegalizationCost *= LT.first - 1;
5728 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5737 return LegalizationCost + 2;
5745 std::optional<FastMathFlags> FMF,
5761 return BaseCost + FixedVTy->getNumElements();
5764 if (Opcode != Instruction::FAdd)
5778 MVT MTy = LT.second;
5779 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5827 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
5828 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
5840 return (LT.first - 1) +
Log2_32(NElts);
5845 return (LT.first - 1) + Entry->Cost;
5857 if (LT.first != 1) {
5863 ExtraCost *= LT.first - 1;
5866 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
5867 return Cost + ExtraCost;
5875 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *VecTy,
5877 EVT VecVT = TLI->getValueType(
DL, VecTy);
5878 EVT ResVT = TLI->getValueType(
DL, ResTy);
5888 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5890 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&
5892 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&
5894 return (LT.first - 1) * 2 + 2;
5905 EVT VecVT = TLI->getValueType(
DL, VecTy);
5906 EVT ResVT = TLI->getValueType(
DL, ResTy);
5909 RedOpcode == Instruction::Add) {
5915 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5917 return LT.first + 2;
5952 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
5953 ? TLI->getPromotedVTForPredicate(
EVT(LT.second))
5967 if (LT.second.getScalarType() == MVT::i1) {
5976 assert(Entry &&
"Illegal Type for Splice");
5977 LegalizationCost += Entry->Cost;
5978 return LegalizationCost * LT.first;
5982 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
5991 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub &&
5992 Opcode != Instruction::FAdd) ||
5999 assert(FMF &&
"Missing FastMathFlags for floating-point partial reduction");
6000 if (!FMF->allowReassoc() || !FMF->allowContract())
6004 "FastMathFlags only apply to floating-point partial reductions");
6008 (!BinOp || (OpBExtend !=
TTI::PR_None && InputTypeB)) &&
6009 "Unexpected values for OpBExtend or InputTypeB");
6013 if (BinOp && ((*BinOp != Instruction::Mul && *BinOp != Instruction::FMul) ||
6014 InputTypeA != InputTypeB))
6017 bool IsUSDot = OpBExtend !=
TTI::PR_None && OpAExtend != OpBExtend;
6018 if (IsUSDot && !ST->hasMatMulInt8())
6031 auto TC = TLI->getTypeConversion(AccumVectorType->
getContext(),
6040 if (TLI->getTypeAction(AccumVectorType->
getContext(), TC.second) !=
6046 std::pair<InstructionCost, MVT> AccumLT =
6048 std::pair<InstructionCost, MVT> InputLT =
6052 auto IsSupported = [&](
bool SVEPred,
bool NEONPred) ->
bool {
6053 return (ST->isSVEorStreamingSVEAvailable() && SVEPred) ||
6054 (AccumLT.second.isFixedLengthVector() &&
6055 AccumLT.second.getSizeInBits() <= 128 && ST->isNeonAvailable() &&
6059 bool IsSub = Opcode == Instruction::Sub;
6067 if (AccumLT.second.getScalarType() == MVT::i32 &&
6068 InputLT.second.getScalarType() == MVT::i8) {
6070 if (!IsUSDot && IsSupported(
true, ST->hasDotProd()))
6071 return Cost + INegCost;
6073 if (IsUSDot && IsSupported(ST->hasMatMulInt8(), ST->hasMatMulInt8()))
6074 return Cost + INegCost;
6077 if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot) {
6079 if (AccumLT.second.getScalarType() == MVT::i64 &&
6080 InputLT.second.getScalarType() == MVT::i16)
6081 return Cost + INegCost;
6084 if (AccumLT.second.getScalarType() == MVT::i32 &&
6085 InputLT.second.getScalarType() == MVT::i16 &&
6086 (ST->hasSVE2p1() || ST->hasSME2()) && !IsSub)
6089 if (AccumLT.second.getScalarType() == MVT::i64 &&
6090 InputLT.second.getScalarType() == MVT::i8)
6096 return Cost + INegCost;
6099 if (AccumLT.second.getScalarType() == MVT::i16 &&
6100 InputLT.second.getScalarType() == MVT::i8 &&
6101 (ST->hasSVE2p3() || ST->hasSME2p3()) && !IsSub)
6107 if (Opcode == Instruction::FAdd && !IsSub &&
6108 IsSupported(ST->hasSME2() || ST->hasSVE2p1(), ST->hasF16F32DOT()) &&
6109 AccumLT.second.getScalarType() == MVT::f32 &&
6110 InputLT.second.getScalarType() == MVT::f16)
6114 if (Ratio == 2 && !IsUSDot) {
6115 MVT InVT = InputLT.second.getScalarType();
6118 if (IsSupported(ST->hasSVE2(),
true) &&
6123 if (IsSupported(ST->hasSVE2(), ST->hasFP16FML()) && InVT == MVT::f16)
6127 if (IsSupported(ST->hasBF16(), ST->hasBF16()) && InVT == MVT::bf16)
6132 AccumType, VF, OpAExtend, OpBExtend,
6144 "Expected the Mask to match the return size if given");
6146 "Expected the same scalar types");
6152 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
6153 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
6154 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
6162 return std::max<InstructionCost>(1, LT.first / 4);
6170 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2) ||
6172 Mask, 3, SrcTy->getElementCount().getKnownMinValue() * 2)))
6175 unsigned TpNumElts = Mask.size();
6176 unsigned LTNumElts = LT.second.getVectorNumElements();
6177 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
6179 LT.second.getVectorElementCount());
6181 std::map<std::tuple<unsigned, unsigned, SmallVector<int>>,
InstructionCost>
6183 for (
unsigned N = 0;
N < NumVecs;
N++) {
6187 unsigned Source1 = -1U, Source2 = -1U;
6188 unsigned NumSources = 0;
6189 for (
unsigned E = 0; E < LTNumElts; E++) {
6190 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
6199 unsigned Source = MaskElt / LTNumElts;
6200 if (NumSources == 0) {
6203 }
else if (NumSources == 1 && Source != Source1) {
6206 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
6212 if (Source == Source1)
6214 else if (Source == Source2)
6215 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
6224 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});
6235 NTp, NTp, NMask,
CostKind, 0,
nullptr, Args,
6238 Result.first->second = NCost;
6252 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
6253 if (LT.second.getFixedSizeInBits() >= 128 &&
6255 LT.second.getVectorNumElements() / 2) {
6258 if (Index == (
int)LT.second.getVectorNumElements() / 2)
6272 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
6275 return M.value() < 0 || M.value() == (int)M.index();
6281 !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
6282 SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
6291 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
6292 ST->isSVEorStreamingSVEAvailable() &&
6297 if (ST->isSVEorStreamingSVEAvailable() &&
6311 if (IsLoad && LT.second.isVector() &&
6313 LT.second.getVectorElementCount()))
6319 if (Mask.size() == 4 &&
6321 (SrcTy->getScalarSizeInBits() == 16 ||
6322 SrcTy->getScalarSizeInBits() == 32) &&
6323 all_of(Mask, [](
int E) {
return E < 8; }))
6329 if (LT.second.isFixedLengthVector() &&
6330 LT.second.getVectorNumElements() == Mask.size() &&
6336 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6337 isTRNMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6338 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6339 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6340 LT.second.getVectorNumElements(), 16) ||
6341 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6342 LT.second.getVectorNumElements(), 32) ||
6343 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6344 LT.second.getVectorNumElements(), 64) ||
6347 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
6476 return LT.first * Entry->Cost;
6485 LT.second.getSizeInBits() <= 128 && SubTp) {
6487 if (SubLT.second.isVector()) {
6488 int NumElts = LT.second.getVectorNumElements();
6489 int NumSubElts = SubLT.second.getVectorNumElements();
6490 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
6496 if (IsExtractSubvector)
6513 if (
getPtrStride(*PSE, AccessTy, Ptr, TheLoop, DT, Strides,
6532 return ST->useFixedOverScalableIfEqualCost();
6536 return ST->getEpilogueVectorizationMinVF();
6571 unsigned NumInsns = 0;
6573 NumInsns += BB->size();
6583 int64_t Scale,
unsigned AddrSpace)
const {
6611 if (
I->getOpcode() == Instruction::Or &&
6615 if (
I->getOpcode() == Instruction::Add ||
6616 I->getOpcode() == Instruction::Sub)
6641 return all_equal(Shuf->getShuffleMask());
6648 bool AllowSplat =
false) {
6653 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
6654 auto *FullTy = FullV->
getType();
6655 auto *HalfTy = HalfV->getType();
6657 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
6660 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
6663 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
6667 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
6681 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
6682 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
6696 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
6697 (M2Start != 0 && M2Start != (NumElements / 2)))
6699 if (S1Op1 && S2Op1 && M1Start != M2Start)
6709 return Ext->getType()->getScalarSizeInBits() ==
6710 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
6724 Value *VectorOperand =
nullptr;
6741 if (!
GEP ||
GEP->getNumOperands() != 2)
6745 Value *Offsets =
GEP->getOperand(1);
6748 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
6754 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
6755 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
6756 Ops.push_back(&
GEP->getOperandUse(1));
6792 switch (
II->getIntrinsicID()) {
6793 case Intrinsic::aarch64_neon_smull:
6794 case Intrinsic::aarch64_neon_umull:
6797 Ops.push_back(&
II->getOperandUse(0));
6798 Ops.push_back(&
II->getOperandUse(1));
6803 case Intrinsic::fma:
6804 case Intrinsic::fmuladd:
6811 Ops.push_back(&
II->getOperandUse(0));
6813 Ops.push_back(&
II->getOperandUse(1));
6816 case Intrinsic::aarch64_neon_sqdmull:
6817 case Intrinsic::aarch64_neon_sqdmulh:
6818 case Intrinsic::aarch64_neon_sqrdmulh:
6821 Ops.push_back(&
II->getOperandUse(0));
6823 Ops.push_back(&
II->getOperandUse(1));
6824 return !
Ops.empty();
6825 case Intrinsic::aarch64_neon_fmlal:
6826 case Intrinsic::aarch64_neon_fmlal2:
6827 case Intrinsic::aarch64_neon_fmlsl:
6828 case Intrinsic::aarch64_neon_fmlsl2:
6831 Ops.push_back(&
II->getOperandUse(1));
6833 Ops.push_back(&
II->getOperandUse(2));
6834 return !
Ops.empty();
6835 case Intrinsic::aarch64_sve_ptest_first:
6836 case Intrinsic::aarch64_sve_ptest_last:
6838 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
6839 Ops.push_back(&
II->getOperandUse(0));
6840 return !
Ops.empty();
6841 case Intrinsic::aarch64_sme_write_horiz:
6842 case Intrinsic::aarch64_sme_write_vert:
6843 case Intrinsic::aarch64_sme_writeq_horiz:
6844 case Intrinsic::aarch64_sme_writeq_vert: {
6846 if (!Idx || Idx->getOpcode() != Instruction::Add)
6848 Ops.push_back(&
II->getOperandUse(1));
6851 case Intrinsic::aarch64_sme_read_horiz:
6852 case Intrinsic::aarch64_sme_read_vert:
6853 case Intrinsic::aarch64_sme_readq_horiz:
6854 case Intrinsic::aarch64_sme_readq_vert:
6855 case Intrinsic::aarch64_sme_ld1b_vert:
6856 case Intrinsic::aarch64_sme_ld1h_vert:
6857 case Intrinsic::aarch64_sme_ld1w_vert:
6858 case Intrinsic::aarch64_sme_ld1d_vert:
6859 case Intrinsic::aarch64_sme_ld1q_vert:
6860 case Intrinsic::aarch64_sme_st1b_vert:
6861 case Intrinsic::aarch64_sme_st1h_vert:
6862 case Intrinsic::aarch64_sme_st1w_vert:
6863 case Intrinsic::aarch64_sme_st1d_vert:
6864 case Intrinsic::aarch64_sme_st1q_vert:
6865 case Intrinsic::aarch64_sme_ld1b_horiz:
6866 case Intrinsic::aarch64_sme_ld1h_horiz:
6867 case Intrinsic::aarch64_sme_ld1w_horiz:
6868 case Intrinsic::aarch64_sme_ld1d_horiz:
6869 case Intrinsic::aarch64_sme_ld1q_horiz:
6870 case Intrinsic::aarch64_sme_st1b_horiz:
6871 case Intrinsic::aarch64_sme_st1h_horiz:
6872 case Intrinsic::aarch64_sme_st1w_horiz:
6873 case Intrinsic::aarch64_sme_st1d_horiz:
6874 case Intrinsic::aarch64_sme_st1q_horiz: {
6876 if (!Idx || Idx->getOpcode() != Instruction::Add)
6878 Ops.push_back(&
II->getOperandUse(3));
6881 case Intrinsic::aarch64_neon_pmull:
6884 Ops.push_back(&
II->getOperandUse(0));
6885 Ops.push_back(&
II->getOperandUse(1));
6887 case Intrinsic::aarch64_neon_pmull64:
6889 II->getArgOperand(1)))
6891 Ops.push_back(&
II->getArgOperandUse(0));
6892 Ops.push_back(&
II->getArgOperandUse(1));
6894 case Intrinsic::masked_gather:
6897 Ops.push_back(&
II->getArgOperandUse(0));
6899 case Intrinsic::masked_scatter:
6902 Ops.push_back(&
II->getArgOperandUse(1));
6909 auto ShouldSinkCondition = [](
Value *
Cond,
6914 if (
II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
6918 Ops.push_back(&
II->getOperandUse(0));
6922 switch (
I->getOpcode()) {
6923 case Instruction::GetElementPtr:
6924 case Instruction::Add:
6925 case Instruction::Sub:
6927 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
6929 Ops.push_back(&
I->getOperandUse(
Op));
6934 case Instruction::Select: {
6935 if (!ShouldSinkCondition(
I->getOperand(0),
Ops))
6938 Ops.push_back(&
I->getOperandUse(0));
6941 case Instruction::UncondBr:
6943 case Instruction::CondBr: {
6947 Ops.push_back(&
I->getOperandUse(0));
6950 case Instruction::FMul:
6955 Ops.push_back(&
I->getOperandUse(0));
6957 Ops.push_back(&
I->getOperandUse(1));
6967 case Instruction::Xor:
6970 if (
I->getType()->isVectorTy() && ST->isNeonAvailable()) {
6972 ST->isSVEorStreamingSVEAvailable() && (ST->hasSVE2() || ST->hasSME());
6977 case Instruction::And:
6978 case Instruction::Or:
6981 if (
I->getOpcode() == Instruction::Or &&
6986 if (!(
I->getType()->isVectorTy() && ST->hasNEON()) &&
6989 for (
auto &
Op :
I->operands()) {
7001 Ops.push_back(&Not);
7002 Ops.push_back(&InsertElt);
7012 if (!
I->getType()->isVectorTy())
7013 return !
Ops.empty();
7015 switch (
I->getOpcode()) {
7016 case Instruction::Sub:
7017 case Instruction::Add: {
7026 Ops.push_back(&Ext1->getOperandUse(0));
7027 Ops.push_back(&Ext2->getOperandUse(0));
7030 Ops.push_back(&
I->getOperandUse(0));
7031 Ops.push_back(&
I->getOperandUse(1));
7035 case Instruction::Or: {
7038 if (ST->hasNEON()) {
7052 if (
I->getParent() != MainAnd->
getParent() ||
7057 if (
I->getParent() != IA->getParent() ||
7058 I->getParent() != IB->getParent())
7063 Ops.push_back(&
I->getOperandUse(0));
7064 Ops.push_back(&
I->getOperandUse(1));
7073 case Instruction::Mul: {
7074 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
7077 if (Ty->isScalableTy())
7081 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
7084 int NumZExts = 0, NumSExts = 0;
7085 for (
auto &
Op :
I->operands()) {
7092 auto *ExtOp = Ext->getOperand(0);
7093 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
7094 Ops.push_back(&Ext->getOperandUse(0));
7102 if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <
7103 I->getType()->getScalarSizeInBits())
7140 if (!ElementConstant || !ElementConstant->
isZero())
7143 unsigned Opcode = OperandInstr->
getOpcode();
7144 if (Opcode == Instruction::SExt)
7146 else if (Opcode == Instruction::ZExt)
7151 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
7161 Ops.push_back(&Insert->getOperandUse(1));
7167 if (!
Ops.empty() && (NumSExts == 2 || NumZExts == 2))
7171 if (!ShouldSinkSplatForIndexedVariant(
I))
7176 Ops.push_back(&
I->getOperandUse(0));
7178 Ops.push_back(&
I->getOperandUse(1));
7180 return !
Ops.empty();
7182 case Instruction::FMul: {
7184 if (
I->getType()->isScalableTy())
7185 return !
Ops.empty();
7189 return !
Ops.empty();
7193 Ops.push_back(&
I->getOperandUse(0));
7195 Ops.push_back(&
I->getOperandUse(1));
7196 return !
Ops.empty();
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file defines the DenseMap class.
static Value * getCondition(Instruction *I)
const HexagonInstrInfo * TII
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
unsigned getVectorInsertExtractBaseCost() const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
bool prefersVectorizedAddressing() const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
bool preferTailFoldingOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
APInt getPriorityMask(const Function &F) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool useNeonVector(const Type *Ty) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedExpandLoad(Type *DataTy, Align Alignment) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
bool enableScalableVectorization() const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isTypeLegal(Type *Ty) const override
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isIntPredicate(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool noSignedZeros() const
bool allowContract() const
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={})
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)
Create an expression which evaluates to the number of elements in EC at runtime.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
DominatorTree * getDominatorTree() const
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
const FeatureBitset & getFeatureBits() const
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
bool isFixedLengthVector() const
MVT getVectorElementType() const
Information for memory intrinsic cost model.
Align getAlignment() const
Type * getDataType() const
Intrinsic::ID getID() const
const Instruction * getInst() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool isSMEABIRoutine() const
bool hasStreamingBody() const
void set(unsigned M, bool Enable=true)
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
TargetInstrInfo - Interface to description of machine instruction set.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
LLVM_ABI APInt getCpuSupportsMask(ArrayRef< StringRef > Features)
static constexpr unsigned SVEBitsPerBlock
LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
brc_match< Cond_t, match_bind< BasicBlock >, match_bind< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
LLVM_ABI Libcall getPOW(EVT RetVT)
getPOW - Return the POW_* value for the given types, or UNKNOWN_LIBCALL if there is none.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FindLast
FindLast reduction with select(cmp(),x,y) where x and y.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0,...
unsigned getMatchingIROpode() const
bool inactiveLanesAreUnused() const
bool inactiveLanesAreNotDefined() const
bool hasMatchingUndefIntrinsic() const
static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()
static SVEIntrinsicInfo defaultZeroingOp()
bool hasGoverningPredicate() const
SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)
static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)
SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)
unsigned getOperandIdxWithNoActiveLanes() const
SVEIntrinsicInfo & setInactiveLanesAreUnused()
SVEIntrinsicInfo & setInactiveLanesAreNotDefined()
SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)
bool inactiveLanesTakenFromOperand() const
static SVEIntrinsicInfo defaultUndefOp()
bool hasOperandWithNoActiveLanes() const
Intrinsic::ID getMatchingUndefIntrinsic() const
SVEIntrinsicInfo & setResultIsZeroInitialized()
static SVEIntrinsicInfo defaultMergingUnaryOp()
SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)
unsigned getGoverningPredicateOperandIdx() const
bool hasMatchingIROpode() const
bool resultIsZeroInitialized() const
SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)
unsigned getOperandIdxInactiveLanesTakenFrom() const
static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...