23#include "llvm/IR/IntrinsicsAArch64.h"
35#define DEBUG_TYPE "aarch64tti"
41 "sve-prefer-fixed-over-scalable-if-equal",
cl::Hidden);
59 "Penalty of calling a function that requires a change to PSTATE.SM"));
63 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
74 cl::desc(
"The cost of a histcnt instruction"));
78 cl::desc(
"The number of instructions to search for a redundant dmb"));
82 cl::desc(
"Threshold for forced unrolling of small loops in AArch64"));
85class TailFoldingOption {
100 bool NeedsDefault =
true;
104 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
119 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
120 "Initial bits should only include one of "
121 "(disabled|all|simple|default)");
122 Bits = NeedsDefault ? DefaultBits : InitialBits;
124 Bits &= ~DisableBits;
130 errs() <<
"invalid argument '" << Opt
131 <<
"' to -sve-tail-folding=; the option should be of the form\n"
132 " (disabled|all|default|simple)[+(reductions|recurrences"
133 "|reverse|noreductions|norecurrences|noreverse)]\n";
139 void operator=(
const std::string &Val) {
148 setNeedsDefault(
false);
151 StringRef(Val).split(TailFoldTypes,
'+', -1,
false);
153 unsigned StartIdx = 1;
154 if (TailFoldTypes[0] ==
"disabled")
155 setInitialBits(TailFoldingOpts::Disabled);
156 else if (TailFoldTypes[0] ==
"all")
157 setInitialBits(TailFoldingOpts::All);
158 else if (TailFoldTypes[0] ==
"default")
159 setNeedsDefault(
true);
160 else if (TailFoldTypes[0] ==
"simple")
161 setInitialBits(TailFoldingOpts::Simple);
164 setInitialBits(TailFoldingOpts::Disabled);
167 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
168 if (TailFoldTypes[
I] ==
"reductions")
169 setEnableBit(TailFoldingOpts::Reductions);
170 else if (TailFoldTypes[
I] ==
"recurrences")
171 setEnableBit(TailFoldingOpts::Recurrences);
172 else if (TailFoldTypes[
I] ==
"reverse")
173 setEnableBit(TailFoldingOpts::Reverse);
174 else if (TailFoldTypes[
I] ==
"noreductions")
175 setDisableBit(TailFoldingOpts::Reductions);
176 else if (TailFoldTypes[
I] ==
"norecurrences")
177 setDisableBit(TailFoldingOpts::Recurrences);
178 else if (TailFoldTypes[
I] ==
"noreverse")
179 setDisableBit(TailFoldingOpts::Reverse);
196 "Control the use of vectorisation using tail-folding for SVE where the"
197 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
198 "\ndisabled (Initial) No loop types will vectorize using "
200 "\ndefault (Initial) Uses the default tail-folding settings for "
202 "\nall (Initial) All legal loop types will vectorize using "
204 "\nsimple (Initial) Use tail-folding for simple loops (not "
205 "reductions or recurrences)"
206 "\nreductions Use tail-folding for loops containing reductions"
207 "\nnoreductions Inverse of above"
208 "\nrecurrences Use tail-folding for loops containing fixed order "
210 "\nnorecurrences Inverse of above"
211 "\nreverse Use tail-folding for loops requiring reversed "
213 "\nnoreverse Inverse of above"),
258 TTI->isMultiversionedFunction(
F) ?
"fmv-features" :
"target-features";
259 StringRef FeatureStr =
F.getFnAttribute(AttributeStr).getValueAsString();
260 FeatureStr.
split(Features,
",");
276 return F.hasFnAttribute(
"fmv-features");
280 AArch64::FeatureExecuteOnly,
320 FeatureBitset EffectiveCallerBits = CallerBits ^ InlineInverseFeatures;
321 FeatureBitset EffectiveCalleeBits = CalleeBits ^ InlineInverseFeatures;
323 return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;
341 auto FVTy = dyn_cast<FixedVectorType>(Ty);
343 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
352 unsigned DefaultCallPenalty)
const {
377 if (
F ==
Call.getCaller())
383 return DefaultCallPenalty;
394 ST->isSVEorStreamingSVEAvailable() &&
395 !ST->disableMaximizeScalableBandwidth();
419 assert(Ty->isIntegerTy());
421 unsigned BitSize = Ty->getPrimitiveSizeInBits();
428 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
433 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
439 return std::max<InstructionCost>(1,
Cost);
446 assert(Ty->isIntegerTy());
448 unsigned BitSize = Ty->getPrimitiveSizeInBits();
454 unsigned ImmIdx = ~0U;
458 case Instruction::GetElementPtr:
463 case Instruction::Store:
466 case Instruction::Add:
467 case Instruction::Sub:
468 case Instruction::Mul:
469 case Instruction::UDiv:
470 case Instruction::SDiv:
471 case Instruction::URem:
472 case Instruction::SRem:
473 case Instruction::And:
474 case Instruction::Or:
475 case Instruction::Xor:
476 case Instruction::ICmp:
480 case Instruction::Shl:
481 case Instruction::LShr:
482 case Instruction::AShr:
486 case Instruction::Trunc:
487 case Instruction::ZExt:
488 case Instruction::SExt:
489 case Instruction::IntToPtr:
490 case Instruction::PtrToInt:
491 case Instruction::BitCast:
492 case Instruction::PHI:
493 case Instruction::Call:
494 case Instruction::Select:
495 case Instruction::Ret:
496 case Instruction::Load:
501 int NumConstants = (BitSize + 63) / 64;
514 assert(Ty->isIntegerTy());
516 unsigned BitSize = Ty->getPrimitiveSizeInBits();
525 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
531 case Intrinsic::sadd_with_overflow:
532 case Intrinsic::uadd_with_overflow:
533 case Intrinsic::ssub_with_overflow:
534 case Intrinsic::usub_with_overflow:
535 case Intrinsic::smul_with_overflow:
536 case Intrinsic::umul_with_overflow:
538 int NumConstants = (BitSize + 63) / 64;
545 case Intrinsic::experimental_stackmap:
546 if ((Idx < 2) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
549 case Intrinsic::experimental_patchpoint_void:
550 case Intrinsic::experimental_patchpoint:
551 if ((Idx < 4) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
554 case Intrinsic::experimental_gc_statepoint:
555 if ((Idx < 5) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
565 if (TyWidth == 32 || TyWidth == 64)
589 unsigned TotalHistCnts = 1;
599 unsigned EC = VTy->getElementCount().getKnownMinValue();
604 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
606 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
610 TotalHistCnts = EC / NaturalVectorWidth;
630 switch (ICA.
getID()) {
631 case Intrinsic::experimental_vector_histogram_add: {
638 case Intrinsic::umin:
639 case Intrinsic::umax:
640 case Intrinsic::smin:
641 case Intrinsic::smax: {
642 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
643 MVT::v8i16, MVT::v2i32, MVT::v4i32,
644 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
648 if (LT.second == MVT::v2i64)
654 case Intrinsic::scmp:
655 case Intrinsic::ucmp: {
657 {Intrinsic::scmp, MVT::i32, 3},
658 {Intrinsic::scmp, MVT::i64, 3},
659 {Intrinsic::scmp, MVT::v8i8, 3},
660 {Intrinsic::scmp, MVT::v16i8, 3},
661 {Intrinsic::scmp, MVT::v4i16, 3},
662 {Intrinsic::scmp, MVT::v8i16, 3},
663 {Intrinsic::scmp, MVT::v2i32, 3},
664 {Intrinsic::scmp, MVT::v4i32, 3},
665 {Intrinsic::scmp, MVT::v1i64, 3},
666 {Intrinsic::scmp, MVT::v2i64, 3},
672 return Entry->Cost * LT.first;
675 case Intrinsic::sadd_sat:
676 case Intrinsic::ssub_sat:
677 case Intrinsic::uadd_sat:
678 case Intrinsic::usub_sat: {
679 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
680 MVT::v8i16, MVT::v2i32, MVT::v4i32,
686 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
688 return LT.first * Instrs;
693 if (ST->isSVEAvailable() && VectorSize >= 128 &&
isPowerOf2_64(VectorSize))
694 return LT.first * Instrs;
698 case Intrinsic::abs: {
699 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
700 MVT::v8i16, MVT::v2i32, MVT::v4i32,
701 MVT::v2i64, MVT::nxv16i8, MVT::nxv8i16,
702 MVT::nxv4i32, MVT::nxv2i64};
708 case Intrinsic::bswap: {
709 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
710 MVT::v4i32, MVT::v2i64};
713 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
718 case Intrinsic::fmuladd: {
723 (EltTy->
isHalfTy() && ST->hasFullFP16()))
727 case Intrinsic::stepvector: {
736 Cost += AddCost * (LT.first - 1);
740 case Intrinsic::vector_extract:
741 case Intrinsic::vector_insert: {
754 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
755 EVT SubVecVT = IsExtract ? getTLI()->getValueType(
DL, RetTy)
763 getTLI()->getTypeConversion(
C, SubVecVT);
765 getTLI()->getTypeConversion(
C, VecVT);
773 case Intrinsic::bitreverse: {
775 {Intrinsic::bitreverse, MVT::i32, 1},
776 {Intrinsic::bitreverse, MVT::i64, 1},
777 {Intrinsic::bitreverse, MVT::v8i8, 1},
778 {Intrinsic::bitreverse, MVT::v16i8, 1},
779 {Intrinsic::bitreverse, MVT::v4i16, 2},
780 {Intrinsic::bitreverse, MVT::v8i16, 2},
781 {Intrinsic::bitreverse, MVT::v2i32, 2},
782 {Intrinsic::bitreverse, MVT::v4i32, 2},
783 {Intrinsic::bitreverse, MVT::v1i64, 2},
784 {Intrinsic::bitreverse, MVT::v2i64, 2},
792 if (TLI->getValueType(
DL, RetTy,
true) == MVT::i8 ||
793 TLI->getValueType(
DL, RetTy,
true) == MVT::i16)
794 return LegalisationCost.first * Entry->Cost + 1;
796 return LegalisationCost.first * Entry->Cost;
800 case Intrinsic::ctpop: {
801 if (!ST->hasNEON()) {
822 RetTy->getScalarSizeInBits()
825 return LT.first * Entry->Cost + ExtraCost;
829 case Intrinsic::sadd_with_overflow:
830 case Intrinsic::uadd_with_overflow:
831 case Intrinsic::ssub_with_overflow:
832 case Intrinsic::usub_with_overflow:
833 case Intrinsic::smul_with_overflow:
834 case Intrinsic::umul_with_overflow: {
836 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
837 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
838 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
839 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
840 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
841 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
842 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
843 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
844 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
845 {Intrinsic::usub_with_overflow, MVT::i8, 3},
846 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
847 {Intrinsic::usub_with_overflow, MVT::i16, 3},
848 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
849 {Intrinsic::usub_with_overflow, MVT::i32, 1},
850 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
851 {Intrinsic::usub_with_overflow, MVT::i64, 1},
852 {Intrinsic::smul_with_overflow, MVT::i8, 5},
853 {Intrinsic::umul_with_overflow, MVT::i8, 4},
854 {Intrinsic::smul_with_overflow, MVT::i16, 5},
855 {Intrinsic::umul_with_overflow, MVT::i16, 4},
856 {Intrinsic::smul_with_overflow, MVT::i32, 2},
857 {Intrinsic::umul_with_overflow, MVT::i32, 2},
858 {Intrinsic::smul_with_overflow, MVT::i64, 3},
859 {Intrinsic::umul_with_overflow, MVT::i64, 3},
861 EVT MTy = TLI->getValueType(
DL, RetTy->getContainedType(0),
true);
868 case Intrinsic::fptosi_sat:
869 case Intrinsic::fptoui_sat: {
872 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
874 EVT MTy = TLI->getValueType(
DL, RetTy);
877 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
878 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
879 LT.second == MVT::v2f64)) {
881 (LT.second == MVT::f64 && MTy == MVT::i32) ||
882 (LT.second == MVT::f32 && MTy == MVT::i64)))
891 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
898 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
899 (LT.second == MVT::f16 && MTy == MVT::i64) ||
900 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
914 if ((LT.second.getScalarType() == MVT::f32 ||
915 LT.second.getScalarType() == MVT::f64 ||
916 LT.second.getScalarType() == MVT::f16) &&
920 if (LT.second.isVector())
925 LegalTy, {LegalTy, LegalTy});
929 LegalTy, {LegalTy, LegalTy});
931 return LT.first *
Cost +
932 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
938 RetTy = RetTy->getScalarType();
939 if (LT.second.isVector()) {
957 return LT.first *
Cost;
959 case Intrinsic::fshl:
960 case Intrinsic::fshr: {
969 if (RetTy->isIntegerTy() && ICA.
getArgs()[0] == ICA.
getArgs()[1] &&
970 (RetTy->getPrimitiveSizeInBits() == 32 ||
971 RetTy->getPrimitiveSizeInBits() == 64)) {
984 {Intrinsic::fshl, MVT::v4i32, 2},
985 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
986 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
987 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
993 return LegalisationCost.first * Entry->Cost;
997 if (!RetTy->isIntegerTy())
1002 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
1003 RetTy->getScalarSizeInBits() < 64) ||
1004 (RetTy->getScalarSizeInBits() % 64 != 0);
1005 unsigned ExtraCost = HigherCost ? 1 : 0;
1006 if (RetTy->getScalarSizeInBits() == 32 ||
1007 RetTy->getScalarSizeInBits() == 64)
1010 else if (HigherCost)
1014 return TyL.first + ExtraCost;
1016 case Intrinsic::get_active_lane_mask: {
1018 EVT RetVT = getTLI()->getValueType(
DL, RetTy);
1020 if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
1023 if (RetTy->isScalableTy()) {
1024 if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
1034 if (ST->hasSVE2p1() || ST->hasSME2()) {
1049 return Cost + (SplitCost * (
Cost - 1));
1064 case Intrinsic::experimental_vector_match: {
1067 unsigned SearchSize = NeedleTy->getNumElements();
1068 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
1081 case Intrinsic::cttz: {
1083 if (LT.second == MVT::v8i8 || LT.second == MVT::v16i8)
1084 return LT.first * 2;
1085 if (LT.second == MVT::v4i16 || LT.second == MVT::v8i16 ||
1086 LT.second == MVT::v2i32 || LT.second == MVT::v4i32)
1087 return LT.first * 3;
1090 case Intrinsic::experimental_cttz_elts: {
1092 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
1100 case Intrinsic::loop_dependence_raw_mask:
1101 case Intrinsic::loop_dependence_war_mask: {
1103 if (ST->hasSVE2() || ST->hasSME()) {
1104 EVT VecVT = getTLI()->getValueType(
DL, RetTy);
1105 unsigned EltSizeInBytes =
1115 case Intrinsic::experimental_vector_extract_last_active:
1116 if (ST->isSVEorStreamingSVEAvailable()) {
1122 case Intrinsic::pow: {
1125 EVT VT = getTLI()->getValueType(
DL, RetTy);
1127 bool HasLibcall = getTLI()->getLibcallImpl(LC) != RTLIB::Unsupported;
1142 bool Is025 = ExpF->getValueAPF().isExactlyValue(0.25);
1143 bool Is075 = ExpF->getValueAPF().isExactlyValue(0.75);
1153 return (Sqrt * 2) +
FMul;
1164 case Intrinsic::sqrt:
1165 case Intrinsic::fabs:
1166 case Intrinsic::ceil:
1167 case Intrinsic::floor:
1168 case Intrinsic::nearbyint:
1169 case Intrinsic::round:
1170 case Intrinsic::rint:
1171 case Intrinsic::roundeven:
1172 case Intrinsic::trunc:
1173 case Intrinsic::minnum:
1174 case Intrinsic::maxnum:
1175 case Intrinsic::minimum:
1176 case Intrinsic::maximum: {
1194 auto RequiredType =
II.getType();
1197 assert(PN &&
"Expected Phi Node!");
1200 if (!PN->hasOneUse())
1201 return std::nullopt;
1203 for (
Value *IncValPhi : PN->incoming_values()) {
1206 Reinterpret->getIntrinsicID() !=
1207 Intrinsic::aarch64_sve_convert_to_svbool ||
1208 RequiredType != Reinterpret->getArgOperand(0)->getType())
1209 return std::nullopt;
1217 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
1219 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
1292 return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
1297 return GoverningPredicateIdx;
1302 GoverningPredicateIdx = Index;
1320 return UndefIntrinsic;
1325 UndefIntrinsic = IID;
1347 return ResultLanes == InactiveLanesTakenFromOperand;
1352 return OperandIdxForInactiveLanes;
1356 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1357 ResultLanes = InactiveLanesTakenFromOperand;
1358 OperandIdxForInactiveLanes = Index;
1363 return ResultLanes == InactiveLanesAreNotDefined;
1367 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1368 ResultLanes = InactiveLanesAreNotDefined;
1373 return ResultLanes == InactiveLanesAreUnused;
1377 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1378 ResultLanes = InactiveLanesAreUnused;
1388 ResultIsZeroInitialized =
true;
1399 return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
1404 return OperandIdxWithNoActiveLanes;
1409 OperandIdxWithNoActiveLanes = Index;
1414 unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
1417 unsigned IROpcode = 0;
1419 enum PredicationStyle {
1421 InactiveLanesTakenFromOperand,
1422 InactiveLanesAreNotDefined,
1423 InactiveLanesAreUnused
1426 bool ResultIsZeroInitialized =
false;
1427 unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
1428 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
1436 return !isa<ScalableVectorType>(V->getType());
1444 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
1445 case Intrinsic::aarch64_sve_fcvt_f16f32:
1446 case Intrinsic::aarch64_sve_fcvt_f16f64:
1447 case Intrinsic::aarch64_sve_fcvt_f32f16:
1448 case Intrinsic::aarch64_sve_fcvt_f32f64:
1449 case Intrinsic::aarch64_sve_fcvt_f64f16:
1450 case Intrinsic::aarch64_sve_fcvt_f64f32:
1451 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
1452 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
1453 case Intrinsic::aarch64_sve_fcvtx_f32f64:
1454 case Intrinsic::aarch64_sve_fcvtzs:
1455 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
1456 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
1457 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
1458 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
1459 case Intrinsic::aarch64_sve_fcvtzu:
1460 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
1461 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
1462 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
1463 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
1464 case Intrinsic::aarch64_sve_scvtf:
1465 case Intrinsic::aarch64_sve_scvtf_f16i32:
1466 case Intrinsic::aarch64_sve_scvtf_f16i64:
1467 case Intrinsic::aarch64_sve_scvtf_f32i64:
1468 case Intrinsic::aarch64_sve_scvtf_f64i32:
1469 case Intrinsic::aarch64_sve_ucvtf:
1470 case Intrinsic::aarch64_sve_ucvtf_f16i32:
1471 case Intrinsic::aarch64_sve_ucvtf_f16i64:
1472 case Intrinsic::aarch64_sve_ucvtf_f32i64:
1473 case Intrinsic::aarch64_sve_ucvtf_f64i32:
1476 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
1477 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
1478 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
1479 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
1482 case Intrinsic::aarch64_sve_fabd:
1484 case Intrinsic::aarch64_sve_fadd:
1487 case Intrinsic::aarch64_sve_fdiv:
1490 case Intrinsic::aarch64_sve_fmax:
1492 case Intrinsic::aarch64_sve_fmaxnm:
1494 case Intrinsic::aarch64_sve_fmin:
1496 case Intrinsic::aarch64_sve_fminnm:
1498 case Intrinsic::aarch64_sve_fmla:
1500 case Intrinsic::aarch64_sve_fmls:
1502 case Intrinsic::aarch64_sve_fmul:
1505 case Intrinsic::aarch64_sve_fmulx:
1507 case Intrinsic::aarch64_sve_fnmla:
1509 case Intrinsic::aarch64_sve_fnmls:
1511 case Intrinsic::aarch64_sve_fsub:
1514 case Intrinsic::aarch64_sve_add:
1517 case Intrinsic::aarch64_sve_mla:
1519 case Intrinsic::aarch64_sve_mls:
1521 case Intrinsic::aarch64_sve_mul:
1524 case Intrinsic::aarch64_sve_sabd:
1526 case Intrinsic::aarch64_sve_sdiv:
1529 case Intrinsic::aarch64_sve_smax:
1531 case Intrinsic::aarch64_sve_smin:
1533 case Intrinsic::aarch64_sve_smulh:
1535 case Intrinsic::aarch64_sve_sub:
1538 case Intrinsic::aarch64_sve_uabd:
1540 case Intrinsic::aarch64_sve_udiv:
1543 case Intrinsic::aarch64_sve_umax:
1545 case Intrinsic::aarch64_sve_umin:
1547 case Intrinsic::aarch64_sve_umulh:
1549 case Intrinsic::aarch64_sve_asr:
1552 case Intrinsic::aarch64_sve_lsl:
1555 case Intrinsic::aarch64_sve_lsr:
1558 case Intrinsic::aarch64_sve_and:
1561 case Intrinsic::aarch64_sve_bic:
1563 case Intrinsic::aarch64_sve_eor:
1566 case Intrinsic::aarch64_sve_orr:
1569 case Intrinsic::aarch64_sve_shsub:
1571 case Intrinsic::aarch64_sve_shsubr:
1573 case Intrinsic::aarch64_sve_sqrshl:
1575 case Intrinsic::aarch64_sve_sqshl:
1577 case Intrinsic::aarch64_sve_sqsub:
1579 case Intrinsic::aarch64_sve_srshl:
1581 case Intrinsic::aarch64_sve_uhsub:
1583 case Intrinsic::aarch64_sve_uhsubr:
1585 case Intrinsic::aarch64_sve_uqrshl:
1587 case Intrinsic::aarch64_sve_uqshl:
1589 case Intrinsic::aarch64_sve_uqsub:
1591 case Intrinsic::aarch64_sve_urshl:
1594 case Intrinsic::aarch64_sve_add_u:
1597 case Intrinsic::aarch64_sve_and_u:
1600 case Intrinsic::aarch64_sve_asr_u:
1603 case Intrinsic::aarch64_sve_eor_u:
1606 case Intrinsic::aarch64_sve_fadd_u:
1609 case Intrinsic::aarch64_sve_fdiv_u:
1612 case Intrinsic::aarch64_sve_fmul_u:
1615 case Intrinsic::aarch64_sve_fsub_u:
1618 case Intrinsic::aarch64_sve_lsl_u:
1621 case Intrinsic::aarch64_sve_lsr_u:
1624 case Intrinsic::aarch64_sve_mul_u:
1627 case Intrinsic::aarch64_sve_orr_u:
1630 case Intrinsic::aarch64_sve_sdiv_u:
1633 case Intrinsic::aarch64_sve_sub_u:
1636 case Intrinsic::aarch64_sve_udiv_u:
1640 case Intrinsic::aarch64_sve_addqv:
1641 case Intrinsic::aarch64_sve_and_z:
1642 case Intrinsic::aarch64_sve_bic_z:
1643 case Intrinsic::aarch64_sve_brka_z:
1644 case Intrinsic::aarch64_sve_brkb_z:
1645 case Intrinsic::aarch64_sve_brkn_z:
1646 case Intrinsic::aarch64_sve_brkpa_z:
1647 case Intrinsic::aarch64_sve_brkpb_z:
1648 case Intrinsic::aarch64_sve_cntp:
1649 case Intrinsic::aarch64_sve_compact:
1650 case Intrinsic::aarch64_sve_eor_z:
1651 case Intrinsic::aarch64_sve_eorv:
1652 case Intrinsic::aarch64_sve_eorqv:
1653 case Intrinsic::aarch64_sve_nand_z:
1654 case Intrinsic::aarch64_sve_nor_z:
1655 case Intrinsic::aarch64_sve_orn_z:
1656 case Intrinsic::aarch64_sve_orr_z:
1657 case Intrinsic::aarch64_sve_orv:
1658 case Intrinsic::aarch64_sve_orqv:
1659 case Intrinsic::aarch64_sve_pnext:
1660 case Intrinsic::aarch64_sve_rdffr_z:
1661 case Intrinsic::aarch64_sve_saddv:
1662 case Intrinsic::aarch64_sve_uaddv:
1663 case Intrinsic::aarch64_sve_umaxv:
1664 case Intrinsic::aarch64_sve_umaxqv:
1665 case Intrinsic::aarch64_sve_cmpeq:
1666 case Intrinsic::aarch64_sve_cmpeq_wide:
1667 case Intrinsic::aarch64_sve_cmpge:
1668 case Intrinsic::aarch64_sve_cmpge_wide:
1669 case Intrinsic::aarch64_sve_cmpgt:
1670 case Intrinsic::aarch64_sve_cmpgt_wide:
1671 case Intrinsic::aarch64_sve_cmphi:
1672 case Intrinsic::aarch64_sve_cmphi_wide:
1673 case Intrinsic::aarch64_sve_cmphs:
1674 case Intrinsic::aarch64_sve_cmphs_wide:
1675 case Intrinsic::aarch64_sve_cmple_wide:
1676 case Intrinsic::aarch64_sve_cmplo_wide:
1677 case Intrinsic::aarch64_sve_cmpls_wide:
1678 case Intrinsic::aarch64_sve_cmplt_wide:
1679 case Intrinsic::aarch64_sve_cmpne:
1680 case Intrinsic::aarch64_sve_cmpne_wide:
1681 case Intrinsic::aarch64_sve_facge:
1682 case Intrinsic::aarch64_sve_facgt:
1683 case Intrinsic::aarch64_sve_fcmpeq:
1684 case Intrinsic::aarch64_sve_fcmpge:
1685 case Intrinsic::aarch64_sve_fcmpgt:
1686 case Intrinsic::aarch64_sve_fcmpne:
1687 case Intrinsic::aarch64_sve_fcmpuo:
1688 case Intrinsic::aarch64_sve_ld1:
1689 case Intrinsic::aarch64_sve_ld1_gather:
1690 case Intrinsic::aarch64_sve_ld1_gather_index:
1691 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
1692 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
1693 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
1694 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
1695 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
1696 case Intrinsic::aarch64_sve_ld1q_gather_index:
1697 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
1698 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
1699 case Intrinsic::aarch64_sve_ld1ro:
1700 case Intrinsic::aarch64_sve_ld1rq:
1701 case Intrinsic::aarch64_sve_ld1udq:
1702 case Intrinsic::aarch64_sve_ld1uwq:
1703 case Intrinsic::aarch64_sve_ld2_sret:
1704 case Intrinsic::aarch64_sve_ld2q_sret:
1705 case Intrinsic::aarch64_sve_ld3_sret:
1706 case Intrinsic::aarch64_sve_ld3q_sret:
1707 case Intrinsic::aarch64_sve_ld4_sret:
1708 case Intrinsic::aarch64_sve_ld4q_sret:
1709 case Intrinsic::aarch64_sve_ldff1:
1710 case Intrinsic::aarch64_sve_ldff1_gather:
1711 case Intrinsic::aarch64_sve_ldff1_gather_index:
1712 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
1713 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
1714 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
1715 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
1716 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
1717 case Intrinsic::aarch64_sve_ldnf1:
1718 case Intrinsic::aarch64_sve_ldnt1:
1719 case Intrinsic::aarch64_sve_ldnt1_gather:
1720 case Intrinsic::aarch64_sve_ldnt1_gather_index:
1721 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
1722 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
1725 case Intrinsic::aarch64_sve_prf:
1726 case Intrinsic::aarch64_sve_prfb_gather_index:
1727 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
1728 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
1729 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
1730 case Intrinsic::aarch64_sve_prfd_gather_index:
1731 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
1732 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
1733 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
1734 case Intrinsic::aarch64_sve_prfh_gather_index:
1735 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
1736 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
1737 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
1738 case Intrinsic::aarch64_sve_prfw_gather_index:
1739 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
1740 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
1741 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
1744 case Intrinsic::aarch64_sve_st1_scatter:
1745 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
1746 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
1747 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
1748 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
1749 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
1750 case Intrinsic::aarch64_sve_st1dq:
1751 case Intrinsic::aarch64_sve_st1q_scatter_index:
1752 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
1753 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
1754 case Intrinsic::aarch64_sve_st1wq:
1755 case Intrinsic::aarch64_sve_stnt1:
1756 case Intrinsic::aarch64_sve_stnt1_scatter:
1757 case Intrinsic::aarch64_sve_stnt1_scatter_index:
1758 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
1759 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
1761 case Intrinsic::aarch64_sve_st2:
1762 case Intrinsic::aarch64_sve_st2q:
1764 case Intrinsic::aarch64_sve_st3:
1765 case Intrinsic::aarch64_sve_st3q:
1767 case Intrinsic::aarch64_sve_st4:
1768 case Intrinsic::aarch64_sve_st4q:
1776 Value *UncastedPred;
1782 Pred = UncastedPred;
1788 if (OrigPredTy->getMinNumElements() <=
1790 ->getMinNumElements())
1791 Pred = UncastedPred;
1795 return C &&
C->isAllOnesValue();
1802 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
1803 Dup->getOperand(1) == Pg &&
isa<Constant>(Dup->getOperand(2)))
1811static std::optional<Instruction *>
1818 Value *Op1 =
II.getOperand(1);
1819 Value *Op2 =
II.getOperand(2);
1845 return std::nullopt;
1853 if (SimpleII == Inactive)
1863static std::optional<Instruction *>
1867 return std::nullopt;
1896 II.setCalledFunction(NewDecl);
1906 return std::nullopt;
1918static std::optional<Instruction *>
1922 return std::nullopt;
1924 auto IntrinsicID = BinOp->getIntrinsicID();
1925 switch (IntrinsicID) {
1926 case Intrinsic::aarch64_sve_and_z:
1927 case Intrinsic::aarch64_sve_bic_z:
1928 case Intrinsic::aarch64_sve_eor_z:
1929 case Intrinsic::aarch64_sve_nand_z:
1930 case Intrinsic::aarch64_sve_nor_z:
1931 case Intrinsic::aarch64_sve_orn_z:
1932 case Intrinsic::aarch64_sve_orr_z:
1935 return std::nullopt;
1938 auto BinOpPred = BinOp->getOperand(0);
1939 auto BinOpOp1 = BinOp->getOperand(1);
1940 auto BinOpOp2 = BinOp->getOperand(2);
1944 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1945 return std::nullopt;
1947 auto PredOp = PredIntr->getOperand(0);
1949 if (PredOpTy !=
II.getType())
1950 return std::nullopt;
1954 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1955 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1956 if (BinOpOp1 == BinOpOp2)
1957 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1960 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1962 auto NarrowedBinOp =
1967static std::optional<Instruction *>
1974 return BinOpCombine;
1979 return std::nullopt;
1982 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
1991 if (CursorVTy->getElementCount().getKnownMinValue() <
1992 IVTy->getElementCount().getKnownMinValue())
1996 if (Cursor->getType() == IVTy)
1997 EarliestReplacement = Cursor;
2002 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
2003 Intrinsic::aarch64_sve_convert_to_svbool ||
2004 IntrinsicCursor->getIntrinsicID() ==
2005 Intrinsic::aarch64_sve_convert_from_svbool))
2008 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
2009 Cursor = IntrinsicCursor->getOperand(0);
2014 if (!EarliestReplacement)
2015 return std::nullopt;
2023 auto *OpPredicate =
II.getOperand(0);
2040 II.getArgOperand(2));
2046 return std::nullopt;
2050 II.getArgOperand(0),
II.getArgOperand(2),
uint64_t(0));
2059 II.getArgOperand(0));
2069 return std::nullopt;
2074 if (!SplatValue || !SplatValue->isZero())
2075 return std::nullopt;
2080 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
2081 return std::nullopt;
2085 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
2086 return std::nullopt;
2089 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
2090 return std::nullopt;
2095 return std::nullopt;
2098 return std::nullopt;
2102 return std::nullopt;
2106 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
2107 return std::nullopt;
2109 unsigned NumElts = VecTy->getNumElements();
2110 unsigned PredicateBits = 0;
2113 for (
unsigned I = 0;
I < NumElts; ++
I) {
2116 return std::nullopt;
2118 PredicateBits |= 1 << (
I * (16 / NumElts));
2122 if (PredicateBits == 0) {
2124 PFalse->takeName(&
II);
2130 for (
unsigned I = 0;
I < 16; ++
I)
2131 if ((PredicateBits & (1 <<
I)) != 0)
2134 unsigned PredSize = Mask & -Mask;
2139 for (
unsigned I = 0;
I < 16;
I += PredSize)
2140 if ((PredicateBits & (1 <<
I)) == 0)
2141 return std::nullopt;
2146 {PredType}, {PTruePat});
2148 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
2149 auto *ConvertFromSVBool =
2151 {
II.getType()}, {ConvertToSVBool});
2159 Value *Pg =
II.getArgOperand(0);
2160 Value *Vec =
II.getArgOperand(1);
2161 auto IntrinsicID =
II.getIntrinsicID();
2162 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
2174 auto OpC = OldBinOp->getOpcode();
2180 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
2186 if (IsAfter &&
C &&
C->isNullValue()) {
2190 Extract->insertBefore(
II.getIterator());
2191 Extract->takeName(&
II);
2197 return std::nullopt;
2199 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
2200 return std::nullopt;
2202 const auto PTruePattern =
2208 return std::nullopt;
2210 unsigned Idx = MinNumElts - 1;
2220 if (Idx >= PgVTy->getMinNumElements())
2221 return std::nullopt;
2226 Extract->insertBefore(
II.getIterator());
2227 Extract->takeName(&
II);
2240 Value *Pg =
II.getArgOperand(0);
2242 Value *Vec =
II.getArgOperand(2);
2245 if (!Ty->isIntegerTy())
2246 return std::nullopt;
2251 return std::nullopt;
2268 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
2281 {
II.getType()}, {AllPat});
2288static std::optional<Instruction *>
2292 if (
Pattern == AArch64SVEPredPattern::all) {
2301 return MinNumElts && NumElts >= MinNumElts
2303 II, ConstantInt::get(
II.getType(), MinNumElts)))
2307static std::optional<Instruction *>
2310 if (!ST->isStreaming())
2311 return std::nullopt;
2323 Value *PgVal =
II.getArgOperand(0);
2324 Value *OpVal =
II.getArgOperand(1);
2328 if (PgVal == OpVal &&
2329 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
2330 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
2345 return std::nullopt;
2349 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
2350 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
2364 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
2365 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
2366 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
2367 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
2368 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
2369 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
2370 (OpIID == Intrinsic::aarch64_sve_and_z) ||
2371 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
2372 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
2373 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
2374 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
2375 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
2376 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
2386 return std::nullopt;
2389template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>
2390static std::optional<Instruction *>
2392 bool MergeIntoAddendOp) {
2394 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
2395 if (MergeIntoAddendOp) {
2396 AddendOp =
II.getOperand(1);
2397 Mul =
II.getOperand(2);
2399 AddendOp =
II.getOperand(2);
2400 Mul =
II.getOperand(1);
2405 return std::nullopt;
2407 if (!
Mul->hasOneUse())
2408 return std::nullopt;
2411 if (
II.getType()->isFPOrFPVectorTy()) {
2416 return std::nullopt;
2418 return std::nullopt;
2423 if (MergeIntoAddendOp)
2433static std::optional<Instruction *>
2435 Value *Pred =
II.getOperand(0);
2436 Value *PtrOp =
II.getOperand(1);
2437 Type *VecTy =
II.getType();
2441 Load->copyMetadata(
II);
2452static std::optional<Instruction *>
2454 Value *VecOp =
II.getOperand(0);
2455 Value *Pred =
II.getOperand(1);
2456 Value *PtrOp =
II.getOperand(2);
2460 Store->copyMetadata(
II);
2472 case Intrinsic::aarch64_sve_fmul_u:
2473 return Instruction::BinaryOps::FMul;
2474 case Intrinsic::aarch64_sve_fadd_u:
2475 return Instruction::BinaryOps::FAdd;
2476 case Intrinsic::aarch64_sve_fsub_u:
2477 return Instruction::BinaryOps::FSub;
2479 return Instruction::BinaryOpsEnd;
2483static std::optional<Instruction *>
2486 if (
II.isStrictFP())
2487 return std::nullopt;
2489 auto *OpPredicate =
II.getOperand(0);
2491 if (BinOpCode == Instruction::BinaryOpsEnd ||
2493 return std::nullopt;
2495 BinOpCode,
II.getOperand(1),
II.getOperand(2),
II.getFastMathFlags());
2502 Intrinsic::aarch64_sve_mla>(
2506 Intrinsic::aarch64_sve_mad>(
2509 return std::nullopt;
2512static std::optional<Instruction *>
2516 Intrinsic::aarch64_sve_fmla>(IC,
II,
2521 Intrinsic::aarch64_sve_fmad>(IC,
II,
2526 Intrinsic::aarch64_sve_fmla>(IC,
II,
2529 return std::nullopt;
2532static std::optional<Instruction *>
2536 Intrinsic::aarch64_sve_fmla>(IC,
II,
2541 Intrinsic::aarch64_sve_fmad>(IC,
II,
2546 Intrinsic::aarch64_sve_fmla_u>(
2552static std::optional<Instruction *>
2556 Intrinsic::aarch64_sve_fmls>(IC,
II,
2561 Intrinsic::aarch64_sve_fnmsb>(
2566 Intrinsic::aarch64_sve_fmls>(IC,
II,
2569 return std::nullopt;
2572static std::optional<Instruction *>
2576 Intrinsic::aarch64_sve_fmls>(IC,
II,
2581 Intrinsic::aarch64_sve_fnmsb>(
2586 Intrinsic::aarch64_sve_fmls_u>(
2595 Intrinsic::aarch64_sve_mls>(
2598 return std::nullopt;
2603 Value *UnpackArg =
II.getArgOperand(0);
2605 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
2606 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
2619 return std::nullopt;
2623 auto *OpVal =
II.getOperand(0);
2624 auto *OpIndices =
II.getOperand(1);
2631 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
2632 return std::nullopt;
2647 Type *RetTy =
II.getType();
2648 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
2649 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
2653 if ((
match(
II.getArgOperand(0),
2660 if (TyA ==
B->getType() &&
2665 TyA->getMinNumElements());
2671 return std::nullopt;
2679 if (
match(
II.getArgOperand(0),
2684 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
2686 return std::nullopt;
2689static std::optional<Instruction *>
2691 Value *Mask =
II.getOperand(0);
2692 Value *BasePtr =
II.getOperand(1);
2693 Value *Index =
II.getOperand(2);
2704 BasePtr->getPointerAlignment(
II.getDataLayout());
2707 BasePtr, IndexBase);
2714 return std::nullopt;
2717static std::optional<Instruction *>
2719 Value *Val =
II.getOperand(0);
2720 Value *Mask =
II.getOperand(1);
2721 Value *BasePtr =
II.getOperand(2);
2722 Value *Index =
II.getOperand(3);
2732 BasePtr->getPointerAlignment(
II.getDataLayout());
2735 BasePtr, IndexBase);
2741 return std::nullopt;
2747 Value *Pred =
II.getOperand(0);
2748 Value *Vec =
II.getOperand(1);
2749 Value *DivVec =
II.getOperand(2);
2753 if (!SplatConstantInt)
2754 return std::nullopt;
2758 if (DivisorValue == -1)
2759 return std::nullopt;
2760 if (DivisorValue == 1)
2766 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2773 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2775 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2779 return std::nullopt;
2783 size_t VecSize = Vec.
size();
2788 size_t HalfVecSize = VecSize / 2;
2792 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2800 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2818 return std::nullopt;
2825 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2826 CurrentInsertElt = InsertElt->getOperand(0);
2832 return std::nullopt;
2836 for (
size_t I = 0;
I < Elts.
size();
I++) {
2837 if (Elts[
I] ==
nullptr)
2842 if (InsertEltChain ==
nullptr)
2843 return std::nullopt;
2849 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2850 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2851 IIScalableTy->getMinNumElements() /
2856 auto *WideShuffleMaskTy =
2867 auto NarrowBitcast =
2880 return std::nullopt;
2885 Value *Pred =
II.getOperand(0);
2886 Value *Vec =
II.getOperand(1);
2887 Value *Shift =
II.getOperand(2);
2890 Value *AbsPred, *MergedValue;
2896 return std::nullopt;
2904 return std::nullopt;
2909 return std::nullopt;
2912 {
II.getType()}, {Pred, Vec, Shift});
2919 Value *Vec =
II.getOperand(0);
2924 return std::nullopt;
2930 auto *NI =
II.getNextNode();
2933 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
2935 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2936 auto *NIBB = NI->getParent();
2937 NI = NI->getNextNode();
2939 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
2940 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
2946 if (NextII &&
II.isIdenticalTo(NextII))
2949 return std::nullopt;
2957 {II.getType(), II.getOperand(0)->getType()},
2958 {II.getOperand(0), II.getOperand(1)}));
2965 return std::nullopt;
2971 Value *Passthru =
II.getOperand(0);
2979 auto *Mask = ConstantInt::get(Ty, MaskValue);
2985 return std::nullopt;
2988static std::optional<Instruction *>
2995 return std::nullopt;
2998std::optional<Instruction *>
3009 case Intrinsic::aarch64_dmb:
3011 case Intrinsic::aarch64_neon_fmaxnm:
3012 case Intrinsic::aarch64_neon_fminnm:
3014 case Intrinsic::aarch64_sve_convert_from_svbool:
3016 case Intrinsic::aarch64_sve_dup:
3018 case Intrinsic::aarch64_sve_dup_x:
3020 case Intrinsic::aarch64_sve_cmpne:
3021 case Intrinsic::aarch64_sve_cmpne_wide:
3023 case Intrinsic::aarch64_sve_rdffr:
3025 case Intrinsic::aarch64_sve_lasta:
3026 case Intrinsic::aarch64_sve_lastb:
3028 case Intrinsic::aarch64_sve_clasta_n:
3029 case Intrinsic::aarch64_sve_clastb_n:
3031 case Intrinsic::aarch64_sve_cntd:
3033 case Intrinsic::aarch64_sve_cntw:
3035 case Intrinsic::aarch64_sve_cnth:
3037 case Intrinsic::aarch64_sve_cntb:
3039 case Intrinsic::aarch64_sme_cntsd:
3041 case Intrinsic::aarch64_sve_ptest_any:
3042 case Intrinsic::aarch64_sve_ptest_first:
3043 case Intrinsic::aarch64_sve_ptest_last:
3045 case Intrinsic::aarch64_sve_fadd:
3047 case Intrinsic::aarch64_sve_fadd_u:
3049 case Intrinsic::aarch64_sve_fmul_u:
3051 case Intrinsic::aarch64_sve_fsub:
3053 case Intrinsic::aarch64_sve_fsub_u:
3055 case Intrinsic::aarch64_sve_add:
3057 case Intrinsic::aarch64_sve_add_u:
3059 Intrinsic::aarch64_sve_mla_u>(
3061 case Intrinsic::aarch64_sve_sub:
3063 case Intrinsic::aarch64_sve_sub_u:
3065 Intrinsic::aarch64_sve_mls_u>(
3067 case Intrinsic::aarch64_sve_tbl:
3069 case Intrinsic::aarch64_sve_uunpkhi:
3070 case Intrinsic::aarch64_sve_uunpklo:
3071 case Intrinsic::aarch64_sve_sunpkhi:
3072 case Intrinsic::aarch64_sve_sunpklo:
3074 case Intrinsic::aarch64_sve_uzp1:
3076 case Intrinsic::aarch64_sve_zip1:
3077 case Intrinsic::aarch64_sve_zip2:
3079 case Intrinsic::aarch64_sve_ld1_gather_index:
3081 case Intrinsic::aarch64_sve_st1_scatter_index:
3083 case Intrinsic::aarch64_sve_ld1:
3085 case Intrinsic::aarch64_sve_st1:
3087 case Intrinsic::aarch64_sve_sdiv:
3089 case Intrinsic::aarch64_sve_sel:
3091 case Intrinsic::aarch64_sve_srshl:
3093 case Intrinsic::aarch64_sve_dupq_lane:
3095 case Intrinsic::aarch64_sve_insr:
3097 case Intrinsic::aarch64_sve_whilelo:
3099 case Intrinsic::aarch64_sve_ptrue:
3101 case Intrinsic::aarch64_sve_uxtb:
3103 case Intrinsic::aarch64_sve_uxth:
3105 case Intrinsic::aarch64_sve_uxtw:
3107 case Intrinsic::aarch64_sme_in_streaming_mode:
3111 return std::nullopt;
3118 SimplifyAndSetOp)
const {
3119 switch (
II.getIntrinsicID()) {
3122 case Intrinsic::aarch64_neon_fcvtxn:
3123 case Intrinsic::aarch64_neon_rshrn:
3124 case Intrinsic::aarch64_neon_sqrshrn:
3125 case Intrinsic::aarch64_neon_sqrshrun:
3126 case Intrinsic::aarch64_neon_sqshrn:
3127 case Intrinsic::aarch64_neon_sqshrun:
3128 case Intrinsic::aarch64_neon_sqxtn:
3129 case Intrinsic::aarch64_neon_sqxtun:
3130 case Intrinsic::aarch64_neon_uqrshrn:
3131 case Intrinsic::aarch64_neon_uqshrn:
3132 case Intrinsic::aarch64_neon_uqxtn:
3133 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
3137 return std::nullopt;
3141 return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3151 if (ST->useSVEForFixedLengthVectors() &&
3154 std::max(ST->getMinSVEVectorSizeInBits(), 128u));
3155 else if (ST->isNeonAvailable())
3160 if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3169bool AArch64TTIImpl::isSingleExtWideningInstruction(
3171 Type *SrcOverrideTy)
const {
3186 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3189 Type *SrcTy = SrcOverrideTy;
3191 case Instruction::Add:
3192 case Instruction::Sub: {
3201 if (Opcode == Instruction::Sub)
3225 assert(SrcTy &&
"Expected some SrcTy");
3227 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
3233 DstTyL.first * DstTyL.second.getVectorMinNumElements();
3235 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
3239 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
3242Type *AArch64TTIImpl::isBinExtWideningInstruction(
unsigned Opcode,
Type *DstTy,
3244 Type *SrcOverrideTy)
const {
3245 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
3246 Opcode != Instruction::Mul)
3256 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3259 auto getScalarSizeWithOverride = [&](
const Value *
V) {
3265 ->getScalarSizeInBits();
3268 unsigned MaxEltSize = 0;
3271 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3272 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3273 MaxEltSize = std::max(EltSize0, EltSize1);
3276 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3277 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3280 if (EltSize0 >= DstEltSize / 2 || EltSize1 >= DstEltSize / 2)
3282 MaxEltSize = DstEltSize / 2;
3283 }
else if (Opcode == Instruction::Mul &&
3296 getScalarSizeWithOverride(
isa<ZExtInst>(Args[0]) ? Args[0] : Args[1]);
3300 if (MaxEltSize * 2 > DstEltSize)
3318 if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(
DL, Src)) ||
3319 (Src->isScalableTy() && !ST->hasSVE2()))
3329 if (AddUser && AddUser->getOpcode() == Instruction::Add)
3333 if (!Shr || Shr->getOpcode() != Instruction::LShr)
3337 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
3338 Src->getScalarSizeInBits() !=
3362 int ISD = TLI->InstructionOpcodeToISD(Opcode);
3366 if (
I &&
I->hasOneUser()) {
3369 if (
Type *ExtTy = isBinExtWideningInstruction(
3370 SingleUser->getOpcode(), Dst, Operands,
3371 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3384 if (isSingleExtWideningInstruction(
3385 SingleUser->getOpcode(), Dst, Operands,
3386 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3390 if (SingleUser->getOpcode() == Instruction::Add) {
3391 if (
I == SingleUser->getOperand(1) ||
3393 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
3408 EVT SrcTy = TLI->getValueType(
DL, Src);
3409 EVT DstTy = TLI->getValueType(
DL, Dst);
3411 if (!SrcTy.isSimple() || !DstTy.
isSimple())
3416 if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
3445 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3448 ST->useSVEForFixedLengthVectors(WiderTy)) {
3449 std::pair<InstructionCost, MVT> LT =
3451 unsigned NumElements =
3467 const unsigned int SVE_EXT_COST = 1;
3468 const unsigned int SVE_FCVT_COST = 1;
3469 const unsigned int SVE_UNPACK_ONCE = 4;
3470 const unsigned int SVE_UNPACK_TWICE = 16;
3599 SVE_EXT_COST + SVE_FCVT_COST},
3604 SVE_EXT_COST + SVE_FCVT_COST},
3611 SVE_EXT_COST + SVE_FCVT_COST},
3615 SVE_EXT_COST + SVE_FCVT_COST},
3621 SVE_EXT_COST + SVE_FCVT_COST},
3624 SVE_EXT_COST + SVE_FCVT_COST},
3629 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3631 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3641 SVE_EXT_COST + SVE_FCVT_COST},
3646 SVE_EXT_COST + SVE_FCVT_COST},
3659 SVE_EXT_COST + SVE_FCVT_COST},
3663 SVE_EXT_COST + SVE_FCVT_COST},
3675 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3677 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3679 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3681 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3685 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3687 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3703 SVE_EXT_COST + SVE_FCVT_COST},
3708 SVE_EXT_COST + SVE_FCVT_COST},
3719 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3721 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3723 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3725 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3727 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3729 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3733 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3735 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3737 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3739 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3964 if (ST->hasFullFP16())
3976 Src->getScalarType(), CCH,
CostKind) +
3984 ST->isSVEorStreamingSVEAvailable() &&
3985 TLI->getTypeAction(Src->getContext(), SrcTy) ==
3987 TLI->getTypeAction(Dst->getContext(), DstTy) ==
3996 Opcode, LegalTy, Src, CCH,
CostKind,
I);
3999 return Part1 + Part2;
4006 ST->isSVEorStreamingSVEAvailable() && TLI->isTypeLegal(DstTy))
4018 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
4031 CostKind, Index,
nullptr,
nullptr);
4035 auto DstVT = TLI->getValueType(
DL, Dst);
4036 auto SrcVT = TLI->getValueType(
DL, Src);
4041 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
4047 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
4057 case Instruction::SExt:
4062 case Instruction::ZExt:
4063 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
4076 return Opcode == Instruction::PHI ? 0 : 1;
4085 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
4094 if (!LT.second.isVector())
4099 if (LT.second.isFixedLengthVector()) {
4100 unsigned Width = LT.second.getVectorNumElements();
4101 Index = Index % Width;
4116 if (ST->hasFastLD1Single())
4128 : ST->getVectorInsertExtractBaseCost() + 1;
4152 auto ExtractCanFuseWithFmul = [&]() {
4159 auto IsAllowedScalarTy = [&](
const Type *
T) {
4160 return T->isFloatTy() ||
T->isDoubleTy() ||
4161 (
T->isHalfTy() && ST->hasFullFP16());
4165 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
4168 return BO && BO->getOpcode() == BinaryOperator::FMul &&
4169 !BO->getType()->isVectorTy();
4174 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
4178 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);
4187 DenseMap<User *, unsigned> UserToExtractIdx;
4188 for (
auto *U :
Scalar->users()) {
4189 if (!IsUserFMulScalarTy(U))
4193 UserToExtractIdx[
U];
4195 if (UserToExtractIdx.
empty())
4197 for (
auto &[S, U, L] : ScalarUserAndIdx) {
4198 for (
auto *U : S->users()) {
4199 if (UserToExtractIdx.
contains(U)) {
4201 auto *Op0 =
FMul->getOperand(0);
4202 auto *Op1 =
FMul->getOperand(1);
4203 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
4204 UserToExtractIdx[
U] =
L;
4210 for (
auto &[U, L] : UserToExtractIdx) {
4222 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
4223 if (!IsUserFMulScalarTy(U))
4228 const auto *BO = cast<BinaryOperator>(U);
4229 const auto *OtherEE = dyn_cast<ExtractElementInst>(
4230 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
4232 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
4235 return IsExtractLaneEquivalentToZero(
4236 cast<ConstantInt>(OtherEE->getIndexOperand())
4239 OtherEE->getType()->getScalarSizeInBits());
4247 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
4248 ExtractCanFuseWithFmul())
4253 :
ST->getVectorInsertExtractBaseCost();
4262 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
4265 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr,
4271 Value *Scalar,
ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
4273 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr, Scalar,
4274 ScalarUserAndIdx, VIC);
4281 return getVectorInstrCostHelper(
I.getOpcode(), Val,
CostKind, Index, &
I,
4288 unsigned Index)
const {
4300 : ST->getVectorInsertExtractBaseCost() + 1;
4309 if (Ty->getElementType()->isFloatingPointTy())
4312 unsigned VecInstCost =
4314 return DemandedElts.
popcount() * (Insert + Extract) * VecInstCost;
4321 if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
4322 return std::nullopt;
4323 if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
4324 return std::nullopt;
4326 if (CanUseSVE && ST->hasSVEB16B16() && ST->isNonStreamingSVEorSME2Available())
4327 return std::nullopt;
4334 Cost += InstCost(PromotedTy);
4357 Op2Info, Args, CxtI);
4361 int ISD = TLI->InstructionOpcodeToISD(Opcode);
4368 Ty,
CostKind, Op1Info, Op2Info,
true,
4371 [&](
Type *PromotedTy) {
4375 return *PromotedCost;
4381 if (
Type *ExtTy = isBinExtWideningInstruction(Opcode, Ty, Args)) {
4448 auto VT = TLI->getValueType(
DL, Ty);
4449 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
4453 : (3 * AsrCost + AddCost);
4455 return MulCost + AsrCost + 2 * AddCost;
4457 }
else if (VT.isVector()) {
4467 if (Ty->isScalableTy() && ST->hasSVE())
4468 Cost += 2 * AsrCost;
4473 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
4477 }
else if (LT.second == MVT::v2i64) {
4478 return VT.getVectorNumElements() *
4485 if (Ty->isScalableTy() && ST->hasSVE())
4486 return MulCost + 2 * AddCost + 2 * AsrCost;
4487 return 2 * MulCost + AddCost + AsrCost + UsraCost;
4492 LT.second.isFixedLengthVector()) {
4502 return ExtractCost + InsertCost +
4510 auto VT = TLI->getValueType(
DL, Ty);
4526 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
4527 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
4528 LT.second == MVT::nxv16i8;
4529 bool Is128bit = LT.second.is128BitVector();
4541 (HasMULH ? 0 : ShrCost) +
4542 AddCost * 2 + ShrCost;
4543 return DivCost + (
ISD ==
ISD::UREM ? MulCost + AddCost : 0);
4550 if (!VT.isVector() && VT.getSizeInBits() > 64)
4554 Opcode, Ty,
CostKind, Op1Info, Op2Info);
4556 if (TLI->isOperationLegalOrCustom(
ISD, LT.second) && ST->hasSVE()) {
4560 Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {
4570 if (
nullptr != Entry)
4575 if (LT.second.getScalarType() == MVT::i8)
4577 else if (LT.second.getScalarType() == MVT::i16)
4589 Opcode, Ty->getScalarType(),
CostKind, Op1Info, Op2Info);
4590 return (4 + DivCost) * VTy->getNumElements();
4596 -1,
nullptr,
nullptr);
4610 if (LT.second == MVT::v2i64 && ST->hasSVE())
4623 if (LT.second != MVT::v2i64)
4645 if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
4646 (Ty->isHalfTy() && ST->hasFullFP16())) &&
4655 if (!Ty->getScalarType()->isFP128Ty())
4662 if (!Ty->getScalarType()->isFP128Ty())
4663 return 2 * LT.first;
4670 if (!Ty->isVectorTy())
4686 int MaxMergeDistance = 64;
4690 return NumVectorInstToHideOverhead;
4700 unsigned Opcode1,
unsigned Opcode2)
const {
4703 if (!
Sched.hasInstrSchedModel())
4707 Sched.getSchedClassDesc(
TII->get(Opcode1).getSchedClass());
4709 Sched.getSchedClassDesc(
TII->get(Opcode2).getSchedClass());
4715 "Cannot handle variant scheduling classes without an MI");
4731 const int AmortizationCost = 20;
4739 VecPred = CurrentPred;
4747 static const auto ValidMinMaxTys = {
4748 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
4749 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
4750 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
4754 (ST->hasFullFP16() &&
4760 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
4761 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
4762 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
4763 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
4764 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
4765 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
4766 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
4767 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
4768 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
4769 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
4770 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
4772 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
4773 EVT SelValTy = TLI->getValueType(
DL, ValTy);
4782 if (Opcode == Instruction::FCmp) {
4784 ValTy,
CostKind, Op1Info, Op2Info,
false,
4786 false, [&](
Type *PromotedTy) {
4798 return *PromotedCost;
4802 if (LT.second.getScalarType() != MVT::f64 &&
4803 LT.second.getScalarType() != MVT::f32 &&
4804 LT.second.getScalarType() != MVT::f16)
4809 unsigned Factor = 1;
4810 if (!CondTy->isVectorTy() &&
4824 AArch64::FCMEQv4f32))
4836 TLI->isTypeLegal(TLI->getValueType(
DL, ValTy)) &&
4855 Op1Info, Op2Info,
I);
4861 if (ST->requiresStrictAlign()) {
4866 Options.AllowOverlappingLoads =
true;
4867 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
4872 Options.LoadSizes = {8, 4, 2, 1};
4873 Options.AllowedTailExpansions = {3, 5, 6};
4878 return ST->hasSVE();
4884 switch (MICA.
getID()) {
4885 case Intrinsic::masked_scatter:
4886 case Intrinsic::masked_gather:
4888 case Intrinsic::masked_load:
4889 case Intrinsic::masked_expandload:
4890 case Intrinsic::masked_store:
4904 if (!LT.first.isValid())
4909 if (VT->getElementType()->isIntegerTy(1))
4920 if (MICA.
getID() == Intrinsic::masked_expandload) {
4936 if (LT.first > 1 && LT.second.getScalarSizeInBits() > 8)
4937 return MemOpCost * 2;
4946 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4947 "Should be called on only load or stores.");
4949 case Instruction::Load:
4952 return ST->getGatherOverhead();
4954 case Instruction::Store:
4957 return ST->getScatterOverhead();
4968 unsigned Opcode = (MICA.
getID() == Intrinsic::masked_gather ||
4969 MICA.
getID() == Intrinsic::vp_gather)
4971 : Instruction::Store;
4981 if (!LT.first.isValid())
4985 if (!LT.second.isVector() ||
4987 VT->getElementType()->isIntegerTy(1))
4997 ElementCount LegalVF = LT.second.getVectorElementCount();
5000 {TTI::OK_AnyValue, TTI::OP_None},
I);
5016 EVT VT = TLI->getValueType(
DL, Ty,
true);
5018 if (VT == MVT::Other)
5023 if (!LT.first.isValid())
5033 (VTy->getElementType()->isIntegerTy(1) &&
5034 !VTy->getElementCount().isKnownMultipleOf(
5045 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
5046 LT.second.is128BitVector() && Alignment <
Align(16)) {
5052 const int AmortizationCost = 6;
5054 return LT.first * 2 * AmortizationCost;
5058 if (Ty->isPtrOrPtrVectorTy())
5063 if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
5065 if (VT == MVT::v4i8)
5072 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
5087 while (!TypeWorklist.
empty()) {
5109 bool UseMaskForCond,
bool UseMaskForGaps)
const {
5110 assert(Factor >= 2 &&
"Invalid interleave factor");
5125 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
5128 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
5129 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
5132 VecVTy->getElementCount().divideCoefficientBy(Factor));
5138 if (MinElts % Factor == 0 &&
5139 TLI->isLegalInterleavedAccessType(SubVecTy,
DL, UseScalable))
5140 return Factor * TLI->getNumInterleavedAccesses(SubVecTy,
DL, UseScalable);
5145 UseMaskForCond, UseMaskForGaps);
5152 for (
auto *
I : Tys) {
5153 if (!
I->isVectorTy())
5164 Align Alignment)
const {
5171 return (ST->isSVEAvailable() && ST->hasSVE2p2()) ||
5172 (ST->isSVEorStreamingSVEAvailable() && ST->hasSME2p2());
5176 return ST->getMaxInterleaveFactor();
5186 enum { MaxStridedLoads = 7 };
5188 int StridedLoads = 0;
5191 for (
const auto BB : L->blocks()) {
5192 for (
auto &
I : *BB) {
5198 if (L->isLoopInvariant(PtrValue))
5203 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
5212 if (StridedLoads > MaxStridedLoads / 2)
5213 return StridedLoads;
5216 return StridedLoads;
5219 int StridedLoads = countStridedLoads(L, SE);
5221 <<
" strided loads\n");
5237 unsigned *FinalSize) {
5241 for (
auto *BB : L->getBlocks()) {
5242 for (
auto &
I : *BB) {
5248 if (!Cost.isValid())
5252 if (LoopCost > Budget)
5274 if (MaxTC > 0 && MaxTC <= 32)
5285 if (Blocks.
size() != 2)
5307 if (!L->isInnermost() || L->getNumBlocks() > 8)
5311 if (!L->getExitBlock())
5317 bool HasParellelizableReductions =
5318 L->getNumBlocks() == 1 &&
5319 any_of(L->getHeader()->phis(),
5321 return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
5324 if (HasParellelizableReductions &&
5346 if (HasParellelizableReductions) {
5357 if (Header == Latch) {
5360 unsigned Width = 10;
5366 unsigned MaxInstsPerLine = 16;
5368 unsigned BestUC = 1;
5369 unsigned SizeWithBestUC = BestUC *
Size;
5371 unsigned SizeWithUC = UC *
Size;
5372 if (SizeWithUC > 48)
5374 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
5375 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
5377 SizeWithBestUC = BestUC *
Size;
5387 for (
auto *BB : L->blocks()) {
5388 for (
auto &
I : *BB) {
5398 for (
auto *U :
I.users())
5400 LoadedValuesPlus.
insert(U);
5407 return LoadedValuesPlus.
contains(
SI->getOperand(0));
5433 auto *I = dyn_cast<Instruction>(V);
5434 return I && DependsOnLoopLoad(I, Depth + 1);
5441 DependsOnLoopLoad(
I, 0)) {
5457 if (L->getLoopDepth() > 1)
5468 for (
auto *BB : L->getBlocks()) {
5469 for (
auto &
I : *BB) {
5473 if (IsVectorized &&
I.getType()->isVectorTy())
5490 if (ST->isAppleMLike())
5492 else if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
5514 !ST->getSchedModel().isOutOfOrder()) {
5537 bool CanCreate)
const {
5541 case Intrinsic::aarch64_neon_st2:
5542 case Intrinsic::aarch64_neon_st3:
5543 case Intrinsic::aarch64_neon_st4: {
5546 if (!CanCreate || !ST)
5548 unsigned NumElts = Inst->
arg_size() - 1;
5549 if (ST->getNumElements() != NumElts)
5551 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5557 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5559 Res = Builder.CreateInsertValue(Res, L, i);
5563 case Intrinsic::aarch64_neon_ld2:
5564 case Intrinsic::aarch64_neon_ld3:
5565 case Intrinsic::aarch64_neon_ld4:
5566 if (Inst->
getType() == ExpectedType)
5577 case Intrinsic::aarch64_neon_ld2:
5578 case Intrinsic::aarch64_neon_ld3:
5579 case Intrinsic::aarch64_neon_ld4:
5580 Info.ReadMem =
true;
5581 Info.WriteMem =
false;
5584 case Intrinsic::aarch64_neon_st2:
5585 case Intrinsic::aarch64_neon_st3:
5586 case Intrinsic::aarch64_neon_st4:
5587 Info.ReadMem =
false;
5588 Info.WriteMem =
true;
5596 case Intrinsic::aarch64_neon_ld2:
5597 case Intrinsic::aarch64_neon_st2:
5598 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
5600 case Intrinsic::aarch64_neon_ld3:
5601 case Intrinsic::aarch64_neon_st3:
5602 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
5604 case Intrinsic::aarch64_neon_ld4:
5605 case Intrinsic::aarch64_neon_st4:
5606 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
5618 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
5619 bool Considerable =
false;
5620 AllowPromotionWithoutCommonHeader =
false;
5623 Type *ConsideredSExtType =
5625 if (
I.getType() != ConsideredSExtType)
5629 for (
const User *U :
I.users()) {
5631 Considerable =
true;
5635 if (GEPInst->getNumOperands() > 2) {
5636 AllowPromotionWithoutCommonHeader =
true;
5641 return Considerable;
5690 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
5700 return LegalizationCost + 2;
5710 LegalizationCost *= LT.first - 1;
5713 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5722 return LegalizationCost + 2;
5730 std::optional<FastMathFlags> FMF,
5746 return BaseCost + FixedVTy->getNumElements();
5749 if (Opcode != Instruction::FAdd)
5763 MVT MTy = LT.second;
5764 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5812 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
5813 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
5825 return (LT.first - 1) +
Log2_32(NElts);
5830 return (LT.first - 1) + Entry->Cost;
5842 if (LT.first != 1) {
5848 ExtraCost *= LT.first - 1;
5851 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
5852 return Cost + ExtraCost;
5860 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *VecTy,
5862 EVT VecVT = TLI->getValueType(
DL, VecTy);
5863 EVT ResVT = TLI->getValueType(
DL, ResTy);
5873 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5875 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&
5877 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&
5879 return (LT.first - 1) * 2 + 2;
5890 EVT VecVT = TLI->getValueType(
DL, VecTy);
5891 EVT ResVT = TLI->getValueType(
DL, ResTy);
5894 RedOpcode == Instruction::Add) {
5900 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5902 return LT.first + 2;
5937 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
5938 ? TLI->getPromotedVTForPredicate(
EVT(LT.second))
5952 if (LT.second.getScalarType() == MVT::i1) {
5961 assert(Entry &&
"Illegal Type for Splice");
5962 LegalizationCost += Entry->Cost;
5963 return LegalizationCost * LT.first;
5967 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
5976 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub &&
5977 Opcode != Instruction::FAdd) ||
5984 assert(FMF &&
"Missing FastMathFlags for floating-point partial reduction");
5985 if (!FMF->allowReassoc() || !FMF->allowContract())
5989 "FastMathFlags only apply to floating-point partial reductions");
5993 (!BinOp || (OpBExtend !=
TTI::PR_None && InputTypeB)) &&
5994 "Unexpected values for OpBExtend or InputTypeB");
5998 if (BinOp && ((*BinOp != Instruction::Mul && *BinOp != Instruction::FMul) ||
5999 InputTypeA != InputTypeB))
6002 bool IsUSDot = OpBExtend !=
TTI::PR_None && OpAExtend != OpBExtend;
6003 if (IsUSDot && !ST->hasMatMulInt8())
6016 auto TC = TLI->getTypeConversion(AccumVectorType->
getContext(),
6025 if (TLI->getTypeAction(AccumVectorType->
getContext(), TC.second) !=
6031 std::pair<InstructionCost, MVT> AccumLT =
6033 std::pair<InstructionCost, MVT> InputLT =
6037 auto IsSupported = [&](
bool SVEPred,
bool NEONPred) ->
bool {
6038 return (ST->isSVEorStreamingSVEAvailable() && SVEPred) ||
6039 (AccumLT.second.isFixedLengthVector() &&
6040 AccumLT.second.getSizeInBits() <= 128 && ST->isNeonAvailable() &&
6044 bool IsSub = Opcode == Instruction::Sub;
6047 if (AccumLT.second.getScalarType() == MVT::i32 &&
6048 InputLT.second.getScalarType() == MVT::i8 && !IsSub) {
6050 if (!IsUSDot && IsSupported(
true, ST->hasDotProd()))
6053 if (IsUSDot && IsSupported(ST->hasMatMulInt8(), ST->hasMatMulInt8()))
6057 if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot && !IsSub) {
6059 if (AccumLT.second.getScalarType() == MVT::i64 &&
6060 InputLT.second.getScalarType() == MVT::i16)
6063 if (AccumLT.second.getScalarType() == MVT::i32 &&
6064 InputLT.second.getScalarType() == MVT::i16 &&
6065 (ST->hasSVE2p1() || ST->hasSME2()))
6068 if (AccumLT.second.getScalarType() == MVT::i64 &&
6069 InputLT.second.getScalarType() == MVT::i8)
6077 if (AccumLT.second.getScalarType() == MVT::i16 &&
6078 InputLT.second.getScalarType() == MVT::i8 &&
6079 (ST->hasSVE2p3() || ST->hasSME2p3()))
6085 if (Opcode == Instruction::FAdd && !IsSub &&
6086 IsSupported(ST->hasSME2() || ST->hasSVE2p1(), ST->hasF16F32DOT()) &&
6087 AccumLT.second.getScalarType() == MVT::f32 &&
6088 InputLT.second.getScalarType() == MVT::f16)
6092 if (Ratio == 2 && !IsSub) {
6093 MVT InVT = InputLT.second.getScalarType();
6096 if (IsSupported(ST->hasSVE2(),
true) &&
6101 if (IsSupported(ST->hasSVE2(), ST->hasFP16FML()) && InVT == MVT::f16)
6105 if (IsSupported(ST->hasBF16(), ST->hasBF16()) && InVT == MVT::bf16)
6110 Opcode, InputTypeA, InputTypeB, AccumType, VF, OpAExtend, OpBExtend,
6116 return ExpandCost.
isValid() && IsSub ? ((8 * ExpandCost) / 10) : ExpandCost;
6127 "Expected the Mask to match the return size if given");
6129 "Expected the same scalar types");
6135 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
6136 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
6137 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
6145 return std::max<InstructionCost>(1, LT.first / 4);
6153 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2) ||
6155 Mask, 3, SrcTy->getElementCount().getKnownMinValue() * 2)))
6158 unsigned TpNumElts = Mask.size();
6159 unsigned LTNumElts = LT.second.getVectorNumElements();
6160 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
6162 LT.second.getVectorElementCount());
6164 std::map<std::tuple<unsigned, unsigned, SmallVector<int>>,
InstructionCost>
6166 for (
unsigned N = 0;
N < NumVecs;
N++) {
6170 unsigned Source1 = -1U, Source2 = -1U;
6171 unsigned NumSources = 0;
6172 for (
unsigned E = 0; E < LTNumElts; E++) {
6173 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
6182 unsigned Source = MaskElt / LTNumElts;
6183 if (NumSources == 0) {
6186 }
else if (NumSources == 1 && Source != Source1) {
6189 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
6195 if (Source == Source1)
6197 else if (Source == Source2)
6198 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
6207 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});
6218 NTp, NTp, NMask,
CostKind, 0,
nullptr, Args,
6221 Result.first->second = NCost;
6235 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
6236 if (LT.second.getFixedSizeInBits() >= 128 &&
6238 LT.second.getVectorNumElements() / 2) {
6241 if (Index == (
int)LT.second.getVectorNumElements() / 2)
6255 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
6258 return M.value() < 0 || M.value() == (int)M.index();
6264 !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
6265 SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
6274 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
6275 ST->isSVEorStreamingSVEAvailable() &&
6280 if (ST->isSVEorStreamingSVEAvailable() &&
6294 if (IsLoad && LT.second.isVector() &&
6296 LT.second.getVectorElementCount()))
6302 if (Mask.size() == 4 &&
6304 (SrcTy->getScalarSizeInBits() == 16 ||
6305 SrcTy->getScalarSizeInBits() == 32) &&
6306 all_of(Mask, [](
int E) {
return E < 8; }))
6312 if (LT.second.isFixedLengthVector() &&
6313 LT.second.getVectorNumElements() == Mask.size() &&
6319 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6320 isTRNMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6321 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6322 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6323 LT.second.getVectorNumElements(), 16) ||
6324 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6325 LT.second.getVectorNumElements(), 32) ||
6326 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6327 LT.second.getVectorNumElements(), 64) ||
6330 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
6459 return LT.first * Entry->Cost;
6468 LT.second.getSizeInBits() <= 128 && SubTp) {
6470 if (SubLT.second.isVector()) {
6471 int NumElts = LT.second.getVectorNumElements();
6472 int NumSubElts = SubLT.second.getVectorNumElements();
6473 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
6479 if (IsExtractSubvector)
6496 if (
getPtrStride(*PSE, AccessTy, Ptr, TheLoop, DT, Strides,
6515 return ST->useFixedOverScalableIfEqualCost();
6519 return ST->getEpilogueVectorizationMinVF();
6554 unsigned NumInsns = 0;
6556 NumInsns += BB->size();
6566 int64_t Scale,
unsigned AddrSpace)
const {
6594 if (
I->getOpcode() == Instruction::Or &&
6598 if (
I->getOpcode() == Instruction::Add ||
6599 I->getOpcode() == Instruction::Sub)
6624 return all_equal(Shuf->getShuffleMask());
6631 bool AllowSplat =
false) {
6636 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
6637 auto *FullTy = FullV->
getType();
6638 auto *HalfTy = HalfV->getType();
6640 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
6643 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
6646 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
6650 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
6664 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
6665 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
6679 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
6680 (M2Start != 0 && M2Start != (NumElements / 2)))
6682 if (S1Op1 && S2Op1 && M1Start != M2Start)
6692 return Ext->getType()->getScalarSizeInBits() ==
6693 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
6707 Value *VectorOperand =
nullptr;
6724 if (!
GEP ||
GEP->getNumOperands() != 2)
6728 Value *Offsets =
GEP->getOperand(1);
6731 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
6737 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
6738 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
6739 Ops.push_back(&
GEP->getOperandUse(1));
6775 switch (
II->getIntrinsicID()) {
6776 case Intrinsic::aarch64_neon_smull:
6777 case Intrinsic::aarch64_neon_umull:
6780 Ops.push_back(&
II->getOperandUse(0));
6781 Ops.push_back(&
II->getOperandUse(1));
6786 case Intrinsic::fma:
6787 case Intrinsic::fmuladd:
6794 Ops.push_back(&
II->getOperandUse(0));
6796 Ops.push_back(&
II->getOperandUse(1));
6799 case Intrinsic::aarch64_neon_sqdmull:
6800 case Intrinsic::aarch64_neon_sqdmulh:
6801 case Intrinsic::aarch64_neon_sqrdmulh:
6804 Ops.push_back(&
II->getOperandUse(0));
6806 Ops.push_back(&
II->getOperandUse(1));
6807 return !
Ops.empty();
6808 case Intrinsic::aarch64_neon_fmlal:
6809 case Intrinsic::aarch64_neon_fmlal2:
6810 case Intrinsic::aarch64_neon_fmlsl:
6811 case Intrinsic::aarch64_neon_fmlsl2:
6814 Ops.push_back(&
II->getOperandUse(1));
6816 Ops.push_back(&
II->getOperandUse(2));
6817 return !
Ops.empty();
6818 case Intrinsic::aarch64_sve_ptest_first:
6819 case Intrinsic::aarch64_sve_ptest_last:
6821 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
6822 Ops.push_back(&
II->getOperandUse(0));
6823 return !
Ops.empty();
6824 case Intrinsic::aarch64_sme_write_horiz:
6825 case Intrinsic::aarch64_sme_write_vert:
6826 case Intrinsic::aarch64_sme_writeq_horiz:
6827 case Intrinsic::aarch64_sme_writeq_vert: {
6829 if (!Idx || Idx->getOpcode() != Instruction::Add)
6831 Ops.push_back(&
II->getOperandUse(1));
6834 case Intrinsic::aarch64_sme_read_horiz:
6835 case Intrinsic::aarch64_sme_read_vert:
6836 case Intrinsic::aarch64_sme_readq_horiz:
6837 case Intrinsic::aarch64_sme_readq_vert:
6838 case Intrinsic::aarch64_sme_ld1b_vert:
6839 case Intrinsic::aarch64_sme_ld1h_vert:
6840 case Intrinsic::aarch64_sme_ld1w_vert:
6841 case Intrinsic::aarch64_sme_ld1d_vert:
6842 case Intrinsic::aarch64_sme_ld1q_vert:
6843 case Intrinsic::aarch64_sme_st1b_vert:
6844 case Intrinsic::aarch64_sme_st1h_vert:
6845 case Intrinsic::aarch64_sme_st1w_vert:
6846 case Intrinsic::aarch64_sme_st1d_vert:
6847 case Intrinsic::aarch64_sme_st1q_vert:
6848 case Intrinsic::aarch64_sme_ld1b_horiz:
6849 case Intrinsic::aarch64_sme_ld1h_horiz:
6850 case Intrinsic::aarch64_sme_ld1w_horiz:
6851 case Intrinsic::aarch64_sme_ld1d_horiz:
6852 case Intrinsic::aarch64_sme_ld1q_horiz:
6853 case Intrinsic::aarch64_sme_st1b_horiz:
6854 case Intrinsic::aarch64_sme_st1h_horiz:
6855 case Intrinsic::aarch64_sme_st1w_horiz:
6856 case Intrinsic::aarch64_sme_st1d_horiz:
6857 case Intrinsic::aarch64_sme_st1q_horiz: {
6859 if (!Idx || Idx->getOpcode() != Instruction::Add)
6861 Ops.push_back(&
II->getOperandUse(3));
6864 case Intrinsic::aarch64_neon_pmull:
6867 Ops.push_back(&
II->getOperandUse(0));
6868 Ops.push_back(&
II->getOperandUse(1));
6870 case Intrinsic::aarch64_neon_pmull64:
6872 II->getArgOperand(1)))
6874 Ops.push_back(&
II->getArgOperandUse(0));
6875 Ops.push_back(&
II->getArgOperandUse(1));
6877 case Intrinsic::masked_gather:
6880 Ops.push_back(&
II->getArgOperandUse(0));
6882 case Intrinsic::masked_scatter:
6885 Ops.push_back(&
II->getArgOperandUse(1));
6892 auto ShouldSinkCondition = [](
Value *
Cond,
6897 if (
II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
6901 Ops.push_back(&
II->getOperandUse(0));
6905 switch (
I->getOpcode()) {
6906 case Instruction::GetElementPtr:
6907 case Instruction::Add:
6908 case Instruction::Sub:
6910 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
6912 Ops.push_back(&
I->getOperandUse(
Op));
6917 case Instruction::Select: {
6918 if (!ShouldSinkCondition(
I->getOperand(0),
Ops))
6921 Ops.push_back(&
I->getOperandUse(0));
6924 case Instruction::UncondBr:
6926 case Instruction::CondBr: {
6930 Ops.push_back(&
I->getOperandUse(0));
6933 case Instruction::FMul:
6938 Ops.push_back(&
I->getOperandUse(0));
6940 Ops.push_back(&
I->getOperandUse(1));
6950 case Instruction::Xor:
6953 if (
I->getType()->isVectorTy() && ST->isNeonAvailable()) {
6955 ST->isSVEorStreamingSVEAvailable() && (ST->hasSVE2() || ST->hasSME());
6960 case Instruction::And:
6961 case Instruction::Or:
6964 if (
I->getOpcode() == Instruction::Or &&
6969 if (!(
I->getType()->isVectorTy() && ST->hasNEON()) &&
6972 for (
auto &
Op :
I->operands()) {
6984 Ops.push_back(&Not);
6985 Ops.push_back(&InsertElt);
6995 if (!
I->getType()->isVectorTy())
6996 return !
Ops.empty();
6998 switch (
I->getOpcode()) {
6999 case Instruction::Sub:
7000 case Instruction::Add: {
7009 Ops.push_back(&Ext1->getOperandUse(0));
7010 Ops.push_back(&Ext2->getOperandUse(0));
7013 Ops.push_back(&
I->getOperandUse(0));
7014 Ops.push_back(&
I->getOperandUse(1));
7018 case Instruction::Or: {
7021 if (ST->hasNEON()) {
7035 if (
I->getParent() != MainAnd->
getParent() ||
7040 if (
I->getParent() != IA->getParent() ||
7041 I->getParent() != IB->getParent())
7046 Ops.push_back(&
I->getOperandUse(0));
7047 Ops.push_back(&
I->getOperandUse(1));
7056 case Instruction::Mul: {
7057 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
7060 if (Ty->isScalableTy())
7064 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
7067 int NumZExts = 0, NumSExts = 0;
7068 for (
auto &
Op :
I->operands()) {
7075 auto *ExtOp = Ext->getOperand(0);
7076 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
7077 Ops.push_back(&Ext->getOperandUse(0));
7085 if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <
7086 I->getType()->getScalarSizeInBits())
7123 if (!ElementConstant || !ElementConstant->
isZero())
7126 unsigned Opcode = OperandInstr->
getOpcode();
7127 if (Opcode == Instruction::SExt)
7129 else if (Opcode == Instruction::ZExt)
7134 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
7144 Ops.push_back(&Insert->getOperandUse(1));
7150 if (!
Ops.empty() && (NumSExts == 2 || NumZExts == 2))
7154 if (!ShouldSinkSplatForIndexedVariant(
I))
7159 Ops.push_back(&
I->getOperandUse(0));
7161 Ops.push_back(&
I->getOperandUse(1));
7163 return !
Ops.empty();
7165 case Instruction::FMul: {
7167 if (
I->getType()->isScalableTy())
7168 return !
Ops.empty();
7172 return !
Ops.empty();
7176 Ops.push_back(&
I->getOperandUse(0));
7178 Ops.push_back(&
I->getOperandUse(1));
7179 return !
Ops.empty();
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file defines the DenseMap class.
static Value * getCondition(Instruction *I)
const HexagonInstrInfo * TII
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
unsigned getVectorInsertExtractBaseCost() const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
bool prefersVectorizedAddressing() const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
bool preferTailFoldingOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
APInt getPriorityMask(const Function &F) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool useNeonVector(const Type *Ty) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedExpandLoad(Type *DataTy, Align Alignment) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
bool enableScalableVectorization() const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isTypeLegal(Type *Ty) const override
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isIntPredicate(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool noSignedZeros() const
bool allowContract() const
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)
Create an expression which evaluates to the number of elements in EC at runtime.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
DominatorTree * getDominatorTree() const
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
const FeatureBitset & getFeatureBits() const
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
Information for memory intrinsic cost model.
Align getAlignment() const
Type * getDataType() const
Intrinsic::ID getID() const
const Instruction * getInst() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool isSMEABIRoutine() const
bool hasStreamingBody() const
void set(unsigned M, bool Enable=true)
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
TargetInstrInfo - Interface to description of machine instruction set.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
LLVM_ABI APInt getCpuSupportsMask(ArrayRef< StringRef > Features)
static constexpr unsigned SVEBitsPerBlock
LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
brc_match< Cond_t, match_bind< BasicBlock >, match_bind< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
LLVM_ABI Libcall getPOW(EVT RetVT)
getPOW - Return the POW_* value for the given types, or UNKNOWN_LIBCALL if there is none.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FindLast
FindLast reduction with select(cmp(),x,y) where x and y.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0,...
unsigned getMatchingIROpode() const
bool inactiveLanesAreUnused() const
bool inactiveLanesAreNotDefined() const
bool hasMatchingUndefIntrinsic() const
static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()
static SVEIntrinsicInfo defaultZeroingOp()
bool hasGoverningPredicate() const
SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)
static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)
SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)
unsigned getOperandIdxWithNoActiveLanes() const
SVEIntrinsicInfo & setInactiveLanesAreUnused()
SVEIntrinsicInfo & setInactiveLanesAreNotDefined()
SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)
bool inactiveLanesTakenFromOperand() const
static SVEIntrinsicInfo defaultUndefOp()
bool hasOperandWithNoActiveLanes() const
Intrinsic::ID getMatchingUndefIntrinsic() const
SVEIntrinsicInfo & setResultIsZeroInitialized()
static SVEIntrinsicInfo defaultMergingUnaryOp()
SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)
unsigned getGoverningPredicateOperandIdx() const
bool hasMatchingIROpode() const
bool resultIsZeroInitialized() const
SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)
unsigned getOperandIdxInactiveLanesTakenFrom() const
static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...