23#include "llvm/IR/IntrinsicsAArch64.h"
35#define DEBUG_TYPE "aarch64tti"
41 "sve-prefer-fixed-over-scalable-if-equal",
cl::Hidden);
59 "Penalty of calling a function that requires a change to PSTATE.SM"));
63 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
74 cl::desc(
"The cost of a histcnt instruction"));
78 cl::desc(
"The number of instructions to search for a redundant dmb"));
82 cl::desc(
"Threshold for forced unrolling of small loops in AArch64"));
85class TailFoldingOption {
100 bool NeedsDefault =
true;
104 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
119 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
120 "Initial bits should only include one of "
121 "(disabled|all|simple|default)");
122 Bits = NeedsDefault ? DefaultBits : InitialBits;
124 Bits &= ~DisableBits;
130 errs() <<
"invalid argument '" << Opt
131 <<
"' to -sve-tail-folding=; the option should be of the form\n"
132 " (disabled|all|default|simple)[+(reductions|recurrences"
133 "|reverse|noreductions|norecurrences|noreverse)]\n";
139 void operator=(
const std::string &Val) {
148 setNeedsDefault(
false);
151 StringRef(Val).split(TailFoldTypes,
'+', -1,
false);
153 unsigned StartIdx = 1;
154 if (TailFoldTypes[0] ==
"disabled")
155 setInitialBits(TailFoldingOpts::Disabled);
156 else if (TailFoldTypes[0] ==
"all")
157 setInitialBits(TailFoldingOpts::All);
158 else if (TailFoldTypes[0] ==
"default")
159 setNeedsDefault(
true);
160 else if (TailFoldTypes[0] ==
"simple")
161 setInitialBits(TailFoldingOpts::Simple);
164 setInitialBits(TailFoldingOpts::Disabled);
167 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
168 if (TailFoldTypes[
I] ==
"reductions")
169 setEnableBit(TailFoldingOpts::Reductions);
170 else if (TailFoldTypes[
I] ==
"recurrences")
171 setEnableBit(TailFoldingOpts::Recurrences);
172 else if (TailFoldTypes[
I] ==
"reverse")
173 setEnableBit(TailFoldingOpts::Reverse);
174 else if (TailFoldTypes[
I] ==
"noreductions")
175 setDisableBit(TailFoldingOpts::Reductions);
176 else if (TailFoldTypes[
I] ==
"norecurrences")
177 setDisableBit(TailFoldingOpts::Recurrences);
178 else if (TailFoldTypes[
I] ==
"noreverse")
179 setDisableBit(TailFoldingOpts::Reverse);
196 "Control the use of vectorisation using tail-folding for SVE where the"
197 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
198 "\ndisabled (Initial) No loop types will vectorize using "
200 "\ndefault (Initial) Uses the default tail-folding settings for "
202 "\nall (Initial) All legal loop types will vectorize using "
204 "\nsimple (Initial) Use tail-folding for simple loops (not "
205 "reductions or recurrences)"
206 "\nreductions Use tail-folding for loops containing reductions"
207 "\nnoreductions Inverse of above"
208 "\nrecurrences Use tail-folding for loops containing fixed order "
210 "\nnorecurrences Inverse of above"
211 "\nreverse Use tail-folding for loops requiring reversed "
213 "\nnoreverse Inverse of above"),
258 TTI->isMultiversionedFunction(
F) ?
"fmv-features" :
"target-features";
259 StringRef FeatureStr =
F.getFnAttribute(AttributeStr).getValueAsString();
260 FeatureStr.
split(Features,
",");
276 return F.hasFnAttribute(
"fmv-features");
280 AArch64::FeatureExecuteOnly,
320 FeatureBitset EffectiveCallerBits = CallerBits ^ InlineInverseFeatures;
321 FeatureBitset EffectiveCalleeBits = CalleeBits ^ InlineInverseFeatures;
323 return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;
341 auto FVTy = dyn_cast<FixedVectorType>(Ty);
343 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
352 unsigned DefaultCallPenalty)
const {
377 if (
F ==
Call.getCaller())
383 return DefaultCallPenalty;
394 ST->isSVEorStreamingSVEAvailable() &&
395 !ST->disableMaximizeScalableBandwidth();
419 assert(Ty->isIntegerTy());
421 unsigned BitSize = Ty->getPrimitiveSizeInBits();
428 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
433 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
439 return std::max<InstructionCost>(1,
Cost);
446 assert(Ty->isIntegerTy());
448 unsigned BitSize = Ty->getPrimitiveSizeInBits();
454 unsigned ImmIdx = ~0U;
458 case Instruction::GetElementPtr:
463 case Instruction::Store:
466 case Instruction::Add:
467 case Instruction::Sub:
468 case Instruction::Mul:
469 case Instruction::UDiv:
470 case Instruction::SDiv:
471 case Instruction::URem:
472 case Instruction::SRem:
473 case Instruction::And:
474 case Instruction::Or:
475 case Instruction::Xor:
476 case Instruction::ICmp:
480 case Instruction::Shl:
481 case Instruction::LShr:
482 case Instruction::AShr:
486 case Instruction::Trunc:
487 case Instruction::ZExt:
488 case Instruction::SExt:
489 case Instruction::IntToPtr:
490 case Instruction::PtrToInt:
491 case Instruction::BitCast:
492 case Instruction::PHI:
493 case Instruction::Call:
494 case Instruction::Select:
495 case Instruction::Ret:
496 case Instruction::Load:
501 int NumConstants = (BitSize + 63) / 64;
514 assert(Ty->isIntegerTy());
516 unsigned BitSize = Ty->getPrimitiveSizeInBits();
525 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
531 case Intrinsic::sadd_with_overflow:
532 case Intrinsic::uadd_with_overflow:
533 case Intrinsic::ssub_with_overflow:
534 case Intrinsic::usub_with_overflow:
535 case Intrinsic::smul_with_overflow:
536 case Intrinsic::umul_with_overflow:
538 int NumConstants = (BitSize + 63) / 64;
545 case Intrinsic::experimental_stackmap:
546 if ((Idx < 2) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
549 case Intrinsic::experimental_patchpoint_void:
550 case Intrinsic::experimental_patchpoint:
551 if ((Idx < 4) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
554 case Intrinsic::experimental_gc_statepoint:
555 if ((Idx < 5) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
565 if (TyWidth == 32 || TyWidth == 64)
589 unsigned TotalHistCnts = 1;
599 unsigned EC = VTy->getElementCount().getKnownMinValue();
604 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
606 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
610 TotalHistCnts = EC / NaturalVectorWidth;
630 switch (ICA.
getID()) {
631 case Intrinsic::experimental_vector_histogram_add: {
638 case Intrinsic::umin:
639 case Intrinsic::umax:
640 case Intrinsic::smin:
641 case Intrinsic::smax: {
642 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
643 MVT::v8i16, MVT::v2i32, MVT::v4i32,
644 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
648 if (LT.second == MVT::v2i64)
650 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }))
654 case Intrinsic::sadd_sat:
655 case Intrinsic::ssub_sat:
656 case Intrinsic::uadd_sat:
657 case Intrinsic::usub_sat: {
658 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
659 MVT::v8i16, MVT::v2i32, MVT::v4i32,
665 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
666 if (
any_of(ValidSatTys, [<](
MVT M) {
return M == LT.second; }))
667 return LT.first * Instrs;
672 if (ST->isSVEAvailable() && VectorSize >= 128 &&
isPowerOf2_64(VectorSize))
673 return LT.first * Instrs;
677 case Intrinsic::abs: {
678 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
679 MVT::v8i16, MVT::v2i32, MVT::v4i32,
682 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }))
686 case Intrinsic::bswap: {
687 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
688 MVT::v4i32, MVT::v2i64};
690 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }) &&
691 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
696 case Intrinsic::fmuladd: {
701 (EltTy->
isHalfTy() && ST->hasFullFP16()))
705 case Intrinsic::stepvector: {
714 Cost += AddCost * (LT.first - 1);
718 case Intrinsic::vector_extract:
719 case Intrinsic::vector_insert: {
732 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
733 EVT SubVecVT = IsExtract ? getTLI()->getValueType(
DL, RetTy)
741 getTLI()->getTypeConversion(
C, SubVecVT);
743 getTLI()->getTypeConversion(
C, VecVT);
751 case Intrinsic::bitreverse: {
753 {Intrinsic::bitreverse, MVT::i32, 1},
754 {Intrinsic::bitreverse, MVT::i64, 1},
755 {Intrinsic::bitreverse, MVT::v8i8, 1},
756 {Intrinsic::bitreverse, MVT::v16i8, 1},
757 {Intrinsic::bitreverse, MVT::v4i16, 2},
758 {Intrinsic::bitreverse, MVT::v8i16, 2},
759 {Intrinsic::bitreverse, MVT::v2i32, 2},
760 {Intrinsic::bitreverse, MVT::v4i32, 2},
761 {Intrinsic::bitreverse, MVT::v1i64, 2},
762 {Intrinsic::bitreverse, MVT::v2i64, 2},
770 if (TLI->getValueType(
DL, RetTy,
true) == MVT::i8 ||
771 TLI->getValueType(
DL, RetTy,
true) == MVT::i16)
772 return LegalisationCost.first * Entry->Cost + 1;
774 return LegalisationCost.first * Entry->Cost;
778 case Intrinsic::ctpop: {
779 if (!ST->hasNEON()) {
800 RetTy->getScalarSizeInBits()
803 return LT.first * Entry->Cost + ExtraCost;
807 case Intrinsic::sadd_with_overflow:
808 case Intrinsic::uadd_with_overflow:
809 case Intrinsic::ssub_with_overflow:
810 case Intrinsic::usub_with_overflow:
811 case Intrinsic::smul_with_overflow:
812 case Intrinsic::umul_with_overflow: {
814 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
815 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
816 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
817 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
818 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
819 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
820 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
821 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
822 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
823 {Intrinsic::usub_with_overflow, MVT::i8, 3},
824 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
825 {Intrinsic::usub_with_overflow, MVT::i16, 3},
826 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
827 {Intrinsic::usub_with_overflow, MVT::i32, 1},
828 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
829 {Intrinsic::usub_with_overflow, MVT::i64, 1},
830 {Intrinsic::smul_with_overflow, MVT::i8, 5},
831 {Intrinsic::umul_with_overflow, MVT::i8, 4},
832 {Intrinsic::smul_with_overflow, MVT::i16, 5},
833 {Intrinsic::umul_with_overflow, MVT::i16, 4},
834 {Intrinsic::smul_with_overflow, MVT::i32, 2},
835 {Intrinsic::umul_with_overflow, MVT::i32, 2},
836 {Intrinsic::smul_with_overflow, MVT::i64, 3},
837 {Intrinsic::umul_with_overflow, MVT::i64, 3},
839 EVT MTy = TLI->getValueType(
DL, RetTy->getContainedType(0),
true);
846 case Intrinsic::fptosi_sat:
847 case Intrinsic::fptoui_sat: {
850 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
852 EVT MTy = TLI->getValueType(
DL, RetTy);
855 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
856 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
857 LT.second == MVT::v2f64)) {
859 (LT.second == MVT::f64 && MTy == MVT::i32) ||
860 (LT.second == MVT::f32 && MTy == MVT::i64)))
869 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
876 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
877 (LT.second == MVT::f16 && MTy == MVT::i64) ||
878 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
892 if ((LT.second.getScalarType() == MVT::f32 ||
893 LT.second.getScalarType() == MVT::f64 ||
894 LT.second.getScalarType() == MVT::f16) &&
898 if (LT.second.isVector())
902 LegalTy, {LegalTy, LegalTy});
905 LegalTy, {LegalTy, LegalTy});
907 return LT.first *
Cost +
908 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
914 RetTy = RetTy->getScalarType();
915 if (LT.second.isVector()) {
933 return LT.first *
Cost;
935 case Intrinsic::fshl:
936 case Intrinsic::fshr: {
945 if (RetTy->isIntegerTy() && ICA.
getArgs()[0] == ICA.
getArgs()[1] &&
946 (RetTy->getPrimitiveSizeInBits() == 32 ||
947 RetTy->getPrimitiveSizeInBits() == 64)) {
960 {Intrinsic::fshl, MVT::v4i32, 2},
961 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
962 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
963 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
969 return LegalisationCost.first * Entry->Cost;
973 if (!RetTy->isIntegerTy())
978 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
979 RetTy->getScalarSizeInBits() < 64) ||
980 (RetTy->getScalarSizeInBits() % 64 != 0);
981 unsigned ExtraCost = HigherCost ? 1 : 0;
982 if (RetTy->getScalarSizeInBits() == 32 ||
983 RetTy->getScalarSizeInBits() == 64)
990 return TyL.first + ExtraCost;
992 case Intrinsic::get_active_lane_mask: {
994 EVT RetVT = getTLI()->getValueType(
DL, RetTy);
996 if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
999 if (RetTy->isScalableTy()) {
1000 if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
1010 if (ST->hasSVE2p1() || ST->hasSME2()) {
1025 return Cost + (SplitCost * (
Cost - 1));
1040 case Intrinsic::experimental_vector_match: {
1043 unsigned SearchSize = NeedleTy->getNumElements();
1044 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
1057 case Intrinsic::experimental_cttz_elts: {
1059 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
1067 case Intrinsic::experimental_vector_extract_last_active:
1068 if (ST->isSVEorStreamingSVEAvailable()) {
1085 auto RequiredType =
II.getType();
1088 assert(PN &&
"Expected Phi Node!");
1091 if (!PN->hasOneUse())
1092 return std::nullopt;
1094 for (
Value *IncValPhi : PN->incoming_values()) {
1097 Reinterpret->getIntrinsicID() !=
1098 Intrinsic::aarch64_sve_convert_to_svbool ||
1099 RequiredType != Reinterpret->getArgOperand(0)->getType())
1100 return std::nullopt;
1108 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
1110 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
1183 return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
1188 return GoverningPredicateIdx;
1193 GoverningPredicateIdx = Index;
1211 return UndefIntrinsic;
1216 UndefIntrinsic = IID;
1238 return ResultLanes == InactiveLanesTakenFromOperand;
1243 return OperandIdxForInactiveLanes;
1247 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1248 ResultLanes = InactiveLanesTakenFromOperand;
1249 OperandIdxForInactiveLanes = Index;
1254 return ResultLanes == InactiveLanesAreNotDefined;
1258 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1259 ResultLanes = InactiveLanesAreNotDefined;
1264 return ResultLanes == InactiveLanesAreUnused;
1268 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1269 ResultLanes = InactiveLanesAreUnused;
1279 ResultIsZeroInitialized =
true;
1290 return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
1295 return OperandIdxWithNoActiveLanes;
1300 OperandIdxWithNoActiveLanes = Index;
1305 unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
1308 unsigned IROpcode = 0;
1310 enum PredicationStyle {
1312 InactiveLanesTakenFromOperand,
1313 InactiveLanesAreNotDefined,
1314 InactiveLanesAreUnused
1317 bool ResultIsZeroInitialized =
false;
1318 unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
1319 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
1327 return !isa<ScalableVectorType>(V->getType());
1335 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
1336 case Intrinsic::aarch64_sve_fcvt_f16f32:
1337 case Intrinsic::aarch64_sve_fcvt_f16f64:
1338 case Intrinsic::aarch64_sve_fcvt_f32f16:
1339 case Intrinsic::aarch64_sve_fcvt_f32f64:
1340 case Intrinsic::aarch64_sve_fcvt_f64f16:
1341 case Intrinsic::aarch64_sve_fcvt_f64f32:
1342 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
1343 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
1344 case Intrinsic::aarch64_sve_fcvtx_f32f64:
1345 case Intrinsic::aarch64_sve_fcvtzs:
1346 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
1347 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
1348 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
1349 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
1350 case Intrinsic::aarch64_sve_fcvtzu:
1351 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
1352 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
1353 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
1354 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
1355 case Intrinsic::aarch64_sve_scvtf:
1356 case Intrinsic::aarch64_sve_scvtf_f16i32:
1357 case Intrinsic::aarch64_sve_scvtf_f16i64:
1358 case Intrinsic::aarch64_sve_scvtf_f32i64:
1359 case Intrinsic::aarch64_sve_scvtf_f64i32:
1360 case Intrinsic::aarch64_sve_ucvtf:
1361 case Intrinsic::aarch64_sve_ucvtf_f16i32:
1362 case Intrinsic::aarch64_sve_ucvtf_f16i64:
1363 case Intrinsic::aarch64_sve_ucvtf_f32i64:
1364 case Intrinsic::aarch64_sve_ucvtf_f64i32:
1367 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
1368 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
1369 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
1370 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
1373 case Intrinsic::aarch64_sve_fabd:
1375 case Intrinsic::aarch64_sve_fadd:
1378 case Intrinsic::aarch64_sve_fdiv:
1381 case Intrinsic::aarch64_sve_fmax:
1383 case Intrinsic::aarch64_sve_fmaxnm:
1385 case Intrinsic::aarch64_sve_fmin:
1387 case Intrinsic::aarch64_sve_fminnm:
1389 case Intrinsic::aarch64_sve_fmla:
1391 case Intrinsic::aarch64_sve_fmls:
1393 case Intrinsic::aarch64_sve_fmul:
1396 case Intrinsic::aarch64_sve_fmulx:
1398 case Intrinsic::aarch64_sve_fnmla:
1400 case Intrinsic::aarch64_sve_fnmls:
1402 case Intrinsic::aarch64_sve_fsub:
1405 case Intrinsic::aarch64_sve_add:
1408 case Intrinsic::aarch64_sve_mla:
1410 case Intrinsic::aarch64_sve_mls:
1412 case Intrinsic::aarch64_sve_mul:
1415 case Intrinsic::aarch64_sve_sabd:
1417 case Intrinsic::aarch64_sve_sdiv:
1420 case Intrinsic::aarch64_sve_smax:
1422 case Intrinsic::aarch64_sve_smin:
1424 case Intrinsic::aarch64_sve_smulh:
1426 case Intrinsic::aarch64_sve_sub:
1429 case Intrinsic::aarch64_sve_uabd:
1431 case Intrinsic::aarch64_sve_udiv:
1434 case Intrinsic::aarch64_sve_umax:
1436 case Intrinsic::aarch64_sve_umin:
1438 case Intrinsic::aarch64_sve_umulh:
1440 case Intrinsic::aarch64_sve_asr:
1443 case Intrinsic::aarch64_sve_lsl:
1446 case Intrinsic::aarch64_sve_lsr:
1449 case Intrinsic::aarch64_sve_and:
1452 case Intrinsic::aarch64_sve_bic:
1454 case Intrinsic::aarch64_sve_eor:
1457 case Intrinsic::aarch64_sve_orr:
1460 case Intrinsic::aarch64_sve_sqrshl:
1462 case Intrinsic::aarch64_sve_sqshl:
1464 case Intrinsic::aarch64_sve_sqsub:
1466 case Intrinsic::aarch64_sve_srshl:
1468 case Intrinsic::aarch64_sve_uqrshl:
1470 case Intrinsic::aarch64_sve_uqshl:
1472 case Intrinsic::aarch64_sve_uqsub:
1474 case Intrinsic::aarch64_sve_urshl:
1477 case Intrinsic::aarch64_sve_add_u:
1480 case Intrinsic::aarch64_sve_and_u:
1483 case Intrinsic::aarch64_sve_asr_u:
1486 case Intrinsic::aarch64_sve_eor_u:
1489 case Intrinsic::aarch64_sve_fadd_u:
1492 case Intrinsic::aarch64_sve_fdiv_u:
1495 case Intrinsic::aarch64_sve_fmul_u:
1498 case Intrinsic::aarch64_sve_fsub_u:
1501 case Intrinsic::aarch64_sve_lsl_u:
1504 case Intrinsic::aarch64_sve_lsr_u:
1507 case Intrinsic::aarch64_sve_mul_u:
1510 case Intrinsic::aarch64_sve_orr_u:
1513 case Intrinsic::aarch64_sve_sdiv_u:
1516 case Intrinsic::aarch64_sve_sub_u:
1519 case Intrinsic::aarch64_sve_udiv_u:
1523 case Intrinsic::aarch64_sve_addqv:
1524 case Intrinsic::aarch64_sve_and_z:
1525 case Intrinsic::aarch64_sve_bic_z:
1526 case Intrinsic::aarch64_sve_brka_z:
1527 case Intrinsic::aarch64_sve_brkb_z:
1528 case Intrinsic::aarch64_sve_brkn_z:
1529 case Intrinsic::aarch64_sve_brkpa_z:
1530 case Intrinsic::aarch64_sve_brkpb_z:
1531 case Intrinsic::aarch64_sve_cntp:
1532 case Intrinsic::aarch64_sve_compact:
1533 case Intrinsic::aarch64_sve_eor_z:
1534 case Intrinsic::aarch64_sve_eorv:
1535 case Intrinsic::aarch64_sve_eorqv:
1536 case Intrinsic::aarch64_sve_nand_z:
1537 case Intrinsic::aarch64_sve_nor_z:
1538 case Intrinsic::aarch64_sve_orn_z:
1539 case Intrinsic::aarch64_sve_orr_z:
1540 case Intrinsic::aarch64_sve_orv:
1541 case Intrinsic::aarch64_sve_orqv:
1542 case Intrinsic::aarch64_sve_pnext:
1543 case Intrinsic::aarch64_sve_rdffr_z:
1544 case Intrinsic::aarch64_sve_saddv:
1545 case Intrinsic::aarch64_sve_uaddv:
1546 case Intrinsic::aarch64_sve_umaxv:
1547 case Intrinsic::aarch64_sve_umaxqv:
1548 case Intrinsic::aarch64_sve_cmpeq:
1549 case Intrinsic::aarch64_sve_cmpeq_wide:
1550 case Intrinsic::aarch64_sve_cmpge:
1551 case Intrinsic::aarch64_sve_cmpge_wide:
1552 case Intrinsic::aarch64_sve_cmpgt:
1553 case Intrinsic::aarch64_sve_cmpgt_wide:
1554 case Intrinsic::aarch64_sve_cmphi:
1555 case Intrinsic::aarch64_sve_cmphi_wide:
1556 case Intrinsic::aarch64_sve_cmphs:
1557 case Intrinsic::aarch64_sve_cmphs_wide:
1558 case Intrinsic::aarch64_sve_cmple_wide:
1559 case Intrinsic::aarch64_sve_cmplo_wide:
1560 case Intrinsic::aarch64_sve_cmpls_wide:
1561 case Intrinsic::aarch64_sve_cmplt_wide:
1562 case Intrinsic::aarch64_sve_cmpne:
1563 case Intrinsic::aarch64_sve_cmpne_wide:
1564 case Intrinsic::aarch64_sve_facge:
1565 case Intrinsic::aarch64_sve_facgt:
1566 case Intrinsic::aarch64_sve_fcmpeq:
1567 case Intrinsic::aarch64_sve_fcmpge:
1568 case Intrinsic::aarch64_sve_fcmpgt:
1569 case Intrinsic::aarch64_sve_fcmpne:
1570 case Intrinsic::aarch64_sve_fcmpuo:
1571 case Intrinsic::aarch64_sve_ld1:
1572 case Intrinsic::aarch64_sve_ld1_gather:
1573 case Intrinsic::aarch64_sve_ld1_gather_index:
1574 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
1575 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
1576 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
1577 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
1578 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
1579 case Intrinsic::aarch64_sve_ld1q_gather_index:
1580 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
1581 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
1582 case Intrinsic::aarch64_sve_ld1ro:
1583 case Intrinsic::aarch64_sve_ld1rq:
1584 case Intrinsic::aarch64_sve_ld1udq:
1585 case Intrinsic::aarch64_sve_ld1uwq:
1586 case Intrinsic::aarch64_sve_ld2_sret:
1587 case Intrinsic::aarch64_sve_ld2q_sret:
1588 case Intrinsic::aarch64_sve_ld3_sret:
1589 case Intrinsic::aarch64_sve_ld3q_sret:
1590 case Intrinsic::aarch64_sve_ld4_sret:
1591 case Intrinsic::aarch64_sve_ld4q_sret:
1592 case Intrinsic::aarch64_sve_ldff1:
1593 case Intrinsic::aarch64_sve_ldff1_gather:
1594 case Intrinsic::aarch64_sve_ldff1_gather_index:
1595 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
1596 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
1597 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
1598 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
1599 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
1600 case Intrinsic::aarch64_sve_ldnf1:
1601 case Intrinsic::aarch64_sve_ldnt1:
1602 case Intrinsic::aarch64_sve_ldnt1_gather:
1603 case Intrinsic::aarch64_sve_ldnt1_gather_index:
1604 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
1605 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
1608 case Intrinsic::aarch64_sve_prf:
1609 case Intrinsic::aarch64_sve_prfb_gather_index:
1610 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
1611 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
1612 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
1613 case Intrinsic::aarch64_sve_prfd_gather_index:
1614 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
1615 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
1616 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
1617 case Intrinsic::aarch64_sve_prfh_gather_index:
1618 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
1619 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
1620 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
1621 case Intrinsic::aarch64_sve_prfw_gather_index:
1622 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
1623 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
1624 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
1627 case Intrinsic::aarch64_sve_st1_scatter:
1628 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
1629 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
1630 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
1631 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
1632 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
1633 case Intrinsic::aarch64_sve_st1dq:
1634 case Intrinsic::aarch64_sve_st1q_scatter_index:
1635 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
1636 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
1637 case Intrinsic::aarch64_sve_st1wq:
1638 case Intrinsic::aarch64_sve_stnt1:
1639 case Intrinsic::aarch64_sve_stnt1_scatter:
1640 case Intrinsic::aarch64_sve_stnt1_scatter_index:
1641 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
1642 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
1644 case Intrinsic::aarch64_sve_st2:
1645 case Intrinsic::aarch64_sve_st2q:
1647 case Intrinsic::aarch64_sve_st3:
1648 case Intrinsic::aarch64_sve_st3q:
1650 case Intrinsic::aarch64_sve_st4:
1651 case Intrinsic::aarch64_sve_st4q:
1659 Value *UncastedPred;
1665 Pred = UncastedPred;
1671 if (OrigPredTy->getMinNumElements() <=
1673 ->getMinNumElements())
1674 Pred = UncastedPred;
1678 return C &&
C->isAllOnesValue();
1685 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
1686 Dup->getOperand(1) == Pg &&
isa<Constant>(Dup->getOperand(2)))
1694static std::optional<Instruction *>
1701 Value *Op1 =
II.getOperand(1);
1702 Value *Op2 =
II.getOperand(2);
1728 return std::nullopt;
1736 if (SimpleII == Inactive)
1746static std::optional<Instruction *>
1750 return std::nullopt;
1779 II.setCalledFunction(NewDecl);
1789 return std::nullopt;
1801static std::optional<Instruction *>
1805 return std::nullopt;
1807 auto IntrinsicID = BinOp->getIntrinsicID();
1808 switch (IntrinsicID) {
1809 case Intrinsic::aarch64_sve_and_z:
1810 case Intrinsic::aarch64_sve_bic_z:
1811 case Intrinsic::aarch64_sve_eor_z:
1812 case Intrinsic::aarch64_sve_nand_z:
1813 case Intrinsic::aarch64_sve_nor_z:
1814 case Intrinsic::aarch64_sve_orn_z:
1815 case Intrinsic::aarch64_sve_orr_z:
1818 return std::nullopt;
1821 auto BinOpPred = BinOp->getOperand(0);
1822 auto BinOpOp1 = BinOp->getOperand(1);
1823 auto BinOpOp2 = BinOp->getOperand(2);
1827 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1828 return std::nullopt;
1830 auto PredOp = PredIntr->getOperand(0);
1832 if (PredOpTy !=
II.getType())
1833 return std::nullopt;
1837 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1838 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1839 if (BinOpOp1 == BinOpOp2)
1840 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1843 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1845 auto NarrowedBinOp =
1850static std::optional<Instruction *>
1857 return BinOpCombine;
1862 return std::nullopt;
1865 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
1874 if (CursorVTy->getElementCount().getKnownMinValue() <
1875 IVTy->getElementCount().getKnownMinValue())
1879 if (Cursor->getType() == IVTy)
1880 EarliestReplacement = Cursor;
1885 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
1886 Intrinsic::aarch64_sve_convert_to_svbool ||
1887 IntrinsicCursor->getIntrinsicID() ==
1888 Intrinsic::aarch64_sve_convert_from_svbool))
1891 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
1892 Cursor = IntrinsicCursor->getOperand(0);
1897 if (!EarliestReplacement)
1898 return std::nullopt;
1906 auto *OpPredicate =
II.getOperand(0);
1923 II.getArgOperand(2));
1929 return std::nullopt;
1933 II.getArgOperand(0),
II.getArgOperand(2),
uint64_t(0));
1942 II.getArgOperand(0));
1952 return std::nullopt;
1957 if (!SplatValue || !SplatValue->isZero())
1958 return std::nullopt;
1963 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
1964 return std::nullopt;
1968 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
1969 return std::nullopt;
1972 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
1973 return std::nullopt;
1978 return std::nullopt;
1981 return std::nullopt;
1985 return std::nullopt;
1989 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
1990 return std::nullopt;
1992 unsigned NumElts = VecTy->getNumElements();
1993 unsigned PredicateBits = 0;
1996 for (
unsigned I = 0;
I < NumElts; ++
I) {
1999 return std::nullopt;
2001 PredicateBits |= 1 << (
I * (16 / NumElts));
2005 if (PredicateBits == 0) {
2007 PFalse->takeName(&
II);
2013 for (
unsigned I = 0;
I < 16; ++
I)
2014 if ((PredicateBits & (1 <<
I)) != 0)
2017 unsigned PredSize = Mask & -Mask;
2022 for (
unsigned I = 0;
I < 16;
I += PredSize)
2023 if ((PredicateBits & (1 <<
I)) == 0)
2024 return std::nullopt;
2029 {PredType}, {PTruePat});
2031 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
2032 auto *ConvertFromSVBool =
2034 {
II.getType()}, {ConvertToSVBool});
2042 Value *Pg =
II.getArgOperand(0);
2043 Value *Vec =
II.getArgOperand(1);
2044 auto IntrinsicID =
II.getIntrinsicID();
2045 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
2057 auto OpC = OldBinOp->getOpcode();
2063 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
2069 if (IsAfter &&
C &&
C->isNullValue()) {
2073 Extract->insertBefore(
II.getIterator());
2074 Extract->takeName(&
II);
2080 return std::nullopt;
2082 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
2083 return std::nullopt;
2085 const auto PTruePattern =
2091 return std::nullopt;
2093 unsigned Idx = MinNumElts - 1;
2103 if (Idx >= PgVTy->getMinNumElements())
2104 return std::nullopt;
2109 Extract->insertBefore(
II.getIterator());
2110 Extract->takeName(&
II);
2123 Value *Pg =
II.getArgOperand(0);
2125 Value *Vec =
II.getArgOperand(2);
2128 if (!Ty->isIntegerTy())
2129 return std::nullopt;
2134 return std::nullopt;
2151 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
2164 {
II.getType()}, {AllPat});
2171static std::optional<Instruction *>
2175 if (
Pattern == AArch64SVEPredPattern::all) {
2184 return MinNumElts && NumElts >= MinNumElts
2186 II, ConstantInt::get(
II.getType(), MinNumElts)))
2190static std::optional<Instruction *>
2193 if (!ST->isStreaming())
2194 return std::nullopt;
2206 Value *PgVal =
II.getArgOperand(0);
2207 Value *OpVal =
II.getArgOperand(1);
2211 if (PgVal == OpVal &&
2212 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
2213 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
2228 return std::nullopt;
2232 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
2233 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
2247 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
2248 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
2249 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
2250 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
2251 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
2252 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
2253 (OpIID == Intrinsic::aarch64_sve_and_z) ||
2254 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
2255 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
2256 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
2257 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
2258 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
2259 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
2269 return std::nullopt;
2272template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>
2273static std::optional<Instruction *>
2275 bool MergeIntoAddendOp) {
2277 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
2278 if (MergeIntoAddendOp) {
2279 AddendOp =
II.getOperand(1);
2280 Mul =
II.getOperand(2);
2282 AddendOp =
II.getOperand(2);
2283 Mul =
II.getOperand(1);
2288 return std::nullopt;
2290 if (!
Mul->hasOneUse())
2291 return std::nullopt;
2294 if (
II.getType()->isFPOrFPVectorTy()) {
2299 return std::nullopt;
2301 return std::nullopt;
2306 if (MergeIntoAddendOp)
2316static std::optional<Instruction *>
2318 Value *Pred =
II.getOperand(0);
2319 Value *PtrOp =
II.getOperand(1);
2320 Type *VecTy =
II.getType();
2324 Load->copyMetadata(
II);
2335static std::optional<Instruction *>
2337 Value *VecOp =
II.getOperand(0);
2338 Value *Pred =
II.getOperand(1);
2339 Value *PtrOp =
II.getOperand(2);
2343 Store->copyMetadata(
II);
2355 case Intrinsic::aarch64_sve_fmul_u:
2356 return Instruction::BinaryOps::FMul;
2357 case Intrinsic::aarch64_sve_fadd_u:
2358 return Instruction::BinaryOps::FAdd;
2359 case Intrinsic::aarch64_sve_fsub_u:
2360 return Instruction::BinaryOps::FSub;
2362 return Instruction::BinaryOpsEnd;
2366static std::optional<Instruction *>
2369 if (
II.isStrictFP())
2370 return std::nullopt;
2372 auto *OpPredicate =
II.getOperand(0);
2374 if (BinOpCode == Instruction::BinaryOpsEnd ||
2376 return std::nullopt;
2378 BinOpCode,
II.getOperand(1),
II.getOperand(2),
II.getFastMathFlags());
2385 Intrinsic::aarch64_sve_mla>(
2389 Intrinsic::aarch64_sve_mad>(
2392 return std::nullopt;
2395static std::optional<Instruction *>
2399 Intrinsic::aarch64_sve_fmla>(IC,
II,
2404 Intrinsic::aarch64_sve_fmad>(IC,
II,
2409 Intrinsic::aarch64_sve_fmla>(IC,
II,
2412 return std::nullopt;
2415static std::optional<Instruction *>
2419 Intrinsic::aarch64_sve_fmla>(IC,
II,
2424 Intrinsic::aarch64_sve_fmad>(IC,
II,
2429 Intrinsic::aarch64_sve_fmla_u>(
2435static std::optional<Instruction *>
2439 Intrinsic::aarch64_sve_fmls>(IC,
II,
2444 Intrinsic::aarch64_sve_fnmsb>(
2449 Intrinsic::aarch64_sve_fmls>(IC,
II,
2452 return std::nullopt;
2455static std::optional<Instruction *>
2459 Intrinsic::aarch64_sve_fmls>(IC,
II,
2464 Intrinsic::aarch64_sve_fnmsb>(
2469 Intrinsic::aarch64_sve_fmls_u>(
2478 Intrinsic::aarch64_sve_mls>(
2481 return std::nullopt;
2486 Value *UnpackArg =
II.getArgOperand(0);
2488 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
2489 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
2502 return std::nullopt;
2506 auto *OpVal =
II.getOperand(0);
2507 auto *OpIndices =
II.getOperand(1);
2514 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
2515 return std::nullopt;
2530 Type *RetTy =
II.getType();
2531 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
2532 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
2536 if ((
match(
II.getArgOperand(0),
2543 if (TyA ==
B->getType() &&
2548 TyA->getMinNumElements());
2554 return std::nullopt;
2562 if (
match(
II.getArgOperand(0),
2567 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
2569 return std::nullopt;
2572static std::optional<Instruction *>
2574 Value *Mask =
II.getOperand(0);
2575 Value *BasePtr =
II.getOperand(1);
2576 Value *Index =
II.getOperand(2);
2587 BasePtr->getPointerAlignment(
II.getDataLayout());
2590 BasePtr, IndexBase);
2597 return std::nullopt;
2600static std::optional<Instruction *>
2602 Value *Val =
II.getOperand(0);
2603 Value *Mask =
II.getOperand(1);
2604 Value *BasePtr =
II.getOperand(2);
2605 Value *Index =
II.getOperand(3);
2615 BasePtr->getPointerAlignment(
II.getDataLayout());
2618 BasePtr, IndexBase);
2624 return std::nullopt;
2630 Value *Pred =
II.getOperand(0);
2631 Value *Vec =
II.getOperand(1);
2632 Value *DivVec =
II.getOperand(2);
2636 if (!SplatConstantInt)
2637 return std::nullopt;
2641 if (DivisorValue == -1)
2642 return std::nullopt;
2643 if (DivisorValue == 1)
2649 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2656 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2658 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2662 return std::nullopt;
2666 size_t VecSize = Vec.
size();
2671 size_t HalfVecSize = VecSize / 2;
2675 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2683 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2701 return std::nullopt;
2708 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2709 CurrentInsertElt = InsertElt->getOperand(0);
2715 return std::nullopt;
2719 for (
size_t I = 0;
I < Elts.
size();
I++) {
2720 if (Elts[
I] ==
nullptr)
2725 if (InsertEltChain ==
nullptr)
2726 return std::nullopt;
2732 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2733 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2734 IIScalableTy->getMinNumElements() /
2739 auto *WideShuffleMaskTy =
2750 auto NarrowBitcast =
2763 return std::nullopt;
2768 Value *Pred =
II.getOperand(0);
2769 Value *Vec =
II.getOperand(1);
2770 Value *Shift =
II.getOperand(2);
2773 Value *AbsPred, *MergedValue;
2779 return std::nullopt;
2787 return std::nullopt;
2792 return std::nullopt;
2795 {
II.getType()}, {Pred, Vec, Shift});
2802 Value *Vec =
II.getOperand(0);
2807 return std::nullopt;
2813 auto *NI =
II.getNextNode();
2816 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
2818 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2819 auto *NIBB = NI->getParent();
2820 NI = NI->getNextNode();
2822 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
2823 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
2829 if (NextII &&
II.isIdenticalTo(NextII))
2832 return std::nullopt;
2840 {II.getType(), II.getOperand(0)->getType()},
2841 {II.getOperand(0), II.getOperand(1)}));
2848 return std::nullopt;
2854 Value *Passthru =
II.getOperand(0);
2862 auto *Mask = ConstantInt::get(Ty, MaskValue);
2868 return std::nullopt;
2871static std::optional<Instruction *>
2878 return std::nullopt;
2881std::optional<Instruction *>
2892 case Intrinsic::aarch64_dmb:
2894 case Intrinsic::aarch64_neon_fmaxnm:
2895 case Intrinsic::aarch64_neon_fminnm:
2897 case Intrinsic::aarch64_sve_convert_from_svbool:
2899 case Intrinsic::aarch64_sve_dup:
2901 case Intrinsic::aarch64_sve_dup_x:
2903 case Intrinsic::aarch64_sve_cmpne:
2904 case Intrinsic::aarch64_sve_cmpne_wide:
2906 case Intrinsic::aarch64_sve_rdffr:
2908 case Intrinsic::aarch64_sve_lasta:
2909 case Intrinsic::aarch64_sve_lastb:
2911 case Intrinsic::aarch64_sve_clasta_n:
2912 case Intrinsic::aarch64_sve_clastb_n:
2914 case Intrinsic::aarch64_sve_cntd:
2916 case Intrinsic::aarch64_sve_cntw:
2918 case Intrinsic::aarch64_sve_cnth:
2920 case Intrinsic::aarch64_sve_cntb:
2922 case Intrinsic::aarch64_sme_cntsd:
2924 case Intrinsic::aarch64_sve_ptest_any:
2925 case Intrinsic::aarch64_sve_ptest_first:
2926 case Intrinsic::aarch64_sve_ptest_last:
2928 case Intrinsic::aarch64_sve_fadd:
2930 case Intrinsic::aarch64_sve_fadd_u:
2932 case Intrinsic::aarch64_sve_fmul_u:
2934 case Intrinsic::aarch64_sve_fsub:
2936 case Intrinsic::aarch64_sve_fsub_u:
2938 case Intrinsic::aarch64_sve_add:
2940 case Intrinsic::aarch64_sve_add_u:
2942 Intrinsic::aarch64_sve_mla_u>(
2944 case Intrinsic::aarch64_sve_sub:
2946 case Intrinsic::aarch64_sve_sub_u:
2948 Intrinsic::aarch64_sve_mls_u>(
2950 case Intrinsic::aarch64_sve_tbl:
2952 case Intrinsic::aarch64_sve_uunpkhi:
2953 case Intrinsic::aarch64_sve_uunpklo:
2954 case Intrinsic::aarch64_sve_sunpkhi:
2955 case Intrinsic::aarch64_sve_sunpklo:
2957 case Intrinsic::aarch64_sve_uzp1:
2959 case Intrinsic::aarch64_sve_zip1:
2960 case Intrinsic::aarch64_sve_zip2:
2962 case Intrinsic::aarch64_sve_ld1_gather_index:
2964 case Intrinsic::aarch64_sve_st1_scatter_index:
2966 case Intrinsic::aarch64_sve_ld1:
2968 case Intrinsic::aarch64_sve_st1:
2970 case Intrinsic::aarch64_sve_sdiv:
2972 case Intrinsic::aarch64_sve_sel:
2974 case Intrinsic::aarch64_sve_srshl:
2976 case Intrinsic::aarch64_sve_dupq_lane:
2978 case Intrinsic::aarch64_sve_insr:
2980 case Intrinsic::aarch64_sve_whilelo:
2982 case Intrinsic::aarch64_sve_ptrue:
2984 case Intrinsic::aarch64_sve_uxtb:
2986 case Intrinsic::aarch64_sve_uxth:
2988 case Intrinsic::aarch64_sve_uxtw:
2990 case Intrinsic::aarch64_sme_in_streaming_mode:
2994 return std::nullopt;
3001 SimplifyAndSetOp)
const {
3002 switch (
II.getIntrinsicID()) {
3005 case Intrinsic::aarch64_neon_fcvtxn:
3006 case Intrinsic::aarch64_neon_rshrn:
3007 case Intrinsic::aarch64_neon_sqrshrn:
3008 case Intrinsic::aarch64_neon_sqrshrun:
3009 case Intrinsic::aarch64_neon_sqshrn:
3010 case Intrinsic::aarch64_neon_sqshrun:
3011 case Intrinsic::aarch64_neon_sqxtn:
3012 case Intrinsic::aarch64_neon_sqxtun:
3013 case Intrinsic::aarch64_neon_uqrshrn:
3014 case Intrinsic::aarch64_neon_uqshrn:
3015 case Intrinsic::aarch64_neon_uqxtn:
3016 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
3020 return std::nullopt;
3024 return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3034 if (ST->useSVEForFixedLengthVectors() &&
3037 std::max(ST->getMinSVEVectorSizeInBits(), 128u));
3038 else if (ST->isNeonAvailable())
3043 if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3052bool AArch64TTIImpl::isSingleExtWideningInstruction(
3054 Type *SrcOverrideTy)
const {
3069 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3072 Type *SrcTy = SrcOverrideTy;
3074 case Instruction::Add:
3075 case Instruction::Sub: {
3084 if (Opcode == Instruction::Sub)
3108 assert(SrcTy &&
"Expected some SrcTy");
3110 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
3116 DstTyL.first * DstTyL.second.getVectorMinNumElements();
3118 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
3122 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
3125Type *AArch64TTIImpl::isBinExtWideningInstruction(
unsigned Opcode,
Type *DstTy,
3127 Type *SrcOverrideTy)
const {
3128 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
3129 Opcode != Instruction::Mul)
3139 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3142 auto getScalarSizeWithOverride = [&](
const Value *
V) {
3148 ->getScalarSizeInBits();
3151 unsigned MaxEltSize = 0;
3154 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3155 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3156 MaxEltSize = std::max(EltSize0, EltSize1);
3159 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3160 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3163 if (EltSize0 >= DstEltSize / 2 || EltSize1 >= DstEltSize / 2)
3165 MaxEltSize = DstEltSize / 2;
3166 }
else if (Opcode == Instruction::Mul &&
3179 getScalarSizeWithOverride(
isa<ZExtInst>(Args[0]) ? Args[0] : Args[1]);
3183 if (MaxEltSize * 2 > DstEltSize)
3201 if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(
DL, Src)) ||
3202 (Src->isScalableTy() && !ST->hasSVE2()))
3212 if (AddUser && AddUser->getOpcode() == Instruction::Add)
3216 if (!Shr || Shr->getOpcode() != Instruction::LShr)
3220 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
3221 Src->getScalarSizeInBits() !=
3245 int ISD = TLI->InstructionOpcodeToISD(Opcode);
3249 if (
I &&
I->hasOneUser()) {
3252 if (
Type *ExtTy = isBinExtWideningInstruction(
3253 SingleUser->getOpcode(), Dst, Operands,
3254 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3267 if (isSingleExtWideningInstruction(
3268 SingleUser->getOpcode(), Dst, Operands,
3269 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3273 if (SingleUser->getOpcode() == Instruction::Add) {
3274 if (
I == SingleUser->getOperand(1) ||
3276 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
3294 return Cost == 0 ? 0 : 1;
3298 EVT SrcTy = TLI->getValueType(
DL, Src);
3299 EVT DstTy = TLI->getValueType(
DL, Dst);
3301 if (!SrcTy.isSimple() || !DstTy.
isSimple())
3307 if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
3331 return AdjustCost(Entry->Cost);
3339 const unsigned int SVE_EXT_COST = 1;
3340 const unsigned int SVE_FCVT_COST = 1;
3341 const unsigned int SVE_UNPACK_ONCE = 4;
3342 const unsigned int SVE_UNPACK_TWICE = 16;
3420 {ISD::FP_EXTEND, MVT::f64, MVT::f32, 1},
3421 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f32, 1},
3422 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 2},
3424 {ISD::FP_EXTEND, MVT::f32, MVT::f16, 1},
3425 {ISD::FP_EXTEND, MVT::f64, MVT::f16, 1},
3426 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, 1},
3427 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 2},
3428 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f16, 2},
3429 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, 3},
3430 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, 6},
3432 {ISD::FP_EXTEND, MVT::f32, MVT::bf16, 1},
3433 {ISD::FP_EXTEND, MVT::f64, MVT::bf16, 2},
3434 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4bf16, 1},
3435 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8bf16, 2},
3436 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2bf16, 2},
3437 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4bf16, 3},
3438 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8bf16, 6},
3471 SVE_EXT_COST + SVE_FCVT_COST},
3476 SVE_EXT_COST + SVE_FCVT_COST},
3483 SVE_EXT_COST + SVE_FCVT_COST},
3487 SVE_EXT_COST + SVE_FCVT_COST},
3493 SVE_EXT_COST + SVE_FCVT_COST},
3496 SVE_EXT_COST + SVE_FCVT_COST},
3501 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3503 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3513 SVE_EXT_COST + SVE_FCVT_COST},
3518 SVE_EXT_COST + SVE_FCVT_COST},
3531 SVE_EXT_COST + SVE_FCVT_COST},
3535 SVE_EXT_COST + SVE_FCVT_COST},
3547 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3549 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3551 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3553 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3557 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3559 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3575 SVE_EXT_COST + SVE_FCVT_COST},
3580 SVE_EXT_COST + SVE_FCVT_COST},
3591 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3593 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3595 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3597 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3599 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3601 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3605 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3607 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3609 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3611 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3755 {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2f16, 1},
3756 {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4f16, 1},
3757 {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8f16, 2},
3760 {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2bf16, 1},
3761 {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4bf16, 1},
3762 {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8bf16, 4},
3765 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1},
3766 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f16, 2},
3767 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f16, 4},
3770 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2bf16, 2},
3771 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4bf16, 6},
3772 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8bf16, 14},
3775 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f32, 1},
3776 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},
3777 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f32, 6},
3780 {ISD::BITCAST, MVT::nxv2f16, MVT::nxv2i16, 0},
3781 {ISD::BITCAST, MVT::nxv4f16, MVT::nxv4i16, 0},
3782 {ISD::BITCAST, MVT::nxv2f32, MVT::nxv2i32, 0},
3785 {ISD::BITCAST, MVT::nxv2i16, MVT::nxv2f16, 0},
3786 {ISD::BITCAST, MVT::nxv4i16, MVT::nxv4f16, 0},
3787 {ISD::BITCAST, MVT::nxv2i32, MVT::nxv2f32, 0},
3810 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3813 ST->useSVEForFixedLengthVectors(WiderTy)) {
3814 std::pair<InstructionCost, MVT> LT =
3816 unsigned NumElements =
3828 return AdjustCost(Entry->Cost);
3855 if (ST->hasFullFP16())
3858 return AdjustCost(Entry->Cost);
3876 ST->isSVEorStreamingSVEAvailable() &&
3877 TLI->getTypeAction(Src->getContext(), SrcTy) ==
3879 TLI->getTypeAction(Dst->getContext(), DstTy) ==
3888 Opcode, LegalTy, Src, CCH,
CostKind,
I);
3891 return Part1 + Part2;
3898 ST->isSVEorStreamingSVEAvailable() && TLI->isTypeLegal(DstTy))
3911 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
3924 CostKind, Index,
nullptr,
nullptr);
3928 auto DstVT = TLI->getValueType(
DL, Dst);
3929 auto SrcVT = TLI->getValueType(
DL, Src);
3934 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
3940 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
3950 case Instruction::SExt:
3955 case Instruction::ZExt:
3956 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
3969 return Opcode == Instruction::PHI ? 0 : 1;
3978 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const {
3986 if (!LT.second.isVector())
3991 if (LT.second.isFixedLengthVector()) {
3992 unsigned Width = LT.second.getVectorNumElements();
3993 Index = Index % Width;
4041 auto ExtractCanFuseWithFmul = [&]() {
4048 auto IsAllowedScalarTy = [&](
const Type *
T) {
4049 return T->isFloatTy() ||
T->isDoubleTy() ||
4050 (
T->isHalfTy() && ST->hasFullFP16());
4054 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
4057 return BO && BO->getOpcode() == BinaryOperator::FMul &&
4058 !BO->getType()->isVectorTy();
4063 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
4067 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);
4076 DenseMap<User *, unsigned> UserToExtractIdx;
4077 for (
auto *U :
Scalar->users()) {
4078 if (!IsUserFMulScalarTy(U))
4082 UserToExtractIdx[
U];
4084 if (UserToExtractIdx.
empty())
4086 for (
auto &[S, U, L] : ScalarUserAndIdx) {
4087 for (
auto *U : S->users()) {
4088 if (UserToExtractIdx.
contains(U)) {
4090 auto *Op0 =
FMul->getOperand(0);
4091 auto *Op1 =
FMul->getOperand(1);
4092 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
4093 UserToExtractIdx[
U] =
L;
4099 for (
auto &[U, L] : UserToExtractIdx) {
4111 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
4112 if (!IsUserFMulScalarTy(U))
4117 const auto *BO = cast<BinaryOperator>(U);
4118 const auto *OtherEE = dyn_cast<ExtractElementInst>(
4119 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
4121 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
4124 return IsExtractLaneEquivalentToZero(
4125 cast<ConstantInt>(OtherEE->getIndexOperand())
4128 OtherEE->getType()->getScalarSizeInBits());
4136 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
4137 ExtractCanFuseWithFmul())
4142 :
ST->getVectorInsertExtractBaseCost();
4149 const Value *Op1)
const {
4153 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
4156 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index);
4162 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const {
4163 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr, Scalar,
4170 unsigned Index)
const {
4171 return getVectorInstrCostHelper(
I.getOpcode(), Val,
CostKind, Index, &
I);
4177 unsigned Index)
const {
4189 : ST->getVectorInsertExtractBaseCost() + 1;
4198 if (Ty->getElementType()->isFloatingPointTy())
4201 unsigned VecInstCost =
4203 return DemandedElts.
popcount() * (Insert + Extract) * VecInstCost;
4210 if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
4211 return std::nullopt;
4212 if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
4213 return std::nullopt;
4214 if (CanUseSVE && Ty->isScalableTy() && ST->hasSVEB16B16() &&
4215 ST->isNonStreamingSVEorSME2Available())
4216 return std::nullopt;
4223 Cost += InstCost(PromotedTy);
4246 Op2Info, Args, CxtI);
4250 int ISD = TLI->InstructionOpcodeToISD(Opcode);
4257 Ty,
CostKind, Op1Info, Op2Info,
true,
4260 [&](
Type *PromotedTy) {
4264 return *PromotedCost;
4270 if (
Type *ExtTy = isBinExtWideningInstruction(Opcode, Ty, Args)) {
4337 auto VT = TLI->getValueType(
DL, Ty);
4338 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
4342 : (3 * AsrCost + AddCost);
4344 return MulCost + AsrCost + 2 * AddCost;
4346 }
else if (VT.isVector()) {
4356 if (Ty->isScalableTy() && ST->hasSVE())
4357 Cost += 2 * AsrCost;
4362 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
4366 }
else if (LT.second == MVT::v2i64) {
4367 return VT.getVectorNumElements() *
4374 if (Ty->isScalableTy() && ST->hasSVE())
4375 return MulCost + 2 * AddCost + 2 * AsrCost;
4376 return 2 * MulCost + AddCost + AsrCost + UsraCost;
4381 LT.second.isFixedLengthVector()) {
4391 return ExtractCost + InsertCost +
4399 auto VT = TLI->getValueType(
DL, Ty);
4415 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
4416 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
4417 LT.second == MVT::nxv16i8;
4418 bool Is128bit = LT.second.is128BitVector();
4430 (HasMULH ? 0 : ShrCost) +
4431 AddCost * 2 + ShrCost;
4432 return DivCost + (
ISD ==
ISD::UREM ? MulCost + AddCost : 0);
4439 if (!VT.isVector() && VT.getSizeInBits() > 64)
4443 Opcode, Ty,
CostKind, Op1Info, Op2Info);
4445 if (TLI->isOperationLegalOrCustom(
ISD, LT.second) && ST->hasSVE()) {
4449 Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {
4459 if (
nullptr != Entry)
4464 if (LT.second.getScalarType() == MVT::i8)
4466 else if (LT.second.getScalarType() == MVT::i16)
4478 Opcode, Ty->getScalarType(),
CostKind, Op1Info, Op2Info);
4479 return (4 + DivCost) * VTy->getNumElements();
4485 -1,
nullptr,
nullptr);
4499 if (LT.second == MVT::v2i64 && ST->hasSVE())
4512 if (LT.second != MVT::v2i64)
4534 if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
4535 (Ty->isHalfTy() && ST->hasFullFP16())) &&
4544 if (!Ty->getScalarType()->isFP128Ty())
4551 if (!Ty->getScalarType()->isFP128Ty())
4552 return 2 * LT.first;
4559 if (!Ty->isVectorTy())
4575 int MaxMergeDistance = 64;
4579 return NumVectorInstToHideOverhead;
4589 unsigned Opcode1,
unsigned Opcode2)
const {
4592 if (!
Sched.hasInstrSchedModel())
4596 Sched.getSchedClassDesc(
TII->get(Opcode1).getSchedClass());
4598 Sched.getSchedClassDesc(
TII->get(Opcode2).getSchedClass());
4604 "Cannot handle variant scheduling classes without an MI");
4620 const int AmortizationCost = 20;
4628 VecPred = CurrentPred;
4636 static const auto ValidMinMaxTys = {
4637 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
4638 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
4639 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
4642 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }) ||
4643 (ST->hasFullFP16() &&
4644 any_of(ValidFP16MinMaxTys, [<](
MVT M) {
return M == LT.second; })))
4649 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
4650 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
4651 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
4652 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
4653 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
4654 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
4655 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
4656 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
4657 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
4658 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
4659 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
4661 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
4662 EVT SelValTy = TLI->getValueType(
DL, ValTy);
4671 if (Opcode == Instruction::FCmp) {
4673 ValTy,
CostKind, Op1Info, Op2Info,
false,
4675 false, [&](
Type *PromotedTy) {
4687 return *PromotedCost;
4691 if (LT.second.getScalarType() != MVT::f64 &&
4692 LT.second.getScalarType() != MVT::f32 &&
4693 LT.second.getScalarType() != MVT::f16)
4698 unsigned Factor = 1;
4713 AArch64::FCMEQv4f32))
4725 TLI->isTypeLegal(TLI->getValueType(
DL, ValTy)) &&
4744 Op1Info, Op2Info,
I);
4750 if (ST->requiresStrictAlign()) {
4755 Options.AllowOverlappingLoads =
true;
4756 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
4761 Options.LoadSizes = {8, 4, 2, 1};
4762 Options.AllowedTailExpansions = {3, 5, 6};
4767 return ST->hasSVE();
4773 switch (MICA.
getID()) {
4774 case Intrinsic::masked_scatter:
4775 case Intrinsic::masked_gather:
4777 case Intrinsic::masked_load:
4778 case Intrinsic::masked_store:
4792 if (!LT.first.isValid())
4797 if (VT->getElementType()->isIntegerTy(1))
4814 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4815 "Should be called on only load or stores.");
4817 case Instruction::Load:
4820 return ST->getGatherOverhead();
4822 case Instruction::Store:
4825 return ST->getScatterOverhead();
4836 unsigned Opcode = (MICA.
getID() == Intrinsic::masked_gather ||
4837 MICA.
getID() == Intrinsic::vp_gather)
4839 : Instruction::Store;
4849 if (!LT.first.isValid())
4853 if (!LT.second.isVector() ||
4855 VT->getElementType()->isIntegerTy(1))
4865 ElementCount LegalVF = LT.second.getVectorElementCount();
4868 {TTI::OK_AnyValue, TTI::OP_None},
I);
4884 EVT VT = TLI->getValueType(
DL, Ty,
true);
4886 if (VT == MVT::Other)
4891 if (!LT.first.isValid())
4901 (VTy->getElementType()->isIntegerTy(1) &&
4902 !VTy->getElementCount().isKnownMultipleOf(
4913 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
4914 LT.second.is128BitVector() && Alignment <
Align(16)) {
4920 const int AmortizationCost = 6;
4922 return LT.first * 2 * AmortizationCost;
4926 if (Ty->isPtrOrPtrVectorTy())
4931 if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
4933 if (VT == MVT::v4i8)
4940 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
4955 while (!TypeWorklist.
empty()) {
4977 bool UseMaskForCond,
bool UseMaskForGaps)
const {
4978 assert(Factor >= 2 &&
"Invalid interleave factor");
4993 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
4996 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
4997 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
5000 VecVTy->getElementCount().divideCoefficientBy(Factor));
5006 if (MinElts % Factor == 0 &&
5007 TLI->isLegalInterleavedAccessType(SubVecTy,
DL, UseScalable))
5008 return Factor * TLI->getNumInterleavedAccesses(SubVecTy,
DL, UseScalable);
5013 UseMaskForCond, UseMaskForGaps);
5020 for (
auto *
I : Tys) {
5021 if (!
I->isVectorTy())
5032 return ST->getMaxInterleaveFactor();
5042 enum { MaxStridedLoads = 7 };
5044 int StridedLoads = 0;
5047 for (
const auto BB : L->blocks()) {
5048 for (
auto &
I : *BB) {
5054 if (L->isLoopInvariant(PtrValue))
5059 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
5068 if (StridedLoads > MaxStridedLoads / 2)
5069 return StridedLoads;
5072 return StridedLoads;
5075 int StridedLoads = countStridedLoads(L, SE);
5077 <<
" strided loads\n");
5093 unsigned *FinalSize) {
5097 for (
auto *BB : L->getBlocks()) {
5098 for (
auto &
I : *BB) {
5104 if (!Cost.isValid())
5108 if (LoopCost > Budget)
5130 if (MaxTC > 0 && MaxTC <= 32)
5141 if (Blocks.
size() != 2)
5163 if (!L->isInnermost() || L->getNumBlocks() > 8)
5167 if (!L->getExitBlock())
5173 bool HasParellelizableReductions =
5174 L->getNumBlocks() == 1 &&
5175 any_of(L->getHeader()->phis(),
5177 return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
5180 if (HasParellelizableReductions &&
5202 if (HasParellelizableReductions) {
5213 if (Header == Latch) {
5216 unsigned Width = 10;
5222 unsigned MaxInstsPerLine = 16;
5224 unsigned BestUC = 1;
5225 unsigned SizeWithBestUC = BestUC *
Size;
5227 unsigned SizeWithUC = UC *
Size;
5228 if (SizeWithUC > 48)
5230 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
5231 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
5233 SizeWithBestUC = BestUC *
Size;
5243 for (
auto *BB : L->blocks()) {
5244 for (
auto &
I : *BB) {
5254 for (
auto *U :
I.users())
5256 LoadedValuesPlus.
insert(U);
5263 return LoadedValuesPlus.
contains(
SI->getOperand(0));
5276 if (!Term || !Term->isConditional() || Preds.
size() == 1 ||
5290 auto *I = dyn_cast<Instruction>(V);
5291 return I && DependsOnLoopLoad(I, Depth + 1);
5298 DependsOnLoopLoad(
I, 0)) {
5314 if (L->getLoopDepth() > 1)
5325 for (
auto *BB : L->getBlocks()) {
5326 for (
auto &
I : *BB) {
5330 if (IsVectorized &&
I.getType()->isVectorTy())
5347 if (ST->isAppleMLike())
5349 else if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
5371 !ST->getSchedModel().isOutOfOrder()) {
5394 bool CanCreate)
const {
5398 case Intrinsic::aarch64_neon_st2:
5399 case Intrinsic::aarch64_neon_st3:
5400 case Intrinsic::aarch64_neon_st4: {
5403 if (!CanCreate || !ST)
5405 unsigned NumElts = Inst->
arg_size() - 1;
5406 if (ST->getNumElements() != NumElts)
5408 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5414 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5416 Res = Builder.CreateInsertValue(Res, L, i);
5420 case Intrinsic::aarch64_neon_ld2:
5421 case Intrinsic::aarch64_neon_ld3:
5422 case Intrinsic::aarch64_neon_ld4:
5423 if (Inst->
getType() == ExpectedType)
5434 case Intrinsic::aarch64_neon_ld2:
5435 case Intrinsic::aarch64_neon_ld3:
5436 case Intrinsic::aarch64_neon_ld4:
5437 Info.ReadMem =
true;
5438 Info.WriteMem =
false;
5441 case Intrinsic::aarch64_neon_st2:
5442 case Intrinsic::aarch64_neon_st3:
5443 case Intrinsic::aarch64_neon_st4:
5444 Info.ReadMem =
false;
5445 Info.WriteMem =
true;
5453 case Intrinsic::aarch64_neon_ld2:
5454 case Intrinsic::aarch64_neon_st2:
5455 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
5457 case Intrinsic::aarch64_neon_ld3:
5458 case Intrinsic::aarch64_neon_st3:
5459 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
5461 case Intrinsic::aarch64_neon_ld4:
5462 case Intrinsic::aarch64_neon_st4:
5463 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
5475 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
5476 bool Considerable =
false;
5477 AllowPromotionWithoutCommonHeader =
false;
5480 Type *ConsideredSExtType =
5482 if (
I.getType() != ConsideredSExtType)
5486 for (
const User *U :
I.users()) {
5488 Considerable =
true;
5492 if (GEPInst->getNumOperands() > 2) {
5493 AllowPromotionWithoutCommonHeader =
true;
5498 return Considerable;
5546 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
5556 return LegalizationCost + 2;
5566 LegalizationCost *= LT.first - 1;
5569 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5578 return LegalizationCost + 2;
5586 std::optional<FastMathFlags> FMF,
5602 return BaseCost + FixedVTy->getNumElements();
5605 if (Opcode != Instruction::FAdd)
5619 MVT MTy = LT.second;
5620 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5668 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
5669 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
5671 if (ValTy->getElementCount().getFixedValue() >= 2 && NElts >= 2 &&
5681 return (LT.first - 1) +
Log2_32(NElts);
5686 return (LT.first - 1) + Entry->Cost;
5698 if (LT.first != 1) {
5704 ExtraCost *= LT.first - 1;
5707 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
5708 return Cost + ExtraCost;
5716 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *VecTy,
5718 EVT VecVT = TLI->getValueType(
DL, VecTy);
5719 EVT ResVT = TLI->getValueType(
DL, ResTy);
5729 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5731 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&
5733 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&
5735 return (LT.first - 1) * 2 + 2;
5746 EVT VecVT = TLI->getValueType(
DL, VecTy);
5747 EVT ResVT = TLI->getValueType(
DL, ResTy);
5750 RedOpcode == Instruction::Add) {
5756 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5758 return LT.first + 2;
5793 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
5794 ? TLI->getPromotedVTForPredicate(
EVT(LT.second))
5808 if (LT.second.getScalarType() == MVT::i1) {
5817 assert(Entry &&
"Illegal Type for Splice");
5818 LegalizationCost += Entry->Cost;
5819 return LegalizationCost * LT.first;
5823 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
5832 if (VF.
isFixed() && !ST->isSVEorStreamingSVEAvailable() &&
5833 (!ST->isNeonAvailable() || !ST->hasDotProd()))
5836 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub) ||
5841 (!BinOp || (OpBExtend !=
TTI::PR_None && InputTypeB)) &&
5842 "Unexpected values for OpBExtend or InputTypeB");
5846 if (BinOp && (*BinOp != Instruction::Mul || InputTypeA != InputTypeB))
5849 bool IsUSDot = OpBExtend !=
TTI::PR_None && OpAExtend != OpBExtend;
5850 if (IsUSDot && !ST->hasMatMulInt8())
5862 auto TC = TLI->getTypeConversion(AccumVectorType->
getContext(),
5871 if (TLI->getTypeAction(AccumVectorType->
getContext(), TC.second) !=
5877 std::pair<InstructionCost, MVT> AccumLT =
5879 std::pair<InstructionCost, MVT> InputLT =
5892 if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot) {
5894 if (AccumLT.second.getScalarType() == MVT::i64 &&
5895 InputLT.second.getScalarType() == MVT::i16)
5898 if (AccumLT.second.getScalarType() == MVT::i64 &&
5899 InputLT.second.getScalarType() == MVT::i8)
5909 if (ST->isSVEorStreamingSVEAvailable() ||
5910 (AccumLT.second.isFixedLengthVector() && ST->isNeonAvailable() &&
5911 ST->hasDotProd())) {
5912 if (AccumLT.second.getScalarType() == MVT::i32 &&
5913 InputLT.second.getScalarType() == MVT::i8)
5929 "Expected the Mask to match the return size if given");
5931 "Expected the same scalar types");
5937 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
5938 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
5939 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
5947 return std::max<InstructionCost>(1, LT.first / 4);
5955 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2) ||
5957 Mask, 3, SrcTy->getElementCount().getKnownMinValue() * 2)))
5960 unsigned TpNumElts = Mask.size();
5961 unsigned LTNumElts = LT.second.getVectorNumElements();
5962 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
5964 LT.second.getVectorElementCount());
5966 std::map<std::tuple<unsigned, unsigned, SmallVector<int>>,
InstructionCost>
5968 for (
unsigned N = 0;
N < NumVecs;
N++) {
5972 unsigned Source1 = -1U, Source2 = -1U;
5973 unsigned NumSources = 0;
5974 for (
unsigned E = 0; E < LTNumElts; E++) {
5975 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
5984 unsigned Source = MaskElt / LTNumElts;
5985 if (NumSources == 0) {
5988 }
else if (NumSources == 1 && Source != Source1) {
5991 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
5997 if (Source == Source1)
5999 else if (Source == Source2)
6000 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
6009 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});
6020 NTp, NTp, NMask,
CostKind, 0,
nullptr, Args,
6023 Result.first->second = NCost;
6037 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
6038 if (LT.second.getFixedSizeInBits() >= 128 &&
6040 LT.second.getVectorNumElements() / 2) {
6043 if (Index == (
int)LT.second.getVectorNumElements() / 2)
6057 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
6060 return M.value() < 0 || M.value() == (int)M.index();
6066 !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
6067 SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
6076 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
6077 ST->isSVEorStreamingSVEAvailable() &&
6082 if (ST->isSVEorStreamingSVEAvailable() &&
6096 if (IsLoad && LT.second.isVector() &&
6098 LT.second.getVectorElementCount()))
6104 if (Mask.size() == 4 &&
6106 (SrcTy->getScalarSizeInBits() == 16 ||
6107 SrcTy->getScalarSizeInBits() == 32) &&
6108 all_of(Mask, [](
int E) {
return E < 8; }))
6114 if (LT.second.isFixedLengthVector() &&
6115 LT.second.getVectorNumElements() == Mask.size() &&
6117 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6118 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6119 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6120 LT.second.getVectorNumElements(), 16) ||
6121 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6122 LT.second.getVectorNumElements(), 32) ||
6123 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6124 LT.second.getVectorNumElements(), 64) ||
6127 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
6256 return LT.first * Entry->Cost;
6265 LT.second.getSizeInBits() <= 128 && SubTp) {
6267 if (SubLT.second.isVector()) {
6268 int NumElts = LT.second.getVectorNumElements();
6269 int NumSubElts = SubLT.second.getVectorNumElements();
6270 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
6276 if (IsExtractSubvector)
6293 if (
getPtrStride(*PSE, AccessTy, Ptr, TheLoop, DT, Strides,
6312 return ST->useFixedOverScalableIfEqualCost();
6316 return ST->getEpilogueVectorizationMinVF();
6351 unsigned NumInsns = 0;
6353 NumInsns += BB->sizeWithoutDebug();
6363 int64_t Scale,
unsigned AddrSpace)
const {
6391 if (
I->getOpcode() == Instruction::Or &&
6396 if (
I->getOpcode() == Instruction::Add ||
6397 I->getOpcode() == Instruction::Sub)
6422 return all_equal(Shuf->getShuffleMask());
6429 bool AllowSplat =
false) {
6434 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
6435 auto *FullTy = FullV->
getType();
6436 auto *HalfTy = HalfV->getType();
6438 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
6441 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
6444 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
6448 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
6462 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
6463 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
6477 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
6478 (M2Start != 0 && M2Start != (NumElements / 2)))
6480 if (S1Op1 && S2Op1 && M1Start != M2Start)
6490 return Ext->getType()->getScalarSizeInBits() ==
6491 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
6505 Value *VectorOperand =
nullptr;
6522 if (!
GEP ||
GEP->getNumOperands() != 2)
6526 Value *Offsets =
GEP->getOperand(1);
6529 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
6535 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
6536 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
6537 Ops.push_back(&
GEP->getOperandUse(1));
6571 switch (
II->getIntrinsicID()) {
6572 case Intrinsic::aarch64_neon_smull:
6573 case Intrinsic::aarch64_neon_umull:
6576 Ops.push_back(&
II->getOperandUse(0));
6577 Ops.push_back(&
II->getOperandUse(1));
6582 case Intrinsic::fma:
6583 case Intrinsic::fmuladd:
6589 case Intrinsic::aarch64_neon_sqdmull:
6590 case Intrinsic::aarch64_neon_sqdmulh:
6591 case Intrinsic::aarch64_neon_sqrdmulh:
6594 Ops.push_back(&
II->getOperandUse(0));
6596 Ops.push_back(&
II->getOperandUse(1));
6597 return !
Ops.empty();
6598 case Intrinsic::aarch64_neon_fmlal:
6599 case Intrinsic::aarch64_neon_fmlal2:
6600 case Intrinsic::aarch64_neon_fmlsl:
6601 case Intrinsic::aarch64_neon_fmlsl2:
6604 Ops.push_back(&
II->getOperandUse(1));
6606 Ops.push_back(&
II->getOperandUse(2));
6607 return !
Ops.empty();
6608 case Intrinsic::aarch64_sve_ptest_first:
6609 case Intrinsic::aarch64_sve_ptest_last:
6611 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
6612 Ops.push_back(&
II->getOperandUse(0));
6613 return !
Ops.empty();
6614 case Intrinsic::aarch64_sme_write_horiz:
6615 case Intrinsic::aarch64_sme_write_vert:
6616 case Intrinsic::aarch64_sme_writeq_horiz:
6617 case Intrinsic::aarch64_sme_writeq_vert: {
6619 if (!Idx || Idx->getOpcode() != Instruction::Add)
6621 Ops.push_back(&
II->getOperandUse(1));
6624 case Intrinsic::aarch64_sme_read_horiz:
6625 case Intrinsic::aarch64_sme_read_vert:
6626 case Intrinsic::aarch64_sme_readq_horiz:
6627 case Intrinsic::aarch64_sme_readq_vert:
6628 case Intrinsic::aarch64_sme_ld1b_vert:
6629 case Intrinsic::aarch64_sme_ld1h_vert:
6630 case Intrinsic::aarch64_sme_ld1w_vert:
6631 case Intrinsic::aarch64_sme_ld1d_vert:
6632 case Intrinsic::aarch64_sme_ld1q_vert:
6633 case Intrinsic::aarch64_sme_st1b_vert:
6634 case Intrinsic::aarch64_sme_st1h_vert:
6635 case Intrinsic::aarch64_sme_st1w_vert:
6636 case Intrinsic::aarch64_sme_st1d_vert:
6637 case Intrinsic::aarch64_sme_st1q_vert:
6638 case Intrinsic::aarch64_sme_ld1b_horiz:
6639 case Intrinsic::aarch64_sme_ld1h_horiz:
6640 case Intrinsic::aarch64_sme_ld1w_horiz:
6641 case Intrinsic::aarch64_sme_ld1d_horiz:
6642 case Intrinsic::aarch64_sme_ld1q_horiz:
6643 case Intrinsic::aarch64_sme_st1b_horiz:
6644 case Intrinsic::aarch64_sme_st1h_horiz:
6645 case Intrinsic::aarch64_sme_st1w_horiz:
6646 case Intrinsic::aarch64_sme_st1d_horiz:
6647 case Intrinsic::aarch64_sme_st1q_horiz: {
6649 if (!Idx || Idx->getOpcode() != Instruction::Add)
6651 Ops.push_back(&
II->getOperandUse(3));
6654 case Intrinsic::aarch64_neon_pmull:
6657 Ops.push_back(&
II->getOperandUse(0));
6658 Ops.push_back(&
II->getOperandUse(1));
6660 case Intrinsic::aarch64_neon_pmull64:
6662 II->getArgOperand(1)))
6664 Ops.push_back(&
II->getArgOperandUse(0));
6665 Ops.push_back(&
II->getArgOperandUse(1));
6667 case Intrinsic::masked_gather:
6670 Ops.push_back(&
II->getArgOperandUse(0));
6672 case Intrinsic::masked_scatter:
6675 Ops.push_back(&
II->getArgOperandUse(1));
6682 auto ShouldSinkCondition = [](
Value *
Cond,
6687 if (
II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
6691 Ops.push_back(&
II->getOperandUse(0));
6695 switch (
I->getOpcode()) {
6696 case Instruction::GetElementPtr:
6697 case Instruction::Add:
6698 case Instruction::Sub:
6700 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
6702 Ops.push_back(&
I->getOperandUse(
Op));
6707 case Instruction::Select: {
6708 if (!ShouldSinkCondition(
I->getOperand(0),
Ops))
6711 Ops.push_back(&
I->getOperandUse(0));
6714 case Instruction::Br: {
6721 Ops.push_back(&
I->getOperandUse(0));
6728 if (!
I->getType()->isVectorTy())
6731 switch (
I->getOpcode()) {
6732 case Instruction::Sub:
6733 case Instruction::Add: {
6742 Ops.push_back(&Ext1->getOperandUse(0));
6743 Ops.push_back(&Ext2->getOperandUse(0));
6746 Ops.push_back(&
I->getOperandUse(0));
6747 Ops.push_back(&
I->getOperandUse(1));
6751 case Instruction::Or: {
6754 if (ST->hasNEON()) {
6768 if (
I->getParent() != MainAnd->
getParent() ||
6773 if (
I->getParent() != IA->getParent() ||
6774 I->getParent() != IB->getParent())
6779 Ops.push_back(&
I->getOperandUse(0));
6780 Ops.push_back(&
I->getOperandUse(1));
6789 case Instruction::Mul: {
6790 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
6793 if (Ty->isScalableTy())
6797 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
6800 int NumZExts = 0, NumSExts = 0;
6801 for (
auto &
Op :
I->operands()) {
6808 auto *ExtOp = Ext->getOperand(0);
6809 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
6810 Ops.push_back(&Ext->getOperandUse(0));
6818 if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <
6819 I->getType()->getScalarSizeInBits())
6856 if (!ElementConstant || !ElementConstant->
isZero())
6859 unsigned Opcode = OperandInstr->
getOpcode();
6860 if (Opcode == Instruction::SExt)
6862 else if (Opcode == Instruction::ZExt)
6867 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
6877 Ops.push_back(&Insert->getOperandUse(1));
6883 if (!
Ops.empty() && (NumSExts == 2 || NumZExts == 2))
6887 if (!ShouldSinkSplatForIndexedVariant(
I))
6892 Ops.push_back(&
I->getOperandUse(0));
6894 Ops.push_back(&
I->getOperandUse(1));
6896 return !
Ops.empty();
6898 case Instruction::FMul: {
6900 if (
I->getType()->isScalableTy())
6909 Ops.push_back(&
I->getOperandUse(0));
6911 Ops.push_back(&
I->getOperandUse(1));
6912 return !
Ops.empty();
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file defines the DenseMap class.
static Value * getCondition(Instruction *I)
const HexagonInstrInfo * TII
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
static unsigned getNumElements(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
unsigned getVectorInsertExtractBaseCost() const
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
bool prefersVectorizedAddressing() const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
APInt getPriorityMask(const Function &F) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool useNeonVector(const Type *Ty) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
bool enableScalableVectorization() const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isTypeLegal(Type *Ty) const override
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isIntPredicate(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)
Create an expression which evaluates to the number of elements in EC at runtime.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
DominatorTree * getDominatorTree() const
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
const FeatureBitset & getFeatureBits() const
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
Information for memory intrinsic cost model.
Align getAlignment() const
Type * getDataType() const
Intrinsic::ID getID() const
const Instruction * getInst() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool isSMEABIRoutine() const
bool hasStreamingBody() const
void set(unsigned M, bool Enable=true)
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
TargetInstrInfo - Interface to description of machine instruction set.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
LLVM_ABI APInt getCpuSupportsMask(ArrayRef< StringRef > Features)
static constexpr unsigned SVEBitsPerBlock
LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
unsigned getMatchingIROpode() const
bool inactiveLanesAreUnused() const
bool inactiveLanesAreNotDefined() const
bool hasMatchingUndefIntrinsic() const
static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()
static SVEIntrinsicInfo defaultZeroingOp()
bool hasGoverningPredicate() const
SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)
static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)
SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)
unsigned getOperandIdxWithNoActiveLanes() const
SVEIntrinsicInfo & setInactiveLanesAreUnused()
SVEIntrinsicInfo & setInactiveLanesAreNotDefined()
SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)
bool inactiveLanesTakenFromOperand() const
static SVEIntrinsicInfo defaultUndefOp()
bool hasOperandWithNoActiveLanes() const
Intrinsic::ID getMatchingUndefIntrinsic() const
SVEIntrinsicInfo & setResultIsZeroInitialized()
static SVEIntrinsicInfo defaultMergingUnaryOp()
SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)
unsigned getGoverningPredicateOperandIdx() const
bool hasMatchingIROpode() const
bool resultIsZeroInitialized() const
SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)
unsigned getOperandIdxInactiveLanesTakenFrom() const
static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...