23#include "llvm/IR/IntrinsicsAArch64.h"
35#define DEBUG_TYPE "aarch64tti"
41 "sve-prefer-fixed-over-scalable-if-equal",
cl::Hidden);
59 "Penalty of calling a function that requires a change to PSTATE.SM"));
63 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
74 cl::desc(
"The cost of a histcnt instruction"));
78 cl::desc(
"The number of instructions to search for a redundant dmb"));
82 cl::desc(
"Threshold for forced unrolling of small loops in AArch64"));
85class TailFoldingOption {
100 bool NeedsDefault =
true;
104 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
119 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
120 "Initial bits should only include one of "
121 "(disabled|all|simple|default)");
122 Bits = NeedsDefault ? DefaultBits : InitialBits;
124 Bits &= ~DisableBits;
130 errs() <<
"invalid argument '" << Opt
131 <<
"' to -sve-tail-folding=; the option should be of the form\n"
132 " (disabled|all|default|simple)[+(reductions|recurrences"
133 "|reverse|noreductions|norecurrences|noreverse)]\n";
139 void operator=(
const std::string &Val) {
148 setNeedsDefault(
false);
151 StringRef(Val).split(TailFoldTypes,
'+', -1,
false);
153 unsigned StartIdx = 1;
154 if (TailFoldTypes[0] ==
"disabled")
155 setInitialBits(TailFoldingOpts::Disabled);
156 else if (TailFoldTypes[0] ==
"all")
157 setInitialBits(TailFoldingOpts::All);
158 else if (TailFoldTypes[0] ==
"default")
159 setNeedsDefault(
true);
160 else if (TailFoldTypes[0] ==
"simple")
161 setInitialBits(TailFoldingOpts::Simple);
164 setInitialBits(TailFoldingOpts::Disabled);
167 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
168 if (TailFoldTypes[
I] ==
"reductions")
169 setEnableBit(TailFoldingOpts::Reductions);
170 else if (TailFoldTypes[
I] ==
"recurrences")
171 setEnableBit(TailFoldingOpts::Recurrences);
172 else if (TailFoldTypes[
I] ==
"reverse")
173 setEnableBit(TailFoldingOpts::Reverse);
174 else if (TailFoldTypes[
I] ==
"noreductions")
175 setDisableBit(TailFoldingOpts::Reductions);
176 else if (TailFoldTypes[
I] ==
"norecurrences")
177 setDisableBit(TailFoldingOpts::Recurrences);
178 else if (TailFoldTypes[
I] ==
"noreverse")
179 setDisableBit(TailFoldingOpts::Reverse);
196 "Control the use of vectorisation using tail-folding for SVE where the"
197 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
198 "\ndisabled (Initial) No loop types will vectorize using "
200 "\ndefault (Initial) Uses the default tail-folding settings for "
202 "\nall (Initial) All legal loop types will vectorize using "
204 "\nsimple (Initial) Use tail-folding for simple loops (not "
205 "reductions or recurrences)"
206 "\nreductions Use tail-folding for loops containing reductions"
207 "\nnoreductions Inverse of above"
208 "\nrecurrences Use tail-folding for loops containing fixed order "
210 "\nnorecurrences Inverse of above"
211 "\nreverse Use tail-folding for loops requiring reversed "
213 "\nnoreverse Inverse of above"),
258 TTI->isMultiversionedFunction(
F) ?
"fmv-features" :
"target-features";
259 StringRef FeatureStr =
F.getFnAttribute(AttributeStr).getValueAsString();
260 FeatureStr.
split(Features,
",");
276 return F.hasFnAttribute(
"fmv-features");
280 AArch64::FeatureExecuteOnly,
320 FeatureBitset EffectiveCallerBits = CallerBits ^ InlineInverseFeatures;
321 FeatureBitset EffectiveCalleeBits = CalleeBits ^ InlineInverseFeatures;
323 return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;
341 auto FVTy = dyn_cast<FixedVectorType>(Ty);
343 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
352 unsigned DefaultCallPenalty)
const {
377 if (
F ==
Call.getCaller())
383 return DefaultCallPenalty;
394 ST->isSVEorStreamingSVEAvailable() &&
395 !ST->disableMaximizeScalableBandwidth();
419 assert(Ty->isIntegerTy());
421 unsigned BitSize = Ty->getPrimitiveSizeInBits();
428 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
433 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
439 return std::max<InstructionCost>(1,
Cost);
446 assert(Ty->isIntegerTy());
448 unsigned BitSize = Ty->getPrimitiveSizeInBits();
454 unsigned ImmIdx = ~0U;
458 case Instruction::GetElementPtr:
463 case Instruction::Store:
466 case Instruction::Add:
467 case Instruction::Sub:
468 case Instruction::Mul:
469 case Instruction::UDiv:
470 case Instruction::SDiv:
471 case Instruction::URem:
472 case Instruction::SRem:
473 case Instruction::And:
474 case Instruction::Or:
475 case Instruction::Xor:
476 case Instruction::ICmp:
480 case Instruction::Shl:
481 case Instruction::LShr:
482 case Instruction::AShr:
486 case Instruction::Trunc:
487 case Instruction::ZExt:
488 case Instruction::SExt:
489 case Instruction::IntToPtr:
490 case Instruction::PtrToInt:
491 case Instruction::BitCast:
492 case Instruction::PHI:
493 case Instruction::Call:
494 case Instruction::Select:
495 case Instruction::Ret:
496 case Instruction::Load:
501 int NumConstants = (BitSize + 63) / 64;
514 assert(Ty->isIntegerTy());
516 unsigned BitSize = Ty->getPrimitiveSizeInBits();
525 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
531 case Intrinsic::sadd_with_overflow:
532 case Intrinsic::uadd_with_overflow:
533 case Intrinsic::ssub_with_overflow:
534 case Intrinsic::usub_with_overflow:
535 case Intrinsic::smul_with_overflow:
536 case Intrinsic::umul_with_overflow:
538 int NumConstants = (BitSize + 63) / 64;
545 case Intrinsic::experimental_stackmap:
546 if ((Idx < 2) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
549 case Intrinsic::experimental_patchpoint_void:
550 case Intrinsic::experimental_patchpoint:
551 if ((Idx < 4) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
554 case Intrinsic::experimental_gc_statepoint:
555 if ((Idx < 5) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
565 if (TyWidth == 32 || TyWidth == 64)
589 unsigned TotalHistCnts = 1;
599 unsigned EC = VTy->getElementCount().getKnownMinValue();
604 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
606 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
610 TotalHistCnts = EC / NaturalVectorWidth;
630 switch (ICA.
getID()) {
631 case Intrinsic::experimental_vector_histogram_add: {
638 case Intrinsic::umin:
639 case Intrinsic::umax:
640 case Intrinsic::smin:
641 case Intrinsic::smax: {
642 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
643 MVT::v8i16, MVT::v2i32, MVT::v4i32,
644 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
648 if (LT.second == MVT::v2i64)
650 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }))
654 case Intrinsic::sadd_sat:
655 case Intrinsic::ssub_sat:
656 case Intrinsic::uadd_sat:
657 case Intrinsic::usub_sat: {
658 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
659 MVT::v8i16, MVT::v2i32, MVT::v4i32,
665 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
666 if (
any_of(ValidSatTys, [<](
MVT M) {
return M == LT.second; }))
667 return LT.first * Instrs;
672 if (ST->isSVEAvailable() && VectorSize >= 128 &&
isPowerOf2_64(VectorSize))
673 return LT.first * Instrs;
677 case Intrinsic::abs: {
678 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
679 MVT::v8i16, MVT::v2i32, MVT::v4i32,
682 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }))
686 case Intrinsic::bswap: {
687 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
688 MVT::v4i32, MVT::v2i64};
690 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }) &&
691 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
696 case Intrinsic::fmuladd: {
701 (EltTy->
isHalfTy() && ST->hasFullFP16()))
705 case Intrinsic::stepvector: {
714 Cost += AddCost * (LT.first - 1);
718 case Intrinsic::vector_extract:
719 case Intrinsic::vector_insert: {
732 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
733 EVT SubVecVT = IsExtract ? getTLI()->getValueType(
DL, RetTy)
741 getTLI()->getTypeConversion(
C, SubVecVT);
743 getTLI()->getTypeConversion(
C, VecVT);
751 case Intrinsic::bitreverse: {
753 {Intrinsic::bitreverse, MVT::i32, 1},
754 {Intrinsic::bitreverse, MVT::i64, 1},
755 {Intrinsic::bitreverse, MVT::v8i8, 1},
756 {Intrinsic::bitreverse, MVT::v16i8, 1},
757 {Intrinsic::bitreverse, MVT::v4i16, 2},
758 {Intrinsic::bitreverse, MVT::v8i16, 2},
759 {Intrinsic::bitreverse, MVT::v2i32, 2},
760 {Intrinsic::bitreverse, MVT::v4i32, 2},
761 {Intrinsic::bitreverse, MVT::v1i64, 2},
762 {Intrinsic::bitreverse, MVT::v2i64, 2},
770 if (TLI->getValueType(
DL, RetTy,
true) == MVT::i8 ||
771 TLI->getValueType(
DL, RetTy,
true) == MVT::i16)
772 return LegalisationCost.first * Entry->Cost + 1;
774 return LegalisationCost.first * Entry->Cost;
778 case Intrinsic::ctpop: {
779 if (!ST->hasNEON()) {
800 RetTy->getScalarSizeInBits()
803 return LT.first * Entry->Cost + ExtraCost;
807 case Intrinsic::sadd_with_overflow:
808 case Intrinsic::uadd_with_overflow:
809 case Intrinsic::ssub_with_overflow:
810 case Intrinsic::usub_with_overflow:
811 case Intrinsic::smul_with_overflow:
812 case Intrinsic::umul_with_overflow: {
814 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
815 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
816 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
817 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
818 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
819 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
820 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
821 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
822 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
823 {Intrinsic::usub_with_overflow, MVT::i8, 3},
824 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
825 {Intrinsic::usub_with_overflow, MVT::i16, 3},
826 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
827 {Intrinsic::usub_with_overflow, MVT::i32, 1},
828 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
829 {Intrinsic::usub_with_overflow, MVT::i64, 1},
830 {Intrinsic::smul_with_overflow, MVT::i8, 5},
831 {Intrinsic::umul_with_overflow, MVT::i8, 4},
832 {Intrinsic::smul_with_overflow, MVT::i16, 5},
833 {Intrinsic::umul_with_overflow, MVT::i16, 4},
834 {Intrinsic::smul_with_overflow, MVT::i32, 2},
835 {Intrinsic::umul_with_overflow, MVT::i32, 2},
836 {Intrinsic::smul_with_overflow, MVT::i64, 3},
837 {Intrinsic::umul_with_overflow, MVT::i64, 3},
839 EVT MTy = TLI->getValueType(
DL, RetTy->getContainedType(0),
true);
846 case Intrinsic::fptosi_sat:
847 case Intrinsic::fptoui_sat: {
850 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
852 EVT MTy = TLI->getValueType(
DL, RetTy);
855 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
856 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
857 LT.second == MVT::v2f64)) {
859 (LT.second == MVT::f64 && MTy == MVT::i32) ||
860 (LT.second == MVT::f32 && MTy == MVT::i64)))
869 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
876 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
877 (LT.second == MVT::f16 && MTy == MVT::i64) ||
878 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
892 if ((LT.second.getScalarType() == MVT::f32 ||
893 LT.second.getScalarType() == MVT::f64 ||
894 LT.second.getScalarType() == MVT::f16) &&
898 if (LT.second.isVector())
902 LegalTy, {LegalTy, LegalTy});
905 LegalTy, {LegalTy, LegalTy});
907 return LT.first *
Cost +
908 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
914 RetTy = RetTy->getScalarType();
915 if (LT.second.isVector()) {
933 return LT.first *
Cost;
935 case Intrinsic::fshl:
936 case Intrinsic::fshr: {
945 if (RetTy->isIntegerTy() && ICA.
getArgs()[0] == ICA.
getArgs()[1] &&
946 (RetTy->getPrimitiveSizeInBits() == 32 ||
947 RetTy->getPrimitiveSizeInBits() == 64)) {
960 {Intrinsic::fshl, MVT::v4i32, 2},
961 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
962 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
963 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
969 return LegalisationCost.first * Entry->Cost;
973 if (!RetTy->isIntegerTy())
978 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
979 RetTy->getScalarSizeInBits() < 64) ||
980 (RetTy->getScalarSizeInBits() % 64 != 0);
981 unsigned ExtraCost = HigherCost ? 1 : 0;
982 if (RetTy->getScalarSizeInBits() == 32 ||
983 RetTy->getScalarSizeInBits() == 64)
990 return TyL.first + ExtraCost;
992 case Intrinsic::get_active_lane_mask: {
994 EVT RetVT = getTLI()->getValueType(
DL, RetTy);
996 if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
999 if (RetTy->isScalableTy()) {
1000 if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
1010 if (ST->hasSVE2p1() || ST->hasSME2()) {
1025 return Cost + (SplitCost * (
Cost - 1));
1040 case Intrinsic::experimental_vector_match: {
1043 unsigned SearchSize = NeedleTy->getNumElements();
1044 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
1057 case Intrinsic::experimental_cttz_elts: {
1059 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
1067 case Intrinsic::loop_dependence_raw_mask:
1068 case Intrinsic::loop_dependence_war_mask: {
1070 if (ST->hasSVE2() || ST->hasSME()) {
1071 EVT VecVT = getTLI()->getValueType(
DL, RetTy);
1072 unsigned EltSizeInBytes =
1080 case Intrinsic::experimental_vector_extract_last_active:
1081 if (ST->isSVEorStreamingSVEAvailable()) {
1098 auto RequiredType =
II.getType();
1101 assert(PN &&
"Expected Phi Node!");
1104 if (!PN->hasOneUse())
1105 return std::nullopt;
1107 for (
Value *IncValPhi : PN->incoming_values()) {
1110 Reinterpret->getIntrinsicID() !=
1111 Intrinsic::aarch64_sve_convert_to_svbool ||
1112 RequiredType != Reinterpret->getArgOperand(0)->getType())
1113 return std::nullopt;
1121 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
1123 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
1196 return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
1201 return GoverningPredicateIdx;
1206 GoverningPredicateIdx = Index;
1224 return UndefIntrinsic;
1229 UndefIntrinsic = IID;
1251 return ResultLanes == InactiveLanesTakenFromOperand;
1256 return OperandIdxForInactiveLanes;
1260 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1261 ResultLanes = InactiveLanesTakenFromOperand;
1262 OperandIdxForInactiveLanes = Index;
1267 return ResultLanes == InactiveLanesAreNotDefined;
1271 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1272 ResultLanes = InactiveLanesAreNotDefined;
1277 return ResultLanes == InactiveLanesAreUnused;
1281 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1282 ResultLanes = InactiveLanesAreUnused;
1292 ResultIsZeroInitialized =
true;
1303 return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
1308 return OperandIdxWithNoActiveLanes;
1313 OperandIdxWithNoActiveLanes = Index;
1318 unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
1321 unsigned IROpcode = 0;
1323 enum PredicationStyle {
1325 InactiveLanesTakenFromOperand,
1326 InactiveLanesAreNotDefined,
1327 InactiveLanesAreUnused
1330 bool ResultIsZeroInitialized =
false;
1331 unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
1332 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
1340 return !isa<ScalableVectorType>(V->getType());
1348 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
1349 case Intrinsic::aarch64_sve_fcvt_f16f32:
1350 case Intrinsic::aarch64_sve_fcvt_f16f64:
1351 case Intrinsic::aarch64_sve_fcvt_f32f16:
1352 case Intrinsic::aarch64_sve_fcvt_f32f64:
1353 case Intrinsic::aarch64_sve_fcvt_f64f16:
1354 case Intrinsic::aarch64_sve_fcvt_f64f32:
1355 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
1356 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
1357 case Intrinsic::aarch64_sve_fcvtx_f32f64:
1358 case Intrinsic::aarch64_sve_fcvtzs:
1359 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
1360 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
1361 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
1362 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
1363 case Intrinsic::aarch64_sve_fcvtzu:
1364 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
1365 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
1366 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
1367 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
1368 case Intrinsic::aarch64_sve_scvtf:
1369 case Intrinsic::aarch64_sve_scvtf_f16i32:
1370 case Intrinsic::aarch64_sve_scvtf_f16i64:
1371 case Intrinsic::aarch64_sve_scvtf_f32i64:
1372 case Intrinsic::aarch64_sve_scvtf_f64i32:
1373 case Intrinsic::aarch64_sve_ucvtf:
1374 case Intrinsic::aarch64_sve_ucvtf_f16i32:
1375 case Intrinsic::aarch64_sve_ucvtf_f16i64:
1376 case Intrinsic::aarch64_sve_ucvtf_f32i64:
1377 case Intrinsic::aarch64_sve_ucvtf_f64i32:
1380 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
1381 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
1382 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
1383 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
1386 case Intrinsic::aarch64_sve_fabd:
1388 case Intrinsic::aarch64_sve_fadd:
1391 case Intrinsic::aarch64_sve_fdiv:
1394 case Intrinsic::aarch64_sve_fmax:
1396 case Intrinsic::aarch64_sve_fmaxnm:
1398 case Intrinsic::aarch64_sve_fmin:
1400 case Intrinsic::aarch64_sve_fminnm:
1402 case Intrinsic::aarch64_sve_fmla:
1404 case Intrinsic::aarch64_sve_fmls:
1406 case Intrinsic::aarch64_sve_fmul:
1409 case Intrinsic::aarch64_sve_fmulx:
1411 case Intrinsic::aarch64_sve_fnmla:
1413 case Intrinsic::aarch64_sve_fnmls:
1415 case Intrinsic::aarch64_sve_fsub:
1418 case Intrinsic::aarch64_sve_add:
1421 case Intrinsic::aarch64_sve_mla:
1423 case Intrinsic::aarch64_sve_mls:
1425 case Intrinsic::aarch64_sve_mul:
1428 case Intrinsic::aarch64_sve_sabd:
1430 case Intrinsic::aarch64_sve_sdiv:
1433 case Intrinsic::aarch64_sve_smax:
1435 case Intrinsic::aarch64_sve_smin:
1437 case Intrinsic::aarch64_sve_smulh:
1439 case Intrinsic::aarch64_sve_sub:
1442 case Intrinsic::aarch64_sve_uabd:
1444 case Intrinsic::aarch64_sve_udiv:
1447 case Intrinsic::aarch64_sve_umax:
1449 case Intrinsic::aarch64_sve_umin:
1451 case Intrinsic::aarch64_sve_umulh:
1453 case Intrinsic::aarch64_sve_asr:
1456 case Intrinsic::aarch64_sve_lsl:
1459 case Intrinsic::aarch64_sve_lsr:
1462 case Intrinsic::aarch64_sve_and:
1465 case Intrinsic::aarch64_sve_bic:
1467 case Intrinsic::aarch64_sve_eor:
1470 case Intrinsic::aarch64_sve_orr:
1473 case Intrinsic::aarch64_sve_shsub:
1475 case Intrinsic::aarch64_sve_shsubr:
1477 case Intrinsic::aarch64_sve_sqrshl:
1479 case Intrinsic::aarch64_sve_sqshl:
1481 case Intrinsic::aarch64_sve_sqsub:
1483 case Intrinsic::aarch64_sve_srshl:
1485 case Intrinsic::aarch64_sve_uhsub:
1487 case Intrinsic::aarch64_sve_uhsubr:
1489 case Intrinsic::aarch64_sve_uqrshl:
1491 case Intrinsic::aarch64_sve_uqshl:
1493 case Intrinsic::aarch64_sve_uqsub:
1495 case Intrinsic::aarch64_sve_urshl:
1498 case Intrinsic::aarch64_sve_add_u:
1501 case Intrinsic::aarch64_sve_and_u:
1504 case Intrinsic::aarch64_sve_asr_u:
1507 case Intrinsic::aarch64_sve_eor_u:
1510 case Intrinsic::aarch64_sve_fadd_u:
1513 case Intrinsic::aarch64_sve_fdiv_u:
1516 case Intrinsic::aarch64_sve_fmul_u:
1519 case Intrinsic::aarch64_sve_fsub_u:
1522 case Intrinsic::aarch64_sve_lsl_u:
1525 case Intrinsic::aarch64_sve_lsr_u:
1528 case Intrinsic::aarch64_sve_mul_u:
1531 case Intrinsic::aarch64_sve_orr_u:
1534 case Intrinsic::aarch64_sve_sdiv_u:
1537 case Intrinsic::aarch64_sve_sub_u:
1540 case Intrinsic::aarch64_sve_udiv_u:
1544 case Intrinsic::aarch64_sve_addqv:
1545 case Intrinsic::aarch64_sve_and_z:
1546 case Intrinsic::aarch64_sve_bic_z:
1547 case Intrinsic::aarch64_sve_brka_z:
1548 case Intrinsic::aarch64_sve_brkb_z:
1549 case Intrinsic::aarch64_sve_brkn_z:
1550 case Intrinsic::aarch64_sve_brkpa_z:
1551 case Intrinsic::aarch64_sve_brkpb_z:
1552 case Intrinsic::aarch64_sve_cntp:
1553 case Intrinsic::aarch64_sve_compact:
1554 case Intrinsic::aarch64_sve_eor_z:
1555 case Intrinsic::aarch64_sve_eorv:
1556 case Intrinsic::aarch64_sve_eorqv:
1557 case Intrinsic::aarch64_sve_nand_z:
1558 case Intrinsic::aarch64_sve_nor_z:
1559 case Intrinsic::aarch64_sve_orn_z:
1560 case Intrinsic::aarch64_sve_orr_z:
1561 case Intrinsic::aarch64_sve_orv:
1562 case Intrinsic::aarch64_sve_orqv:
1563 case Intrinsic::aarch64_sve_pnext:
1564 case Intrinsic::aarch64_sve_rdffr_z:
1565 case Intrinsic::aarch64_sve_saddv:
1566 case Intrinsic::aarch64_sve_uaddv:
1567 case Intrinsic::aarch64_sve_umaxv:
1568 case Intrinsic::aarch64_sve_umaxqv:
1569 case Intrinsic::aarch64_sve_cmpeq:
1570 case Intrinsic::aarch64_sve_cmpeq_wide:
1571 case Intrinsic::aarch64_sve_cmpge:
1572 case Intrinsic::aarch64_sve_cmpge_wide:
1573 case Intrinsic::aarch64_sve_cmpgt:
1574 case Intrinsic::aarch64_sve_cmpgt_wide:
1575 case Intrinsic::aarch64_sve_cmphi:
1576 case Intrinsic::aarch64_sve_cmphi_wide:
1577 case Intrinsic::aarch64_sve_cmphs:
1578 case Intrinsic::aarch64_sve_cmphs_wide:
1579 case Intrinsic::aarch64_sve_cmple_wide:
1580 case Intrinsic::aarch64_sve_cmplo_wide:
1581 case Intrinsic::aarch64_sve_cmpls_wide:
1582 case Intrinsic::aarch64_sve_cmplt_wide:
1583 case Intrinsic::aarch64_sve_cmpne:
1584 case Intrinsic::aarch64_sve_cmpne_wide:
1585 case Intrinsic::aarch64_sve_facge:
1586 case Intrinsic::aarch64_sve_facgt:
1587 case Intrinsic::aarch64_sve_fcmpeq:
1588 case Intrinsic::aarch64_sve_fcmpge:
1589 case Intrinsic::aarch64_sve_fcmpgt:
1590 case Intrinsic::aarch64_sve_fcmpne:
1591 case Intrinsic::aarch64_sve_fcmpuo:
1592 case Intrinsic::aarch64_sve_ld1:
1593 case Intrinsic::aarch64_sve_ld1_gather:
1594 case Intrinsic::aarch64_sve_ld1_gather_index:
1595 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
1596 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
1597 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
1598 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
1599 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
1600 case Intrinsic::aarch64_sve_ld1q_gather_index:
1601 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
1602 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
1603 case Intrinsic::aarch64_sve_ld1ro:
1604 case Intrinsic::aarch64_sve_ld1rq:
1605 case Intrinsic::aarch64_sve_ld1udq:
1606 case Intrinsic::aarch64_sve_ld1uwq:
1607 case Intrinsic::aarch64_sve_ld2_sret:
1608 case Intrinsic::aarch64_sve_ld2q_sret:
1609 case Intrinsic::aarch64_sve_ld3_sret:
1610 case Intrinsic::aarch64_sve_ld3q_sret:
1611 case Intrinsic::aarch64_sve_ld4_sret:
1612 case Intrinsic::aarch64_sve_ld4q_sret:
1613 case Intrinsic::aarch64_sve_ldff1:
1614 case Intrinsic::aarch64_sve_ldff1_gather:
1615 case Intrinsic::aarch64_sve_ldff1_gather_index:
1616 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
1617 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
1618 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
1619 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
1620 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
1621 case Intrinsic::aarch64_sve_ldnf1:
1622 case Intrinsic::aarch64_sve_ldnt1:
1623 case Intrinsic::aarch64_sve_ldnt1_gather:
1624 case Intrinsic::aarch64_sve_ldnt1_gather_index:
1625 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
1626 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
1629 case Intrinsic::aarch64_sve_prf:
1630 case Intrinsic::aarch64_sve_prfb_gather_index:
1631 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
1632 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
1633 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
1634 case Intrinsic::aarch64_sve_prfd_gather_index:
1635 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
1636 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
1637 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
1638 case Intrinsic::aarch64_sve_prfh_gather_index:
1639 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
1640 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
1641 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
1642 case Intrinsic::aarch64_sve_prfw_gather_index:
1643 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
1644 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
1645 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
1648 case Intrinsic::aarch64_sve_st1_scatter:
1649 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
1650 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
1651 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
1652 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
1653 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
1654 case Intrinsic::aarch64_sve_st1dq:
1655 case Intrinsic::aarch64_sve_st1q_scatter_index:
1656 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
1657 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
1658 case Intrinsic::aarch64_sve_st1wq:
1659 case Intrinsic::aarch64_sve_stnt1:
1660 case Intrinsic::aarch64_sve_stnt1_scatter:
1661 case Intrinsic::aarch64_sve_stnt1_scatter_index:
1662 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
1663 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
1665 case Intrinsic::aarch64_sve_st2:
1666 case Intrinsic::aarch64_sve_st2q:
1668 case Intrinsic::aarch64_sve_st3:
1669 case Intrinsic::aarch64_sve_st3q:
1671 case Intrinsic::aarch64_sve_st4:
1672 case Intrinsic::aarch64_sve_st4q:
1680 Value *UncastedPred;
1686 Pred = UncastedPred;
1692 if (OrigPredTy->getMinNumElements() <=
1694 ->getMinNumElements())
1695 Pred = UncastedPred;
1699 return C &&
C->isAllOnesValue();
1706 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
1707 Dup->getOperand(1) == Pg &&
isa<Constant>(Dup->getOperand(2)))
1715static std::optional<Instruction *>
1722 Value *Op1 =
II.getOperand(1);
1723 Value *Op2 =
II.getOperand(2);
1749 return std::nullopt;
1757 if (SimpleII == Inactive)
1767static std::optional<Instruction *>
1771 return std::nullopt;
1800 II.setCalledFunction(NewDecl);
1810 return std::nullopt;
1822static std::optional<Instruction *>
1826 return std::nullopt;
1828 auto IntrinsicID = BinOp->getIntrinsicID();
1829 switch (IntrinsicID) {
1830 case Intrinsic::aarch64_sve_and_z:
1831 case Intrinsic::aarch64_sve_bic_z:
1832 case Intrinsic::aarch64_sve_eor_z:
1833 case Intrinsic::aarch64_sve_nand_z:
1834 case Intrinsic::aarch64_sve_nor_z:
1835 case Intrinsic::aarch64_sve_orn_z:
1836 case Intrinsic::aarch64_sve_orr_z:
1839 return std::nullopt;
1842 auto BinOpPred = BinOp->getOperand(0);
1843 auto BinOpOp1 = BinOp->getOperand(1);
1844 auto BinOpOp2 = BinOp->getOperand(2);
1848 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1849 return std::nullopt;
1851 auto PredOp = PredIntr->getOperand(0);
1853 if (PredOpTy !=
II.getType())
1854 return std::nullopt;
1858 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1859 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1860 if (BinOpOp1 == BinOpOp2)
1861 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1864 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1866 auto NarrowedBinOp =
1871static std::optional<Instruction *>
1878 return BinOpCombine;
1883 return std::nullopt;
1886 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
1895 if (CursorVTy->getElementCount().getKnownMinValue() <
1896 IVTy->getElementCount().getKnownMinValue())
1900 if (Cursor->getType() == IVTy)
1901 EarliestReplacement = Cursor;
1906 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
1907 Intrinsic::aarch64_sve_convert_to_svbool ||
1908 IntrinsicCursor->getIntrinsicID() ==
1909 Intrinsic::aarch64_sve_convert_from_svbool))
1912 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
1913 Cursor = IntrinsicCursor->getOperand(0);
1918 if (!EarliestReplacement)
1919 return std::nullopt;
1927 auto *OpPredicate =
II.getOperand(0);
1944 II.getArgOperand(2));
1950 return std::nullopt;
1954 II.getArgOperand(0),
II.getArgOperand(2),
uint64_t(0));
1963 II.getArgOperand(0));
1973 return std::nullopt;
1978 if (!SplatValue || !SplatValue->isZero())
1979 return std::nullopt;
1984 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
1985 return std::nullopt;
1989 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
1990 return std::nullopt;
1993 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
1994 return std::nullopt;
1999 return std::nullopt;
2002 return std::nullopt;
2006 return std::nullopt;
2010 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
2011 return std::nullopt;
2013 unsigned NumElts = VecTy->getNumElements();
2014 unsigned PredicateBits = 0;
2017 for (
unsigned I = 0;
I < NumElts; ++
I) {
2020 return std::nullopt;
2022 PredicateBits |= 1 << (
I * (16 / NumElts));
2026 if (PredicateBits == 0) {
2028 PFalse->takeName(&
II);
2034 for (
unsigned I = 0;
I < 16; ++
I)
2035 if ((PredicateBits & (1 <<
I)) != 0)
2038 unsigned PredSize = Mask & -Mask;
2043 for (
unsigned I = 0;
I < 16;
I += PredSize)
2044 if ((PredicateBits & (1 <<
I)) == 0)
2045 return std::nullopt;
2050 {PredType}, {PTruePat});
2052 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
2053 auto *ConvertFromSVBool =
2055 {
II.getType()}, {ConvertToSVBool});
2063 Value *Pg =
II.getArgOperand(0);
2064 Value *Vec =
II.getArgOperand(1);
2065 auto IntrinsicID =
II.getIntrinsicID();
2066 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
2078 auto OpC = OldBinOp->getOpcode();
2084 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
2090 if (IsAfter &&
C &&
C->isNullValue()) {
2094 Extract->insertBefore(
II.getIterator());
2095 Extract->takeName(&
II);
2101 return std::nullopt;
2103 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
2104 return std::nullopt;
2106 const auto PTruePattern =
2112 return std::nullopt;
2114 unsigned Idx = MinNumElts - 1;
2124 if (Idx >= PgVTy->getMinNumElements())
2125 return std::nullopt;
2130 Extract->insertBefore(
II.getIterator());
2131 Extract->takeName(&
II);
2144 Value *Pg =
II.getArgOperand(0);
2146 Value *Vec =
II.getArgOperand(2);
2149 if (!Ty->isIntegerTy())
2150 return std::nullopt;
2155 return std::nullopt;
2172 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
2185 {
II.getType()}, {AllPat});
2192static std::optional<Instruction *>
2196 if (
Pattern == AArch64SVEPredPattern::all) {
2205 return MinNumElts && NumElts >= MinNumElts
2207 II, ConstantInt::get(
II.getType(), MinNumElts)))
2211static std::optional<Instruction *>
2214 if (!ST->isStreaming())
2215 return std::nullopt;
2227 Value *PgVal =
II.getArgOperand(0);
2228 Value *OpVal =
II.getArgOperand(1);
2232 if (PgVal == OpVal &&
2233 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
2234 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
2249 return std::nullopt;
2253 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
2254 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
2268 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
2269 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
2270 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
2271 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
2272 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
2273 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
2274 (OpIID == Intrinsic::aarch64_sve_and_z) ||
2275 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
2276 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
2277 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
2278 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
2279 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
2280 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
2290 return std::nullopt;
2293template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>
2294static std::optional<Instruction *>
2296 bool MergeIntoAddendOp) {
2298 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
2299 if (MergeIntoAddendOp) {
2300 AddendOp =
II.getOperand(1);
2301 Mul =
II.getOperand(2);
2303 AddendOp =
II.getOperand(2);
2304 Mul =
II.getOperand(1);
2309 return std::nullopt;
2311 if (!
Mul->hasOneUse())
2312 return std::nullopt;
2315 if (
II.getType()->isFPOrFPVectorTy()) {
2320 return std::nullopt;
2322 return std::nullopt;
2327 if (MergeIntoAddendOp)
2337static std::optional<Instruction *>
2339 Value *Pred =
II.getOperand(0);
2340 Value *PtrOp =
II.getOperand(1);
2341 Type *VecTy =
II.getType();
2345 Load->copyMetadata(
II);
2356static std::optional<Instruction *>
2358 Value *VecOp =
II.getOperand(0);
2359 Value *Pred =
II.getOperand(1);
2360 Value *PtrOp =
II.getOperand(2);
2364 Store->copyMetadata(
II);
2376 case Intrinsic::aarch64_sve_fmul_u:
2377 return Instruction::BinaryOps::FMul;
2378 case Intrinsic::aarch64_sve_fadd_u:
2379 return Instruction::BinaryOps::FAdd;
2380 case Intrinsic::aarch64_sve_fsub_u:
2381 return Instruction::BinaryOps::FSub;
2383 return Instruction::BinaryOpsEnd;
2387static std::optional<Instruction *>
2390 if (
II.isStrictFP())
2391 return std::nullopt;
2393 auto *OpPredicate =
II.getOperand(0);
2395 if (BinOpCode == Instruction::BinaryOpsEnd ||
2397 return std::nullopt;
2399 BinOpCode,
II.getOperand(1),
II.getOperand(2),
II.getFastMathFlags());
2406 Intrinsic::aarch64_sve_mla>(
2410 Intrinsic::aarch64_sve_mad>(
2413 return std::nullopt;
2416static std::optional<Instruction *>
2420 Intrinsic::aarch64_sve_fmla>(IC,
II,
2425 Intrinsic::aarch64_sve_fmad>(IC,
II,
2430 Intrinsic::aarch64_sve_fmla>(IC,
II,
2433 return std::nullopt;
2436static std::optional<Instruction *>
2440 Intrinsic::aarch64_sve_fmla>(IC,
II,
2445 Intrinsic::aarch64_sve_fmad>(IC,
II,
2450 Intrinsic::aarch64_sve_fmla_u>(
2456static std::optional<Instruction *>
2460 Intrinsic::aarch64_sve_fmls>(IC,
II,
2465 Intrinsic::aarch64_sve_fnmsb>(
2470 Intrinsic::aarch64_sve_fmls>(IC,
II,
2473 return std::nullopt;
2476static std::optional<Instruction *>
2480 Intrinsic::aarch64_sve_fmls>(IC,
II,
2485 Intrinsic::aarch64_sve_fnmsb>(
2490 Intrinsic::aarch64_sve_fmls_u>(
2499 Intrinsic::aarch64_sve_mls>(
2502 return std::nullopt;
2507 Value *UnpackArg =
II.getArgOperand(0);
2509 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
2510 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
2523 return std::nullopt;
2527 auto *OpVal =
II.getOperand(0);
2528 auto *OpIndices =
II.getOperand(1);
2535 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
2536 return std::nullopt;
2551 Type *RetTy =
II.getType();
2552 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
2553 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
2557 if ((
match(
II.getArgOperand(0),
2564 if (TyA ==
B->getType() &&
2569 TyA->getMinNumElements());
2575 return std::nullopt;
2583 if (
match(
II.getArgOperand(0),
2588 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
2590 return std::nullopt;
2593static std::optional<Instruction *>
2595 Value *Mask =
II.getOperand(0);
2596 Value *BasePtr =
II.getOperand(1);
2597 Value *Index =
II.getOperand(2);
2608 BasePtr->getPointerAlignment(
II.getDataLayout());
2611 BasePtr, IndexBase);
2618 return std::nullopt;
2621static std::optional<Instruction *>
2623 Value *Val =
II.getOperand(0);
2624 Value *Mask =
II.getOperand(1);
2625 Value *BasePtr =
II.getOperand(2);
2626 Value *Index =
II.getOperand(3);
2636 BasePtr->getPointerAlignment(
II.getDataLayout());
2639 BasePtr, IndexBase);
2645 return std::nullopt;
2651 Value *Pred =
II.getOperand(0);
2652 Value *Vec =
II.getOperand(1);
2653 Value *DivVec =
II.getOperand(2);
2657 if (!SplatConstantInt)
2658 return std::nullopt;
2662 if (DivisorValue == -1)
2663 return std::nullopt;
2664 if (DivisorValue == 1)
2670 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2677 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2679 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2683 return std::nullopt;
2687 size_t VecSize = Vec.
size();
2692 size_t HalfVecSize = VecSize / 2;
2696 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2704 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2722 return std::nullopt;
2729 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2730 CurrentInsertElt = InsertElt->getOperand(0);
2736 return std::nullopt;
2740 for (
size_t I = 0;
I < Elts.
size();
I++) {
2741 if (Elts[
I] ==
nullptr)
2746 if (InsertEltChain ==
nullptr)
2747 return std::nullopt;
2753 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2754 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2755 IIScalableTy->getMinNumElements() /
2760 auto *WideShuffleMaskTy =
2771 auto NarrowBitcast =
2784 return std::nullopt;
2789 Value *Pred =
II.getOperand(0);
2790 Value *Vec =
II.getOperand(1);
2791 Value *Shift =
II.getOperand(2);
2794 Value *AbsPred, *MergedValue;
2800 return std::nullopt;
2808 return std::nullopt;
2813 return std::nullopt;
2816 {
II.getType()}, {Pred, Vec, Shift});
2823 Value *Vec =
II.getOperand(0);
2828 return std::nullopt;
2834 auto *NI =
II.getNextNode();
2837 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
2839 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2840 auto *NIBB = NI->getParent();
2841 NI = NI->getNextNode();
2843 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
2844 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
2850 if (NextII &&
II.isIdenticalTo(NextII))
2853 return std::nullopt;
2861 {II.getType(), II.getOperand(0)->getType()},
2862 {II.getOperand(0), II.getOperand(1)}));
2869 return std::nullopt;
2875 Value *Passthru =
II.getOperand(0);
2883 auto *Mask = ConstantInt::get(Ty, MaskValue);
2889 return std::nullopt;
2892static std::optional<Instruction *>
2899 return std::nullopt;
2902std::optional<Instruction *>
2913 case Intrinsic::aarch64_dmb:
2915 case Intrinsic::aarch64_neon_fmaxnm:
2916 case Intrinsic::aarch64_neon_fminnm:
2918 case Intrinsic::aarch64_sve_convert_from_svbool:
2920 case Intrinsic::aarch64_sve_dup:
2922 case Intrinsic::aarch64_sve_dup_x:
2924 case Intrinsic::aarch64_sve_cmpne:
2925 case Intrinsic::aarch64_sve_cmpne_wide:
2927 case Intrinsic::aarch64_sve_rdffr:
2929 case Intrinsic::aarch64_sve_lasta:
2930 case Intrinsic::aarch64_sve_lastb:
2932 case Intrinsic::aarch64_sve_clasta_n:
2933 case Intrinsic::aarch64_sve_clastb_n:
2935 case Intrinsic::aarch64_sve_cntd:
2937 case Intrinsic::aarch64_sve_cntw:
2939 case Intrinsic::aarch64_sve_cnth:
2941 case Intrinsic::aarch64_sve_cntb:
2943 case Intrinsic::aarch64_sme_cntsd:
2945 case Intrinsic::aarch64_sve_ptest_any:
2946 case Intrinsic::aarch64_sve_ptest_first:
2947 case Intrinsic::aarch64_sve_ptest_last:
2949 case Intrinsic::aarch64_sve_fadd:
2951 case Intrinsic::aarch64_sve_fadd_u:
2953 case Intrinsic::aarch64_sve_fmul_u:
2955 case Intrinsic::aarch64_sve_fsub:
2957 case Intrinsic::aarch64_sve_fsub_u:
2959 case Intrinsic::aarch64_sve_add:
2961 case Intrinsic::aarch64_sve_add_u:
2963 Intrinsic::aarch64_sve_mla_u>(
2965 case Intrinsic::aarch64_sve_sub:
2967 case Intrinsic::aarch64_sve_sub_u:
2969 Intrinsic::aarch64_sve_mls_u>(
2971 case Intrinsic::aarch64_sve_tbl:
2973 case Intrinsic::aarch64_sve_uunpkhi:
2974 case Intrinsic::aarch64_sve_uunpklo:
2975 case Intrinsic::aarch64_sve_sunpkhi:
2976 case Intrinsic::aarch64_sve_sunpklo:
2978 case Intrinsic::aarch64_sve_uzp1:
2980 case Intrinsic::aarch64_sve_zip1:
2981 case Intrinsic::aarch64_sve_zip2:
2983 case Intrinsic::aarch64_sve_ld1_gather_index:
2985 case Intrinsic::aarch64_sve_st1_scatter_index:
2987 case Intrinsic::aarch64_sve_ld1:
2989 case Intrinsic::aarch64_sve_st1:
2991 case Intrinsic::aarch64_sve_sdiv:
2993 case Intrinsic::aarch64_sve_sel:
2995 case Intrinsic::aarch64_sve_srshl:
2997 case Intrinsic::aarch64_sve_dupq_lane:
2999 case Intrinsic::aarch64_sve_insr:
3001 case Intrinsic::aarch64_sve_whilelo:
3003 case Intrinsic::aarch64_sve_ptrue:
3005 case Intrinsic::aarch64_sve_uxtb:
3007 case Intrinsic::aarch64_sve_uxth:
3009 case Intrinsic::aarch64_sve_uxtw:
3011 case Intrinsic::aarch64_sme_in_streaming_mode:
3015 return std::nullopt;
3022 SimplifyAndSetOp)
const {
3023 switch (
II.getIntrinsicID()) {
3026 case Intrinsic::aarch64_neon_fcvtxn:
3027 case Intrinsic::aarch64_neon_rshrn:
3028 case Intrinsic::aarch64_neon_sqrshrn:
3029 case Intrinsic::aarch64_neon_sqrshrun:
3030 case Intrinsic::aarch64_neon_sqshrn:
3031 case Intrinsic::aarch64_neon_sqshrun:
3032 case Intrinsic::aarch64_neon_sqxtn:
3033 case Intrinsic::aarch64_neon_sqxtun:
3034 case Intrinsic::aarch64_neon_uqrshrn:
3035 case Intrinsic::aarch64_neon_uqshrn:
3036 case Intrinsic::aarch64_neon_uqxtn:
3037 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
3041 return std::nullopt;
3045 return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3055 if (ST->useSVEForFixedLengthVectors() &&
3058 std::max(ST->getMinSVEVectorSizeInBits(), 128u));
3059 else if (ST->isNeonAvailable())
3064 if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3073bool AArch64TTIImpl::isSingleExtWideningInstruction(
3075 Type *SrcOverrideTy)
const {
3090 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3093 Type *SrcTy = SrcOverrideTy;
3095 case Instruction::Add:
3096 case Instruction::Sub: {
3105 if (Opcode == Instruction::Sub)
3129 assert(SrcTy &&
"Expected some SrcTy");
3131 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
3137 DstTyL.first * DstTyL.second.getVectorMinNumElements();
3139 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
3143 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
3146Type *AArch64TTIImpl::isBinExtWideningInstruction(
unsigned Opcode,
Type *DstTy,
3148 Type *SrcOverrideTy)
const {
3149 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
3150 Opcode != Instruction::Mul)
3160 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3163 auto getScalarSizeWithOverride = [&](
const Value *
V) {
3169 ->getScalarSizeInBits();
3172 unsigned MaxEltSize = 0;
3175 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3176 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3177 MaxEltSize = std::max(EltSize0, EltSize1);
3180 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3181 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3184 if (EltSize0 >= DstEltSize / 2 || EltSize1 >= DstEltSize / 2)
3186 MaxEltSize = DstEltSize / 2;
3187 }
else if (Opcode == Instruction::Mul &&
3200 getScalarSizeWithOverride(
isa<ZExtInst>(Args[0]) ? Args[0] : Args[1]);
3204 if (MaxEltSize * 2 > DstEltSize)
3222 if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(
DL, Src)) ||
3223 (Src->isScalableTy() && !ST->hasSVE2()))
3233 if (AddUser && AddUser->getOpcode() == Instruction::Add)
3237 if (!Shr || Shr->getOpcode() != Instruction::LShr)
3241 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
3242 Src->getScalarSizeInBits() !=
3266 int ISD = TLI->InstructionOpcodeToISD(Opcode);
3270 if (
I &&
I->hasOneUser()) {
3273 if (
Type *ExtTy = isBinExtWideningInstruction(
3274 SingleUser->getOpcode(), Dst, Operands,
3275 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3288 if (isSingleExtWideningInstruction(
3289 SingleUser->getOpcode(), Dst, Operands,
3290 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3294 if (SingleUser->getOpcode() == Instruction::Add) {
3295 if (
I == SingleUser->getOperand(1) ||
3297 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
3315 return Cost == 0 ? 0 : 1;
3319 EVT SrcTy = TLI->getValueType(
DL, Src);
3320 EVT DstTy = TLI->getValueType(
DL, Dst);
3322 if (!SrcTy.isSimple() || !DstTy.
isSimple())
3328 if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
3352 return AdjustCost(Entry->Cost);
3360 const unsigned int SVE_EXT_COST = 1;
3361 const unsigned int SVE_FCVT_COST = 1;
3362 const unsigned int SVE_UNPACK_ONCE = 4;
3363 const unsigned int SVE_UNPACK_TWICE = 16;
3492 SVE_EXT_COST + SVE_FCVT_COST},
3497 SVE_EXT_COST + SVE_FCVT_COST},
3504 SVE_EXT_COST + SVE_FCVT_COST},
3508 SVE_EXT_COST + SVE_FCVT_COST},
3514 SVE_EXT_COST + SVE_FCVT_COST},
3517 SVE_EXT_COST + SVE_FCVT_COST},
3522 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3524 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3534 SVE_EXT_COST + SVE_FCVT_COST},
3539 SVE_EXT_COST + SVE_FCVT_COST},
3552 SVE_EXT_COST + SVE_FCVT_COST},
3556 SVE_EXT_COST + SVE_FCVT_COST},
3568 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3570 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3572 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3574 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3578 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3580 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3596 SVE_EXT_COST + SVE_FCVT_COST},
3601 SVE_EXT_COST + SVE_FCVT_COST},
3612 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3614 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3616 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3618 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3620 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3622 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3626 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3628 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3630 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3632 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3831 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3834 ST->useSVEForFixedLengthVectors(WiderTy)) {
3835 std::pair<InstructionCost, MVT> LT =
3837 unsigned NumElements =
3849 return AdjustCost(Entry->Cost);
3876 if (ST->hasFullFP16())
3879 return AdjustCost(Entry->Cost);
3897 ST->isSVEorStreamingSVEAvailable() &&
3898 TLI->getTypeAction(Src->getContext(), SrcTy) ==
3900 TLI->getTypeAction(Dst->getContext(), DstTy) ==
3909 Opcode, LegalTy, Src, CCH,
CostKind,
I);
3912 return Part1 + Part2;
3919 ST->isSVEorStreamingSVEAvailable() && TLI->isTypeLegal(DstTy))
3932 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
3945 CostKind, Index,
nullptr,
nullptr);
3949 auto DstVT = TLI->getValueType(
DL, Dst);
3950 auto SrcVT = TLI->getValueType(
DL, Src);
3955 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
3961 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
3971 case Instruction::SExt:
3976 case Instruction::ZExt:
3977 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
3990 return Opcode == Instruction::PHI ? 0 : 1;
3999 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const {
4007 if (!LT.second.isVector())
4012 if (LT.second.isFixedLengthVector()) {
4013 unsigned Width = LT.second.getVectorNumElements();
4014 Index = Index % Width;
4062 auto ExtractCanFuseWithFmul = [&]() {
4069 auto IsAllowedScalarTy = [&](
const Type *
T) {
4070 return T->isFloatTy() ||
T->isDoubleTy() ||
4071 (
T->isHalfTy() && ST->hasFullFP16());
4075 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
4078 return BO && BO->getOpcode() == BinaryOperator::FMul &&
4079 !BO->getType()->isVectorTy();
4084 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
4088 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);
4097 DenseMap<User *, unsigned> UserToExtractIdx;
4098 for (
auto *U :
Scalar->users()) {
4099 if (!IsUserFMulScalarTy(U))
4103 UserToExtractIdx[
U];
4105 if (UserToExtractIdx.
empty())
4107 for (
auto &[S, U, L] : ScalarUserAndIdx) {
4108 for (
auto *U : S->users()) {
4109 if (UserToExtractIdx.
contains(U)) {
4111 auto *Op0 =
FMul->getOperand(0);
4112 auto *Op1 =
FMul->getOperand(1);
4113 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
4114 UserToExtractIdx[
U] =
L;
4120 for (
auto &[U, L] : UserToExtractIdx) {
4132 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
4133 if (!IsUserFMulScalarTy(U))
4138 const auto *BO = cast<BinaryOperator>(U);
4139 const auto *OtherEE = dyn_cast<ExtractElementInst>(
4140 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
4142 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
4145 return IsExtractLaneEquivalentToZero(
4146 cast<ConstantInt>(OtherEE->getIndexOperand())
4149 OtherEE->getType()->getScalarSizeInBits());
4157 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
4158 ExtractCanFuseWithFmul())
4163 :
ST->getVectorInsertExtractBaseCost();
4170 const Value *Op1)
const {
4174 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
4177 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index);
4183 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const {
4184 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr, Scalar,
4191 unsigned Index)
const {
4192 return getVectorInstrCostHelper(
I.getOpcode(), Val,
CostKind, Index, &
I);
4198 unsigned Index)
const {
4210 : ST->getVectorInsertExtractBaseCost() + 1;
4219 if (Ty->getElementType()->isFloatingPointTy())
4222 unsigned VecInstCost =
4224 return DemandedElts.
popcount() * (Insert + Extract) * VecInstCost;
4231 if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
4232 return std::nullopt;
4233 if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
4234 return std::nullopt;
4235 if (CanUseSVE && Ty->isScalableTy() && ST->hasSVEB16B16() &&
4236 ST->isNonStreamingSVEorSME2Available())
4237 return std::nullopt;
4244 Cost += InstCost(PromotedTy);
4267 Op2Info, Args, CxtI);
4271 int ISD = TLI->InstructionOpcodeToISD(Opcode);
4278 Ty,
CostKind, Op1Info, Op2Info,
true,
4281 [&](
Type *PromotedTy) {
4285 return *PromotedCost;
4291 if (
Type *ExtTy = isBinExtWideningInstruction(Opcode, Ty, Args)) {
4358 auto VT = TLI->getValueType(
DL, Ty);
4359 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
4363 : (3 * AsrCost + AddCost);
4365 return MulCost + AsrCost + 2 * AddCost;
4367 }
else if (VT.isVector()) {
4377 if (Ty->isScalableTy() && ST->hasSVE())
4378 Cost += 2 * AsrCost;
4383 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
4387 }
else if (LT.second == MVT::v2i64) {
4388 return VT.getVectorNumElements() *
4395 if (Ty->isScalableTy() && ST->hasSVE())
4396 return MulCost + 2 * AddCost + 2 * AsrCost;
4397 return 2 * MulCost + AddCost + AsrCost + UsraCost;
4402 LT.second.isFixedLengthVector()) {
4412 return ExtractCost + InsertCost +
4420 auto VT = TLI->getValueType(
DL, Ty);
4436 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
4437 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
4438 LT.second == MVT::nxv16i8;
4439 bool Is128bit = LT.second.is128BitVector();
4451 (HasMULH ? 0 : ShrCost) +
4452 AddCost * 2 + ShrCost;
4453 return DivCost + (
ISD ==
ISD::UREM ? MulCost + AddCost : 0);
4460 if (!VT.isVector() && VT.getSizeInBits() > 64)
4464 Opcode, Ty,
CostKind, Op1Info, Op2Info);
4466 if (TLI->isOperationLegalOrCustom(
ISD, LT.second) && ST->hasSVE()) {
4470 Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {
4480 if (
nullptr != Entry)
4485 if (LT.second.getScalarType() == MVT::i8)
4487 else if (LT.second.getScalarType() == MVT::i16)
4499 Opcode, Ty->getScalarType(),
CostKind, Op1Info, Op2Info);
4500 return (4 + DivCost) * VTy->getNumElements();
4506 -1,
nullptr,
nullptr);
4520 if (LT.second == MVT::v2i64 && ST->hasSVE())
4533 if (LT.second != MVT::v2i64)
4555 if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
4556 (Ty->isHalfTy() && ST->hasFullFP16())) &&
4565 if (!Ty->getScalarType()->isFP128Ty())
4572 if (!Ty->getScalarType()->isFP128Ty())
4573 return 2 * LT.first;
4580 if (!Ty->isVectorTy())
4596 int MaxMergeDistance = 64;
4600 return NumVectorInstToHideOverhead;
4610 unsigned Opcode1,
unsigned Opcode2)
const {
4613 if (!
Sched.hasInstrSchedModel())
4617 Sched.getSchedClassDesc(
TII->get(Opcode1).getSchedClass());
4619 Sched.getSchedClassDesc(
TII->get(Opcode2).getSchedClass());
4625 "Cannot handle variant scheduling classes without an MI");
4641 const int AmortizationCost = 20;
4649 VecPred = CurrentPred;
4657 static const auto ValidMinMaxTys = {
4658 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
4659 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
4660 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
4663 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }) ||
4664 (ST->hasFullFP16() &&
4665 any_of(ValidFP16MinMaxTys, [<](
MVT M) {
return M == LT.second; })))
4670 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
4671 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
4672 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
4673 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
4674 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
4675 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
4676 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
4677 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
4678 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
4679 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
4680 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
4682 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
4683 EVT SelValTy = TLI->getValueType(
DL, ValTy);
4692 if (Opcode == Instruction::FCmp) {
4694 ValTy,
CostKind, Op1Info, Op2Info,
false,
4696 false, [&](
Type *PromotedTy) {
4708 return *PromotedCost;
4712 if (LT.second.getScalarType() != MVT::f64 &&
4713 LT.second.getScalarType() != MVT::f32 &&
4714 LT.second.getScalarType() != MVT::f16)
4719 unsigned Factor = 1;
4734 AArch64::FCMEQv4f32))
4746 TLI->isTypeLegal(TLI->getValueType(
DL, ValTy)) &&
4765 Op1Info, Op2Info,
I);
4771 if (ST->requiresStrictAlign()) {
4776 Options.AllowOverlappingLoads =
true;
4777 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
4782 Options.LoadSizes = {8, 4, 2, 1};
4783 Options.AllowedTailExpansions = {3, 5, 6};
4788 return ST->hasSVE();
4794 switch (MICA.
getID()) {
4795 case Intrinsic::masked_scatter:
4796 case Intrinsic::masked_gather:
4798 case Intrinsic::masked_load:
4799 case Intrinsic::masked_store:
4813 if (!LT.first.isValid())
4818 if (VT->getElementType()->isIntegerTy(1))
4835 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4836 "Should be called on only load or stores.");
4838 case Instruction::Load:
4841 return ST->getGatherOverhead();
4843 case Instruction::Store:
4846 return ST->getScatterOverhead();
4857 unsigned Opcode = (MICA.
getID() == Intrinsic::masked_gather ||
4858 MICA.
getID() == Intrinsic::vp_gather)
4860 : Instruction::Store;
4870 if (!LT.first.isValid())
4874 if (!LT.second.isVector() ||
4876 VT->getElementType()->isIntegerTy(1))
4886 ElementCount LegalVF = LT.second.getVectorElementCount();
4889 {TTI::OK_AnyValue, TTI::OP_None},
I);
4905 EVT VT = TLI->getValueType(
DL, Ty,
true);
4907 if (VT == MVT::Other)
4912 if (!LT.first.isValid())
4922 (VTy->getElementType()->isIntegerTy(1) &&
4923 !VTy->getElementCount().isKnownMultipleOf(
4934 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
4935 LT.second.is128BitVector() && Alignment <
Align(16)) {
4941 const int AmortizationCost = 6;
4943 return LT.first * 2 * AmortizationCost;
4947 if (Ty->isPtrOrPtrVectorTy())
4952 if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
4954 if (VT == MVT::v4i8)
4961 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
4976 while (!TypeWorklist.
empty()) {
4998 bool UseMaskForCond,
bool UseMaskForGaps)
const {
4999 assert(Factor >= 2 &&
"Invalid interleave factor");
5014 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
5017 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
5018 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
5021 VecVTy->getElementCount().divideCoefficientBy(Factor));
5027 if (MinElts % Factor == 0 &&
5028 TLI->isLegalInterleavedAccessType(SubVecTy,
DL, UseScalable))
5029 return Factor * TLI->getNumInterleavedAccesses(SubVecTy,
DL, UseScalable);
5034 UseMaskForCond, UseMaskForGaps);
5041 for (
auto *
I : Tys) {
5042 if (!
I->isVectorTy())
5053 return ST->getMaxInterleaveFactor();
5063 enum { MaxStridedLoads = 7 };
5065 int StridedLoads = 0;
5068 for (
const auto BB : L->blocks()) {
5069 for (
auto &
I : *BB) {
5075 if (L->isLoopInvariant(PtrValue))
5080 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
5089 if (StridedLoads > MaxStridedLoads / 2)
5090 return StridedLoads;
5093 return StridedLoads;
5096 int StridedLoads = countStridedLoads(L, SE);
5098 <<
" strided loads\n");
5114 unsigned *FinalSize) {
5118 for (
auto *BB : L->getBlocks()) {
5119 for (
auto &
I : *BB) {
5125 if (!Cost.isValid())
5129 if (LoopCost > Budget)
5151 if (MaxTC > 0 && MaxTC <= 32)
5162 if (Blocks.
size() != 2)
5184 if (!L->isInnermost() || L->getNumBlocks() > 8)
5188 if (!L->getExitBlock())
5194 bool HasParellelizableReductions =
5195 L->getNumBlocks() == 1 &&
5196 any_of(L->getHeader()->phis(),
5198 return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
5201 if (HasParellelizableReductions &&
5223 if (HasParellelizableReductions) {
5234 if (Header == Latch) {
5237 unsigned Width = 10;
5243 unsigned MaxInstsPerLine = 16;
5245 unsigned BestUC = 1;
5246 unsigned SizeWithBestUC = BestUC *
Size;
5248 unsigned SizeWithUC = UC *
Size;
5249 if (SizeWithUC > 48)
5251 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
5252 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
5254 SizeWithBestUC = BestUC *
Size;
5264 for (
auto *BB : L->blocks()) {
5265 for (
auto &
I : *BB) {
5275 for (
auto *U :
I.users())
5277 LoadedValuesPlus.
insert(U);
5284 return LoadedValuesPlus.
contains(
SI->getOperand(0));
5297 if (!Term || !Term->isConditional() || Preds.
size() == 1 ||
5311 auto *I = dyn_cast<Instruction>(V);
5312 return I && DependsOnLoopLoad(I, Depth + 1);
5319 DependsOnLoopLoad(
I, 0)) {
5335 if (L->getLoopDepth() > 1)
5346 for (
auto *BB : L->getBlocks()) {
5347 for (
auto &
I : *BB) {
5351 if (IsVectorized &&
I.getType()->isVectorTy())
5368 if (ST->isAppleMLike())
5370 else if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
5392 !ST->getSchedModel().isOutOfOrder()) {
5415 bool CanCreate)
const {
5419 case Intrinsic::aarch64_neon_st2:
5420 case Intrinsic::aarch64_neon_st3:
5421 case Intrinsic::aarch64_neon_st4: {
5424 if (!CanCreate || !ST)
5426 unsigned NumElts = Inst->
arg_size() - 1;
5427 if (ST->getNumElements() != NumElts)
5429 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5435 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5437 Res = Builder.CreateInsertValue(Res, L, i);
5441 case Intrinsic::aarch64_neon_ld2:
5442 case Intrinsic::aarch64_neon_ld3:
5443 case Intrinsic::aarch64_neon_ld4:
5444 if (Inst->
getType() == ExpectedType)
5455 case Intrinsic::aarch64_neon_ld2:
5456 case Intrinsic::aarch64_neon_ld3:
5457 case Intrinsic::aarch64_neon_ld4:
5458 Info.ReadMem =
true;
5459 Info.WriteMem =
false;
5462 case Intrinsic::aarch64_neon_st2:
5463 case Intrinsic::aarch64_neon_st3:
5464 case Intrinsic::aarch64_neon_st4:
5465 Info.ReadMem =
false;
5466 Info.WriteMem =
true;
5474 case Intrinsic::aarch64_neon_ld2:
5475 case Intrinsic::aarch64_neon_st2:
5476 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
5478 case Intrinsic::aarch64_neon_ld3:
5479 case Intrinsic::aarch64_neon_st3:
5480 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
5482 case Intrinsic::aarch64_neon_ld4:
5483 case Intrinsic::aarch64_neon_st4:
5484 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
5496 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
5497 bool Considerable =
false;
5498 AllowPromotionWithoutCommonHeader =
false;
5501 Type *ConsideredSExtType =
5503 if (
I.getType() != ConsideredSExtType)
5507 for (
const User *U :
I.users()) {
5509 Considerable =
true;
5513 if (GEPInst->getNumOperands() > 2) {
5514 AllowPromotionWithoutCommonHeader =
true;
5519 return Considerable;
5567 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
5577 return LegalizationCost + 2;
5587 LegalizationCost *= LT.first - 1;
5590 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5599 return LegalizationCost + 2;
5607 std::optional<FastMathFlags> FMF,
5623 return BaseCost + FixedVTy->getNumElements();
5626 if (Opcode != Instruction::FAdd)
5640 MVT MTy = LT.second;
5641 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5689 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
5690 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
5692 if (ValTy->getElementCount().getFixedValue() >= 2 && NElts >= 2 &&
5702 return (LT.first - 1) +
Log2_32(NElts);
5707 return (LT.first - 1) + Entry->Cost;
5719 if (LT.first != 1) {
5725 ExtraCost *= LT.first - 1;
5728 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
5729 return Cost + ExtraCost;
5737 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *VecTy,
5739 EVT VecVT = TLI->getValueType(
DL, VecTy);
5740 EVT ResVT = TLI->getValueType(
DL, ResTy);
5750 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5752 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&
5754 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&
5756 return (LT.first - 1) * 2 + 2;
5767 EVT VecVT = TLI->getValueType(
DL, VecTy);
5768 EVT ResVT = TLI->getValueType(
DL, ResTy);
5771 RedOpcode == Instruction::Add) {
5777 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5779 return LT.first + 2;
5814 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
5815 ? TLI->getPromotedVTForPredicate(
EVT(LT.second))
5829 if (LT.second.getScalarType() == MVT::i1) {
5838 assert(Entry &&
"Illegal Type for Splice");
5839 LegalizationCost += Entry->Cost;
5840 return LegalizationCost * LT.first;
5844 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
5853 if (VF.
isFixed() && !ST->isSVEorStreamingSVEAvailable() &&
5854 (!ST->isNeonAvailable() || !ST->hasDotProd()))
5857 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub) ||
5862 (!BinOp || (OpBExtend !=
TTI::PR_None && InputTypeB)) &&
5863 "Unexpected values for OpBExtend or InputTypeB");
5867 if (BinOp && (*BinOp != Instruction::Mul || InputTypeA != InputTypeB))
5870 bool IsUSDot = OpBExtend !=
TTI::PR_None && OpAExtend != OpBExtend;
5871 if (IsUSDot && !ST->hasMatMulInt8())
5883 auto TC = TLI->getTypeConversion(AccumVectorType->
getContext(),
5892 if (TLI->getTypeAction(AccumVectorType->
getContext(), TC.second) !=
5898 std::pair<InstructionCost, MVT> AccumLT =
5900 std::pair<InstructionCost, MVT> InputLT =
5913 if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot) {
5915 if (AccumLT.second.getScalarType() == MVT::i64 &&
5916 InputLT.second.getScalarType() == MVT::i16)
5919 if (AccumLT.second.getScalarType() == MVT::i64 &&
5920 InputLT.second.getScalarType() == MVT::i8)
5930 if (ST->isSVEorStreamingSVEAvailable() ||
5931 (AccumLT.second.isFixedLengthVector() && ST->isNeonAvailable() &&
5932 ST->hasDotProd())) {
5933 if (AccumLT.second.getScalarType() == MVT::i32 &&
5934 InputLT.second.getScalarType() == MVT::i8)
5950 "Expected the Mask to match the return size if given");
5952 "Expected the same scalar types");
5958 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
5959 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
5960 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
5968 return std::max<InstructionCost>(1, LT.first / 4);
5976 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2) ||
5978 Mask, 3, SrcTy->getElementCount().getKnownMinValue() * 2)))
5981 unsigned TpNumElts = Mask.size();
5982 unsigned LTNumElts = LT.second.getVectorNumElements();
5983 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
5985 LT.second.getVectorElementCount());
5987 std::map<std::tuple<unsigned, unsigned, SmallVector<int>>,
InstructionCost>
5989 for (
unsigned N = 0;
N < NumVecs;
N++) {
5993 unsigned Source1 = -1U, Source2 = -1U;
5994 unsigned NumSources = 0;
5995 for (
unsigned E = 0; E < LTNumElts; E++) {
5996 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
6005 unsigned Source = MaskElt / LTNumElts;
6006 if (NumSources == 0) {
6009 }
else if (NumSources == 1 && Source != Source1) {
6012 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
6018 if (Source == Source1)
6020 else if (Source == Source2)
6021 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
6030 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});
6041 NTp, NTp, NMask,
CostKind, 0,
nullptr, Args,
6044 Result.first->second = NCost;
6058 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
6059 if (LT.second.getFixedSizeInBits() >= 128 &&
6061 LT.second.getVectorNumElements() / 2) {
6064 if (Index == (
int)LT.second.getVectorNumElements() / 2)
6078 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
6081 return M.value() < 0 || M.value() == (int)M.index();
6087 !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
6088 SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
6097 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
6098 ST->isSVEorStreamingSVEAvailable() &&
6103 if (ST->isSVEorStreamingSVEAvailable() &&
6117 if (IsLoad && LT.second.isVector() &&
6119 LT.second.getVectorElementCount()))
6125 if (Mask.size() == 4 &&
6127 (SrcTy->getScalarSizeInBits() == 16 ||
6128 SrcTy->getScalarSizeInBits() == 32) &&
6129 all_of(Mask, [](
int E) {
return E < 8; }))
6135 if (LT.second.isFixedLengthVector() &&
6136 LT.second.getVectorNumElements() == Mask.size() &&
6138 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6139 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6140 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6141 LT.second.getVectorNumElements(), 16) ||
6142 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6143 LT.second.getVectorNumElements(), 32) ||
6144 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6145 LT.second.getVectorNumElements(), 64) ||
6148 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
6277 return LT.first * Entry->Cost;
6286 LT.second.getSizeInBits() <= 128 && SubTp) {
6288 if (SubLT.second.isVector()) {
6289 int NumElts = LT.second.getVectorNumElements();
6290 int NumSubElts = SubLT.second.getVectorNumElements();
6291 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
6297 if (IsExtractSubvector)
6314 if (
getPtrStride(*PSE, AccessTy, Ptr, TheLoop, DT, Strides,
6333 return ST->useFixedOverScalableIfEqualCost();
6337 return ST->getEpilogueVectorizationMinVF();
6372 unsigned NumInsns = 0;
6374 NumInsns += BB->sizeWithoutDebug();
6384 int64_t Scale,
unsigned AddrSpace)
const {
6412 if (
I->getOpcode() == Instruction::Or &&
6417 if (
I->getOpcode() == Instruction::Add ||
6418 I->getOpcode() == Instruction::Sub)
6443 return all_equal(Shuf->getShuffleMask());
6450 bool AllowSplat =
false) {
6455 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
6456 auto *FullTy = FullV->
getType();
6457 auto *HalfTy = HalfV->getType();
6459 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
6462 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
6465 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
6469 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
6483 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
6484 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
6498 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
6499 (M2Start != 0 && M2Start != (NumElements / 2)))
6501 if (S1Op1 && S2Op1 && M1Start != M2Start)
6511 return Ext->getType()->getScalarSizeInBits() ==
6512 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
6526 Value *VectorOperand =
nullptr;
6543 if (!
GEP ||
GEP->getNumOperands() != 2)
6547 Value *Offsets =
GEP->getOperand(1);
6550 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
6556 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
6557 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
6558 Ops.push_back(&
GEP->getOperandUse(1));
6592 switch (
II->getIntrinsicID()) {
6593 case Intrinsic::aarch64_neon_smull:
6594 case Intrinsic::aarch64_neon_umull:
6597 Ops.push_back(&
II->getOperandUse(0));
6598 Ops.push_back(&
II->getOperandUse(1));
6603 case Intrinsic::fma:
6604 case Intrinsic::fmuladd:
6610 case Intrinsic::aarch64_neon_sqdmull:
6611 case Intrinsic::aarch64_neon_sqdmulh:
6612 case Intrinsic::aarch64_neon_sqrdmulh:
6615 Ops.push_back(&
II->getOperandUse(0));
6617 Ops.push_back(&
II->getOperandUse(1));
6618 return !
Ops.empty();
6619 case Intrinsic::aarch64_neon_fmlal:
6620 case Intrinsic::aarch64_neon_fmlal2:
6621 case Intrinsic::aarch64_neon_fmlsl:
6622 case Intrinsic::aarch64_neon_fmlsl2:
6625 Ops.push_back(&
II->getOperandUse(1));
6627 Ops.push_back(&
II->getOperandUse(2));
6628 return !
Ops.empty();
6629 case Intrinsic::aarch64_sve_ptest_first:
6630 case Intrinsic::aarch64_sve_ptest_last:
6632 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
6633 Ops.push_back(&
II->getOperandUse(0));
6634 return !
Ops.empty();
6635 case Intrinsic::aarch64_sme_write_horiz:
6636 case Intrinsic::aarch64_sme_write_vert:
6637 case Intrinsic::aarch64_sme_writeq_horiz:
6638 case Intrinsic::aarch64_sme_writeq_vert: {
6640 if (!Idx || Idx->getOpcode() != Instruction::Add)
6642 Ops.push_back(&
II->getOperandUse(1));
6645 case Intrinsic::aarch64_sme_read_horiz:
6646 case Intrinsic::aarch64_sme_read_vert:
6647 case Intrinsic::aarch64_sme_readq_horiz:
6648 case Intrinsic::aarch64_sme_readq_vert:
6649 case Intrinsic::aarch64_sme_ld1b_vert:
6650 case Intrinsic::aarch64_sme_ld1h_vert:
6651 case Intrinsic::aarch64_sme_ld1w_vert:
6652 case Intrinsic::aarch64_sme_ld1d_vert:
6653 case Intrinsic::aarch64_sme_ld1q_vert:
6654 case Intrinsic::aarch64_sme_st1b_vert:
6655 case Intrinsic::aarch64_sme_st1h_vert:
6656 case Intrinsic::aarch64_sme_st1w_vert:
6657 case Intrinsic::aarch64_sme_st1d_vert:
6658 case Intrinsic::aarch64_sme_st1q_vert:
6659 case Intrinsic::aarch64_sme_ld1b_horiz:
6660 case Intrinsic::aarch64_sme_ld1h_horiz:
6661 case Intrinsic::aarch64_sme_ld1w_horiz:
6662 case Intrinsic::aarch64_sme_ld1d_horiz:
6663 case Intrinsic::aarch64_sme_ld1q_horiz:
6664 case Intrinsic::aarch64_sme_st1b_horiz:
6665 case Intrinsic::aarch64_sme_st1h_horiz:
6666 case Intrinsic::aarch64_sme_st1w_horiz:
6667 case Intrinsic::aarch64_sme_st1d_horiz:
6668 case Intrinsic::aarch64_sme_st1q_horiz: {
6670 if (!Idx || Idx->getOpcode() != Instruction::Add)
6672 Ops.push_back(&
II->getOperandUse(3));
6675 case Intrinsic::aarch64_neon_pmull:
6678 Ops.push_back(&
II->getOperandUse(0));
6679 Ops.push_back(&
II->getOperandUse(1));
6681 case Intrinsic::aarch64_neon_pmull64:
6683 II->getArgOperand(1)))
6685 Ops.push_back(&
II->getArgOperandUse(0));
6686 Ops.push_back(&
II->getArgOperandUse(1));
6688 case Intrinsic::masked_gather:
6691 Ops.push_back(&
II->getArgOperandUse(0));
6693 case Intrinsic::masked_scatter:
6696 Ops.push_back(&
II->getArgOperandUse(1));
6703 auto ShouldSinkCondition = [](
Value *
Cond,
6708 if (
II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
6712 Ops.push_back(&
II->getOperandUse(0));
6716 switch (
I->getOpcode()) {
6717 case Instruction::GetElementPtr:
6718 case Instruction::Add:
6719 case Instruction::Sub:
6721 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
6723 Ops.push_back(&
I->getOperandUse(
Op));
6728 case Instruction::Select: {
6729 if (!ShouldSinkCondition(
I->getOperand(0),
Ops))
6732 Ops.push_back(&
I->getOperandUse(0));
6735 case Instruction::Br: {
6742 Ops.push_back(&
I->getOperandUse(0));
6749 if (!
I->getType()->isVectorTy())
6752 switch (
I->getOpcode()) {
6753 case Instruction::Sub:
6754 case Instruction::Add: {
6763 Ops.push_back(&Ext1->getOperandUse(0));
6764 Ops.push_back(&Ext2->getOperandUse(0));
6767 Ops.push_back(&
I->getOperandUse(0));
6768 Ops.push_back(&
I->getOperandUse(1));
6772 case Instruction::Or: {
6775 if (ST->hasNEON()) {
6789 if (
I->getParent() != MainAnd->
getParent() ||
6794 if (
I->getParent() != IA->getParent() ||
6795 I->getParent() != IB->getParent())
6800 Ops.push_back(&
I->getOperandUse(0));
6801 Ops.push_back(&
I->getOperandUse(1));
6810 case Instruction::Mul: {
6811 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
6814 if (Ty->isScalableTy())
6818 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
6821 int NumZExts = 0, NumSExts = 0;
6822 for (
auto &
Op :
I->operands()) {
6829 auto *ExtOp = Ext->getOperand(0);
6830 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
6831 Ops.push_back(&Ext->getOperandUse(0));
6839 if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <
6840 I->getType()->getScalarSizeInBits())
6877 if (!ElementConstant || !ElementConstant->
isZero())
6880 unsigned Opcode = OperandInstr->
getOpcode();
6881 if (Opcode == Instruction::SExt)
6883 else if (Opcode == Instruction::ZExt)
6888 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
6898 Ops.push_back(&Insert->getOperandUse(1));
6904 if (!
Ops.empty() && (NumSExts == 2 || NumZExts == 2))
6908 if (!ShouldSinkSplatForIndexedVariant(
I))
6913 Ops.push_back(&
I->getOperandUse(0));
6915 Ops.push_back(&
I->getOperandUse(1));
6917 return !
Ops.empty();
6919 case Instruction::FMul: {
6921 if (
I->getType()->isScalableTy())
6930 Ops.push_back(&
I->getOperandUse(0));
6932 Ops.push_back(&
I->getOperandUse(1));
6933 return !
Ops.empty();
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file defines the DenseMap class.
static Value * getCondition(Instruction *I)
const HexagonInstrInfo * TII
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
static unsigned getNumElements(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
unsigned getVectorInsertExtractBaseCost() const
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
bool prefersVectorizedAddressing() const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
APInt getPriorityMask(const Function &F) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool useNeonVector(const Type *Ty) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
bool enableScalableVectorization() const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isTypeLegal(Type *Ty) const override
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isIntPredicate(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)
Create an expression which evaluates to the number of elements in EC at runtime.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
DominatorTree * getDominatorTree() const
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
const FeatureBitset & getFeatureBits() const
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
Information for memory intrinsic cost model.
Align getAlignment() const
Type * getDataType() const
Intrinsic::ID getID() const
const Instruction * getInst() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool isSMEABIRoutine() const
bool hasStreamingBody() const
void set(unsigned M, bool Enable=true)
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
TargetInstrInfo - Interface to description of machine instruction set.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
LLVM_ABI APInt getCpuSupportsMask(ArrayRef< StringRef > Features)
static constexpr unsigned SVEBitsPerBlock
LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
unsigned getMatchingIROpode() const
bool inactiveLanesAreUnused() const
bool inactiveLanesAreNotDefined() const
bool hasMatchingUndefIntrinsic() const
static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()
static SVEIntrinsicInfo defaultZeroingOp()
bool hasGoverningPredicate() const
SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)
static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)
SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)
unsigned getOperandIdxWithNoActiveLanes() const
SVEIntrinsicInfo & setInactiveLanesAreUnused()
SVEIntrinsicInfo & setInactiveLanesAreNotDefined()
SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)
bool inactiveLanesTakenFromOperand() const
static SVEIntrinsicInfo defaultUndefOp()
bool hasOperandWithNoActiveLanes() const
Intrinsic::ID getMatchingUndefIntrinsic() const
SVEIntrinsicInfo & setResultIsZeroInitialized()
static SVEIntrinsicInfo defaultMergingUnaryOp()
SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)
unsigned getGoverningPredicateOperandIdx() const
bool hasMatchingIROpode() const
bool resultIsZeroInitialized() const
SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)
unsigned getOperandIdxInactiveLanesTakenFrom() const
static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...