21#include "llvm/IR/IntrinsicsAArch64.h"
31#define DEBUG_TYPE "aarch64tti"
52 "Penalty of calling a function that requires a change to PSTATE.SM"));
56 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
62class TailFoldingOption {
77 bool NeedsDefault =
true;
81 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
96 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
97 "Initial bits should only include one of "
98 "(disabled|all|simple|default)");
99 Bits = NeedsDefault ? DefaultBits : InitialBits;
101 Bits &= ~DisableBits;
107 errs() <<
"invalid argument '" << Opt
108 <<
"' to -sve-tail-folding=; the option should be of the form\n"
109 " (disabled|all|default|simple)[+(reductions|recurrences"
110 "|reverse|noreductions|norecurrences|noreverse)]\n";
116 void operator=(
const std::string &Val) {
125 setNeedsDefault(
false);
130 unsigned StartIdx = 1;
131 if (TailFoldTypes[0] ==
"disabled")
132 setInitialBits(TailFoldingOpts::Disabled);
133 else if (TailFoldTypes[0] ==
"all")
134 setInitialBits(TailFoldingOpts::All);
135 else if (TailFoldTypes[0] ==
"default")
136 setNeedsDefault(
true);
137 else if (TailFoldTypes[0] ==
"simple")
138 setInitialBits(TailFoldingOpts::Simple);
141 setInitialBits(TailFoldingOpts::Disabled);
144 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
145 if (TailFoldTypes[
I] ==
"reductions")
146 setEnableBit(TailFoldingOpts::Reductions);
147 else if (TailFoldTypes[
I] ==
"recurrences")
148 setEnableBit(TailFoldingOpts::Recurrences);
149 else if (TailFoldTypes[
I] ==
"reverse")
150 setEnableBit(TailFoldingOpts::Reverse);
151 else if (TailFoldTypes[
I] ==
"noreductions")
152 setDisableBit(TailFoldingOpts::Reductions);
153 else if (TailFoldTypes[
I] ==
"norecurrences")
154 setDisableBit(TailFoldingOpts::Recurrences);
155 else if (TailFoldTypes[
I] ==
"noreverse")
156 setDisableBit(TailFoldingOpts::Reverse);
173 "Control the use of vectorisation using tail-folding for SVE where the"
174 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
175 "\ndisabled (Initial) No loop types will vectorize using "
177 "\ndefault (Initial) Uses the default tail-folding settings for "
179 "\nall (Initial) All legal loop types will vectorize using "
181 "\nsimple (Initial) Use tail-folding for simple loops (not "
182 "reductions or recurrences)"
183 "\nreductions Use tail-folding for loops containing reductions"
184 "\nnoreductions Inverse of above"
185 "\nrecurrences Use tail-folding for loops containing fixed order "
187 "\nnorecurrences Inverse of above"
188 "\nreverse Use tail-folding for loops requiring reversed "
190 "\nnoreverse Inverse of above"),
208 .
Case(
"__arm_sme_state",
true)
209 .
Case(
"__arm_tpidr2_save",
true)
210 .
Case(
"__arm_tpidr2_restore",
true)
211 .
Case(
"__arm_za_disable",
true)
225 if (isa<CallInst>(
I) && !
I.isDebugOrPseudoInst() &&
226 (cast<CallInst>(
I).isInlineAsm() || isa<IntrinsicInst>(
I) ||
236 SMEAttrs CallerAttrs(*Caller), CalleeAttrs(*Callee);
248 if (CallerAttrs.requiresLazySave(CalleeAttrs) ||
249 CallerAttrs.requiresSMChange(CalleeAttrs)) {
257 TM.getSubtargetImpl(*Caller)->getFeatureBits();
259 TM.getSubtargetImpl(*Callee)->getFeatureBits();
263 return (CallerBits & CalleeBits) == CalleeBits;
281 auto FVTy = dyn_cast<FixedVectorType>(Ty);
283 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
292 unsigned DefaultCallPenalty)
const {
315 if (
F == Call.getCaller())
321 return DefaultCallPenalty;
360 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
365 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
371 return std::max<InstructionCost>(1,
Cost);
386 unsigned ImmIdx = ~0U;
390 case Instruction::GetElementPtr:
395 case Instruction::Store:
398 case Instruction::Add:
399 case Instruction::Sub:
400 case Instruction::Mul:
401 case Instruction::UDiv:
402 case Instruction::SDiv:
403 case Instruction::URem:
404 case Instruction::SRem:
405 case Instruction::And:
406 case Instruction::Or:
407 case Instruction::Xor:
408 case Instruction::ICmp:
412 case Instruction::Shl:
413 case Instruction::LShr:
414 case Instruction::AShr:
418 case Instruction::Trunc:
419 case Instruction::ZExt:
420 case Instruction::SExt:
421 case Instruction::IntToPtr:
422 case Instruction::PtrToInt:
423 case Instruction::BitCast:
424 case Instruction::PHI:
425 case Instruction::Call:
426 case Instruction::Select:
427 case Instruction::Ret:
428 case Instruction::Load:
433 int NumConstants = (BitSize + 63) / 64;
457 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
463 case Intrinsic::sadd_with_overflow:
464 case Intrinsic::uadd_with_overflow:
465 case Intrinsic::ssub_with_overflow:
466 case Intrinsic::usub_with_overflow:
467 case Intrinsic::smul_with_overflow:
468 case Intrinsic::umul_with_overflow:
470 int NumConstants = (BitSize + 63) / 64;
477 case Intrinsic::experimental_stackmap:
478 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
481 case Intrinsic::experimental_patchpoint_void:
482 case Intrinsic::experimental_patchpoint_i64:
483 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
486 case Intrinsic::experimental_gc_statepoint:
487 if ((
Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
497 if (TyWidth == 32 || TyWidth == 64)
512 switch (ICA.
getID()) {
513 case Intrinsic::umin:
514 case Intrinsic::umax:
515 case Intrinsic::smin:
516 case Intrinsic::smax: {
517 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
518 MVT::v8i16, MVT::v2i32, MVT::v4i32,
519 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
523 if (LT.second == MVT::v2i64)
525 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }))
529 case Intrinsic::sadd_sat:
530 case Intrinsic::ssub_sat:
531 case Intrinsic::uadd_sat:
532 case Intrinsic::usub_sat: {
533 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
534 MVT::v8i16, MVT::v2i32, MVT::v4i32,
540 LT.second.getScalarSizeInBits() ==
RetTy->getScalarSizeInBits() ? 1 : 4;
541 if (
any_of(ValidSatTys, [<](
MVT M) {
return M == LT.second; }))
542 return LT.first * Instrs;
545 case Intrinsic::abs: {
546 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
547 MVT::v8i16, MVT::v2i32, MVT::v4i32,
550 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }))
554 case Intrinsic::bswap: {
555 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
556 MVT::v4i32, MVT::v2i64};
558 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }) &&
559 LT.second.getScalarSizeInBits() ==
RetTy->getScalarSizeInBits())
563 case Intrinsic::experimental_stepvector: {
572 Cost += AddCost * (LT.first - 1);
576 case Intrinsic::vector_extract:
577 case Intrinsic::vector_insert: {
590 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
609 case Intrinsic::bitreverse: {
611 {Intrinsic::bitreverse, MVT::i32, 1},
612 {Intrinsic::bitreverse, MVT::i64, 1},
613 {Intrinsic::bitreverse, MVT::v8i8, 1},
614 {Intrinsic::bitreverse, MVT::v16i8, 1},
615 {Intrinsic::bitreverse, MVT::v4i16, 2},
616 {Intrinsic::bitreverse, MVT::v8i16, 2},
617 {Intrinsic::bitreverse, MVT::v2i32, 2},
618 {Intrinsic::bitreverse, MVT::v4i32, 2},
619 {Intrinsic::bitreverse, MVT::v1i64, 2},
620 {Intrinsic::bitreverse, MVT::v2i64, 2},
630 return LegalisationCost.first * Entry->Cost + 1;
632 return LegalisationCost.first * Entry->Cost;
636 case Intrinsic::ctpop: {
637 if (!ST->hasNEON()) {
658 RetTy->getScalarSizeInBits()
661 return LT.first * Entry->Cost + ExtraCost;
665 case Intrinsic::sadd_with_overflow:
666 case Intrinsic::uadd_with_overflow:
667 case Intrinsic::ssub_with_overflow:
668 case Intrinsic::usub_with_overflow:
669 case Intrinsic::smul_with_overflow:
670 case Intrinsic::umul_with_overflow: {
672 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
673 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
674 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
675 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
676 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
677 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
678 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
679 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
680 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
681 {Intrinsic::usub_with_overflow, MVT::i8, 3},
682 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
683 {Intrinsic::usub_with_overflow, MVT::i16, 3},
684 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
685 {Intrinsic::usub_with_overflow, MVT::i32, 1},
686 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
687 {Intrinsic::usub_with_overflow, MVT::i64, 1},
688 {Intrinsic::smul_with_overflow, MVT::i8, 5},
689 {Intrinsic::umul_with_overflow, MVT::i8, 4},
690 {Intrinsic::smul_with_overflow, MVT::i16, 5},
691 {Intrinsic::umul_with_overflow, MVT::i16, 4},
692 {Intrinsic::smul_with_overflow, MVT::i32, 2},
693 {Intrinsic::umul_with_overflow, MVT::i32, 2},
694 {Intrinsic::smul_with_overflow, MVT::i64, 3},
695 {Intrinsic::umul_with_overflow, MVT::i64, 3},
704 case Intrinsic::fptosi_sat:
705 case Intrinsic::fptoui_sat: {
708 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
713 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
714 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
715 LT.second == MVT::v2f64) &&
717 (LT.second == MVT::f64 && MTy == MVT::i32) ||
718 (LT.second == MVT::f32 && MTy == MVT::i64)))
721 if (ST->hasFullFP16() &&
722 ((LT.second == MVT::f16 && MTy == MVT::i32) ||
723 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
728 if ((LT.second.getScalarType() == MVT::f32 ||
729 LT.second.getScalarType() == MVT::f64 ||
730 (ST->hasFullFP16() && LT.second.getScalarType() == MVT::f16)) &&
734 if (LT.second.isVector())
738 LegalTy, {LegalTy, LegalTy});
741 LegalTy, {LegalTy, LegalTy});
743 return LT.first *
Cost;
747 case Intrinsic::fshl:
748 case Intrinsic::fshr: {
761 {Intrinsic::fshl, MVT::v4i32, 3},
762 {Intrinsic::fshl, MVT::v2i64, 3}, {Intrinsic::fshl, MVT::v16i8, 4},
763 {Intrinsic::fshl, MVT::v8i16, 4}, {Intrinsic::fshl, MVT::v2i32, 3},
764 {Intrinsic::fshl, MVT::v8i8, 4}, {Intrinsic::fshl, MVT::v4i16, 4}};
770 return LegalisationCost.first * Entry->Cost;
774 if (!
RetTy->isIntegerTy())
779 bool HigherCost = (
RetTy->getScalarSizeInBits() != 32 &&
780 RetTy->getScalarSizeInBits() < 64) ||
781 (
RetTy->getScalarSizeInBits() % 64 != 0);
782 unsigned ExtraCost = HigherCost ? 1 : 0;
783 if (
RetTy->getScalarSizeInBits() == 32 ||
784 RetTy->getScalarSizeInBits() == 64)
791 return TyL.first + ExtraCost;
804 auto RequiredType = II.
getType();
807 assert(PN &&
"Expected Phi Node!");
810 if (!PN->hasOneUse())
813 for (
Value *IncValPhi : PN->incoming_values()) {
814 auto *Reinterpret = dyn_cast<IntrinsicInst>(IncValPhi);
816 Reinterpret->getIntrinsicID() !=
817 Intrinsic::aarch64_sve_convert_to_svbool ||
818 RequiredType != Reinterpret->getArgOperand(0)->getType())
827 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
828 auto *Reinterpret = cast<Instruction>(PN->getIncomingValue(
I));
829 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
846static std::optional<Instruction *>
848 auto BinOp = dyn_cast<IntrinsicInst>(II.
getOperand(0));
852 auto IntrinsicID = BinOp->getIntrinsicID();
853 switch (IntrinsicID) {
854 case Intrinsic::aarch64_sve_and_z:
855 case Intrinsic::aarch64_sve_bic_z:
856 case Intrinsic::aarch64_sve_eor_z:
857 case Intrinsic::aarch64_sve_nand_z:
858 case Intrinsic::aarch64_sve_nor_z:
859 case Intrinsic::aarch64_sve_orn_z:
860 case Intrinsic::aarch64_sve_orr_z:
866 auto BinOpPred = BinOp->getOperand(0);
867 auto BinOpOp1 = BinOp->getOperand(1);
868 auto BinOpOp2 = BinOp->getOperand(2);
870 auto PredIntr = dyn_cast<IntrinsicInst>(BinOpPred);
872 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
875 auto PredOp = PredIntr->getOperand(0);
876 auto PredOpTy = cast<VectorType>(PredOp->getType());
882 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
883 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
884 if (BinOpOp1 == BinOpOp2)
885 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
888 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
895static std::optional<Instruction *>
906 isa<TargetExtType>(II.
getType()))
912 const auto *IVTy = cast<VectorType>(II.
getType());
918 const auto *CursorVTy = cast<VectorType>(Cursor->
getType());
919 if (CursorVTy->getElementCount().getKnownMinValue() <
920 IVTy->getElementCount().getKnownMinValue())
925 EarliestReplacement = Cursor;
927 auto *IntrinsicCursor = dyn_cast<IntrinsicInst>(Cursor);
930 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
931 Intrinsic::aarch64_sve_convert_to_svbool ||
932 IntrinsicCursor->getIntrinsicID() ==
933 Intrinsic::aarch64_sve_convert_from_svbool))
936 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
937 Cursor = IntrinsicCursor->getOperand(0);
942 if (!EarliestReplacement)
951 if (
match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_from_svbool>(
952 m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>(
956 if (cast<ScalableVectorType>(Pred->
getType())->getMinNumElements() <=
957 cast<ScalableVectorType>(UncastedPred->
getType())->getMinNumElements())
960 return match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
961 m_ConstantInt<AArch64SVEPredPattern::all>()));
985 const auto PTruePattern =
986 cast<ConstantInt>(Pg->
getOperand(0))->getZExtValue();
987 if (PTruePattern != AArch64SVEPredPattern::vl1)
994 Insert->insertBefore(&II);
995 Insert->takeName(&II);
1006 Splat->takeName(&II);
1016 if (!Pg || Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
1017 return std::nullopt;
1019 const auto PTruePattern =
1020 cast<ConstantInt>(Pg->getOperand(0))->getZExtValue();
1021 if (PTruePattern != AArch64SVEPredPattern::all)
1022 return std::nullopt;
1027 if (!SplatValue || !SplatValue->isZero())
1028 return std::nullopt;
1031 auto *DupQLane = dyn_cast<IntrinsicInst>(II.
getArgOperand(1));
1033 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
1034 return std::nullopt;
1037 if (!cast<ConstantInt>(DupQLane->getArgOperand(1))->isZero())
1038 return std::nullopt;
1040 auto *VecIns = dyn_cast<IntrinsicInst>(DupQLane->getArgOperand(0));
1041 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
1042 return std::nullopt;
1046 if (!isa<UndefValue>(VecIns->getArgOperand(0)))
1047 return std::nullopt;
1049 if (!cast<ConstantInt>(VecIns->getArgOperand(2))->isZero())
1050 return std::nullopt;
1052 auto *ConstVec = dyn_cast<Constant>(VecIns->getArgOperand(1));
1054 return std::nullopt;
1056 auto *VecTy = dyn_cast<FixedVectorType>(ConstVec->getType());
1057 auto *OutTy = dyn_cast<ScalableVectorType>(II.
getType());
1058 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
1059 return std::nullopt;
1061 unsigned NumElts = VecTy->getNumElements();
1062 unsigned PredicateBits = 0;
1065 for (
unsigned I = 0;
I < NumElts; ++
I) {
1066 auto *Arg = dyn_cast<ConstantInt>(ConstVec->getAggregateElement(
I));
1068 return std::nullopt;
1070 PredicateBits |= 1 << (
I * (16 / NumElts));
1074 if (PredicateBits == 0) {
1076 PFalse->takeName(&II);
1082 for (
unsigned I = 0;
I < 16; ++
I)
1083 if ((PredicateBits & (1 <<
I)) != 0)
1086 unsigned PredSize = Mask & -Mask;
1091 for (
unsigned I = 0;
I < 16;
I += PredSize)
1092 if ((PredicateBits & (1 <<
I)) == 0)
1093 return std::nullopt;
1098 {PredType}, {PTruePat});
1100 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
1101 auto *ConvertFromSVBool =
1103 {II.
getType()}, {ConvertToSVBool});
1114 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
1125 auto *OldBinOp = cast<BinaryOperator>(Vec);
1126 auto OpC = OldBinOp->getOpcode();
1132 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(), &II);
1137 auto *
C = dyn_cast<Constant>(Pg);
1138 if (IsAfter &&
C &&
C->isNullValue()) {
1142 Extract->insertBefore(&II);
1143 Extract->takeName(&II);
1147 auto *IntrPG = dyn_cast<IntrinsicInst>(Pg);
1149 return std::nullopt;
1151 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
1152 return std::nullopt;
1154 const auto PTruePattern =
1155 cast<ConstantInt>(IntrPG->getOperand(0))->getZExtValue();
1160 return std::nullopt;
1162 unsigned Idx = MinNumElts - 1;
1171 auto *PgVTy = cast<ScalableVectorType>(Pg->
getType());
1172 if (
Idx >= PgVTy->getMinNumElements())
1173 return std::nullopt;
1178 Extract->insertBefore(&II);
1179 Extract->takeName(&II);
1198 return std::nullopt;
1203 return std::nullopt;
1217 FPTy, cast<VectorType>(Vec->
getType())->getElementCount());
1220 II.
getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
1240static std::optional<Instruction *>
1244 if (
Pattern == AArch64SVEPredPattern::all) {
1253 return MinNumElts && NumElts >= MinNumElts
1255 II, ConstantInt::get(II.
getType(), MinNumElts)))
1266 if (PgVal == OpVal &&
1269 Value *Ops[] = {PgVal, OpVal};
1283 return std::nullopt;
1287 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
1288 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
1302 if ((Pg ==
Op) && (II.
getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
1303 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
1304 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
1305 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
1306 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
1307 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
1308 (OpIID == Intrinsic::aarch64_sve_and_z) ||
1309 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
1310 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
1311 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
1312 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
1313 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
1314 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
1324 return std::nullopt;
1327template <Intrinsic::ID MulOpc,
typename Intrinsic::ID FuseOpc>
1328static std::optional<Instruction *>
1330 bool MergeIntoAddendOp) {
1332 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
1333 if (MergeIntoAddendOp) {
1343 return std::nullopt;
1345 if (!
Mul->hasOneUse())
1346 return std::nullopt;
1354 return std::nullopt;
1356 return std::nullopt;
1361 if (MergeIntoAddendOp)
1363 {
P, AddendOp, MulOp0, MulOp1}, FMFSource);
1366 {
P, MulOp0, MulOp1, AddendOp}, FMFSource);
1371static std::optional<Instruction *>
1379 Load->copyMetadata(II);
1390static std::optional<Instruction *>
1398 Store->copyMetadata(II);
1409 switch (Intrinsic) {
1410 case Intrinsic::aarch64_sve_fmul_u:
1411 return Instruction::BinaryOps::FMul;
1412 case Intrinsic::aarch64_sve_fadd_u:
1413 return Instruction::BinaryOps::FAdd;
1414 case Intrinsic::aarch64_sve_fsub_u:
1415 return Instruction::BinaryOps::FSub;
1417 return Instruction::BinaryOpsEnd;
1421static std::optional<Instruction *>
1425 return std::nullopt;
1429 if (BinOpCode == Instruction::BinaryOpsEnd ||
1430 !
match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1431 m_ConstantInt<AArch64SVEPredPattern::all>())))
1432 return std::nullopt;
1445 if (!
match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1446 m_ConstantInt<AArch64SVEPredPattern::all>())))
1447 return std::nullopt;
1458static std::optional<Instruction *>
1475 Intrinsic::aarch64_sve_mla>(
1479 Intrinsic::aarch64_sve_mad>(
1482 return std::nullopt;
1485static std::optional<Instruction *>
1492 Intrinsic::aarch64_sve_fmla>(IC, II,
1497 Intrinsic::aarch64_sve_fmad>(IC, II,
1502 Intrinsic::aarch64_sve_fmla>(IC, II,
1505 return std::nullopt;
1508static std::optional<Instruction *>
1512 Intrinsic::aarch64_sve_fmla>(IC, II,
1517 Intrinsic::aarch64_sve_fmad>(IC, II,
1522 Intrinsic::aarch64_sve_fmla_u>(
1528static std::optional<Instruction *>
1535 Intrinsic::aarch64_sve_fmls>(IC, II,
1540 Intrinsic::aarch64_sve_fnmsb>(
1545 Intrinsic::aarch64_sve_fmls>(IC, II,
1548 return std::nullopt;
1551static std::optional<Instruction *>
1555 Intrinsic::aarch64_sve_fmls>(IC, II,
1560 Intrinsic::aarch64_sve_fnmsb>(
1565 Intrinsic::aarch64_sve_fmls_u>(
1577 Intrinsic::aarch64_sve_mls>(
1580 return std::nullopt;
1591 auto IsUnitSplat = [](
auto *
I) {
1600 auto IsUnitDup = [](
auto *
I) {
1601 auto *IntrI = dyn_cast<IntrinsicInst>(
I);
1602 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup)
1605 auto *SplatValue = IntrI->getOperand(2);
1609 if (IsUnitSplat(OpMultiplier)) {
1611 OpMultiplicand->takeName(&II);
1613 }
else if (IsUnitDup(OpMultiplier)) {
1615 auto *DupInst = cast<IntrinsicInst>(OpMultiplier);
1616 auto *DupPg = DupInst->getOperand(1);
1619 if (OpPredicate == DupPg) {
1620 OpMultiplicand->takeName(&II);
1632 bool IsSigned = II.
getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
1646 return std::nullopt;
1656 auto *SplatValue = dyn_cast_or_null<ConstantInt>(
getSplatValue(OpIndices));
1658 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
1659 return std::nullopt;
1675 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
1676 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
1681 m_Intrinsic<FromSVB>(m_Intrinsic<ToSVB>(
m_Value(
A)))) &&
1683 m_Intrinsic<FromSVB>(m_Intrinsic<ToSVB>(
m_Value(
B))))) ||
1686 auto *TyA = cast<ScalableVectorType>(
A->getType());
1687 if (TyA ==
B->getType() &&
1698 return std::nullopt;
1713 return std::nullopt;
1716static std::optional<Instruction *>
1728 if (
match(
Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
1735 BasePtr, IndexBase);
1743 return std::nullopt;
1746static std::optional<Instruction *>
1758 if (
match(
Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
1764 BasePtr, IndexBase);
1773 return std::nullopt;
1784 ConstantInt *SplatConstantInt = dyn_cast_or_null<ConstantInt>(SplatValue);
1785 if (!SplatConstantInt)
1786 return std::nullopt;
1792 Intrinsic::aarch64_sve_asrd, {II.
getType()}, {Pred, Vec, DivisorLog2});
1799 Intrinsic::aarch64_sve_asrd, {II.
getType()}, {Pred, Vec, DivisorLog2});
1801 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
1805 return std::nullopt;
1809 size_t VecSize = Vec.
size();
1814 size_t HalfVecSize = VecSize / 2;
1818 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
1826 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
1841 m_Intrinsic<Intrinsic::vector_insert>(
1843 !isa<FixedVectorType>(CurrentInsertElt->
getType()))
1844 return std::nullopt;
1845 auto IIScalableTy = cast<ScalableVectorType>(II.
getType());
1849 while (
auto InsertElt = dyn_cast<InsertElementInst>(CurrentInsertElt)) {
1850 auto Idx = cast<ConstantInt>(InsertElt->getOperand(2));
1851 Elts[
Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
1852 CurrentInsertElt = InsertElt->getOperand(0);
1856 isa<PoisonValue>(CurrentInsertElt) && isa<PoisonValue>(
Default);
1858 return std::nullopt;
1862 for (
size_t I = 0;
I < Elts.
size();
I++) {
1863 if (Elts[
I] ==
nullptr)
1868 if (InsertEltChain ==
nullptr)
1869 return std::nullopt;
1875 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
1876 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
1877 IIScalableTy->getMinNumElements() /
1882 auto *WideShuffleMaskTy =
1893 auto NarrowBitcast =
1906 return std::nullopt;
1916 Value *AbsPred, *MergedValue;
1917 if (!
match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_sqabs>(
1919 !
match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_abs>(
1922 return std::nullopt;
1930 return std::nullopt;
1935 return std::nullopt;
1938 {II.
getType()}, {Pred, Vec, Shift});
1943std::optional<Instruction *>
1950 case Intrinsic::aarch64_neon_fmaxnm:
1951 case Intrinsic::aarch64_neon_fminnm:
1953 case Intrinsic::aarch64_sve_convert_from_svbool:
1955 case Intrinsic::aarch64_sve_dup:
1957 case Intrinsic::aarch64_sve_dup_x:
1959 case Intrinsic::aarch64_sve_cmpne:
1960 case Intrinsic::aarch64_sve_cmpne_wide:
1962 case Intrinsic::aarch64_sve_rdffr:
1964 case Intrinsic::aarch64_sve_lasta:
1965 case Intrinsic::aarch64_sve_lastb:
1967 case Intrinsic::aarch64_sve_clasta_n:
1968 case Intrinsic::aarch64_sve_clastb_n:
1970 case Intrinsic::aarch64_sve_cntd:
1972 case Intrinsic::aarch64_sve_cntw:
1974 case Intrinsic::aarch64_sve_cnth:
1976 case Intrinsic::aarch64_sve_cntb:
1978 case Intrinsic::aarch64_sve_ptest_any:
1979 case Intrinsic::aarch64_sve_ptest_first:
1980 case Intrinsic::aarch64_sve_ptest_last:
1982 case Intrinsic::aarch64_sve_fabd:
1984 case Intrinsic::aarch64_sve_fadd:
1986 case Intrinsic::aarch64_sve_fadd_u:
1988 case Intrinsic::aarch64_sve_fdiv:
1990 case Intrinsic::aarch64_sve_fmax:
1992 case Intrinsic::aarch64_sve_fmaxnm:
1994 case Intrinsic::aarch64_sve_fmin:
1996 case Intrinsic::aarch64_sve_fminnm:
1998 case Intrinsic::aarch64_sve_fmla:
2000 case Intrinsic::aarch64_sve_fmls:
2002 case Intrinsic::aarch64_sve_fmul:
2007 case Intrinsic::aarch64_sve_fmul_u:
2009 case Intrinsic::aarch64_sve_fmulx:
2011 case Intrinsic::aarch64_sve_fnmla:
2013 case Intrinsic::aarch64_sve_fnmls:
2015 case Intrinsic::aarch64_sve_fsub:
2017 case Intrinsic::aarch64_sve_fsub_u:
2019 case Intrinsic::aarch64_sve_add:
2021 case Intrinsic::aarch64_sve_add_u:
2023 Intrinsic::aarch64_sve_mla_u>(
2025 case Intrinsic::aarch64_sve_mla:
2027 case Intrinsic::aarch64_sve_mls:
2029 case Intrinsic::aarch64_sve_mul:
2034 case Intrinsic::aarch64_sve_mul_u:
2036 case Intrinsic::aarch64_sve_sabd:
2038 case Intrinsic::aarch64_sve_smax:
2040 case Intrinsic::aarch64_sve_smin:
2042 case Intrinsic::aarch64_sve_smulh:
2044 case Intrinsic::aarch64_sve_sub:
2046 case Intrinsic::aarch64_sve_sub_u:
2048 Intrinsic::aarch64_sve_mls_u>(
2050 case Intrinsic::aarch64_sve_uabd:
2052 case Intrinsic::aarch64_sve_umax:
2054 case Intrinsic::aarch64_sve_umin:
2056 case Intrinsic::aarch64_sve_umulh:
2058 case Intrinsic::aarch64_sve_asr:
2060 case Intrinsic::aarch64_sve_lsl:
2062 case Intrinsic::aarch64_sve_lsr:
2064 case Intrinsic::aarch64_sve_and:
2066 case Intrinsic::aarch64_sve_bic:
2068 case Intrinsic::aarch64_sve_eor:
2070 case Intrinsic::aarch64_sve_orr:
2072 case Intrinsic::aarch64_sve_sqsub:
2074 case Intrinsic::aarch64_sve_uqsub:
2076 case Intrinsic::aarch64_sve_tbl:
2078 case Intrinsic::aarch64_sve_uunpkhi:
2079 case Intrinsic::aarch64_sve_uunpklo:
2080 case Intrinsic::aarch64_sve_sunpkhi:
2081 case Intrinsic::aarch64_sve_sunpklo:
2083 case Intrinsic::aarch64_sve_uzp1:
2085 case Intrinsic::aarch64_sve_zip1:
2086 case Intrinsic::aarch64_sve_zip2:
2088 case Intrinsic::aarch64_sve_ld1_gather_index:
2090 case Intrinsic::aarch64_sve_st1_scatter_index:
2092 case Intrinsic::aarch64_sve_ld1:
2094 case Intrinsic::aarch64_sve_st1:
2096 case Intrinsic::aarch64_sve_sdiv:
2098 case Intrinsic::aarch64_sve_sel:
2100 case Intrinsic::aarch64_sve_srshl:
2102 case Intrinsic::aarch64_sve_dupq_lane:
2106 return std::nullopt;
2113 SimplifyAndSetOp)
const {
2117 case Intrinsic::aarch64_neon_fcvtxn:
2118 case Intrinsic::aarch64_neon_rshrn:
2119 case Intrinsic::aarch64_neon_sqrshrn:
2120 case Intrinsic::aarch64_neon_sqrshrun:
2121 case Intrinsic::aarch64_neon_sqshrn:
2122 case Intrinsic::aarch64_neon_sqshrun:
2123 case Intrinsic::aarch64_neon_sqxtn:
2124 case Intrinsic::aarch64_neon_sqxtun:
2125 case Intrinsic::aarch64_neon_uqrshrn:
2126 case Intrinsic::aarch64_neon_uqshrn:
2127 case Intrinsic::aarch64_neon_uqxtn:
2128 SimplifyAndSetOp(&II, 0, OrigDemandedElts, UndefElts);
2132 return std::nullopt;
2158bool AArch64TTIImpl::isWideningInstruction(
Type *DstTy,
unsigned Opcode,
2160 Type *SrcOverrideTy) {
2163 auto toVectorTy = [&](
Type *ArgTy) {
2165 cast<VectorType>(DstTy)->getElementCount());
2175 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
2185 Type *SrcTy = SrcOverrideTy;
2187 case Instruction::Add:
2188 case Instruction::Sub:
2190 if (isa<SExtInst>(Args[1]) || isa<ZExtInst>(Args[1])) {
2193 toVectorTy(cast<Instruction>(Args[1])->getOperand(0)->
getType());
2197 case Instruction::Mul: {
2199 if ((isa<SExtInst>(Args[0]) && isa<SExtInst>(Args[1])) ||
2200 (isa<ZExtInst>(Args[0]) && isa<ZExtInst>(Args[1]))) {
2203 toVectorTy(cast<Instruction>(Args[0])->getOperand(0)->
getType());
2204 }
else if (isa<ZExtInst>(Args[0]) || isa<ZExtInst>(Args[1])) {
2233 assert(SrcTy &&
"Expected some SrcTy");
2235 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
2241 DstTyL.first * DstTyL.second.getVectorMinNumElements();
2243 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
2247 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
2260 (Src->isScalableTy() && !ST->hasSVE2()))
2269 dyn_cast_or_null<Instruction>(
Add->getUniqueUndroppableUser());
2270 if (AddUser && AddUser->getOpcode() == Instruction::Add)
2273 auto *Shr = dyn_cast_or_null<Instruction>(
Add->getUniqueUndroppableUser());
2274 if (!Shr || Shr->getOpcode() != Instruction::LShr)
2277 auto *Trunc = dyn_cast_or_null<Instruction>(Shr->getUniqueUndroppableUser());
2278 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
2279 Src->getScalarSizeInBits() !=
2280 cast<CastInst>(Trunc)->getDestTy()->getScalarSizeInBits())
2304 assert(ISD &&
"Invalid opcode");
2307 if (
I &&
I->hasOneUser()) {
2308 auto *SingleUser = cast<Instruction>(*
I->user_begin());
2310 if (isWideningInstruction(Dst, SingleUser->getOpcode(),
Operands, Src)) {
2314 if (SingleUser->getOpcode() == Instruction::Add) {
2315 if (
I == SingleUser->getOperand(1) ||
2316 (isa<CastInst>(SingleUser->getOperand(1)) &&
2317 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
2324 if ((isa<ZExtInst>(
I) || isa<SExtInst>(
I)) &&
2332 return Cost == 0 ? 0 : 1;
2617 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
2621 std::pair<InstructionCost, MVT> LT =
2624 LT.second.getVectorElementType().getSizeInBits();
2636 return AdjustCost(Entry->Cost);
2663 if (ST->hasFullFP16())
2666 return AdjustCost(Entry->Cost);
2681 Opcode, LegalTy, Src, CCH,
CostKind,
I);
2684 return Part1 + Part2;
2704 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
2712 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) &&
"Invalid type");
2728 if (!VecLT.second.isVector() || !TLI->
isTypeLegal(DstVT))
2734 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
2744 case Instruction::SExt:
2749 case Instruction::ZExt:
2750 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
2763 return Opcode == Instruction::PHI ? 0 : 1;
2780 if (!LT.second.isVector())
2785 if (LT.second.isFixedLengthVector()) {
2786 unsigned Width = LT.second.getVectorNumElements();
2803 if (
I && dyn_cast<LoadInst>(
I->getOperand(1)))
2827 Opcode == Instruction::InsertElement && Op0 && !isa<UndefValue>(Op0);
2828 return getVectorInstrCostHelper(
nullptr, Val,
Index, HasRealUse);
2835 return getVectorInstrCostHelper(&
I, Val,
Index,
true );
2841 if (isa<ScalableVectorType>(Ty))
2846 return DemandedElts.
popcount() * (Insert + Extract) *
2859 Op2Info, Args, CxtI);
2901 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
2906 Opcode, Ty,
CostKind, Op1Info, Op2Info);
2911 if (isa<FixedVectorType>(Ty) && cast<FixedVectorType>(Ty)
2912 ->getPrimitiveSizeInBits()
2913 .getFixedValue() < 128) {
2924 if (
nullptr != Entry)
2929 if (LT.second.getScalarType() == MVT::i8)
2931 else if (LT.second.getScalarType() == MVT::i16)
2941 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
2944 return (4 + DivCost) * VTy->getNumElements();
2964 if (LT.second == MVT::v2i64 && ST->hasSVE())
2979 if (LT.second != MVT::v2i64 || isWideningInstruction(Ty, Opcode, Args))
2981 return LT.first * 14;
3000 return 2 * LT.first;
3009 return 2 * LT.first;
3031 int MaxMergeDistance = 64;
3035 return NumVectorInstToHideOverhead;
3055 if (isa<FixedVectorType>(ValTy) && ISD ==
ISD::SELECT) {
3057 const int AmortizationCost = 20;
3065 VecPred = CurrentPred;
3073 static const auto ValidMinMaxTys = {
3074 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
3075 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
3076 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
3079 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }) ||
3080 (ST->hasFullFP16() &&
3081 any_of(ValidFP16MinMaxTys, [<](
MVT M) {
return M == LT.second; })))
3086 VectorSelectTbl[] = {
3095 {
ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
3096 {
ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
3097 {
ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
3110 if (isa<FixedVectorType>(ValTy) && ISD ==
ISD::SETCC) {
3113 if (LT.second == MVT::v4f16 && !ST->hasFullFP16())
3114 return LT.first * 4;
3135 if (ST->requiresStrictAlign()) {
3140 Options.AllowOverlappingLoads =
true;
3146 Options.LoadSizes = {8, 4, 2, 1};
3147 Options.AllowedTailExpansions = {3, 5, 6};
3152 return ST->hasSVE();
3163 if (!LT.first.isValid())
3181 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
3186 auto *VT = cast<VectorType>(DataTy);
3188 if (!LT.first.isValid())
3191 if (!LT.second.isVector() ||
3199 if (cast<VectorType>(DataTy)->getElementCount() ==
3203 ElementCount LegalVF = LT.second.getVectorElementCount();
3206 {TTI::OK_AnyValue, TTI::OP_None},
I);
3226 if (VT == MVT::Other)
3231 if (!LT.first.isValid())
3238 if (
auto *VTy = dyn_cast<ScalableVectorType>(Ty))
3249 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
3250 LT.second.is128BitVector() && (!Alignment || *Alignment <
Align(16))) {
3256 const int AmortizationCost = 6;
3258 return LT.first * 2 * AmortizationCost;
3269 if (VT == MVT::v4i8)
3272 return cast<FixedVectorType>(Ty)->getNumElements() * 2;
3276 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
3278 *Alignment !=
Align(1))
3292 while (!TypeWorklist.
empty()) {
3314 bool UseMaskForCond,
bool UseMaskForGaps) {
3315 assert(Factor >= 2 &&
"Invalid interleave factor");
3316 auto *VecVTy = cast<VectorType>(VecTy);
3318 if (VecTy->
isScalableTy() && (!ST->hasSVE() || Factor != 2))
3323 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
3326 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
3327 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
3330 VecVTy->getElementCount().divideCoefficientBy(Factor));
3336 if (MinElts % Factor == 0 &&
3343 UseMaskForCond, UseMaskForGaps);
3350 for (
auto *
I : Tys) {
3351 if (!
I->isVectorTy())
3353 if (
I->getScalarSizeInBits() * cast<FixedVectorType>(
I)->getNumElements() ==
3372 enum { MaxStridedLoads = 7 };
3374 int StridedLoads = 0;
3377 for (
const auto BB : L->blocks()) {
3378 for (
auto &
I : *BB) {
3379 LoadInst *LMemI = dyn_cast<LoadInst>(&
I);
3384 if (L->isLoopInvariant(PtrValue))
3388 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
3389 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
3398 if (StridedLoads > MaxStridedLoads / 2)
3399 return StridedLoads;
3402 return StridedLoads;
3405 int StridedLoads = countStridedLoads(L, SE);
3407 <<
" strided loads\n");
3428 if (L->getLoopDepth() > 1)
3441 for (
auto *BB : L->getBlocks()) {
3442 for (
auto &
I : *BB) {
3444 if (
I.getType()->isVectorTy())
3447 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
3462 !ST->getSchedModel().isOutOfOrder()) {
3479 Type *ExpectedType) {
3483 case Intrinsic::aarch64_neon_st2:
3484 case Intrinsic::aarch64_neon_st3:
3485 case Intrinsic::aarch64_neon_st4: {
3487 StructType *ST = dyn_cast<StructType>(ExpectedType);
3490 unsigned NumElts = Inst->
arg_size() - 1;
3491 if (ST->getNumElements() != NumElts)
3493 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
3499 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
3505 case Intrinsic::aarch64_neon_ld2:
3506 case Intrinsic::aarch64_neon_ld3:
3507 case Intrinsic::aarch64_neon_ld4:
3508 if (Inst->
getType() == ExpectedType)
3519 case Intrinsic::aarch64_neon_ld2:
3520 case Intrinsic::aarch64_neon_ld3:
3521 case Intrinsic::aarch64_neon_ld4:
3522 Info.ReadMem =
true;
3523 Info.WriteMem =
false;
3526 case Intrinsic::aarch64_neon_st2:
3527 case Intrinsic::aarch64_neon_st3:
3528 case Intrinsic::aarch64_neon_st4:
3529 Info.ReadMem =
false;
3530 Info.WriteMem =
true;
3538 case Intrinsic::aarch64_neon_ld2:
3539 case Intrinsic::aarch64_neon_st2:
3540 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
3542 case Intrinsic::aarch64_neon_ld3:
3543 case Intrinsic::aarch64_neon_st3:
3544 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
3546 case Intrinsic::aarch64_neon_ld4:
3547 case Intrinsic::aarch64_neon_st4:
3548 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
3560 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) {
3561 bool Considerable =
false;
3562 AllowPromotionWithoutCommonHeader =
false;
3563 if (!isa<SExtInst>(&
I))
3565 Type *ConsideredSExtType =
3567 if (
I.getType() != ConsideredSExtType)
3571 for (
const User *U :
I.users()) {
3573 Considerable =
true;
3577 if (GEPInst->getNumOperands() > 2) {
3578 AllowPromotionWithoutCommonHeader =
true;
3583 return Considerable;
3622 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
3632 return LegalizationCost + 2;
3642 LegalizationCost *= LT.first - 1;
3646 assert(ISD &&
"Invalid opcode");
3654 return LegalizationCost + 2;
3662 std::optional<FastMathFlags> FMF,
3665 if (
auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) {
3670 return BaseCost + FixedVTy->getNumElements();
3673 if (Opcode != Instruction::FAdd)
3676 auto *VTy = cast<ScalableVectorType>(ValTy);
3683 if (isa<ScalableVectorType>(ValTy))
3687 MVT MTy = LT.second;
3689 assert(ISD &&
"Invalid opcode");
3733 return (LT.first - 1) + Entry->Cost;
3741 auto *ValVTy = cast<FixedVectorType>(ValTy);
3745 if (LT.first != 1) {
3751 ExtraCost *= LT.first - 1;
3754 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
3755 return Cost + ExtraCost;
3789 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
3804 if (LT.second.getScalarType() == MVT::i1) {
3813 assert(Entry &&
"Illegal Type for Splice");
3814 LegalizationCost += Entry->Cost;
3815 return LegalizationCost * LT.first;
3827 if (!Mask.empty() && isa<FixedVectorType>(Tp) && LT.second.isVector() &&
3829 Mask.size() > LT.second.getVectorNumElements() && !
Index && !SubTp) {
3830 unsigned TpNumElts = Mask.size();
3831 unsigned LTNumElts = LT.second.getVectorNumElements();
3832 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
3836 for (
unsigned N = 0;
N < NumVecs;
N++) {
3840 unsigned Source1, Source2;
3841 unsigned NumSources = 0;
3842 for (
unsigned E = 0;
E < LTNumElts;
E++) {
3843 int MaskElt = (
N * LTNumElts +
E < TpNumElts) ? Mask[
N * LTNumElts +
E]
3852 unsigned Source = MaskElt / LTNumElts;
3853 if (NumSources == 0) {
3856 }
else if (NumSources == 1 && Source != Source1) {
3859 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
3865 if (Source == Source1)
3867 else if (Source == Source2)
3868 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
3874 if (NumSources <= 2)
3877 NTp, NMask,
CostKind, 0,
nullptr, Args);
3879 return ME.value() % LTNumElts == ME.index();
3881 Cost += LTNumElts - 1;
3891 if (IsExtractSubvector && LT.second.isFixedLengthVector())
3902 bool IsLoad = !Args.empty() && isa<LoadInst>(Args[0]);
3903 if (IsLoad && LT.second.isVector() &&
3905 LT.second.getVectorElementCount()))
3913 all_of(Mask, [](
int E) {
return E < 8; }))
4036 return LT.first * Entry->Cost;
4045 LT.second.getSizeInBits() <= 128 && SubTp) {
4047 if (SubLT.second.isVector()) {
4048 int NumElts = LT.second.getVectorNumElements();
4049 int NumSubElts = SubLT.second.getVectorNumElements();
4050 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
4056 if (IsExtractSubvector)
4068 if (isa<LoadInst>(&
I) || isa<StoreInst>(&
I)) {
4112 unsigned NumInsns = 0;
4114 NumInsns += BB->sizeWithoutDebug();
4123 int64_t BaseOffset,
bool HasBaseReg,
4124 int64_t Scale,
unsigned AddrSpace)
const {
4150 isa<BranchInst>(
I->getNextNode()) &&
4151 cast<BranchInst>(
I->getNextNode())->isUnconditional())
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
static unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides the interface for the instcombine pass implementation.
This file defines the LoopVectorizationLegality class.
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
const char LLVMTargetMachineRef TM
static uint64_t getBits(uint64_t Val, int Start, int End)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getFastMathFlags(const MachineInstr &I)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
unsigned getVectorInsertExtractBaseCost() const
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
unsigned getMaxInterleaveFactor() const
TailFoldingOpts getSVETailFoldingDefaultOpts() const
bool useSVEForFixedLengthVectors() const
bool isSVEAvailable() const
Returns true if the target has SVE and can use the full range of SVE instructions,...
unsigned getMinSVEVectorSizeInBits() const
InstructionCost getSpliceCost(VectorType *Tp, int Index)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
bool shouldTreatInstructionLikeSelect(const Instruction *I)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)
bool prefersVectorizedAddressing() const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind)
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isElementTypeLegalForScalableVector(Type *Ty) const
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool useNeonVector(const Type *Ty) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
bool isLegalMaskedGatherScatter(Type *DataType) const
unsigned getMaxInterleaveFactor(ElementCount VF)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCost(int64_t Val)
Calculate the cost of materializing a 64-bit value.
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src)
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt)
EVT getPromotedVTForPredicate(EVT VT) const
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, bool UseScalable) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const
Returns true if VecTy is a legal interleaved access type.
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instruction.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name, BasicBlock::iterator InsertBefore)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
bool isIntPredicate() const
static ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
Container class for subtarget features.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isEquality() const
Return true if this predicate is either EQ or NE.
Value * CreateVScale(Constant *Scaling, const Twine &Name="")
Create a call to llvm.vscale, multiplied by Scaling.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr, BasicBlock::iterator InsertBefore)
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool requiresSMChange(const SMEAttrs &Callee) const
void set(unsigned M, bool Enable=true)
bool hasStreamingBody() const
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Class to represent struct types.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const
Return pair that represents the legalization kind (first) that needs to happen to EVT (second) in ord...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isFP128Ty() const
Return true if this is 'fp128'.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVMContext & getContext() const
All values hold a context through their type.
void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static constexpr unsigned SVEBitsPerBlock
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
constexpr int PoisonMaskElem
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Mod
The access may modify the value stored in memory.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FAnyOf
Any_of reduction with select(fcmp(),x,y) where one of (x,y) is loop invariant, and both x and y are i...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ IAnyOf
Any_of reduction with select(icmp(),x,y) where one of (x,y) is loop invariant, and both x and y are i...
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
@ Default
The result values are uniform if and only if all operands are uniform.
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
Type Conversion Cost Table.