18#include "llvm/IR/IntrinsicsRISCV.h"
25#define DEBUG_TYPE "riscvtti"
28 "riscv-v-register-bit-width-lmul",
30 "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
31 "by autovectorized code. Fractional LMULs are not supported."),
37 "Overrides result used for getMaximumVF query which is used "
38 "exclusively by SLP vectorizer."),
43 cl::desc(
"Set the lower bound of a trip count to decide on "
44 "vectorization while tail-folding."),
53 size_t NumInstr = OpCodes.size();
58 return LMULCost * NumInstr;
60 for (
auto Op : OpCodes) {
62 case RISCV::VRGATHER_VI:
65 case RISCV::VRGATHER_VV:
68 case RISCV::VSLIDEUP_VI:
69 case RISCV::VSLIDEDOWN_VI:
72 case RISCV::VSLIDEUP_VX:
73 case RISCV::VSLIDEDOWN_VX:
76 case RISCV::VREDMAX_VS:
77 case RISCV::VREDMIN_VS:
78 case RISCV::VREDMAXU_VS:
79 case RISCV::VREDMINU_VS:
80 case RISCV::VREDSUM_VS:
81 case RISCV::VREDAND_VS:
82 case RISCV::VREDOR_VS:
83 case RISCV::VREDXOR_VS:
84 case RISCV::VFREDMAX_VS:
85 case RISCV::VFREDMIN_VS:
86 case RISCV::VFREDUSUM_VS: {
93 case RISCV::VFREDOSUM_VS: {
102 case RISCV::VFMV_F_S:
103 case RISCV::VFMV_S_F:
105 case RISCV::VMXOR_MM:
106 case RISCV::VMAND_MM:
107 case RISCV::VMANDN_MM:
108 case RISCV::VMNAND_MM:
110 case RISCV::VFIRST_M:
125 assert(Ty->isIntegerTy() &&
126 "getIntImmCost can only estimate cost of materialising integers");
149 if (!BO || !BO->hasOneUse())
152 if (BO->getOpcode() != Instruction::Shl)
163 if (ShAmt == Trailing)
180 if (!Cmp || !Cmp->isEquality())
196 if ((CmpC & Mask) != CmpC)
203 return NewCmpC >= -2048 && NewCmpC <= 2048;
210 assert(Ty->isIntegerTy() &&
211 "getIntImmCost can only estimate cost of materialising integers");
219 bool Takes12BitImm =
false;
220 unsigned ImmArgIdx = ~0U;
223 case Instruction::GetElementPtr:
228 case Instruction::Store: {
233 if (Idx == 1 || !Inst)
238 if (!getTLI()->allowsMemoryAccessForAlignment(
246 case Instruction::Load:
249 case Instruction::And:
251 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
254 if (Imm == UINT64_C(0xffffffff) &&
255 ((ST->hasStdExtZba() && ST->isRV64()) || ST->isRV32()))
258 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())
260 if (Inst && Idx == 1 && Imm.getBitWidth() <= ST->getXLen() &&
263 if (Inst && Idx == 1 && Imm.getBitWidth() == 64 &&
266 Takes12BitImm =
true;
268 case Instruction::Add:
269 Takes12BitImm =
true;
271 case Instruction::Or:
272 case Instruction::Xor:
274 if (ST->hasStdExtZbs() && Imm.isPowerOf2())
276 Takes12BitImm =
true;
278 case Instruction::Mul:
280 if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
283 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
286 Takes12BitImm =
true;
288 case Instruction::Sub:
289 case Instruction::Shl:
290 case Instruction::LShr:
291 case Instruction::AShr:
292 Takes12BitImm =
true;
303 if (Imm.getSignificantBits() <= 64 &&
326 return ST->hasVInstructions();
336 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
343 if (!ST->hasStdExtZvqdotq() || ST->getELen() < 64 ||
344 Opcode != Instruction::Add || !BinOp || *BinOp != Instruction::Mul ||
345 InputTypeA != InputTypeB || !InputTypeA->
isIntegerTy(8) ||
353 getRISCVInstructionCost(RISCV::VQDOT_VV, LT.second,
CostKind);
360 switch (
II->getIntrinsicID()) {
364 case Intrinsic::vector_reduce_mul:
365 case Intrinsic::vector_reduce_fmul:
371 if (ST->hasVInstructions())
377 if (ST->hasVInstructions())
378 if (
unsigned MinVLen = ST->getRealMinVLen();
393 ST->useRVVForFixedLengthVectors() ? LMUL * ST->getRealMinVLen() : 0);
396 (ST->hasVInstructions() &&
419 return (ST->hasAUIPCADDIFusion() && ST->hasLUIADDIFusion()) ? 1 : 2;
425RISCVTTIImpl::getConstantPoolLoadCost(
Type *Ty,
430 return getStaticDataAddrGenerationCost(
CostKind) +
436 unsigned Size = Mask.size();
439 for (
unsigned I = 0;
I !=
Size; ++
I) {
440 if (
static_cast<unsigned>(Mask[
I]) ==
I)
446 for (
unsigned J =
I + 1; J !=
Size; ++J)
448 if (
static_cast<unsigned>(Mask[J]) != J %
I)
476 "Expected fixed vector type and non-empty mask");
479 unsigned NumOfDests =
divideCeil(Mask.size(), LegalNumElts);
483 if (NumOfDests <= 1 ||
485 Tp->getElementType()->getPrimitiveSizeInBits() ||
486 LegalNumElts >= Tp->getElementCount().getFixedValue())
489 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
492 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
496 unsigned NormalizedVF = LegalNumElts * std::max(NumOfSrcs, NumOfDests);
497 unsigned NumOfSrcRegs = NormalizedVF / LegalNumElts;
498 unsigned NumOfDestRegs = NormalizedVF / LegalNumElts;
500 assert(NormalizedVF >= Mask.size() &&
501 "Normalized mask expected to be not shorter than original mask.");
506 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
507 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
510 if (!ReusedSingleSrcShuffles.
insert(std::make_pair(RegMask, SrcReg))
513 Cost +=
TTI.getShuffleCost(
516 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
518 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
519 Cost +=
TTI.getShuffleCost(
522 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
545 if (!VLen || Mask.empty())
549 LegalVT =
TTI.getTypeLegalizationCost(
555 if (NumOfDests <= 1 ||
557 Tp->getElementType()->getPrimitiveSizeInBits() ||
561 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
564 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
570 unsigned NormalizedVF =
575 assert(NormalizedVF >= Mask.size() &&
576 "Normalized mask expected to be not shorter than original mask.");
582 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
583 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
586 if (!ReusedSingleSrcShuffles.
insert(std::make_pair(RegMask, SrcReg))
591 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
593 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
595 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
602 if ((NumOfDestRegs > 2 && NumShuffles <=
static_cast<int>(NumOfDestRegs)) ||
603 (NumOfDestRegs <= 2 && NumShuffles < 4))
618 if (!
LT.second.isFixedLengthVector())
626 auto GetSlideOpcode = [&](
int SlideAmt) {
628 bool IsVI =
isUInt<5>(std::abs(SlideAmt));
630 return IsVI ? RISCV::VSLIDEDOWN_VI : RISCV::VSLIDEDOWN_VX;
631 return IsVI ? RISCV::VSLIDEUP_VI : RISCV::VSLIDEUP_VX;
634 std::array<std::pair<int, int>, 2> SrcInfo;
638 if (SrcInfo[1].second == 0)
642 if (SrcInfo[0].second != 0) {
643 unsigned Opcode = GetSlideOpcode(SrcInfo[0].second);
644 FirstSlideCost = getRISCVInstructionCost(Opcode,
LT.second,
CostKind);
647 if (SrcInfo[1].first == -1)
648 return FirstSlideCost;
651 if (SrcInfo[1].second != 0) {
652 unsigned Opcode = GetSlideOpcode(SrcInfo[1].second);
653 SecondSlideCost = getRISCVInstructionCost(Opcode,
LT.second,
CostKind);
656 getRISCVInstructionCost(RISCV::VMERGE_VVM,
LT.second,
CostKind);
663 return FirstSlideCost + SecondSlideCost + MaskCost;
674 "Expected the Mask to match the return size if given");
676 "Expected the same scalar types");
685 FVTp && ST->hasVInstructions() && LT.second.isFixedLengthVector()) {
687 *
this, LT.second, ST->getRealVLen(),
689 if (VRegSplittingCost.
isValid())
690 return VRegSplittingCost;
695 if (Mask.size() >= 2) {
696 MVT EltTp = LT.second.getVectorElementType();
707 return 2 * LT.first * TLI->getLMULCost(LT.second);
709 if (Mask[0] == 0 || Mask[0] == 1) {
713 if (
equal(DeinterleaveMask, Mask))
714 return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,
719 if (LT.second.getScalarSizeInBits() != 1 &&
722 unsigned NumSlides =
Log2_32(Mask.size() / SubVectorSize);
724 for (
unsigned I = 0;
I != NumSlides; ++
I) {
725 unsigned InsertIndex = SubVectorSize * (1 <<
I);
730 std::pair<InstructionCost, MVT> DestLT =
735 Cost += DestLT.first * TLI->getLMULCost(DestLT.second);
749 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
750 LT.second.getVectorNumElements() <= 256)) {
755 getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second,
CostKind);
769 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
770 LT.second.getVectorNumElements() <= 256)) {
771 auto &
C = SrcTy->getContext();
772 auto EC = SrcTy->getElementCount();
777 return 2 * IndexCost +
778 getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},
797 if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
825 SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) {
826 if (std::optional<unsigned> VLen = ST->getRealVLen();
827 VLen && SubLT.second.getScalarSizeInBits() * Index % *VLen == 0 &&
828 SubLT.second.getSizeInBits() <= *VLen)
836 getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second,
CostKind);
843 getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second,
CostKind);
855 (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
860 Instruction::InsertElement);
861 if (LT.second.getScalarSizeInBits() == 1) {
869 (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
882 (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
883 RISCV::VMV_X_S, RISCV::VMV_V_X,
892 getRISCVInstructionCost(RISCV::VMV_V_X, LT.second,
CostKind);
898 getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second,
CostKind);
904 unsigned Opcodes[2] = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};
905 if (Index >= 0 && Index < 32)
906 Opcodes[0] = RISCV::VSLIDEDOWN_VI;
907 else if (Index < 0 && Index > -32)
908 Opcodes[1] = RISCV::VSLIDEUP_VI;
909 return LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
913 if (!LT.second.isVector())
919 if (SrcTy->getElementType()->isIntegerTy(1)) {
931 MVT ContainerVT = LT.second;
932 if (LT.second.isFixedLengthVector())
933 ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
935 if (ContainerVT.
bitsLE(M1VT)) {
945 if (LT.second.isFixedLengthVector())
947 LenCost =
isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
948 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV};
949 if (LT.second.isFixedLengthVector() &&
950 isInt<5>(LT.second.getVectorNumElements() - 1))
951 Opcodes[1] = RISCV::VRSUB_VI;
953 getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
954 return LT.first * (LenCost + GatherCost);
961 unsigned M1Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX};
963 getRISCVInstructionCost(M1Opcodes, M1VT,
CostKind) + 3;
967 getRISCVInstructionCost({RISCV::VRGATHER_VV}, M1VT,
CostKind) * Ratio;
969 getRISCVInstructionCost({RISCV::VSLIDEDOWN_VX}, LT.second,
CostKind);
970 return FixedCost + LT.first * (GatherCost + SlideCost);
1004 Ty, DemandedElts, Insert, Extract,
CostKind);
1006 if (Insert && !Extract && LT.first.isValid() && LT.second.isVector()) {
1007 if (Ty->getScalarSizeInBits() == 1) {
1017 assert(LT.second.isFixedLengthVector());
1018 MVT ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
1022 getRISCVInstructionCost(RISCV::VSLIDE1DOWN_VX, LT.second,
CostKind);
1035 switch (MICA.
getID()) {
1036 case Intrinsic::vp_load_ff: {
1037 EVT DataTypeVT = TLI->getValueType(
DL, DataTy);
1038 if (!TLI->isLegalFirstFaultLoad(DataTypeVT, Alignment))
1045 case Intrinsic::experimental_vp_strided_load:
1046 case Intrinsic::experimental_vp_strided_store:
1048 case Intrinsic::masked_compressstore:
1049 case Intrinsic::masked_expandload:
1051 case Intrinsic::vp_scatter:
1052 case Intrinsic::vp_gather:
1053 case Intrinsic::masked_scatter:
1054 case Intrinsic::masked_gather:
1056 case Intrinsic::vp_load:
1057 case Intrinsic::vp_store:
1058 case Intrinsic::masked_load:
1059 case Intrinsic::masked_store:
1068 unsigned Opcode = MICA.
getID() == Intrinsic::masked_load ? Instruction::Load
1069 : Instruction::Store;
1084 bool UseMaskForCond,
bool UseMaskForGaps)
const {
1090 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
1094 if (LT.second.isVector()) {
1097 VTy->getElementCount().divideCoefficientBy(Factor));
1098 if (VTy->getElementCount().isKnownMultipleOf(Factor) &&
1099 TLI->isLegalInterleavedAccessType(SubVecTy, Factor, Alignment,
1104 if (ST->hasOptimizedSegmentLoadStore(Factor)) {
1107 MVT SubVecVT = getTLI()->getValueType(
DL, SubVecTy).getSimpleVT();
1108 Cost += Factor * TLI->getLMULCost(SubVecVT);
1109 return LT.first *
Cost;
1116 CostKind, {TTI::OK_AnyValue, TTI::OP_None});
1117 unsigned NumLoads = getEstimatedVLFor(VTy);
1118 return NumLoads * MemOpCost;
1131 unsigned VF = FVTy->getNumElements() / Factor;
1138 if (Opcode == Instruction::Load) {
1140 for (
unsigned Index : Indices) {
1144 Mask.resize(VF * Factor, -1);
1148 Cost += ShuffleCost;
1166 UseMaskForCond, UseMaskForGaps);
1168 assert(Opcode == Instruction::Store &&
"Opcode must be a store");
1175 return MemCost + ShuffleCost;
1182 bool IsLoad = MICA.
getID() == Intrinsic::masked_gather ||
1183 MICA.
getID() == Intrinsic::vp_gather;
1184 unsigned Opcode = IsLoad ? Instruction::Load : Instruction::Store;
1191 if ((Opcode == Instruction::Load &&
1193 (Opcode == Instruction::Store &&
1203 {TTI::OK_AnyValue, TTI::OP_None},
I);
1204 unsigned NumLoads = getEstimatedVLFor(&VTy);
1205 return NumLoads * MemOpCost;
1211 unsigned Opcode = MICA.
getID() == Intrinsic::masked_expandload
1213 : Instruction::Store;
1217 bool IsLegal = (Opcode == Instruction::Store &&
1219 (Opcode == Instruction::Load &&
1243 if (Opcode == Instruction::Store)
1244 Opcodes.
append({RISCV::VCOMPRESS_VM});
1246 Opcodes.
append({RISCV::VSETIVLI, RISCV::VIOTA_M, RISCV::VRGATHER_VV});
1248 LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1255 unsigned Opcode = MICA.
getID() == Intrinsic::experimental_vp_strided_load
1257 : Instruction::Store;
1275 {TTI::OK_AnyValue, TTI::OP_None},
I);
1276 unsigned NumLoads = getEstimatedVLFor(&VTy);
1277 return NumLoads * MemOpCost;
1287 for (
auto *Ty : Tys) {
1288 if (!Ty->isVectorTy())
1302 {Intrinsic::floor, MVT::f32, 9},
1303 {Intrinsic::floor, MVT::f64, 9},
1304 {Intrinsic::ceil, MVT::f32, 9},
1305 {Intrinsic::ceil, MVT::f64, 9},
1306 {Intrinsic::trunc, MVT::f32, 7},
1307 {Intrinsic::trunc, MVT::f64, 7},
1308 {Intrinsic::round, MVT::f32, 9},
1309 {Intrinsic::round, MVT::f64, 9},
1310 {Intrinsic::roundeven, MVT::f32, 9},
1311 {Intrinsic::roundeven, MVT::f64, 9},
1312 {Intrinsic::rint, MVT::f32, 7},
1313 {Intrinsic::rint, MVT::f64, 7},
1314 {Intrinsic::nearbyint, MVT::f32, 9},
1315 {Intrinsic::nearbyint, MVT::f64, 9},
1316 {Intrinsic::bswap, MVT::i16, 3},
1317 {Intrinsic::bswap, MVT::i32, 12},
1318 {Intrinsic::bswap, MVT::i64, 31},
1319 {Intrinsic::vp_bswap, MVT::i16, 3},
1320 {Intrinsic::vp_bswap, MVT::i32, 12},
1321 {Intrinsic::vp_bswap, MVT::i64, 31},
1322 {Intrinsic::vp_fshl, MVT::i8, 7},
1323 {Intrinsic::vp_fshl, MVT::i16, 7},
1324 {Intrinsic::vp_fshl, MVT::i32, 7},
1325 {Intrinsic::vp_fshl, MVT::i64, 7},
1326 {Intrinsic::vp_fshr, MVT::i8, 7},
1327 {Intrinsic::vp_fshr, MVT::i16, 7},
1328 {Intrinsic::vp_fshr, MVT::i32, 7},
1329 {Intrinsic::vp_fshr, MVT::i64, 7},
1330 {Intrinsic::bitreverse, MVT::i8, 17},
1331 {Intrinsic::bitreverse, MVT::i16, 24},
1332 {Intrinsic::bitreverse, MVT::i32, 33},
1333 {Intrinsic::bitreverse, MVT::i64, 52},
1334 {Intrinsic::vp_bitreverse, MVT::i8, 17},
1335 {Intrinsic::vp_bitreverse, MVT::i16, 24},
1336 {Intrinsic::vp_bitreverse, MVT::i32, 33},
1337 {Intrinsic::vp_bitreverse, MVT::i64, 52},
1338 {Intrinsic::ctpop, MVT::i8, 12},
1339 {Intrinsic::ctpop, MVT::i16, 19},
1340 {Intrinsic::ctpop, MVT::i32, 20},
1341 {Intrinsic::ctpop, MVT::i64, 21},
1342 {Intrinsic::ctlz, MVT::i8, 19},
1343 {Intrinsic::ctlz, MVT::i16, 28},
1344 {Intrinsic::ctlz, MVT::i32, 31},
1345 {Intrinsic::ctlz, MVT::i64, 35},
1346 {Intrinsic::cttz, MVT::i8, 16},
1347 {Intrinsic::cttz, MVT::i16, 23},
1348 {Intrinsic::cttz, MVT::i32, 24},
1349 {Intrinsic::cttz, MVT::i64, 25},
1350 {Intrinsic::vp_ctpop, MVT::i8, 12},
1351 {Intrinsic::vp_ctpop, MVT::i16, 19},
1352 {Intrinsic::vp_ctpop, MVT::i32, 20},
1353 {Intrinsic::vp_ctpop, MVT::i64, 21},
1354 {Intrinsic::vp_ctlz, MVT::i8, 19},
1355 {Intrinsic::vp_ctlz, MVT::i16, 28},
1356 {Intrinsic::vp_ctlz, MVT::i32, 31},
1357 {Intrinsic::vp_ctlz, MVT::i64, 35},
1358 {Intrinsic::vp_cttz, MVT::i8, 16},
1359 {Intrinsic::vp_cttz, MVT::i16, 23},
1360 {Intrinsic::vp_cttz, MVT::i32, 24},
1361 {Intrinsic::vp_cttz, MVT::i64, 25},
1368 switch (ICA.
getID()) {
1369 case Intrinsic::lrint:
1370 case Intrinsic::llrint:
1371 case Intrinsic::lround:
1372 case Intrinsic::llround: {
1376 if (ST->hasVInstructions() && LT.second.isVector()) {
1378 unsigned SrcEltSz =
DL.getTypeSizeInBits(SrcTy->getScalarType());
1379 unsigned DstEltSz =
DL.getTypeSizeInBits(RetTy->getScalarType());
1380 if (LT.second.getVectorElementType() == MVT::bf16) {
1381 if (!ST->hasVInstructionsBF16Minimal())
1384 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFCVT_X_F_V};
1386 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVT_X_F_V};
1387 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1388 !ST->hasVInstructionsF16()) {
1389 if (!ST->hasVInstructionsF16Minimal())
1392 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFCVT_X_F_V};
1394 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_X_F_V};
1396 }
else if (SrcEltSz > DstEltSz) {
1397 Ops = {RISCV::VFNCVT_X_F_W};
1398 }
else if (SrcEltSz < DstEltSz) {
1399 Ops = {RISCV::VFWCVT_X_F_V};
1401 Ops = {RISCV::VFCVT_X_F_V};
1406 if (SrcEltSz > DstEltSz)
1407 return SrcLT.first *
1408 getRISCVInstructionCost(
Ops, SrcLT.second,
CostKind);
1409 return LT.first * getRISCVInstructionCost(
Ops, LT.second,
CostKind);
1413 case Intrinsic::ceil:
1414 case Intrinsic::floor:
1415 case Intrinsic::trunc:
1416 case Intrinsic::rint:
1417 case Intrinsic::round:
1418 case Intrinsic::roundeven: {
1421 if (!LT.second.isVector() && TLI->isOperationCustom(
ISD::FCEIL, LT.second))
1422 return LT.first * 8;
1425 case Intrinsic::umin:
1426 case Intrinsic::umax:
1427 case Intrinsic::smin:
1428 case Intrinsic::smax: {
1430 if (LT.second.isScalarInteger() && ST->hasStdExtZbb())
1433 if (ST->hasVInstructions() && LT.second.isVector()) {
1435 switch (ICA.
getID()) {
1436 case Intrinsic::umin:
1437 Op = RISCV::VMINU_VV;
1439 case Intrinsic::umax:
1440 Op = RISCV::VMAXU_VV;
1442 case Intrinsic::smin:
1443 Op = RISCV::VMIN_VV;
1445 case Intrinsic::smax:
1446 Op = RISCV::VMAX_VV;
1449 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1453 case Intrinsic::sadd_sat:
1454 case Intrinsic::ssub_sat:
1455 case Intrinsic::uadd_sat:
1456 case Intrinsic::usub_sat: {
1458 if (ST->hasVInstructions() && LT.second.isVector()) {
1460 switch (ICA.
getID()) {
1461 case Intrinsic::sadd_sat:
1462 Op = RISCV::VSADD_VV;
1464 case Intrinsic::ssub_sat:
1465 Op = RISCV::VSSUBU_VV;
1467 case Intrinsic::uadd_sat:
1468 Op = RISCV::VSADDU_VV;
1470 case Intrinsic::usub_sat:
1471 Op = RISCV::VSSUBU_VV;
1474 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1478 case Intrinsic::fma:
1479 case Intrinsic::fmuladd: {
1482 if (ST->hasVInstructions() && LT.second.isVector())
1484 getRISCVInstructionCost(RISCV::VFMADD_VV, LT.second,
CostKind);
1487 case Intrinsic::fabs: {
1489 if (ST->hasVInstructions() && LT.second.isVector()) {
1495 if (LT.second.getVectorElementType() == MVT::bf16 ||
1496 (LT.second.getVectorElementType() == MVT::f16 &&
1497 !ST->hasVInstructionsF16()))
1498 return LT.first * getRISCVInstructionCost(RISCV::VAND_VX, LT.second,
1503 getRISCVInstructionCost(RISCV::VFSGNJX_VV, LT.second,
CostKind);
1507 case Intrinsic::sqrt: {
1509 if (ST->hasVInstructions() && LT.second.isVector()) {
1512 MVT ConvType = LT.second;
1513 MVT FsqrtType = LT.second;
1516 if (LT.second.getVectorElementType() == MVT::bf16) {
1517 if (LT.second == MVT::nxv32bf16) {
1518 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVTBF16_F_F_V,
1519 RISCV::VFNCVTBF16_F_F_W, RISCV::VFNCVTBF16_F_F_W};
1520 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1521 ConvType = MVT::nxv16f16;
1522 FsqrtType = MVT::nxv16f32;
1524 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFNCVTBF16_F_F_W};
1525 FsqrtOp = {RISCV::VFSQRT_V};
1526 FsqrtType = TLI->getTypeToPromoteTo(
ISD::FSQRT, FsqrtType);
1528 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1529 !ST->hasVInstructionsF16()) {
1530 if (LT.second == MVT::nxv32f16) {
1531 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_F_F_V,
1532 RISCV::VFNCVT_F_F_W, RISCV::VFNCVT_F_F_W};
1533 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1534 ConvType = MVT::nxv16f16;
1535 FsqrtType = MVT::nxv16f32;
1537 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFNCVT_F_F_W};
1538 FsqrtOp = {RISCV::VFSQRT_V};
1539 FsqrtType = TLI->getTypeToPromoteTo(
ISD::FSQRT, FsqrtType);
1542 FsqrtOp = {RISCV::VFSQRT_V};
1545 return LT.first * (getRISCVInstructionCost(FsqrtOp, FsqrtType,
CostKind) +
1546 getRISCVInstructionCost(ConvOp, ConvType,
CostKind));
1550 case Intrinsic::cttz:
1551 case Intrinsic::ctlz:
1552 case Intrinsic::ctpop: {
1554 if (ST->hasStdExtZvbb() && LT.second.isVector()) {
1556 switch (ICA.
getID()) {
1557 case Intrinsic::cttz:
1560 case Intrinsic::ctlz:
1563 case Intrinsic::ctpop:
1564 Op = RISCV::VCPOP_V;
1567 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1571 case Intrinsic::abs: {
1573 if (ST->hasVInstructions() && LT.second.isVector()) {
1577 getRISCVInstructionCost({RISCV::VRSUB_VI, RISCV::VMAX_VV},
1582 case Intrinsic::fshl:
1583 case Intrinsic::fshr: {
1590 if ((ST->hasStdExtZbb() || ST->hasStdExtZbkb()) && RetTy->isIntegerTy() &&
1592 (RetTy->getIntegerBitWidth() == 32 ||
1593 RetTy->getIntegerBitWidth() == 64) &&
1594 RetTy->getIntegerBitWidth() <= ST->getXLen()) {
1599 case Intrinsic::get_active_lane_mask: {
1600 if (ST->hasVInstructions()) {
1609 getRISCVInstructionCost({RISCV::VSADDU_VX, RISCV::VMSLTU_VX},
1615 case Intrinsic::stepvector: {
1619 if (ST->hasVInstructions())
1620 return getRISCVInstructionCost(RISCV::VID_V, LT.second,
CostKind) +
1622 getRISCVInstructionCost(RISCV::VADD_VX, LT.second,
CostKind);
1623 return 1 + (LT.first - 1);
1625 case Intrinsic::experimental_cttz_elts: {
1627 EVT ArgType = TLI->getValueType(
DL, ArgTy,
true);
1628 if (getTLI()->shouldExpandCttzElements(ArgType))
1645 case Intrinsic::experimental_vp_splice: {
1653 case Intrinsic::fptoui_sat:
1654 case Intrinsic::fptosi_sat: {
1656 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
1661 if (!SrcTy->isVectorTy())
1664 if (!SrcLT.first.isValid() || !DstLT.first.isValid())
1683 if (ST->hasVInstructions() && RetTy->isVectorTy()) {
1685 LT.second.isVector()) {
1686 MVT EltTy = LT.second.getVectorElementType();
1688 ICA.
getID(), EltTy))
1689 return LT.first * Entry->Cost;
1702 if (ST->hasVInstructions() && PtrTy->
isVectorTy())
1720 if (ST->enablePExtSIMDCodeGen() &&
1728 if (!ST->hasVInstructions() || Src->getScalarSizeInBits() > ST->getELen() ||
1729 Dst->getScalarSizeInBits() > ST->getELen())
1732 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1747 if (Src->getScalarSizeInBits() == 1) {
1752 return getRISCVInstructionCost(RISCV::VMV_V_I, DstLT.second,
CostKind) +
1753 DstLT.first * getRISCVInstructionCost(RISCV::VMERGE_VIM,
1759 if (Dst->getScalarSizeInBits() == 1) {
1765 return SrcLT.first *
1766 getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},
1778 if (!SrcLT.second.isVector() || !DstLT.second.isVector() ||
1779 !SrcLT.first.isValid() || !DstLT.first.isValid() ||
1781 SrcLT.second.getSizeInBits()) ||
1783 DstLT.second.getSizeInBits()) ||
1784 SrcLT.first > 1 || DstLT.first > 1)
1788 assert((SrcLT.first == 1) && (DstLT.first == 1) &&
"Illegal type");
1790 int PowDiff = (int)
Log2_32(DstLT.second.getScalarSizeInBits()) -
1791 (int)
Log2_32(SrcLT.second.getScalarSizeInBits());
1795 if ((PowDiff < 1) || (PowDiff > 3))
1797 unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};
1798 unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};
1801 return getRISCVInstructionCost(
Op, DstLT.second,
CostKind);
1807 unsigned SrcEltSize = SrcLT.second.getScalarSizeInBits();
1808 unsigned DstEltSize = DstLT.second.getScalarSizeInBits();
1812 : RISCV::VFNCVT_F_F_W;
1814 for (; SrcEltSize != DstEltSize;) {
1818 MVT DstMVT = DstLT.second.changeVectorElementType(ElementMVT);
1820 (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1;
1828 unsigned FCVT = IsSigned ? RISCV::VFCVT_RTZ_X_F_V : RISCV::VFCVT_RTZ_XU_F_V;
1830 IsSigned ? RISCV::VFWCVT_RTZ_X_F_V : RISCV::VFWCVT_RTZ_XU_F_V;
1832 IsSigned ? RISCV::VFNCVT_RTZ_X_F_W : RISCV::VFNCVT_RTZ_XU_F_W;
1833 unsigned SrcEltSize = Src->getScalarSizeInBits();
1834 unsigned DstEltSize = Dst->getScalarSizeInBits();
1836 if ((SrcEltSize == 16) &&
1837 (!ST->hasVInstructionsF16() || ((DstEltSize / 2) > SrcEltSize))) {
1843 std::pair<InstructionCost, MVT> VecF32LT =
1846 VecF32LT.first * getRISCVInstructionCost(RISCV::VFWCVT_F_F_V,
1851 if (DstEltSize == SrcEltSize)
1852 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1853 else if (DstEltSize > SrcEltSize)
1854 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1859 MVT VecVT = DstLT.second.changeVectorElementType(ElementVT);
1860 Cost += getRISCVInstructionCost(FNCVT, VecVT,
CostKind);
1861 if ((SrcEltSize / 2) > DstEltSize) {
1872 unsigned FCVT = IsSigned ? RISCV::VFCVT_F_X_V : RISCV::VFCVT_F_XU_V;
1873 unsigned FWCVT = IsSigned ? RISCV::VFWCVT_F_X_V : RISCV::VFWCVT_F_XU_V;
1874 unsigned FNCVT = IsSigned ? RISCV::VFNCVT_F_X_W : RISCV::VFNCVT_F_XU_W;
1875 unsigned SrcEltSize = Src->getScalarSizeInBits();
1876 unsigned DstEltSize = Dst->getScalarSizeInBits();
1879 if ((DstEltSize == 16) &&
1880 (!ST->hasVInstructionsF16() || ((SrcEltSize / 2) > DstEltSize))) {
1886 std::pair<InstructionCost, MVT> VecF32LT =
1889 Cost += VecF32LT.first * getRISCVInstructionCost(RISCV::VFNCVT_F_F_W,
1894 if (DstEltSize == SrcEltSize)
1895 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1896 else if (DstEltSize > SrcEltSize) {
1897 if ((DstEltSize / 2) > SrcEltSize) {
1901 unsigned Op = IsSigned ? Instruction::SExt : Instruction::ZExt;
1904 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1906 Cost += getRISCVInstructionCost(FNCVT, DstLT.second,
CostKind);
1913unsigned RISCVTTIImpl::getEstimatedVLFor(
VectorType *Ty)
const {
1915 const unsigned EltSize =
DL.getTypeSizeInBits(Ty->getElementType());
1916 const unsigned MinSize =
DL.getTypeSizeInBits(Ty).getKnownMinValue();
1931 if (Ty->getScalarSizeInBits() > ST->getELen())
1935 if (Ty->getElementType()->isIntegerTy(1)) {
1939 if (IID == Intrinsic::umax || IID == Intrinsic::smin)
1945 if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
1949 case Intrinsic::maximum:
1951 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1953 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
1968 case Intrinsic::minimum:
1970 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1972 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
1978 const unsigned EltTyBits =
DL.getTypeSizeInBits(DstTy);
1987 return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1996 case Intrinsic::smax:
1997 SplitOp = RISCV::VMAX_VV;
1998 Opcodes = {RISCV::VREDMAX_VS, RISCV::VMV_X_S};
2000 case Intrinsic::smin:
2001 SplitOp = RISCV::VMIN_VV;
2002 Opcodes = {RISCV::VREDMIN_VS, RISCV::VMV_X_S};
2004 case Intrinsic::umax:
2005 SplitOp = RISCV::VMAXU_VV;
2006 Opcodes = {RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
2008 case Intrinsic::umin:
2009 SplitOp = RISCV::VMINU_VV;
2010 Opcodes = {RISCV::VREDMINU_VS, RISCV::VMV_X_S};
2012 case Intrinsic::maxnum:
2013 SplitOp = RISCV::VFMAX_VV;
2014 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
2016 case Intrinsic::minnum:
2017 SplitOp = RISCV::VFMIN_VV;
2018 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
2023 (LT.first > 1) ? (LT.first - 1) *
2024 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
2026 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2031 std::optional<FastMathFlags> FMF,
2037 if (Ty->getScalarSizeInBits() > ST->getELen())
2040 int ISD = TLI->InstructionOpcodeToISD(Opcode);
2048 Type *ElementTy = Ty->getElementType();
2053 if (LT.second == MVT::v1i1)
2054 return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second,
CostKind) +
2072 return ((LT.first > 2) ? (LT.first - 2) : 0) *
2073 getRISCVInstructionCost(RISCV::VMAND_MM, LT.second,
CostKind) +
2074 getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second,
CostKind) +
2075 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
2084 return (LT.first - 1) *
2085 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind) +
2086 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) + 1;
2094 return (LT.first - 1) *
2095 getRISCVInstructionCost(RISCV::VMOR_MM, LT.second,
CostKind) +
2096 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
2109 SplitOp = RISCV::VADD_VV;
2110 Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
2113 SplitOp = RISCV::VOR_VV;
2114 Opcodes = {RISCV::VREDOR_VS, RISCV::VMV_X_S};
2117 SplitOp = RISCV::VXOR_VV;
2118 Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
2121 SplitOp = RISCV::VAND_VV;
2122 Opcodes = {RISCV::VREDAND_VS, RISCV::VMV_X_S};
2126 if ((LT.second.getScalarType() == MVT::f16 && !ST->hasVInstructionsF16()) ||
2127 LT.second.getScalarType() == MVT::bf16)
2131 for (
unsigned i = 0; i < LT.first.getValue(); i++)
2134 return getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2136 SplitOp = RISCV::VFADD_VV;
2137 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
2142 (LT.first > 1) ? (LT.first - 1) *
2143 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
2145 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2149 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
2160 if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)
2166 if (IsUnsigned && Opcode == Instruction::Add &&
2167 LT.second.isFixedLengthVector() && LT.second.getScalarType() == MVT::i1) {
2171 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind);
2178 return (LT.first - 1) +
2185 assert(OpInfo.isConstant() &&
"non constant operand?");
2192 if (OpInfo.isUniform())
2198 return getConstantPoolLoadCost(Ty,
CostKind);
2207 EVT VT = TLI->getValueType(
DL, Src,
true);
2209 if (VT == MVT::Other)
2214 if (Opcode == Instruction::Store && OpInfo.isConstant())
2229 if (Src->
isVectorTy() && LT.second.isVector() &&
2231 LT.second.getSizeInBits()))
2241 if (ST->hasVInstructions() && LT.second.isVector() &&
2243 BaseCost *= TLI->getLMULCost(LT.second);
2244 return Cost + BaseCost;
2253 Op1Info, Op2Info,
I);
2257 Op1Info, Op2Info,
I);
2260 if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() > ST->getELen())
2262 Op1Info, Op2Info,
I);
2264 auto GetConstantMatCost =
2266 if (OpInfo.isUniform())
2271 return getConstantPoolLoadCost(ValTy,
CostKind);
2276 ConstantMatCost += GetConstantMatCost(Op1Info);
2278 ConstantMatCost += GetConstantMatCost(Op2Info);
2281 if (Opcode == Instruction::Select && ValTy->isVectorTy()) {
2283 if (ValTy->getScalarSizeInBits() == 1) {
2287 return ConstantMatCost +
2289 getRISCVInstructionCost(
2290 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2294 return ConstantMatCost +
2295 LT.first * getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second,
2299 if (ValTy->getScalarSizeInBits() == 1) {
2305 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8);
2306 return ConstantMatCost +
2308 getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
2310 LT.first * getRISCVInstructionCost(
2311 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2318 return ConstantMatCost +
2319 LT.first * getRISCVInstructionCost(
2320 {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM},
2324 if ((Opcode == Instruction::ICmp) && ValTy->isVectorTy() &&
2328 return ConstantMatCost + LT.first * getRISCVInstructionCost(RISCV::VMSLT_VV,
2333 if ((Opcode == Instruction::FCmp) && ValTy->isVectorTy() &&
2338 return ConstantMatCost +
2339 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind);
2345 if ((ValTy->getScalarSizeInBits() == 16 && !ST->hasVInstructionsF16()) ||
2346 (ValTy->getScalarSizeInBits() == 32 && !ST->hasVInstructionsF32()) ||
2347 (ValTy->getScalarSizeInBits() == 64 && !ST->hasVInstructionsF64()))
2349 Op1Info, Op2Info,
I);
2358 return ConstantMatCost +
2359 LT.first * getRISCVInstructionCost(
2360 {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM},
2367 return ConstantMatCost +
2369 getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM},
2378 return ConstantMatCost +
2380 getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second,
CostKind);
2391 ValTy->isIntegerTy() && !
I->user_empty()) {
2393 return match(U, m_Select(m_Specific(I), m_Value(), m_Value())) &&
2394 U->getType()->isIntegerTy() &&
2395 !isa<ConstantData>(U->getOperand(1)) &&
2396 !isa<ConstantData>(U->getOperand(2));
2404 Op1Info, Op2Info,
I);
2411 return Opcode == Instruction::PHI ? 0 : 1;
2420 const Value *Op1)
const {
2430 if (Opcode != Instruction::ExtractElement &&
2431 Opcode != Instruction::InsertElement)
2438 if (!LT.second.isVector()) {
2447 Type *ElemTy = FixedVecTy->getElementType();
2448 auto NumElems = FixedVecTy->getNumElements();
2449 auto Align =
DL.getPrefTypeAlign(ElemTy);
2454 return Opcode == Instruction::ExtractElement
2455 ? StoreCost * NumElems + LoadCost
2456 : (StoreCost + LoadCost) * NumElems + StoreCost;
2460 if (LT.second.isScalableVector() && !LT.first.isValid())
2468 if (Opcode == Instruction::ExtractElement) {
2474 return ExtendCost + ExtractCost;
2484 return ExtendCost + InsertCost + TruncCost;
2490 unsigned BaseCost = 1;
2492 unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;
2497 if (LT.second.isFixedLengthVector()) {
2498 unsigned Width = LT.second.getVectorNumElements();
2499 Index = Index % Width;
2504 if (
auto VLEN = ST->getRealVLen()) {
2505 unsigned EltSize = LT.second.getScalarSizeInBits();
2506 unsigned M1Max = *VLEN / EltSize;
2507 Index = Index % M1Max;
2513 else if (ST->hasVendorXRivosVisni() &&
isUInt<5>(Index) &&
2516 else if (Opcode == Instruction::InsertElement)
2524 ((Index == -1U) || (Index >= LT.second.getVectorMinNumElements() &&
2525 LT.second.isScalableVector()))) {
2527 Align VecAlign =
DL.getPrefTypeAlign(Val);
2528 Align SclAlign =
DL.getPrefTypeAlign(ScalarType);
2533 if (Opcode == Instruction::ExtractElement)
2569 BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
2571 return BaseCost + SlideCost;
2577 unsigned Index)
const {
2586 assert(Index < EC.getKnownMinValue() &&
"Unexpected reverse index");
2588 EC.getKnownMinValue() - 1 - Index,
nullptr,
2615 if (!LT.second.isVector())
2621 unsigned ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
2623 if ((LT.second.getVectorElementType() == MVT::f16 ||
2624 LT.second.getVectorElementType() == MVT::bf16) &&
2625 TLI->getOperationAction(ISDOpcode, LT.second) ==
2627 MVT PromotedVT = TLI->getTypeToPromoteTo(ISDOpcode, LT.second);
2631 CastCost += LT.first * Args.size() *
2639 LT.second = PromotedVT;
2642 auto getConstantMatCost =
2652 return getConstantPoolLoadCost(Ty,
CostKind);
2658 ConstantMatCost += getConstantMatCost(0, Op1Info);
2660 ConstantMatCost += getConstantMatCost(1, Op2Info);
2663 switch (ISDOpcode) {
2666 Op = RISCV::VADD_VV;
2671 Op = RISCV::VSLL_VV;
2676 Op = (Ty->getScalarSizeInBits() == 1) ? RISCV::VMAND_MM : RISCV::VAND_VV;
2681 Op = RISCV::VMUL_VV;
2685 Op = RISCV::VDIV_VV;
2689 Op = RISCV::VREM_VV;
2693 Op = RISCV::VFADD_VV;
2696 Op = RISCV::VFMUL_VV;
2699 Op = RISCV::VFDIV_VV;
2702 Op = RISCV::VFSGNJN_VV;
2707 return CastCost + ConstantMatCost +
2716 if (Ty->isFPOrFPVectorTy())
2718 return CastCost + ConstantMatCost + LT.first *
InstrCost;
2741 if (Info.isSameBase() && V !=
Base) {
2742 if (
GEP->hasAllConstantIndices())
2748 unsigned Stride =
DL.getTypeStoreSize(AccessTy);
2749 if (Info.isUnitStride() &&
2755 GEP->getType()->getPointerAddressSpace()))
2758 {TTI::OK_AnyValue, TTI::OP_None},
2759 {TTI::OK_AnyValue, TTI::OP_None}, {});
2776 if (ST->enableDefaultUnroll())
2786 if (L->getHeader()->getParent()->hasOptSize())
2790 L->getExitingBlocks(ExitingBlocks);
2792 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
2793 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
2797 if (ExitingBlocks.
size() > 2)
2802 if (L->getNumBlocks() > 4)
2810 for (
auto *BB : L->getBlocks()) {
2811 for (
auto &
I : *BB) {
2815 if (IsVectorized && (
I.getType()->isVectorTy() ||
2817 return V->getType()->isVectorTy();
2858 bool HasMask =
false;
2861 bool IsWrite) -> int64_t {
2862 if (
auto *TarExtTy =
2864 return TarExtTy->getIntParameter(0);
2870 case Intrinsic::riscv_vle_mask:
2871 case Intrinsic::riscv_vse_mask:
2872 case Intrinsic::riscv_vlseg2_mask:
2873 case Intrinsic::riscv_vlseg3_mask:
2874 case Intrinsic::riscv_vlseg4_mask:
2875 case Intrinsic::riscv_vlseg5_mask:
2876 case Intrinsic::riscv_vlseg6_mask:
2877 case Intrinsic::riscv_vlseg7_mask:
2878 case Intrinsic::riscv_vlseg8_mask:
2879 case Intrinsic::riscv_vsseg2_mask:
2880 case Intrinsic::riscv_vsseg3_mask:
2881 case Intrinsic::riscv_vsseg4_mask:
2882 case Intrinsic::riscv_vsseg5_mask:
2883 case Intrinsic::riscv_vsseg6_mask:
2884 case Intrinsic::riscv_vsseg7_mask:
2885 case Intrinsic::riscv_vsseg8_mask:
2888 case Intrinsic::riscv_vle:
2889 case Intrinsic::riscv_vse:
2890 case Intrinsic::riscv_vlseg2:
2891 case Intrinsic::riscv_vlseg3:
2892 case Intrinsic::riscv_vlseg4:
2893 case Intrinsic::riscv_vlseg5:
2894 case Intrinsic::riscv_vlseg6:
2895 case Intrinsic::riscv_vlseg7:
2896 case Intrinsic::riscv_vlseg8:
2897 case Intrinsic::riscv_vsseg2:
2898 case Intrinsic::riscv_vsseg3:
2899 case Intrinsic::riscv_vsseg4:
2900 case Intrinsic::riscv_vsseg5:
2901 case Intrinsic::riscv_vsseg6:
2902 case Intrinsic::riscv_vsseg7:
2903 case Intrinsic::riscv_vsseg8: {
2920 Ty = TarExtTy->getTypeParameter(0U);
2925 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
2926 unsigned VLIndex = RVVIInfo->VLOperand;
2927 unsigned PtrOperandNo = VLIndex - 1 - HasMask;
2935 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
2938 unsigned ElemSize = Ty->getScalarSizeInBits();
2942 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
2943 Alignment, Mask, EVL);
2946 case Intrinsic::riscv_vlse_mask:
2947 case Intrinsic::riscv_vsse_mask:
2948 case Intrinsic::riscv_vlsseg2_mask:
2949 case Intrinsic::riscv_vlsseg3_mask:
2950 case Intrinsic::riscv_vlsseg4_mask:
2951 case Intrinsic::riscv_vlsseg5_mask:
2952 case Intrinsic::riscv_vlsseg6_mask:
2953 case Intrinsic::riscv_vlsseg7_mask:
2954 case Intrinsic::riscv_vlsseg8_mask:
2955 case Intrinsic::riscv_vssseg2_mask:
2956 case Intrinsic::riscv_vssseg3_mask:
2957 case Intrinsic::riscv_vssseg4_mask:
2958 case Intrinsic::riscv_vssseg5_mask:
2959 case Intrinsic::riscv_vssseg6_mask:
2960 case Intrinsic::riscv_vssseg7_mask:
2961 case Intrinsic::riscv_vssseg8_mask:
2964 case Intrinsic::riscv_vlse:
2965 case Intrinsic::riscv_vsse:
2966 case Intrinsic::riscv_vlsseg2:
2967 case Intrinsic::riscv_vlsseg3:
2968 case Intrinsic::riscv_vlsseg4:
2969 case Intrinsic::riscv_vlsseg5:
2970 case Intrinsic::riscv_vlsseg6:
2971 case Intrinsic::riscv_vlsseg7:
2972 case Intrinsic::riscv_vlsseg8:
2973 case Intrinsic::riscv_vssseg2:
2974 case Intrinsic::riscv_vssseg3:
2975 case Intrinsic::riscv_vssseg4:
2976 case Intrinsic::riscv_vssseg5:
2977 case Intrinsic::riscv_vssseg6:
2978 case Intrinsic::riscv_vssseg7:
2979 case Intrinsic::riscv_vssseg8: {
2996 Ty = TarExtTy->getTypeParameter(0U);
3001 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
3002 unsigned VLIndex = RVVIInfo->VLOperand;
3003 unsigned PtrOperandNo = VLIndex - 2 - HasMask;
3015 Alignment =
Align(1);
3022 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
3025 unsigned ElemSize = Ty->getScalarSizeInBits();
3029 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
3030 Alignment, Mask, EVL, Stride);
3033 case Intrinsic::riscv_vloxei_mask:
3034 case Intrinsic::riscv_vluxei_mask:
3035 case Intrinsic::riscv_vsoxei_mask:
3036 case Intrinsic::riscv_vsuxei_mask:
3037 case Intrinsic::riscv_vloxseg2_mask:
3038 case Intrinsic::riscv_vloxseg3_mask:
3039 case Intrinsic::riscv_vloxseg4_mask:
3040 case Intrinsic::riscv_vloxseg5_mask:
3041 case Intrinsic::riscv_vloxseg6_mask:
3042 case Intrinsic::riscv_vloxseg7_mask:
3043 case Intrinsic::riscv_vloxseg8_mask:
3044 case Intrinsic::riscv_vluxseg2_mask:
3045 case Intrinsic::riscv_vluxseg3_mask:
3046 case Intrinsic::riscv_vluxseg4_mask:
3047 case Intrinsic::riscv_vluxseg5_mask:
3048 case Intrinsic::riscv_vluxseg6_mask:
3049 case Intrinsic::riscv_vluxseg7_mask:
3050 case Intrinsic::riscv_vluxseg8_mask:
3051 case Intrinsic::riscv_vsoxseg2_mask:
3052 case Intrinsic::riscv_vsoxseg3_mask:
3053 case Intrinsic::riscv_vsoxseg4_mask:
3054 case Intrinsic::riscv_vsoxseg5_mask:
3055 case Intrinsic::riscv_vsoxseg6_mask:
3056 case Intrinsic::riscv_vsoxseg7_mask:
3057 case Intrinsic::riscv_vsoxseg8_mask:
3058 case Intrinsic::riscv_vsuxseg2_mask:
3059 case Intrinsic::riscv_vsuxseg3_mask:
3060 case Intrinsic::riscv_vsuxseg4_mask:
3061 case Intrinsic::riscv_vsuxseg5_mask:
3062 case Intrinsic::riscv_vsuxseg6_mask:
3063 case Intrinsic::riscv_vsuxseg7_mask:
3064 case Intrinsic::riscv_vsuxseg8_mask:
3067 case Intrinsic::riscv_vloxei:
3068 case Intrinsic::riscv_vluxei:
3069 case Intrinsic::riscv_vsoxei:
3070 case Intrinsic::riscv_vsuxei:
3071 case Intrinsic::riscv_vloxseg2:
3072 case Intrinsic::riscv_vloxseg3:
3073 case Intrinsic::riscv_vloxseg4:
3074 case Intrinsic::riscv_vloxseg5:
3075 case Intrinsic::riscv_vloxseg6:
3076 case Intrinsic::riscv_vloxseg7:
3077 case Intrinsic::riscv_vloxseg8:
3078 case Intrinsic::riscv_vluxseg2:
3079 case Intrinsic::riscv_vluxseg3:
3080 case Intrinsic::riscv_vluxseg4:
3081 case Intrinsic::riscv_vluxseg5:
3082 case Intrinsic::riscv_vluxseg6:
3083 case Intrinsic::riscv_vluxseg7:
3084 case Intrinsic::riscv_vluxseg8:
3085 case Intrinsic::riscv_vsoxseg2:
3086 case Intrinsic::riscv_vsoxseg3:
3087 case Intrinsic::riscv_vsoxseg4:
3088 case Intrinsic::riscv_vsoxseg5:
3089 case Intrinsic::riscv_vsoxseg6:
3090 case Intrinsic::riscv_vsoxseg7:
3091 case Intrinsic::riscv_vsoxseg8:
3092 case Intrinsic::riscv_vsuxseg2:
3093 case Intrinsic::riscv_vsuxseg3:
3094 case Intrinsic::riscv_vsuxseg4:
3095 case Intrinsic::riscv_vsuxseg5:
3096 case Intrinsic::riscv_vsuxseg6:
3097 case Intrinsic::riscv_vsuxseg7:
3098 case Intrinsic::riscv_vsuxseg8: {
3115 Ty = TarExtTy->getTypeParameter(0U);
3120 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
3121 unsigned VLIndex = RVVIInfo->VLOperand;
3122 unsigned PtrOperandNo = VLIndex - 2 - HasMask;
3135 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
3138 unsigned ElemSize = Ty->getScalarSizeInBits();
3143 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
3144 Align(1), Mask, EVL,
3153 if (Ty->isVectorTy()) {
3156 if ((EltTy->
isHalfTy() && !ST->hasVInstructionsF16()) ||
3162 if (
Size.isScalable() && ST->hasVInstructions())
3165 if (ST->useRVVForFixedLengthVectors())
3185 return std::max<unsigned>(1U, RegWidth.
getFixedValue() / ElemWidth);
3193 return ST->enableUnalignedVectorMem();
3199 if (ST->hasVendorXCVmem() && !ST->is64Bit())
3221 Align Alignment)
const {
3223 if (!VTy || VTy->isScalableTy())
3231 if (VTy->getElementType()->isIntegerTy(8))
3232 if (VTy->getElementCount().getFixedValue() > 256)
3233 return VTy->getPrimitiveSizeInBits() / ST->getRealMinVLen() <
3234 ST->getMaxLMULForFixedLengthVectors();
3239 Align Alignment)
const {
3241 if (!VTy || VTy->isScalableTy())
3255 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
3256 bool Considerable =
false;
3257 AllowPromotionWithoutCommonHeader =
false;
3260 Type *ConsideredSExtType =
3262 if (
I.getType() != ConsideredSExtType)
3266 for (
const User *U :
I.users()) {
3268 Considerable =
true;
3272 if (GEPInst->getNumOperands() > 2) {
3273 AllowPromotionWithoutCommonHeader =
true;
3278 return Considerable;
3283 case Instruction::Add:
3284 case Instruction::Sub:
3285 case Instruction::Mul:
3286 case Instruction::And:
3287 case Instruction::Or:
3288 case Instruction::Xor:
3289 case Instruction::FAdd:
3290 case Instruction::FSub:
3291 case Instruction::FMul:
3292 case Instruction::FDiv:
3293 case Instruction::ICmp:
3294 case Instruction::FCmp:
3296 case Instruction::Shl:
3297 case Instruction::LShr:
3298 case Instruction::AShr:
3299 case Instruction::UDiv:
3300 case Instruction::SDiv:
3301 case Instruction::URem:
3302 case Instruction::SRem:
3303 case Instruction::Select:
3304 return Operand == 1;
3311 if (!
I->getType()->isVectorTy() || !ST->hasVInstructions())
3321 switch (
II->getIntrinsicID()) {
3322 case Intrinsic::fma:
3323 case Intrinsic::vp_fma:
3324 case Intrinsic::fmuladd:
3325 case Intrinsic::vp_fmuladd:
3326 return Operand == 0 || Operand == 1;
3327 case Intrinsic::vp_shl:
3328 case Intrinsic::vp_lshr:
3329 case Intrinsic::vp_ashr:
3330 case Intrinsic::vp_udiv:
3331 case Intrinsic::vp_sdiv:
3332 case Intrinsic::vp_urem:
3333 case Intrinsic::vp_srem:
3334 case Intrinsic::ssub_sat:
3335 case Intrinsic::vp_ssub_sat:
3336 case Intrinsic::usub_sat:
3337 case Intrinsic::vp_usub_sat:
3338 case Intrinsic::vp_select:
3339 return Operand == 1;
3341 case Intrinsic::vp_add:
3342 case Intrinsic::vp_mul:
3343 case Intrinsic::vp_and:
3344 case Intrinsic::vp_or:
3345 case Intrinsic::vp_xor:
3346 case Intrinsic::vp_fadd:
3347 case Intrinsic::vp_fmul:
3348 case Intrinsic::vp_icmp:
3349 case Intrinsic::vp_fcmp:
3350 case Intrinsic::smin:
3351 case Intrinsic::vp_smin:
3352 case Intrinsic::umin:
3353 case Intrinsic::vp_umin:
3354 case Intrinsic::smax:
3355 case Intrinsic::vp_smax:
3356 case Intrinsic::umax:
3357 case Intrinsic::vp_umax:
3358 case Intrinsic::sadd_sat:
3359 case Intrinsic::vp_sadd_sat:
3360 case Intrinsic::uadd_sat:
3361 case Intrinsic::vp_uadd_sat:
3363 case Intrinsic::vp_sub:
3364 case Intrinsic::vp_fsub:
3365 case Intrinsic::vp_fdiv:
3366 return Operand == 0 || Operand == 1;
3379 if (
I->isBitwiseLogicOp()) {
3380 if (!
I->getType()->isVectorTy()) {
3381 if (ST->hasStdExtZbb() || ST->hasStdExtZbkb()) {
3382 for (
auto &
Op :
I->operands()) {
3390 }
else if (
I->getOpcode() == Instruction::And && ST->hasStdExtZvkb()) {
3391 for (
auto &
Op :
I->operands()) {
3403 Ops.push_back(&Not);
3404 Ops.push_back(&InsertElt);
3412 if (!
I->getType()->isVectorTy() || !ST->hasVInstructions())
3420 if (!ST->sinkSplatOperands())
3443 for (
Use &U :
Op->uses()) {
3450 Use *InsertEltUse = &
Op->getOperandUse(0);
3453 Ops.push_back(&InsertElt->getOperandUse(1));
3454 Ops.push_back(InsertEltUse);
3465 if (!ST->enableUnalignedScalarMem())
3468 if (!ST->hasStdExtZbb() && !ST->hasStdExtZbkb() && !IsZeroCmp)
3471 Options.AllowOverlappingLoads =
true;
3472 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
3474 if (ST->is64Bit()) {
3475 Options.LoadSizes = {8, 4, 2, 1};
3476 Options.AllowedTailExpansions = {3, 5, 6};
3478 Options.LoadSizes = {4, 2, 1};
3479 Options.AllowedTailExpansions = {3};
3482 if (IsZeroCmp && ST->hasVInstructions()) {
3483 unsigned VLenB = ST->getRealMinVLen() / 8;
3486 unsigned MinSize = ST->getXLen() / 8 + 1;
3487 unsigned MaxSize = VLenB * ST->getMaxLMULForFixedLengthVectors();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool shouldSplit(Instruction *InsertPoint, DenseSet< Value * > &PrevConditionValues, DenseSet< Value * > &ConditionValues, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables)
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
std::optional< unsigned > getMaxVScale() const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
bool isLegalAddImmediate(int64_t imm) const override
std::optional< unsigned > getVScaleForTuning() const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const override
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isFPPredicate(Predicate P)
static bool isIntPredicate(Predicate P)
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
A parsed version of the target data layout string in and methods for querying it.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Information for memory intrinsic cost model.
Align getAlignment() const
unsigned getAddressSpace() const
Type * getDataType() const
bool getVariableMask() const
Intrinsic::ID getID() const
const Instruction * getInst() const
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const override
InstructionCost getStridedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
unsigned getMinTripCountTailFoldingThreshold() const override
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override
InstructionCost getAddressComputationCost(Type *PTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind) const
Return the cost of materializing an immediate for a value operand of a store instruction.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
bool hasActiveVectorLength() const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const override
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
Estimate the overhead of scalarizing an instruction.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override
bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment) const override
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getExpandCompressMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool preferAlternateOpcodeVectorization() const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
std::optional< unsigned > getMaxVScale() const override
bool shouldExpandReduction(const IntrinsicInst *II) const override
std::optional< unsigned > getVScaleForTuning() const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Get memory intrinsic cost based on arguments.
bool isLegalMaskedGather(Type *DataType, Align Alignment) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
static RISCVVType::VLMUL getLMUL(MVT VT)
This class represents an analyzed expression in the program.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
The main scalar evolution driver.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
std::pair< iterator, bool > insert(const ValueT &V)
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool match(Val *V, const Pattern &P)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
static constexpr unsigned RVVBitsPerBlock
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
OutputIt copy(R &&Range, OutputIt Out)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Information about a load/store intrinsic defined by the target.