18#include "llvm/IR/IntrinsicsRISCV.h"
26#define DEBUG_TYPE "riscvtti"
29 "riscv-v-register-bit-width-lmul",
31 "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
32 "by autovectorized code. Fractional LMULs are not supported."),
38 "Overrides result used for getMaximumVF query which is used "
39 "exclusively by SLP vectorizer."),
44 cl::desc(
"Set the lower bound of a trip count to decide on "
45 "vectorization while tail-folding."),
57 size_t NumInstr = OpCodes.size();
62 return LMULCost * NumInstr;
64 for (
auto Op : OpCodes) {
66 case RISCV::VRGATHER_VI:
69 case RISCV::VRGATHER_VV:
72 case RISCV::VSLIDEUP_VI:
73 case RISCV::VSLIDEDOWN_VI:
76 case RISCV::VSLIDEUP_VX:
77 case RISCV::VSLIDEDOWN_VX:
80 case RISCV::VREDMAX_VS:
81 case RISCV::VREDMIN_VS:
82 case RISCV::VREDMAXU_VS:
83 case RISCV::VREDMINU_VS:
84 case RISCV::VREDSUM_VS:
85 case RISCV::VREDAND_VS:
86 case RISCV::VREDOR_VS:
87 case RISCV::VREDXOR_VS:
88 case RISCV::VFREDMAX_VS:
89 case RISCV::VFREDMIN_VS:
90 case RISCV::VFREDUSUM_VS: {
97 case RISCV::VFREDOSUM_VS: {
106 case RISCV::VFMV_F_S:
107 case RISCV::VFMV_S_F:
109 case RISCV::VMXOR_MM:
110 case RISCV::VMAND_MM:
111 case RISCV::VMANDN_MM:
112 case RISCV::VMNAND_MM:
114 case RISCV::VFIRST_M:
133 assert(Ty->isIntegerTy() &&
134 "getIntImmCost can only estimate cost of materialising integers");
157 if (!BO || !BO->hasOneUse())
160 if (BO->getOpcode() != Instruction::Shl)
171 if (ShAmt == Trailing)
188 if (!Cmp || !Cmp->isEquality())
204 if ((CmpC & Mask) != CmpC)
211 return NewCmpC >= -2048 && NewCmpC <= 2048;
218 assert(Ty->isIntegerTy() &&
219 "getIntImmCost can only estimate cost of materialising integers");
227 bool Takes12BitImm =
false;
228 unsigned ImmArgIdx = ~0U;
231 case Instruction::GetElementPtr:
236 case Instruction::Store: {
241 if (Idx == 1 || !Inst)
246 if (!getTLI()->allowsMemoryAccessForAlignment(
254 case Instruction::Load:
257 case Instruction::And:
259 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
262 if (Imm == UINT64_C(0xffffffff) &&
263 ((ST->hasStdExtZba() && ST->isRV64()) || ST->isRV32()))
266 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())
268 if (Inst && Idx == 1 && Imm.getBitWidth() <= ST->getXLen() &&
271 if (Inst && Idx == 1 && Imm.getBitWidth() == 64 &&
274 Takes12BitImm =
true;
276 case Instruction::Add:
277 Takes12BitImm =
true;
279 case Instruction::Or:
280 case Instruction::Xor:
282 if (ST->hasStdExtZbs() && Imm.isPowerOf2())
284 Takes12BitImm =
true;
286 case Instruction::Mul:
288 if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
291 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
294 Takes12BitImm =
true;
296 case Instruction::Sub:
297 case Instruction::Shl:
298 case Instruction::LShr:
299 case Instruction::AShr:
300 Takes12BitImm =
true;
311 if (Imm.getSignificantBits() <= 64 &&
334 return ST->hasVInstructions();
344 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
348 if (Opcode == Instruction::FAdd)
353 if (!ST->hasStdExtZvdot4a8i() || ST->getELen() < 64 ||
354 Opcode != Instruction::Add || !BinOp || *BinOp != Instruction::Mul ||
355 InputTypeA != InputTypeB || !InputTypeA->
isIntegerTy(8) ||
363 getRISCVInstructionCost(RISCV::VDOT4A_VV, LT.second,
CostKind);
370 switch (
II->getIntrinsicID()) {
374 case Intrinsic::vector_reduce_mul:
375 case Intrinsic::vector_reduce_fmul:
381 if (ST->hasVInstructions())
387 if (ST->hasVInstructions())
388 if (
unsigned MinVLen = ST->getRealMinVLen();
403 ST->useRVVForFixedLengthVectors() ? LMUL * ST->getRealMinVLen() : 0);
406 (ST->hasVInstructions() &&
429 return (ST->hasAUIPCADDIFusion() && ST->hasLUIADDIFusion()) ? 1 : 2;
435RISCVTTIImpl::getConstantPoolLoadCost(
Type *Ty,
440 return getStaticDataAddrGenerationCost(
CostKind) +
446 unsigned Size = Mask.size();
449 for (
unsigned I = 0;
I !=
Size; ++
I) {
450 if (
static_cast<unsigned>(Mask[
I]) ==
I)
456 for (
unsigned J =
I + 1; J !=
Size; ++J)
458 if (
static_cast<unsigned>(Mask[J]) != J %
I)
486 "Expected fixed vector type and non-empty mask");
489 unsigned NumOfDests =
divideCeil(Mask.size(), LegalNumElts);
493 if (NumOfDests <= 1 ||
495 Tp->getElementType()->getPrimitiveSizeInBits() ||
496 LegalNumElts >= Tp->getElementCount().getFixedValue())
499 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
502 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
506 unsigned NormalizedVF = LegalNumElts * std::max(NumOfSrcs, NumOfDests);
507 unsigned NumOfSrcRegs = NormalizedVF / LegalNumElts;
508 unsigned NumOfDestRegs = NormalizedVF / LegalNumElts;
510 assert(NormalizedVF >= Mask.size() &&
511 "Normalized mask expected to be not shorter than original mask.");
516 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
517 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
520 if (!ReusedSingleSrcShuffles.
insert(std::make_pair(RegMask, SrcReg))
523 Cost +=
TTI.getShuffleCost(
526 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
528 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
529 Cost +=
TTI.getShuffleCost(
532 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
555 if (!VLen || Mask.empty())
559 LegalVT =
TTI.getTypeLegalizationCost(
565 if (NumOfDests <= 1 ||
567 Tp->getElementType()->getPrimitiveSizeInBits() ||
571 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
574 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
580 unsigned NormalizedVF =
585 assert(NormalizedVF >= Mask.size() &&
586 "Normalized mask expected to be not shorter than original mask.");
592 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
593 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
596 if (!ReusedSingleSrcShuffles.
insert(std::make_pair(RegMask, SrcReg))
601 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
603 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
605 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
612 if ((NumOfDestRegs > 2 && NumShuffles <=
static_cast<int>(NumOfDestRegs)) ||
613 (NumOfDestRegs <= 2 && NumShuffles < 4))
628 if (!
LT.second.isFixedLengthVector())
636 auto GetSlideOpcode = [&](
int SlideAmt) {
638 bool IsVI =
isUInt<5>(std::abs(SlideAmt));
640 return IsVI ? RISCV::VSLIDEDOWN_VI : RISCV::VSLIDEDOWN_VX;
641 return IsVI ? RISCV::VSLIDEUP_VI : RISCV::VSLIDEUP_VX;
644 std::array<std::pair<int, int>, 2> SrcInfo;
648 if (SrcInfo[1].second == 0)
652 if (SrcInfo[0].second != 0) {
653 unsigned Opcode = GetSlideOpcode(SrcInfo[0].second);
654 FirstSlideCost = getRISCVInstructionCost(Opcode,
LT.second,
CostKind);
657 if (SrcInfo[1].first == -1)
658 return FirstSlideCost;
661 if (SrcInfo[1].second != 0) {
662 unsigned Opcode = GetSlideOpcode(SrcInfo[1].second);
663 SecondSlideCost = getRISCVInstructionCost(Opcode,
LT.second,
CostKind);
666 getRISCVInstructionCost(RISCV::VMERGE_VVM,
LT.second,
CostKind);
673 return FirstSlideCost + SecondSlideCost + MaskCost;
684 "Expected the Mask to match the return size if given");
686 "Expected the same scalar types");
702 FVTp && ST->hasVInstructions() && LT.second.isFixedLengthVector()) {
704 *
this, LT.second, ST->getRealVLen(),
706 if (VRegSplittingCost.
isValid())
707 return VRegSplittingCost;
712 if (Mask.size() >= 2) {
713 MVT EltTp = LT.second.getVectorElementType();
724 return 2 * LT.first * TLI->getLMULCost(LT.second);
726 if (Mask[0] == 0 || Mask[0] == 1) {
730 if (
equal(DeinterleaveMask, Mask))
731 return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,
736 if (LT.second.getScalarSizeInBits() != 1 &&
739 unsigned NumSlides =
Log2_32(Mask.size() / SubVectorSize);
741 for (
unsigned I = 0;
I != NumSlides; ++
I) {
742 unsigned InsertIndex = SubVectorSize * (1 <<
I);
747 std::pair<InstructionCost, MVT> DestLT =
752 Cost += DestLT.first * TLI->getLMULCost(DestLT.second);
766 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
767 LT.second.getVectorNumElements() <= 256)) {
772 getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second,
CostKind);
786 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
787 LT.second.getVectorNumElements() <= 256)) {
788 auto &
C = SrcTy->getContext();
789 auto EC = SrcTy->getElementCount();
794 return 2 * IndexCost +
795 getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},
814 if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
842 SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) {
843 if (std::optional<unsigned> VLen = ST->getRealVLen();
844 VLen && SubLT.second.getScalarSizeInBits() * Index % *VLen == 0 &&
845 SubLT.second.getSizeInBits() <= *VLen)
853 getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second,
CostKind);
860 getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second,
CostKind);
872 (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
879 if (IsLoad && LT.second.isVector() &&
881 LT.second.getVectorElementCount()))
885 Instruction::InsertElement);
886 if (LT.second.getScalarSizeInBits() == 1) {
894 (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
907 (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
908 RISCV::VMV_X_S, RISCV::VMV_V_X,
917 getRISCVInstructionCost(RISCV::VMV_V_X, LT.second,
CostKind);
923 getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second,
CostKind);
929 unsigned Opcodes[2] = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};
930 if (Index >= 0 && Index < 32)
931 Opcodes[0] = RISCV::VSLIDEDOWN_VI;
932 else if (Index < 0 && Index > -32)
933 Opcodes[1] = RISCV::VSLIDEUP_VI;
934 return LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
938 if (!LT.second.isVector())
944 if (SrcTy->getElementType()->isIntegerTy(1)) {
956 MVT ContainerVT = LT.second;
957 if (LT.second.isFixedLengthVector())
958 ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
960 if (ContainerVT.
bitsLE(M1VT)) {
970 if (LT.second.isFixedLengthVector())
972 LenCost =
isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
973 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV};
974 if (LT.second.isFixedLengthVector() &&
975 isInt<5>(LT.second.getVectorNumElements() - 1))
976 Opcodes[1] = RISCV::VRSUB_VI;
978 getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
979 return LT.first * (LenCost + GatherCost);
986 unsigned M1Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX};
988 getRISCVInstructionCost(M1Opcodes, M1VT,
CostKind) + 3;
992 getRISCVInstructionCost({RISCV::VRGATHER_VV}, M1VT,
CostKind) * Ratio;
994 getRISCVInstructionCost({RISCV::VSLIDEDOWN_VX}, LT.second,
CostKind);
995 return FixedCost + LT.first * (GatherCost + SlideCost);
1029 Ty, DemandedElts, Insert, Extract,
CostKind);
1031 if (Insert && !Extract && LT.first.isValid() && LT.second.isVector()) {
1032 if (Ty->getScalarSizeInBits() == 1) {
1042 assert(LT.second.isFixedLengthVector());
1043 MVT ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
1047 getRISCVInstructionCost(RISCV::VSLIDE1DOWN_VX, LT.second,
CostKind);
1060 switch (MICA.
getID()) {
1061 case Intrinsic::vp_load_ff: {
1062 EVT DataTypeVT = TLI->getValueType(
DL, DataTy);
1063 if (!TLI->isLegalFirstFaultLoad(DataTypeVT, Alignment))
1070 case Intrinsic::experimental_vp_strided_load:
1071 case Intrinsic::experimental_vp_strided_store:
1073 case Intrinsic::masked_compressstore:
1074 case Intrinsic::masked_expandload:
1076 case Intrinsic::vp_scatter:
1077 case Intrinsic::vp_gather:
1078 case Intrinsic::masked_scatter:
1079 case Intrinsic::masked_gather:
1081 case Intrinsic::vp_load:
1082 case Intrinsic::vp_store:
1083 case Intrinsic::masked_load:
1084 case Intrinsic::masked_store:
1093 unsigned Opcode = MICA.
getID() == Intrinsic::masked_load ? Instruction::Load
1094 : Instruction::Store;
1109 bool UseMaskForCond,
bool UseMaskForGaps)
const {
1115 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
1119 if (LT.second.isVector()) {
1122 VTy->getElementCount().divideCoefficientBy(Factor));
1123 if (VTy->getElementCount().isKnownMultipleOf(Factor) &&
1124 TLI->isLegalInterleavedAccessType(SubVecTy, Factor, Alignment,
1129 if (ST->hasOptimizedSegmentLoadStore(Factor)) {
1132 MVT SubVecVT = getTLI()->getValueType(
DL, SubVecTy).getSimpleVT();
1133 Cost += Factor * TLI->getLMULCost(SubVecVT);
1134 return LT.first *
Cost;
1141 CostKind, {TTI::OK_AnyValue, TTI::OP_None});
1142 unsigned NumLoads = getEstimatedVLFor(VTy);
1143 return NumLoads * MemOpCost;
1158 if (UseMaskForGaps) {
1161 "Indices should not contain duplicate elements");
1162 unsigned NumOfFields = Indices.
size();
1163 bool IsTailGapOnly = NumOfFields > 1 && (NumOfFields == Indices.
back() + 1);
1164 if (IsTailGapOnly &&
1165 NumOfFields <= TLI->getMaxSupportedInterleaveFactor()) {
1167 if (LT.second.isVector() &&
1168 FVTy->getElementCount().isKnownMultipleOf(Factor)) {
1170 FVTy->getElementType(),
1171 FVTy->getElementCount().divideCoefficientBy(Factor));
1172 if (TLI->isLegalInterleavedAccessType(SubVecTy, NumOfFields, Alignment,
1175 unsigned NumAccesses = getEstimatedVLFor(FVTy);
1184 unsigned VF = FVTy->getNumElements() / Factor;
1191 if (Opcode == Instruction::Load) {
1193 for (
unsigned Index : Indices) {
1197 Mask.resize(VF * Factor, -1);
1201 Cost += ShuffleCost;
1219 UseMaskForCond, UseMaskForGaps);
1221 assert(Opcode == Instruction::Store &&
"Opcode must be a store");
1228 return MemCost + ShuffleCost;
1235 bool IsLoad = MICA.
getID() == Intrinsic::masked_gather ||
1236 MICA.
getID() == Intrinsic::vp_gather;
1237 unsigned Opcode = IsLoad ? Instruction::Load : Instruction::Store;
1243 if ((Opcode == Instruction::Load &&
1245 (Opcode == Instruction::Store &&
1253 unsigned NumLoads = getEstimatedVLFor(&VTy);
1260 unsigned Opcode = MICA.
getID() == Intrinsic::masked_expandload
1262 : Instruction::Store;
1266 bool IsLegal = (Opcode == Instruction::Store &&
1268 (Opcode == Instruction::Load &&
1292 if (Opcode == Instruction::Store)
1293 Opcodes.
append({RISCV::VCOMPRESS_VM});
1295 Opcodes.
append({RISCV::VSETIVLI, RISCV::VIOTA_M, RISCV::VRGATHER_VV});
1297 LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1304 unsigned Opcode = MICA.
getID() == Intrinsic::experimental_vp_strided_load
1306 : Instruction::Store;
1325 {TTI::OK_AnyValue, TTI::OP_None},
I);
1326 unsigned NumLoads = getEstimatedVLFor(&VTy);
1327 return NumLoads * MemOpCost;
1337 for (
auto *Ty : Tys) {
1338 if (!Ty->isVectorTy())
1352 {Intrinsic::floor, MVT::f32, 9},
1353 {Intrinsic::floor, MVT::f64, 9},
1354 {Intrinsic::ceil, MVT::f32, 9},
1355 {Intrinsic::ceil, MVT::f64, 9},
1356 {Intrinsic::trunc, MVT::f32, 7},
1357 {Intrinsic::trunc, MVT::f64, 7},
1358 {Intrinsic::round, MVT::f32, 9},
1359 {Intrinsic::round, MVT::f64, 9},
1360 {Intrinsic::roundeven, MVT::f32, 9},
1361 {Intrinsic::roundeven, MVT::f64, 9},
1362 {Intrinsic::rint, MVT::f32, 7},
1363 {Intrinsic::rint, MVT::f64, 7},
1364 {Intrinsic::nearbyint, MVT::f32, 9},
1365 {Intrinsic::nearbyint, MVT::f64, 9},
1366 {Intrinsic::bswap, MVT::i16, 3},
1367 {Intrinsic::bswap, MVT::i32, 12},
1368 {Intrinsic::bswap, MVT::i64, 31},
1369 {Intrinsic::vp_bswap, MVT::i16, 3},
1370 {Intrinsic::vp_bswap, MVT::i32, 12},
1371 {Intrinsic::vp_bswap, MVT::i64, 31},
1372 {Intrinsic::vp_fshl, MVT::i8, 7},
1373 {Intrinsic::vp_fshl, MVT::i16, 7},
1374 {Intrinsic::vp_fshl, MVT::i32, 7},
1375 {Intrinsic::vp_fshl, MVT::i64, 7},
1376 {Intrinsic::vp_fshr, MVT::i8, 7},
1377 {Intrinsic::vp_fshr, MVT::i16, 7},
1378 {Intrinsic::vp_fshr, MVT::i32, 7},
1379 {Intrinsic::vp_fshr, MVT::i64, 7},
1380 {Intrinsic::bitreverse, MVT::i8, 17},
1381 {Intrinsic::bitreverse, MVT::i16, 24},
1382 {Intrinsic::bitreverse, MVT::i32, 33},
1383 {Intrinsic::bitreverse, MVT::i64, 52},
1384 {Intrinsic::vp_bitreverse, MVT::i8, 17},
1385 {Intrinsic::vp_bitreverse, MVT::i16, 24},
1386 {Intrinsic::vp_bitreverse, MVT::i32, 33},
1387 {Intrinsic::vp_bitreverse, MVT::i64, 52},
1388 {Intrinsic::ctpop, MVT::i8, 12},
1389 {Intrinsic::ctpop, MVT::i16, 19},
1390 {Intrinsic::ctpop, MVT::i32, 20},
1391 {Intrinsic::ctpop, MVT::i64, 21},
1392 {Intrinsic::ctlz, MVT::i8, 19},
1393 {Intrinsic::ctlz, MVT::i16, 28},
1394 {Intrinsic::ctlz, MVT::i32, 31},
1395 {Intrinsic::ctlz, MVT::i64, 35},
1396 {Intrinsic::cttz, MVT::i8, 16},
1397 {Intrinsic::cttz, MVT::i16, 23},
1398 {Intrinsic::cttz, MVT::i32, 24},
1399 {Intrinsic::cttz, MVT::i64, 25},
1400 {Intrinsic::vp_ctpop, MVT::i8, 12},
1401 {Intrinsic::vp_ctpop, MVT::i16, 19},
1402 {Intrinsic::vp_ctpop, MVT::i32, 20},
1403 {Intrinsic::vp_ctpop, MVT::i64, 21},
1404 {Intrinsic::vp_ctlz, MVT::i8, 19},
1405 {Intrinsic::vp_ctlz, MVT::i16, 28},
1406 {Intrinsic::vp_ctlz, MVT::i32, 31},
1407 {Intrinsic::vp_ctlz, MVT::i64, 35},
1408 {Intrinsic::vp_cttz, MVT::i8, 16},
1409 {Intrinsic::vp_cttz, MVT::i16, 23},
1410 {Intrinsic::vp_cttz, MVT::i32, 24},
1411 {Intrinsic::vp_cttz, MVT::i64, 25},
1418 switch (ICA.
getID()) {
1419 case Intrinsic::lrint:
1420 case Intrinsic::llrint:
1421 case Intrinsic::lround:
1422 case Intrinsic::llround: {
1426 if (ST->hasVInstructions() && LT.second.isVector()) {
1428 unsigned SrcEltSz =
DL.getTypeSizeInBits(SrcTy->getScalarType());
1429 unsigned DstEltSz =
DL.getTypeSizeInBits(RetTy->getScalarType());
1430 if (LT.second.getVectorElementType() == MVT::bf16) {
1431 if (!ST->hasVInstructionsBF16Minimal())
1434 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFCVT_X_F_V};
1436 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVT_X_F_V};
1437 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1438 !ST->hasVInstructionsF16()) {
1439 if (!ST->hasVInstructionsF16Minimal())
1442 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFCVT_X_F_V};
1444 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_X_F_V};
1446 }
else if (SrcEltSz > DstEltSz) {
1447 Ops = {RISCV::VFNCVT_X_F_W};
1448 }
else if (SrcEltSz < DstEltSz) {
1449 Ops = {RISCV::VFWCVT_X_F_V};
1451 Ops = {RISCV::VFCVT_X_F_V};
1456 if (SrcEltSz > DstEltSz)
1457 return SrcLT.first *
1458 getRISCVInstructionCost(
Ops, SrcLT.second,
CostKind);
1459 return LT.first * getRISCVInstructionCost(
Ops, LT.second,
CostKind);
1463 case Intrinsic::ceil:
1464 case Intrinsic::floor:
1465 case Intrinsic::trunc:
1466 case Intrinsic::rint:
1467 case Intrinsic::round:
1468 case Intrinsic::roundeven: {
1471 if (!LT.second.isVector() && TLI->isOperationCustom(
ISD::FCEIL, LT.second))
1472 return LT.first * 8;
1475 case Intrinsic::umin:
1476 case Intrinsic::umax:
1477 case Intrinsic::smin:
1478 case Intrinsic::smax: {
1480 if (LT.second.isScalarInteger() && ST->hasStdExtZbb())
1483 if (ST->hasVInstructions() && LT.second.isVector()) {
1485 switch (ICA.
getID()) {
1486 case Intrinsic::umin:
1487 Op = RISCV::VMINU_VV;
1489 case Intrinsic::umax:
1490 Op = RISCV::VMAXU_VV;
1492 case Intrinsic::smin:
1493 Op = RISCV::VMIN_VV;
1495 case Intrinsic::smax:
1496 Op = RISCV::VMAX_VV;
1499 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1503 case Intrinsic::sadd_sat:
1504 case Intrinsic::ssub_sat:
1505 case Intrinsic::uadd_sat:
1506 case Intrinsic::usub_sat: {
1508 if (ST->hasVInstructions() && LT.second.isVector()) {
1510 switch (ICA.
getID()) {
1511 case Intrinsic::sadd_sat:
1512 Op = RISCV::VSADD_VV;
1514 case Intrinsic::ssub_sat:
1515 Op = RISCV::VSSUB_VV;
1517 case Intrinsic::uadd_sat:
1518 Op = RISCV::VSADDU_VV;
1520 case Intrinsic::usub_sat:
1521 Op = RISCV::VSSUBU_VV;
1524 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1528 case Intrinsic::fma:
1529 case Intrinsic::fmuladd: {
1532 if (ST->hasVInstructions() && LT.second.isVector())
1534 getRISCVInstructionCost(RISCV::VFMADD_VV, LT.second,
CostKind);
1537 case Intrinsic::fabs: {
1539 if (ST->hasVInstructions() && LT.second.isVector()) {
1545 if (LT.second.getVectorElementType() == MVT::bf16 ||
1546 (LT.second.getVectorElementType() == MVT::f16 &&
1547 !ST->hasVInstructionsF16()))
1548 return LT.first * getRISCVInstructionCost(RISCV::VAND_VX, LT.second,
1553 getRISCVInstructionCost(RISCV::VFSGNJX_VV, LT.second,
CostKind);
1557 case Intrinsic::sqrt: {
1559 if (ST->hasVInstructions() && LT.second.isVector()) {
1562 MVT ConvType = LT.second;
1563 MVT FsqrtType = LT.second;
1566 if (LT.second.getVectorElementType() == MVT::bf16) {
1567 if (LT.second == MVT::nxv32bf16) {
1568 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVTBF16_F_F_V,
1569 RISCV::VFNCVTBF16_F_F_W, RISCV::VFNCVTBF16_F_F_W};
1570 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1571 ConvType = MVT::nxv16f16;
1572 FsqrtType = MVT::nxv16f32;
1574 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFNCVTBF16_F_F_W};
1575 FsqrtOp = {RISCV::VFSQRT_V};
1576 FsqrtType = TLI->getTypeToPromoteTo(
ISD::FSQRT, FsqrtType);
1578 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1579 !ST->hasVInstructionsF16()) {
1580 if (LT.second == MVT::nxv32f16) {
1581 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_F_F_V,
1582 RISCV::VFNCVT_F_F_W, RISCV::VFNCVT_F_F_W};
1583 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1584 ConvType = MVT::nxv16f16;
1585 FsqrtType = MVT::nxv16f32;
1587 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFNCVT_F_F_W};
1588 FsqrtOp = {RISCV::VFSQRT_V};
1589 FsqrtType = TLI->getTypeToPromoteTo(
ISD::FSQRT, FsqrtType);
1592 FsqrtOp = {RISCV::VFSQRT_V};
1595 return LT.first * (getRISCVInstructionCost(FsqrtOp, FsqrtType,
CostKind) +
1596 getRISCVInstructionCost(ConvOp, ConvType,
CostKind));
1600 case Intrinsic::cttz:
1601 case Intrinsic::ctlz:
1602 case Intrinsic::ctpop: {
1604 if (ST->hasStdExtZvbb() && LT.second.isVector()) {
1606 switch (ICA.
getID()) {
1607 case Intrinsic::cttz:
1610 case Intrinsic::ctlz:
1613 case Intrinsic::ctpop:
1614 Op = RISCV::VCPOP_V;
1617 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1621 case Intrinsic::abs: {
1623 if (ST->hasVInstructions() && LT.second.isVector()) {
1625 if (ST->hasStdExtZvabd())
1627 getRISCVInstructionCost({RISCV::VABS_V}, LT.second,
CostKind);
1632 getRISCVInstructionCost({RISCV::VRSUB_VI, RISCV::VMAX_VV},
1637 case Intrinsic::fshl:
1638 case Intrinsic::fshr: {
1645 if ((ST->hasStdExtZbb() || ST->hasStdExtZbkb()) && RetTy->isIntegerTy() &&
1647 (RetTy->getIntegerBitWidth() == 32 ||
1648 RetTy->getIntegerBitWidth() == 64) &&
1649 RetTy->getIntegerBitWidth() <= ST->getXLen()) {
1654 case Intrinsic::masked_udiv:
1657 case Intrinsic::masked_sdiv:
1660 case Intrinsic::masked_urem:
1663 case Intrinsic::masked_srem:
1666 case Intrinsic::get_active_lane_mask: {
1667 if (ST->hasVInstructions()) {
1676 getRISCVInstructionCost({RISCV::VSADDU_VX, RISCV::VMSLTU_VX},
1682 case Intrinsic::stepvector: {
1686 if (ST->hasVInstructions())
1687 return getRISCVInstructionCost(RISCV::VID_V, LT.second,
CostKind) +
1689 getRISCVInstructionCost(RISCV::VADD_VX, LT.second,
CostKind);
1690 return 1 + (LT.first - 1);
1692 case Intrinsic::vector_splice_left:
1693 case Intrinsic::vector_splice_right: {
1698 if (ST->hasVInstructions() && LT.second.isVector()) {
1700 getRISCVInstructionCost({RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX},
1705 case Intrinsic::experimental_cttz_elts: {
1707 EVT ArgType = TLI->getValueType(
DL, ArgTy,
true);
1708 if (getTLI()->shouldExpandCttzElements(ArgType))
1725 case Intrinsic::experimental_vp_splice: {
1733 case Intrinsic::fptoui_sat:
1734 case Intrinsic::fptosi_sat: {
1736 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
1741 if (!SrcTy->isVectorTy())
1744 if (!SrcLT.first.isValid() || !DstLT.first.isValid())
1761 case Intrinsic::experimental_vector_extract_last_active: {
1783 unsigned EltWidth = getTLI()->getBitWidthForCttzElements(
1784 TLI->getVectorIdxTy(
getDataLayout()), MaskTy->getElementCount(),
1785 true, &VScaleRange);
1786 EltWidth = std::max(EltWidth, MaskTy->getScalarSizeInBits());
1794 if (StepLT.first > 1)
1798 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
1800 Cost += MaskLT.first *
1801 getRISCVInstructionCost(RISCV::VCPOP_M, MaskLT.second,
CostKind);
1803 Cost += StepLT.first *
1804 getRISCVInstructionCost(Opcodes, StepLT.second,
CostKind);
1808 Cost += ValLT.first *
1809 getRISCVInstructionCost({RISCV::VSLIDEDOWN_VI, RISCV::VMV_X_S},
1815 if (ST->hasVInstructions() && RetTy->isVectorTy()) {
1817 LT.second.isVector()) {
1818 MVT EltTy = LT.second.getVectorElementType();
1820 ICA.
getID(), EltTy))
1821 return LT.first * Entry->Cost;
1834 if (ST->hasVInstructions() && PtrTy->
isVectorTy())
1852 if (ST->hasStdExtP() &&
1860 if (!ST->hasVInstructions() || Src->getScalarSizeInBits() > ST->getELen() ||
1861 Dst->getScalarSizeInBits() > ST->getELen())
1864 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1879 if (Src->getScalarSizeInBits() == 1) {
1884 return getRISCVInstructionCost(RISCV::VMV_V_I, DstLT.second,
CostKind) +
1885 DstLT.first * getRISCVInstructionCost(RISCV::VMERGE_VIM,
1891 if (Dst->getScalarSizeInBits() == 1) {
1897 return SrcLT.first *
1898 getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},
1910 if (!SrcLT.second.isVector() || !DstLT.second.isVector() ||
1911 !SrcLT.first.isValid() || !DstLT.first.isValid() ||
1913 SrcLT.second.getSizeInBits()) ||
1915 DstLT.second.getSizeInBits()) ||
1916 SrcLT.first > 1 || DstLT.first > 1)
1920 assert((SrcLT.first == 1) && (DstLT.first == 1) &&
"Illegal type");
1922 int PowDiff = (int)
Log2_32(DstLT.second.getScalarSizeInBits()) -
1923 (int)
Log2_32(SrcLT.second.getScalarSizeInBits());
1927 if ((PowDiff < 1) || (PowDiff > 3))
1929 unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};
1930 unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};
1933 return getRISCVInstructionCost(
Op, DstLT.second,
CostKind);
1939 unsigned SrcEltSize = SrcLT.second.getScalarSizeInBits();
1940 unsigned DstEltSize = DstLT.second.getScalarSizeInBits();
1944 : RISCV::VFNCVT_F_F_W;
1946 for (; SrcEltSize != DstEltSize;) {
1950 MVT DstMVT = DstLT.second.changeVectorElementType(ElementMVT);
1952 (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1;
1960 unsigned FCVT = IsSigned ? RISCV::VFCVT_RTZ_X_F_V : RISCV::VFCVT_RTZ_XU_F_V;
1962 IsSigned ? RISCV::VFWCVT_RTZ_X_F_V : RISCV::VFWCVT_RTZ_XU_F_V;
1964 IsSigned ? RISCV::VFNCVT_RTZ_X_F_W : RISCV::VFNCVT_RTZ_XU_F_W;
1965 unsigned SrcEltSize = Src->getScalarSizeInBits();
1966 unsigned DstEltSize = Dst->getScalarSizeInBits();
1968 if ((SrcEltSize == 16) &&
1969 (!ST->hasVInstructionsF16() || ((DstEltSize / 2) > SrcEltSize))) {
1975 std::pair<InstructionCost, MVT> VecF32LT =
1978 VecF32LT.first * getRISCVInstructionCost(RISCV::VFWCVT_F_F_V,
1983 if (DstEltSize == SrcEltSize)
1984 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1985 else if (DstEltSize > SrcEltSize)
1986 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1991 MVT VecVT = DstLT.second.changeVectorElementType(ElementVT);
1992 Cost += getRISCVInstructionCost(FNCVT, VecVT,
CostKind);
1993 if ((SrcEltSize / 2) > DstEltSize) {
2004 unsigned FCVT = IsSigned ? RISCV::VFCVT_F_X_V : RISCV::VFCVT_F_XU_V;
2005 unsigned FWCVT = IsSigned ? RISCV::VFWCVT_F_X_V : RISCV::VFWCVT_F_XU_V;
2006 unsigned FNCVT = IsSigned ? RISCV::VFNCVT_F_X_W : RISCV::VFNCVT_F_XU_W;
2007 unsigned SrcEltSize = Src->getScalarSizeInBits();
2008 unsigned DstEltSize = Dst->getScalarSizeInBits();
2011 if ((DstEltSize == 16) &&
2012 (!ST->hasVInstructionsF16() || ((SrcEltSize / 2) > DstEltSize))) {
2018 std::pair<InstructionCost, MVT> VecF32LT =
2021 Cost += VecF32LT.first * getRISCVInstructionCost(RISCV::VFNCVT_F_F_W,
2026 if (DstEltSize == SrcEltSize)
2027 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
2028 else if (DstEltSize > SrcEltSize) {
2029 if ((DstEltSize / 2) > SrcEltSize) {
2033 unsigned Op = IsSigned ? Instruction::SExt : Instruction::ZExt;
2036 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
2038 Cost += getRISCVInstructionCost(FNCVT, DstLT.second,
CostKind);
2045unsigned RISCVTTIImpl::getEstimatedVLFor(
VectorType *Ty)
const {
2047 const unsigned EltSize =
DL.getTypeSizeInBits(Ty->getElementType());
2048 const unsigned MinSize =
DL.getTypeSizeInBits(Ty).getKnownMinValue();
2063 if (Ty->getScalarSizeInBits() > ST->getELen())
2067 if (Ty->getElementType()->isIntegerTy(1)) {
2071 if (IID == Intrinsic::umax || IID == Intrinsic::smin)
2077 if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
2081 case Intrinsic::maximum:
2083 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
2085 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
2100 case Intrinsic::minimum:
2102 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
2104 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
2110 const unsigned EltTyBits =
DL.getTypeSizeInBits(DstTy);
2119 return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2128 case Intrinsic::smax:
2129 SplitOp = RISCV::VMAX_VV;
2130 Opcodes = {RISCV::VREDMAX_VS, RISCV::VMV_X_S};
2132 case Intrinsic::smin:
2133 SplitOp = RISCV::VMIN_VV;
2134 Opcodes = {RISCV::VREDMIN_VS, RISCV::VMV_X_S};
2136 case Intrinsic::umax:
2137 SplitOp = RISCV::VMAXU_VV;
2138 Opcodes = {RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
2140 case Intrinsic::umin:
2141 SplitOp = RISCV::VMINU_VV;
2142 Opcodes = {RISCV::VREDMINU_VS, RISCV::VMV_X_S};
2144 case Intrinsic::maxnum:
2145 SplitOp = RISCV::VFMAX_VV;
2146 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
2148 case Intrinsic::minnum:
2149 SplitOp = RISCV::VFMIN_VV;
2150 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
2155 (LT.first > 1) ? (LT.first - 1) *
2156 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
2158 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2163 std::optional<FastMathFlags> FMF,
2169 if (Ty->getScalarSizeInBits() > ST->getELen())
2172 int ISD = TLI->InstructionOpcodeToISD(Opcode);
2180 Type *ElementTy = Ty->getElementType();
2185 if (LT.second == MVT::v1i1)
2186 return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second,
CostKind) +
2204 return ((LT.first > 2) ? (LT.first - 2) : 0) *
2205 getRISCVInstructionCost(RISCV::VMAND_MM, LT.second,
CostKind) +
2206 getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second,
CostKind) +
2207 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
2216 return (LT.first - 1) *
2217 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind) +
2218 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) + 1;
2226 return (LT.first - 1) *
2227 getRISCVInstructionCost(RISCV::VMOR_MM, LT.second,
CostKind) +
2228 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
2241 SplitOp = RISCV::VADD_VV;
2242 Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
2245 SplitOp = RISCV::VOR_VV;
2246 Opcodes = {RISCV::VREDOR_VS, RISCV::VMV_X_S};
2249 SplitOp = RISCV::VXOR_VV;
2250 Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
2253 SplitOp = RISCV::VAND_VV;
2254 Opcodes = {RISCV::VREDAND_VS, RISCV::VMV_X_S};
2258 if ((LT.second.getScalarType() == MVT::f16 && !ST->hasVInstructionsF16()) ||
2259 LT.second.getScalarType() == MVT::bf16)
2263 for (
unsigned i = 0; i < LT.first.getValue(); i++)
2266 return getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2268 SplitOp = RISCV::VFADD_VV;
2269 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
2274 (LT.first > 1) ? (LT.first - 1) *
2275 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
2277 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2281 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
2292 if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)
2298 if (IsUnsigned && Opcode == Instruction::Add &&
2299 LT.second.isFixedLengthVector() && LT.second.getScalarType() == MVT::i1) {
2303 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind);
2310 return (LT.first - 1) +
2317 assert(OpInfo.isConstant() &&
"non constant operand?");
2324 if (OpInfo.isUniform())
2330 return getConstantPoolLoadCost(Ty,
CostKind);
2339 EVT VT = TLI->getValueType(
DL, Src,
true);
2341 if (VT == MVT::Other ||
2347 if (Opcode == Instruction::Store && OpInfo.isConstant())
2362 if (Src->
isVectorTy() && LT.second.isVector() &&
2364 LT.second.getSizeInBits()))
2374 if (ST->hasVInstructions() && LT.second.isVector() &&
2376 BaseCost *= TLI->getLMULCost(LT.second);
2377 return Cost + BaseCost;
2386 Op1Info, Op2Info,
I);
2390 Op1Info, Op2Info,
I);
2395 Op1Info, Op2Info,
I);
2397 auto GetConstantMatCost =
2399 if (OpInfo.isUniform())
2404 return getConstantPoolLoadCost(ValTy,
CostKind);
2409 ConstantMatCost += GetConstantMatCost(Op1Info);
2411 ConstantMatCost += GetConstantMatCost(Op2Info);
2414 if (Opcode == Instruction::Select && ValTy->
isVectorTy()) {
2415 if (CondTy->isVectorTy()) {
2420 return ConstantMatCost +
2422 getRISCVInstructionCost(
2423 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2427 return ConstantMatCost +
2428 LT.first * getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second,
2438 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8);
2439 return ConstantMatCost +
2441 getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
2443 LT.first * getRISCVInstructionCost(
2444 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2451 return ConstantMatCost +
2452 LT.first * getRISCVInstructionCost(
2453 {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM},
2457 if ((Opcode == Instruction::ICmp) && ValTy->
isVectorTy() &&
2461 return ConstantMatCost + LT.first * getRISCVInstructionCost(RISCV::VMSLT_VV,
2466 if ((Opcode == Instruction::FCmp) && ValTy->
isVectorTy() &&
2471 return ConstantMatCost +
2472 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind);
2482 Op1Info, Op2Info,
I);
2491 return ConstantMatCost +
2492 LT.first * getRISCVInstructionCost(
2493 {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM},
2500 return ConstantMatCost +
2502 getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM},
2511 return ConstantMatCost +
2513 getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second,
CostKind);
2526 return match(U, m_Select(m_Specific(I), m_Value(), m_Value())) &&
2527 U->getType()->isIntegerTy() &&
2528 !isa<ConstantData>(U->getOperand(1)) &&
2529 !isa<ConstantData>(U->getOperand(2));
2537 Op1Info, Op2Info,
I);
2544 return Opcode == Instruction::PHI ? 0 : 1;
2561 if (Opcode != Instruction::ExtractElement &&
2562 Opcode != Instruction::InsertElement)
2570 if (!LT.second.isVector()) {
2579 Type *ElemTy = FixedVecTy->getElementType();
2580 auto NumElems = FixedVecTy->getNumElements();
2581 auto Align =
DL.getPrefTypeAlign(ElemTy);
2586 return Opcode == Instruction::ExtractElement
2587 ? StoreCost * NumElems + LoadCost
2588 : (StoreCost + LoadCost) * NumElems + StoreCost;
2592 if (LT.second.isScalableVector() && !LT.first.isValid())
2600 if (Opcode == Instruction::ExtractElement) {
2606 return ExtendCost + ExtractCost;
2616 return ExtendCost + InsertCost + TruncCost;
2622 unsigned BaseCost = 1;
2624 unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;
2629 if (LT.second.isFixedLengthVector()) {
2630 unsigned Width = LT.second.getVectorNumElements();
2631 Index = Index % Width;
2636 if (
auto VLEN = ST->getRealVLen()) {
2637 unsigned EltSize = LT.second.getScalarSizeInBits();
2638 unsigned M1Max = *VLEN / EltSize;
2639 Index = Index % M1Max;
2645 else if (Opcode == Instruction::InsertElement)
2653 ((Index == -1U) || (Index >= LT.second.getVectorMinNumElements() &&
2654 LT.second.isScalableVector()))) {
2656 Align VecAlign =
DL.getPrefTypeAlign(Val);
2657 Align SclAlign =
DL.getPrefTypeAlign(ScalarType);
2662 if (Opcode == Instruction::ExtractElement)
2698 BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
2700 return BaseCost + SlideCost;
2706 unsigned Index)
const {
2715 assert(Index < EC.getKnownMinValue() &&
"Unexpected reverse index");
2717 EC.getKnownMinValue() - 1 - Index,
nullptr,
2726std::optional<InstructionCost>
2732 if ((Opcode == Instruction::UDiv || Opcode == Instruction::URem) &&
2734 if (Opcode == Instruction::UDiv)
2741 return std::nullopt;
2763 if (std::optional<InstructionCost> CombinedCost =
2765 Op2Info, Args, CxtI))
2766 return *CombinedCost;
2770 unsigned ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
2773 if (!LT.second.isVector()) {
2783 if (TLI->isOperationLegalOrPromote(ISDOpcode, LT.second))
2784 if (
const auto *Entry =
CostTableLookup(DivTbl, ISDOpcode, LT.second))
2785 return Entry->Cost * LT.first;
2794 if ((LT.second.getVectorElementType() == MVT::f16 ||
2795 LT.second.getVectorElementType() == MVT::bf16) &&
2796 TLI->getOperationAction(ISDOpcode, LT.second) ==
2798 MVT PromotedVT = TLI->getTypeToPromoteTo(ISDOpcode, LT.second);
2802 CastCost += LT.first * Args.size() *
2810 LT.second = PromotedVT;
2813 auto getConstantMatCost =
2823 return getConstantPoolLoadCost(Ty,
CostKind);
2829 ConstantMatCost += getConstantMatCost(0, Op1Info);
2831 ConstantMatCost += getConstantMatCost(1, Op2Info);
2834 switch (ISDOpcode) {
2837 Op = RISCV::VADD_VV;
2842 Op = RISCV::VSLL_VV;
2847 Op = (Ty->getScalarSizeInBits() == 1) ? RISCV::VMAND_MM : RISCV::VAND_VV;
2852 Op = RISCV::VMUL_VV;
2856 Op = RISCV::VDIV_VV;
2860 Op = RISCV::VREM_VV;
2864 Op = RISCV::VFADD_VV;
2867 Op = RISCV::VFMUL_VV;
2870 Op = RISCV::VFDIV_VV;
2873 Op = RISCV::VFSGNJN_VV;
2878 return CastCost + ConstantMatCost +
2887 if (Ty->isFPOrFPVectorTy())
2889 return CastCost + ConstantMatCost + LT.first *
InstrCost;
2912 if (Info.isSameBase() && V !=
Base) {
2913 if (
GEP->hasAllConstantIndices())
2919 unsigned Stride =
DL.getTypeStoreSize(AccessTy);
2920 if (Info.isUnitStride() &&
2926 GEP->getType()->getPointerAddressSpace()))
2929 {TTI::OK_AnyValue, TTI::OP_None},
2930 {TTI::OK_AnyValue, TTI::OP_None}, {});
2947 if (ST->enableDefaultUnroll())
2957 if (L->getHeader()->getParent()->hasOptSize())
2961 L->getExitingBlocks(ExitingBlocks);
2963 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
2964 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
2968 if (ExitingBlocks.
size() > 2)
2973 if (L->getNumBlocks() > 4)
2981 for (
auto *BB : L->getBlocks()) {
2982 for (
auto &
I : *BB) {
2986 if (IsVectorized && (
I.getType()->isVectorTy() ||
2988 return V->getType()->isVectorTy();
3029 bool HasMask =
false;
3032 bool IsWrite) -> int64_t {
3033 if (
auto *TarExtTy =
3035 return TarExtTy->getIntParameter(0);
3041 case Intrinsic::riscv_vle_mask:
3042 case Intrinsic::riscv_vse_mask:
3043 case Intrinsic::riscv_vlseg2_mask:
3044 case Intrinsic::riscv_vlseg3_mask:
3045 case Intrinsic::riscv_vlseg4_mask:
3046 case Intrinsic::riscv_vlseg5_mask:
3047 case Intrinsic::riscv_vlseg6_mask:
3048 case Intrinsic::riscv_vlseg7_mask:
3049 case Intrinsic::riscv_vlseg8_mask:
3050 case Intrinsic::riscv_vsseg2_mask:
3051 case Intrinsic::riscv_vsseg3_mask:
3052 case Intrinsic::riscv_vsseg4_mask:
3053 case Intrinsic::riscv_vsseg5_mask:
3054 case Intrinsic::riscv_vsseg6_mask:
3055 case Intrinsic::riscv_vsseg7_mask:
3056 case Intrinsic::riscv_vsseg8_mask:
3059 case Intrinsic::riscv_vle:
3060 case Intrinsic::riscv_vse:
3061 case Intrinsic::riscv_vlseg2:
3062 case Intrinsic::riscv_vlseg3:
3063 case Intrinsic::riscv_vlseg4:
3064 case Intrinsic::riscv_vlseg5:
3065 case Intrinsic::riscv_vlseg6:
3066 case Intrinsic::riscv_vlseg7:
3067 case Intrinsic::riscv_vlseg8:
3068 case Intrinsic::riscv_vsseg2:
3069 case Intrinsic::riscv_vsseg3:
3070 case Intrinsic::riscv_vsseg4:
3071 case Intrinsic::riscv_vsseg5:
3072 case Intrinsic::riscv_vsseg6:
3073 case Intrinsic::riscv_vsseg7:
3074 case Intrinsic::riscv_vsseg8: {
3091 Ty = TarExtTy->getTypeParameter(0U);
3096 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
3097 unsigned VLIndex = RVVIInfo->VLOperand;
3098 unsigned PtrOperandNo = VLIndex - 1 - HasMask;
3106 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
3109 unsigned ElemSize = Ty->getScalarSizeInBits();
3113 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
3114 Alignment, Mask, EVL);
3117 case Intrinsic::riscv_vlse_mask:
3118 case Intrinsic::riscv_vsse_mask:
3119 case Intrinsic::riscv_vlsseg2_mask:
3120 case Intrinsic::riscv_vlsseg3_mask:
3121 case Intrinsic::riscv_vlsseg4_mask:
3122 case Intrinsic::riscv_vlsseg5_mask:
3123 case Intrinsic::riscv_vlsseg6_mask:
3124 case Intrinsic::riscv_vlsseg7_mask:
3125 case Intrinsic::riscv_vlsseg8_mask:
3126 case Intrinsic::riscv_vssseg2_mask:
3127 case Intrinsic::riscv_vssseg3_mask:
3128 case Intrinsic::riscv_vssseg4_mask:
3129 case Intrinsic::riscv_vssseg5_mask:
3130 case Intrinsic::riscv_vssseg6_mask:
3131 case Intrinsic::riscv_vssseg7_mask:
3132 case Intrinsic::riscv_vssseg8_mask:
3135 case Intrinsic::riscv_vlse:
3136 case Intrinsic::riscv_vsse:
3137 case Intrinsic::riscv_vlsseg2:
3138 case Intrinsic::riscv_vlsseg3:
3139 case Intrinsic::riscv_vlsseg4:
3140 case Intrinsic::riscv_vlsseg5:
3141 case Intrinsic::riscv_vlsseg6:
3142 case Intrinsic::riscv_vlsseg7:
3143 case Intrinsic::riscv_vlsseg8:
3144 case Intrinsic::riscv_vssseg2:
3145 case Intrinsic::riscv_vssseg3:
3146 case Intrinsic::riscv_vssseg4:
3147 case Intrinsic::riscv_vssseg5:
3148 case Intrinsic::riscv_vssseg6:
3149 case Intrinsic::riscv_vssseg7:
3150 case Intrinsic::riscv_vssseg8: {
3167 Ty = TarExtTy->getTypeParameter(0U);
3172 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
3173 unsigned VLIndex = RVVIInfo->VLOperand;
3174 unsigned PtrOperandNo = VLIndex - 2 - HasMask;
3186 Alignment =
Align(1);
3193 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
3196 unsigned ElemSize = Ty->getScalarSizeInBits();
3200 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
3201 Alignment, Mask, EVL, Stride);
3204 case Intrinsic::riscv_vloxei_mask:
3205 case Intrinsic::riscv_vluxei_mask:
3206 case Intrinsic::riscv_vsoxei_mask:
3207 case Intrinsic::riscv_vsuxei_mask:
3208 case Intrinsic::riscv_vloxseg2_mask:
3209 case Intrinsic::riscv_vloxseg3_mask:
3210 case Intrinsic::riscv_vloxseg4_mask:
3211 case Intrinsic::riscv_vloxseg5_mask:
3212 case Intrinsic::riscv_vloxseg6_mask:
3213 case Intrinsic::riscv_vloxseg7_mask:
3214 case Intrinsic::riscv_vloxseg8_mask:
3215 case Intrinsic::riscv_vluxseg2_mask:
3216 case Intrinsic::riscv_vluxseg3_mask:
3217 case Intrinsic::riscv_vluxseg4_mask:
3218 case Intrinsic::riscv_vluxseg5_mask:
3219 case Intrinsic::riscv_vluxseg6_mask:
3220 case Intrinsic::riscv_vluxseg7_mask:
3221 case Intrinsic::riscv_vluxseg8_mask:
3222 case Intrinsic::riscv_vsoxseg2_mask:
3223 case Intrinsic::riscv_vsoxseg3_mask:
3224 case Intrinsic::riscv_vsoxseg4_mask:
3225 case Intrinsic::riscv_vsoxseg5_mask:
3226 case Intrinsic::riscv_vsoxseg6_mask:
3227 case Intrinsic::riscv_vsoxseg7_mask:
3228 case Intrinsic::riscv_vsoxseg8_mask:
3229 case Intrinsic::riscv_vsuxseg2_mask:
3230 case Intrinsic::riscv_vsuxseg3_mask:
3231 case Intrinsic::riscv_vsuxseg4_mask:
3232 case Intrinsic::riscv_vsuxseg5_mask:
3233 case Intrinsic::riscv_vsuxseg6_mask:
3234 case Intrinsic::riscv_vsuxseg7_mask:
3235 case Intrinsic::riscv_vsuxseg8_mask:
3238 case Intrinsic::riscv_vloxei:
3239 case Intrinsic::riscv_vluxei:
3240 case Intrinsic::riscv_vsoxei:
3241 case Intrinsic::riscv_vsuxei:
3242 case Intrinsic::riscv_vloxseg2:
3243 case Intrinsic::riscv_vloxseg3:
3244 case Intrinsic::riscv_vloxseg4:
3245 case Intrinsic::riscv_vloxseg5:
3246 case Intrinsic::riscv_vloxseg6:
3247 case Intrinsic::riscv_vloxseg7:
3248 case Intrinsic::riscv_vloxseg8:
3249 case Intrinsic::riscv_vluxseg2:
3250 case Intrinsic::riscv_vluxseg3:
3251 case Intrinsic::riscv_vluxseg4:
3252 case Intrinsic::riscv_vluxseg5:
3253 case Intrinsic::riscv_vluxseg6:
3254 case Intrinsic::riscv_vluxseg7:
3255 case Intrinsic::riscv_vluxseg8:
3256 case Intrinsic::riscv_vsoxseg2:
3257 case Intrinsic::riscv_vsoxseg3:
3258 case Intrinsic::riscv_vsoxseg4:
3259 case Intrinsic::riscv_vsoxseg5:
3260 case Intrinsic::riscv_vsoxseg6:
3261 case Intrinsic::riscv_vsoxseg7:
3262 case Intrinsic::riscv_vsoxseg8:
3263 case Intrinsic::riscv_vsuxseg2:
3264 case Intrinsic::riscv_vsuxseg3:
3265 case Intrinsic::riscv_vsuxseg4:
3266 case Intrinsic::riscv_vsuxseg5:
3267 case Intrinsic::riscv_vsuxseg6:
3268 case Intrinsic::riscv_vsuxseg7:
3269 case Intrinsic::riscv_vsuxseg8: {
3286 Ty = TarExtTy->getTypeParameter(0U);
3291 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
3292 unsigned VLIndex = RVVIInfo->VLOperand;
3293 unsigned PtrOperandNo = VLIndex - 2 - HasMask;
3306 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
3309 unsigned ElemSize = Ty->getScalarSizeInBits();
3314 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
3315 Align(1), Mask, EVL,
3324 if (Ty->isVectorTy()) {
3327 if ((EltTy->
isHalfTy() && !ST->hasVInstructionsF16()) ||
3333 if (
Size.isScalable() && ST->hasVInstructions())
3336 if (ST->useRVVForFixedLengthVectors())
3356 return std::max<unsigned>(1U, RegWidth.
getFixedValue() / ElemWidth);
3364 return ST->enableUnalignedVectorMem();
3370 if (ST->hasVendorXCVmem() && !ST->is64Bit())
3392 Align Alignment)
const {
3394 if (!VTy || VTy->isScalableTy())
3402 if (VTy->getElementType()->isIntegerTy(8))
3403 if (VTy->getElementCount().getFixedValue() > 256)
3404 return VTy->getPrimitiveSizeInBits() / ST->getRealMinVLen() <
3405 ST->getMaxLMULForFixedLengthVectors();
3410 Align Alignment)
const {
3412 if (!VTy || VTy->isScalableTy())
3423 if (!ST->hasVInstructions() || !ST->hasOptimizedZeroStrideLoad())
3426 return TLI->isLegalElementTypeForRVV(TLI->getValueType(
DL, ElementTy));
3435 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
3436 bool Considerable =
false;
3437 AllowPromotionWithoutCommonHeader =
false;
3440 Type *ConsideredSExtType =
3442 if (
I.getType() != ConsideredSExtType)
3446 for (
const User *U :
I.users()) {
3448 Considerable =
true;
3452 if (GEPInst->getNumOperands() > 2) {
3453 AllowPromotionWithoutCommonHeader =
true;
3458 return Considerable;
3463 case Instruction::Add:
3464 case Instruction::Sub:
3465 case Instruction::Mul:
3466 case Instruction::And:
3467 case Instruction::Or:
3468 case Instruction::Xor:
3469 case Instruction::FAdd:
3470 case Instruction::FSub:
3471 case Instruction::FMul:
3472 case Instruction::FDiv:
3473 case Instruction::ICmp:
3474 case Instruction::FCmp:
3476 case Instruction::Shl:
3477 case Instruction::LShr:
3478 case Instruction::AShr:
3479 case Instruction::UDiv:
3480 case Instruction::SDiv:
3481 case Instruction::URem:
3482 case Instruction::SRem:
3483 case Instruction::Select:
3484 return Operand == 1;
3491 if (!
I->getType()->isVectorTy() || !ST->hasVInstructions())
3501 switch (
II->getIntrinsicID()) {
3502 case Intrinsic::fma:
3503 case Intrinsic::vp_fma:
3504 case Intrinsic::fmuladd:
3505 case Intrinsic::vp_fmuladd:
3506 return Operand == 0 || Operand == 1;
3507 case Intrinsic::vp_shl:
3508 case Intrinsic::vp_lshr:
3509 case Intrinsic::vp_ashr:
3510 case Intrinsic::vp_udiv:
3511 case Intrinsic::vp_sdiv:
3512 case Intrinsic::vp_urem:
3513 case Intrinsic::vp_srem:
3514 case Intrinsic::ssub_sat:
3515 case Intrinsic::vp_ssub_sat:
3516 case Intrinsic::usub_sat:
3517 case Intrinsic::vp_usub_sat:
3518 case Intrinsic::vp_select:
3519 return Operand == 1;
3521 case Intrinsic::vp_add:
3522 case Intrinsic::vp_mul:
3523 case Intrinsic::vp_and:
3524 case Intrinsic::vp_or:
3525 case Intrinsic::vp_xor:
3526 case Intrinsic::vp_fadd:
3527 case Intrinsic::vp_fmul:
3528 case Intrinsic::vp_icmp:
3529 case Intrinsic::vp_fcmp:
3530 case Intrinsic::smin:
3531 case Intrinsic::vp_smin:
3532 case Intrinsic::umin:
3533 case Intrinsic::vp_umin:
3534 case Intrinsic::smax:
3535 case Intrinsic::vp_smax:
3536 case Intrinsic::umax:
3537 case Intrinsic::vp_umax:
3538 case Intrinsic::sadd_sat:
3539 case Intrinsic::vp_sadd_sat:
3540 case Intrinsic::uadd_sat:
3541 case Intrinsic::vp_uadd_sat:
3543 case Intrinsic::vp_sub:
3544 case Intrinsic::vp_fsub:
3545 case Intrinsic::vp_fdiv:
3546 return Operand == 0 || Operand == 1;
3559 if (
I->isBitwiseLogicOp()) {
3560 if (!
I->getType()->isVectorTy()) {
3561 if (ST->hasStdExtZbb() || ST->hasStdExtZbkb()) {
3562 for (
auto &
Op :
I->operands()) {
3570 }
else if (
I->getOpcode() == Instruction::And && ST->hasStdExtZvkb()) {
3571 for (
auto &
Op :
I->operands()) {
3583 Ops.push_back(&Not);
3584 Ops.push_back(&InsertElt);
3592 if (!
I->getType()->isVectorTy() || !ST->hasVInstructions())
3600 if (!ST->sinkSplatOperands())
3623 for (
Use &U :
Op->uses()) {
3630 Use *InsertEltUse = &
Op->getOperandUse(0);
3633 Ops.push_back(&InsertElt->getOperandUse(1));
3634 Ops.push_back(InsertEltUse);
3645 if (!ST->enableUnalignedScalarMem())
3648 if (!ST->hasStdExtZbb() && !ST->hasStdExtZbkb() && !IsZeroCmp)
3651 Options.AllowOverlappingLoads =
true;
3652 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
3654 if (ST->is64Bit()) {
3655 Options.LoadSizes = {8, 4, 2, 1};
3656 Options.AllowedTailExpansions = {3, 5, 6};
3658 Options.LoadSizes = {4, 2, 1};
3659 Options.AllowedTailExpansions = {3};
3662 if (IsZeroCmp && ST->hasVInstructions()) {
3663 unsigned VLenB = ST->getRealMinVLen() / 8;
3666 unsigned MinSize = ST->getXLen() / 8 + 1;
3667 unsigned MaxSize = VLenB * ST->getMaxLMULForFixedLengthVectors();
3681 if (
I->getOpcode() == Instruction::Or &&
3685 if (
I->getOpcode() == Instruction::Add ||
3686 I->getOpcode() == Instruction::Sub)
3704std::optional<Instruction *>
3710 if (
II.user_empty())
3715 const APInt *Scalar;
3720 return U->getType() == TargetVecTy && match(U, m_BitCast(m_Value()));
3724 unsigned TargetEltBW =
DL.getTypeSizeInBits(TargetVecTy->getElementType());
3725 unsigned SourceEltBW =
DL.getTypeSizeInBits(SourceVecTy->getElementType());
3726 if (TargetEltBW % SourceEltBW)
3728 unsigned TargetScale = TargetEltBW / SourceEltBW;
3729 if (VL % TargetScale || TargetScale == 1)
3731 Type *VLTy =
II.getOperand(2)->getType();
3732 ElementCount SourceEC = SourceVecTy->getElementCount();
3733 unsigned NewEltBW = SourceEltBW * TargetScale;
3735 !
DL.fitsInLegalInteger(NewEltBW))
3738 if (!TLI->isLegalElementTypeForRVV(TLI->getValueType(
DL, NewEltTy)))
3742 assert(SourceVecTy->canLosslesslyBitCastTo(RetTy) &&
3743 "Lossless bitcast between types expected");
3749 RetTy, Intrinsic::riscv_vmv_v_x,
3750 {PoisonValue::get(RetTy), ConstantInt::get(NewEltTy, NewScalar),
3751 ConstantInt::get(VLTy, VL / TargetScale)}),
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool shouldSplit(Instruction *InsertPoint, DenseSet< Value * > &PrevConditionValues, DenseSet< Value * > &ConditionValues, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables)
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static Type * getValueType(Value *V, bool LookThroughCmp=false)
Returns the "element type" of the given value/instruction V.
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
Get the last element.
size_t size() const
Get the array size.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
LLVM_ABI bool isStringAttribute() const
Return true if the attribute is a string (target-dependent) attribute.
LLVM_ABI StringRef getKindAsString() const
Return the attribute's kind as a string.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
std::optional< unsigned > getMaxVScale() const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
bool isLegalAddImmediate(int64_t imm) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
std::optional< unsigned > getVScaleForTuning() const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const override
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isFPPredicate(Predicate P)
static bool isIntPredicate(Predicate P)
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
This class represents a range of values.
A parsed version of the target data layout string in and methods for querying it.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={})
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
The core instruction combiner logic.
const DataLayout & getDataLayout() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Information for memory intrinsic cost model.
Align getAlignment() const
unsigned getAddressSpace() const
Type * getDataType() const
bool getVariableMask() const
Intrinsic::ID getID() const
const Instruction * getInst() const
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool shouldCopyAttributeWhenOutliningFrom(const Function *Caller, const Attribute &Attr) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const override
InstructionCost getStridedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
unsigned getMinTripCountTailFoldingThreshold() const override
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override
InstructionCost getAddressComputationCost(Type *PTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind) const
Return the cost of materializing an immediate for a value operand of a store instruction.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
std::optional< InstructionCost > getCombinedArithmeticInstructionCost(unsigned ISDOpcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI) const
Check to see if this instruction is expected to be combined to a simpler operation during/before lowe...
bool hasActiveVectorLength() const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const override
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override
bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment) const override
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
InstructionCost getExpandCompressMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool preferAlternateOpcodeVectorization() const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< unsigned > getMaxVScale() const override
bool shouldExpandReduction(const IntrinsicInst *II) const override
std::optional< unsigned > getVScaleForTuning() const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Get memory intrinsic cost based on arguments.
bool isLegalMaskedGather(Type *DataType, Align Alignment) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
Estimate the overhead of scalarizing an instruction.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
static RISCVVType::VLMUL getLMUL(MVT VT)
This class represents an analyzed expression in the program.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
The main scalar evolution driver.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
std::pair< iterator, bool > insert(const ValueT &V)
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
auto m_Value()
Match an arbitrary value and ignore it.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
static constexpr unsigned RVVBitsPerBlock
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto adjacent_find(R &&Range)
Provide wrappers to std::adjacent_find which finds the first pair of adjacent elements that are equal...
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
OutputIt copy(R &&Range, OutputIt Out)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Information about a load/store intrinsic defined by the target.