63#define DEBUG_TYPE "x86tti"
79 std::optional<unsigned>
164 bool Vector = (ClassID == 1);
183 if (ST->
hasAVX512() && PreferVectorWidth >= 512)
185 if (ST->
hasAVX() && PreferVectorWidth >= 256)
187 if (ST->
hasSSE1() && PreferVectorWidth >= 128)
227 if (Opcode == Instruction::Mul && Ty->
isVectorTy() &&
244 assert(ISD &&
"Invalid opcode");
246 if (ISD ==
ISD::MUL && Args.size() == 2 && LT.second.isVector() &&
247 LT.second.getScalarType() ==
MVT::i32) {
249 bool Op1Signed =
false, Op2Signed =
false;
252 unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
253 bool SignedMode = Op1Signed || Op2Signed;
258 if (OpMinSize <= 15 && !ST->isPMADDWDSlow()) {
260 isa<ConstantDataVector>(Args[0]) || isa<ConstantVector>(Args[0]);
262 isa<ConstantDataVector>(Args[1]) || isa<ConstantVector>(Args[1]);
263 bool Op1Sext = isa<SExtInst>(Args[0]) &&
264 (Op1MinSize == 15 || (Op1MinSize < 15 && !ST->
hasSSE41()));
265 bool Op2Sext = isa<SExtInst>(Args[1]) &&
266 (Op2MinSize == 15 || (Op2MinSize < 15 && !ST->
hasSSE41()));
268 bool IsZeroExtended = !Op1Signed || !Op2Signed;
269 bool IsConstant = Op1Constant || Op2Constant;
270 bool IsSext = Op1Sext || Op2Sext;
271 if (IsConstant || IsZeroExtended || IsSext)
279 if (ST->useSLMArithCosts() && LT.second ==
MVT::v4i32) {
282 if (!SignedMode && OpMinSize <= 8)
286 if (!SignedMode && OpMinSize <= 16)
359 if (
const auto *Entry =
361 if (
auto KindCost = Entry->Cost[
CostKind])
362 return LT.first * *KindCost;
395 if (
const auto *Entry =
397 if (
auto KindCost = Entry->Cost[
CostKind])
398 return LT.first * *KindCost;
436 if (
const auto *Entry =
438 if (
auto KindCost = Entry->Cost[
CostKind])
439 return LT.first * *KindCost;
478 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
479 if (
const auto *Entry =
481 if (
auto KindCost = Entry->Cost[
CostKind])
482 return LT.first * *KindCost;
509 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
510 if (
const auto *Entry =
512 if (
auto KindCost = Entry->Cost[
CostKind])
513 return LT.first * *KindCost;
528 if (
const auto *Entry =
530 if (
auto KindCost = Entry->Cost[
CostKind])
531 return LT.first * *KindCost;
551 if (
const auto *Entry =
553 if (
auto KindCost = Entry->Cost[
CostKind])
554 return LT.first * *KindCost;
574 if (
const auto *Entry =
CostTableLookup(AVX2ConstCostTable, ISD, LT.second))
575 if (
auto KindCost = Entry->Cost[
CostKind])
576 return LT.first * *KindCost;
596 if (
const auto *Entry =
CostTableLookup(AVXConstCostTable, ISD, LT.second))
597 if (
auto KindCost = Entry->Cost[
CostKind])
598 return LT.first * *KindCost;
606 if (
const auto *Entry =
608 if (
auto KindCost = Entry->Cost[
CostKind])
609 return LT.first * *KindCost;
629 if (
const auto *Entry =
CostTableLookup(SSE2ConstCostTable, ISD, LT.second))
630 if (
auto KindCost = Entry->Cost[
CostKind])
631 return LT.first * *KindCost;
650 if (
const auto *Entry =
652 if (
auto KindCost = Entry->Cost[
CostKind])
653 return LT.first * *KindCost;
674 if (
const auto *Entry =
676 if (
auto KindCost = Entry->Cost[
CostKind])
677 return LT.first * *KindCost;
711 if (
const auto *Entry =
713 if (
auto KindCost = Entry->Cost[
CostKind])
714 return LT.first * *KindCost;
748 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
749 if (
const auto *Entry =
751 if (
auto KindCost = Entry->Cost[
CostKind])
752 return LT.first * *KindCost;
774 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
775 if (
const auto *Entry =
777 if (
auto KindCost = Entry->Cost[
CostKind])
778 return LT.first * *KindCost;
788 if (
const auto *Entry =
CostTableLookup(AVX512DQCostTable, ISD, LT.second))
789 if (
auto KindCost = Entry->Cost[
CostKind])
790 return LT.first * *KindCost;
834 if (
const auto *Entry =
CostTableLookup(AVX512BWCostTable, ISD, LT.second))
835 if (
auto KindCost = Entry->Cost[
CostKind])
836 return LT.first * *KindCost;
926 if (
const auto *Entry =
CostTableLookup(AVX512CostTable, ISD, LT.second))
927 if (
auto KindCost = Entry->Cost[
CostKind])
928 return LT.first * *KindCost;
962 if (
const auto *Entry =
CostTableLookup(AVX2ShiftCostTable, ISD, LT.second))
963 if (
auto KindCost = Entry->Cost[
CostKind])
964 return LT.first * *KindCost;
1003 if (
const auto *Entry =
1005 if (
auto KindCost = Entry->Cost[
CostKind])
1006 return LT.first * *KindCost;
1025 if (ST->useGLMDivSqrtCosts())
1026 if (
const auto *Entry =
CostTableLookup(GLMCostTable, ISD, LT.second))
1027 if (
auto KindCost = Entry->Cost[
CostKind])
1028 return LT.first * *KindCost;
1054 if (ST->useSLMArithCosts())
1055 if (
const auto *Entry =
CostTableLookup(SLMCostTable, ISD, LT.second))
1056 if (
auto KindCost = Entry->Cost[
CostKind])
1057 return LT.first * *KindCost;
1126 if (
const auto *Entry =
CostTableLookup(AVX2CostTable, ISD, LT.second))
1127 if (
auto KindCost = Entry->Cost[
CostKind])
1128 return LT.first * *KindCost;
1225 if (
const auto *Entry =
CostTableLookup(AVX1CostTable, ISD, LT.second))
1226 if (
auto KindCost = Entry->Cost[
CostKind])
1227 return LT.first * *KindCost;
1254 if (
const auto *Entry =
CostTableLookup(SSE42CostTable, ISD, LT.second))
1255 if (
auto KindCost = Entry->Cost[
CostKind])
1256 return LT.first * *KindCost;
1277 if (
const auto *Entry =
CostTableLookup(SSE41CostTable, ISD, LT.second))
1278 if (
auto KindCost = Entry->Cost[
CostKind])
1279 return LT.first * *KindCost;
1344 if (
const auto *Entry =
CostTableLookup(SSE2CostTable, ISD, LT.second))
1345 if (
auto KindCost = Entry->Cost[
CostKind])
1346 return LT.first * *KindCost;
1366 if (
const auto *Entry =
CostTableLookup(SSE1CostTable, ISD, LT.second))
1367 if (
auto KindCost = Entry->Cost[
CostKind])
1368 return LT.first * *KindCost;
1378 if (
auto KindCost = Entry->Cost[
CostKind])
1379 return LT.first * *KindCost;
1398 if (
auto KindCost = Entry->Cost[
CostKind])
1399 return LT.first * *KindCost;
1413 return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost;
1462 int NumElts = LT.second.getVectorNumElements();
1463 if ((
Index % NumElts) == 0)
1466 if (SubLT.second.isVector()) {
1467 int NumSubElts = SubLT.second.getVectorNumElements();
1468 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1476 int OrigSubElts = cast<FixedVectorType>(SubTp)->getNumElements();
1477 if (NumSubElts > OrigSubElts && (
Index % OrigSubElts) == 0 &&
1478 (NumSubElts % OrigSubElts) == 0 &&
1479 LT.second.getVectorElementType() ==
1480 SubLT.second.getVectorElementType() &&
1481 LT.second.getVectorElementType().getSizeInBits() ==
1483 assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
1484 "Unexpected number of elements!");
1486 LT.second.getVectorNumElements());
1488 SubLT.second.getVectorNumElements());
1497 return ExtractCost + 1;
1500 "Unexpected vector size");
1502 return ExtractCost + 2;
1511 int NumElts = LT.second.getVectorNumElements();
1513 if (SubLT.second.isVector()) {
1514 int NumSubElts = SubLT.second.getVectorNumElements();
1515 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1528 static const CostTblEntry SSE2SubVectorShuffleTbl[] = {
1559 if (
const auto *Entry =
1568 MVT LegalVT = LT.second;
1573 cast<FixedVectorType>(BaseTp)->getNumElements()) {
1578 unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
1585 if (!Mask.empty() && NumOfDests.
isValid()) {
1600 unsigned NormalizedVF =
1606 unsigned PrevSrcReg = 0;
1610 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
1611 [
this, SingleOpTy,
CostKind, &PrevSrcReg, &PrevRegMask,
1616 if (PrevRegMask.
empty() || PrevSrcReg != SrcReg ||
1617 PrevRegMask != RegMask)
1625 if (SrcReg != DestReg &&
1630 PrevSrcReg = SrcReg;
1631 PrevRegMask = RegMask;
1644 std::nullopt,
CostKind, 0,
nullptr);
1655 LT.first = NumOfDests * NumOfShufflesPerDest;
1671 if (
const auto *Entry =
1673 return LT.first * Entry->Cost;
1706 if (
const auto *Entry =
1708 return LT.first * Entry->Cost;
1785 if (
const auto *Entry =
CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
1786 if (
auto KindCost = Entry->Cost[
CostKind])
1787 return LT.first * *KindCost;
1840 if (
const auto *Entry =
CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
1841 return LT.first * Entry->Cost;
1862 if (
const auto *Entry =
CostTableLookup(XOPShuffleTbl, Kind, LT.second))
1863 return LT.first * Entry->Cost;
1925 if (
const auto *Entry =
CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
1926 return LT.first * Entry->Cost;
1939 if (
const auto *Entry =
CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
1940 return LT.first * Entry->Cost;
1971 if (
const auto *Entry =
CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
1972 return LT.first * Entry->Cost;
2028 llvm::any_of(Args, [](
const auto &V) {
return isa<LoadInst>(V); });
2030 if (
const auto *Entry =
2033 LT.second.getVectorElementCount()) &&
2034 "Table entry missing from isLegalBroadcastLoad()");
2035 return LT.first * Entry->Cost;
2038 if (
const auto *Entry =
CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
2039 return LT.first * Entry->Cost;
2052 if (
const auto *Entry =
CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
2053 return LT.first * Entry->Cost;
2064 assert(ISD &&
"Invalid opcode");
2069 return Cost == 0 ? 0 : 1;
2883 AVX512BWConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2884 return AdjustCost(Entry->Cost);
2888 AVX512DQConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2889 return AdjustCost(Entry->Cost);
2893 AVX512FConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2894 return AdjustCost(Entry->Cost);
2899 AVX512BWVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2900 return AdjustCost(Entry->Cost);
2904 AVX512DQVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
2905 return AdjustCost(Entry->Cost);
2909 SimpleDstTy, SimpleSrcTy))
2910 return AdjustCost(Entry->Cost);
2914 SimpleDstTy, SimpleSrcTy))
2915 return AdjustCost(Entry->Cost);
2920 SimpleDstTy, SimpleSrcTy))
2921 return AdjustCost(Entry->Cost);
2926 SimpleDstTy, SimpleSrcTy))
2927 return AdjustCost(Entry->Cost);
2932 SimpleDstTy, SimpleSrcTy))
2933 return AdjustCost(Entry->Cost);
2948 AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second))
2949 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
2953 AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second))
2954 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
2958 AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second))
2959 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
2964 LTDest.second, LTSrc.second))
2965 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
2969 LTDest.second, LTSrc.second))
2970 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
2974 LTDest.second, LTSrc.second))
2975 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
2979 LTDest.second, LTSrc.second))
2980 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
2984 LTDest.second, LTSrc.second))
2985 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
2989 LTDest.second, LTSrc.second))
2990 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);
2994 LTDest.second, LTSrc.second))
2995 return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost);